Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
bd531bd8
Commit
bd531bd8
authored
Jan 28, 2020
by
Sayed Adel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
core:vsx fix inline asm constraints
generalize constraints to 'wa' for VSX registers
parent
223790e6
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
31 deletions
+26
-31
cpu_vsx_asm.cpp
cmake/checks/cpu_vsx_asm.cpp
+2
-1
intrin_vsx.hpp
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+2
-2
vsx_utils.hpp
modules/core/include/opencv2/core/vsx_utils.hpp
+22
-28
No files found.
cmake/checks/cpu_vsx_asm.cpp
View file @
bd531bd8
...
@@ -16,6 +16,6 @@ int main()
...
@@ -16,6 +16,6 @@ int main()
{
{
__vector
float
vf
;
__vector
float
vf
;
__vector
signed
int
vi
;
__vector
signed
int
vi
;
__asm__
__volatile__
(
"xvcvsxwsp %x0,%x1"
:
"=w
f
"
(
vf
)
:
"wa"
(
vi
));
__asm__
__volatile__
(
"xvcvsxwsp %x0,%x1"
:
"=w
a
"
(
vf
)
:
"wa"
(
vi
));
return
0
;
return
0
;
}
}
\ No newline at end of file
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
View file @
bd531bd8
...
@@ -1338,7 +1338,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
...
@@ -1338,7 +1338,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
return
v_float32x4
(
vec_extract_fp_from_shorth
(
vf16
));
return
v_float32x4
(
vec_extract_fp_from_shorth
(
vf16
));
#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
vec_float4
vf32
;
vec_float4
vf32
;
__asm__
__volatile__
(
"xvcvhpsp %x0,%x1"
:
"=w
f
"
(
vf32
)
:
"wa"
(
vec_mergeh
(
vf16
,
vf16
)));
__asm__
__volatile__
(
"xvcvhpsp %x0,%x1"
:
"=w
a
"
(
vf32
)
:
"wa"
(
vec_mergeh
(
vf16
,
vf16
)));
return
v_float32x4
(
vf32
);
return
v_float32x4
(
vf32
);
#else
#else
const
vec_int4
z
=
vec_int4_z
,
delta
=
vec_int4_sp
(
0x38000000
);
const
vec_int4
z
=
vec_int4_z
,
delta
=
vec_int4_sp
(
0x38000000
);
...
@@ -1363,7 +1363,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
...
@@ -1363,7 +1363,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
// fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"?
// fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"?
#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
vec_ushort8
vf16
;
vec_ushort8
vf16
;
__asm__
__volatile__
(
"xvcvsphp %x0,%x1"
:
"=wa"
(
vf16
)
:
"w
f
"
(
v
.
val
));
__asm__
__volatile__
(
"xvcvsphp %x0,%x1"
:
"=wa"
(
vf16
)
:
"w
a
"
(
v
.
val
));
vec_st_l8
(
vec_mergesqe
(
vf16
,
vf16
),
ptr
);
vec_st_l8
(
vec_mergesqe
(
vf16
,
vf16
),
ptr
);
#else
#else
const
vec_int4
signmask
=
vec_int4_sp
(
0x80000000
);
const
vec_int4
signmask
=
vec_int4_sp
(
0x80000000
);
...
...
modules/core/include/opencv2/core/vsx_utils.hpp
View file @
bd531bd8
...
@@ -110,9 +110,9 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); }
...
@@ -110,9 +110,9 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); }
#if defined(__GNUG__) && !defined(__clang__)
#if defined(__GNUG__) && !defined(__clang__)
// inline asm helper
// inline asm helper
#define VSX_IMPL_1RG(rt, r
to, rg, rgo
, opc, fnm) \
#define VSX_IMPL_1RG(rt, r
g
, opc, fnm) \
VSX_FINLINE(rt) fnm(const rg& a)
\
VSX_FINLINE(rt) fnm(const rg& a) \
{ rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "=
"#rto (rs) : #rgo
(a)); return rs; }
{ rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "=
wa" (rs) : "wa"
(a)); return rs; }
#define VSX_IMPL_1VRG(rt, rg, opc, fnm) \
#define VSX_IMPL_1VRG(rt, rg, opc, fnm) \
VSX_FINLINE(rt) fnm(const rg& a) \
VSX_FINLINE(rt) fnm(const rg& a) \
...
@@ -257,44 +257,38 @@ VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp)
...
@@ -257,44 +257,38 @@ VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp)
VSX_REDIRECT_1RG
(
vec_double2
,
vec_float4
,
vec_cvfo
,
__builtin_vsx_xvcvspdp
)
VSX_REDIRECT_1RG
(
vec_double2
,
vec_float4
,
vec_cvfo
,
__builtin_vsx_xvcvspdp
)
// converts word and doubleword to double-precision
// converts word and doubleword to double-precision
#ifdef vec_ctd
#undef vec_ctd
# undef vec_ctd
VSX_IMPL_1RG
(
vec_double2
,
vec_int4
,
xvcvsxwdp
,
vec_ctdo
)
#endif
VSX_IMPL_1RG
(
vec_double2
,
vec_uint4
,
xvcvuxwdp
,
vec_ctdo
)
VSX_IMPL_1RG
(
vec_double2
,
wd
,
vec_int4
,
wa
,
xvcvsxwdp
,
vec_ctdo
)
VSX_IMPL_1RG
(
vec_double2
,
vec_dword2
,
xvcvsxddp
,
vec_ctd
)
VSX_IMPL_1RG
(
vec_double2
,
wd
,
vec_uint4
,
wa
,
xvcvuxwdp
,
vec_ctdo
)
VSX_IMPL_1RG
(
vec_double2
,
vec_udword2
,
xvcvuxddp
,
vec_ctd
)
VSX_IMPL_1RG
(
vec_double2
,
wd
,
vec_dword2
,
wi
,
xvcvsxddp
,
vec_ctd
)
VSX_IMPL_1RG
(
vec_double2
,
wd
,
vec_udword2
,
wi
,
xvcvuxddp
,
vec_ctd
)
// converts word and doubleword to single-precision
// converts word and doubleword to single-precision
#undef vec_ctf
#undef vec_ctf
VSX_IMPL_1RG
(
vec_float4
,
wf
,
vec_int4
,
wa
,
xvcvsxwsp
,
vec_ctf
)
VSX_IMPL_1RG
(
vec_float4
,
vec_int4
,
xvcvsxwsp
,
vec_ctf
)
VSX_IMPL_1RG
(
vec_float4
,
wf
,
vec_uint4
,
wa
,
xvcvuxwsp
,
vec_ctf
)
VSX_IMPL_1RG
(
vec_float4
,
vec_uint4
,
xvcvuxwsp
,
vec_ctf
)
VSX_IMPL_1RG
(
vec_float4
,
wf
,
vec_dword2
,
wi
,
xvcvsxdsp
,
vec_ctfo
)
VSX_IMPL_1RG
(
vec_float4
,
vec_dword2
,
xvcvsxdsp
,
vec_ctfo
)
VSX_IMPL_1RG
(
vec_float4
,
wf
,
vec_udword2
,
wi
,
xvcvuxdsp
,
vec_ctfo
)
VSX_IMPL_1RG
(
vec_float4
,
vec_udword2
,
xvcvuxdsp
,
vec_ctfo
)
// converts single and double precision to signed word
// converts single and double precision to signed word
#undef vec_cts
#undef vec_cts
VSX_IMPL_1RG
(
vec_int4
,
wa
,
vec_double2
,
wd
,
xvcvdpsxws
,
vec_ctso
)
VSX_IMPL_1RG
(
vec_int4
,
vec_double2
,
xvcvdpsxws
,
vec_ctso
)
VSX_IMPL_1RG
(
vec_int4
,
wa
,
vec_float4
,
wf
,
xvcvspsxws
,
vec_cts
)
VSX_IMPL_1RG
(
vec_int4
,
vec_float4
,
xvcvspsxws
,
vec_cts
)
// converts single and double precision to unsigned word
// converts single and double precision to unsigned word
#undef vec_ctu
#undef vec_ctu
VSX_IMPL_1RG
(
vec_uint4
,
wa
,
vec_double2
,
wd
,
xvcvdpuxws
,
vec_ctuo
)
VSX_IMPL_1RG
(
vec_uint4
,
vec_double2
,
xvcvdpuxws
,
vec_ctuo
)
VSX_IMPL_1RG
(
vec_uint4
,
wa
,
vec_float4
,
wf
,
xvcvspuxws
,
vec_ctu
)
VSX_IMPL_1RG
(
vec_uint4
,
vec_float4
,
xvcvspuxws
,
vec_ctu
)
// converts single and double precision to signed doubleword
// converts single and double precision to signed doubleword
#ifdef vec_ctsl
#undef vec_ctsl
# undef vec_ctsl
VSX_IMPL_1RG
(
vec_dword2
,
vec_double2
,
xvcvdpsxds
,
vec_ctsl
)
#endif
VSX_IMPL_1RG
(
vec_dword2
,
vec_float4
,
xvcvspsxds
,
vec_ctslo
)
VSX_IMPL_1RG
(
vec_dword2
,
wi
,
vec_double2
,
wd
,
xvcvdpsxds
,
vec_ctsl
)
VSX_IMPL_1RG
(
vec_dword2
,
wi
,
vec_float4
,
wf
,
xvcvspsxds
,
vec_ctslo
)
// converts single and double precision to unsigned doubleword
// converts single and double precision to unsigned doubleword
#ifdef vec_ctul
#undef vec_ctul
# undef vec_ctul
VSX_IMPL_1RG
(
vec_udword2
,
vec_double2
,
xvcvdpuxds
,
vec_ctul
)
#endif
VSX_IMPL_1RG
(
vec_udword2
,
vec_float4
,
xvcvspuxds
,
vec_ctulo
)
VSX_IMPL_1RG
(
vec_udword2
,
wi
,
vec_double2
,
wd
,
xvcvdpuxds
,
vec_ctul
)
VSX_IMPL_1RG
(
vec_udword2
,
wi
,
vec_float4
,
wf
,
xvcvspuxds
,
vec_ctulo
)
// just in case if GCC doesn't define it
// just in case if GCC doesn't define it
#ifndef vec_xl
#ifndef vec_xl
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment