Commit bd531bd8 authored by Sayed Adel's avatar Sayed Adel

core:vsx fix inline asm constraints

  generalize constraints to 'wa' for VSX registers
parent 223790e6
...@@ -16,6 +16,6 @@ int main() ...@@ -16,6 +16,6 @@ int main()
{ {
__vector float vf; __vector float vf;
__vector signed int vi; __vector signed int vi;
__asm__ __volatile__ ("xvcvsxwsp %x0,%x1" : "=wf" (vf) : "wa" (vi)); __asm__ __volatile__ ("xvcvsxwsp %x0,%x1" : "=wa" (vf) : "wa" (vi));
return 0; return 0;
} }
\ No newline at end of file
...@@ -1338,7 +1338,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) ...@@ -1338,7 +1338,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
return v_float32x4(vec_extract_fp_from_shorth(vf16)); return v_float32x4(vec_extract_fp_from_shorth(vf16));
#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) #elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
vec_float4 vf32; vec_float4 vf32;
__asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wf" (vf32) : "wa" (vec_mergeh(vf16, vf16))); __asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wa" (vf32) : "wa" (vec_mergeh(vf16, vf16)));
return v_float32x4(vf32); return v_float32x4(vf32);
#else #else
const vec_int4 z = vec_int4_z, delta = vec_int4_sp(0x38000000); const vec_int4 z = vec_int4_z, delta = vec_int4_sp(0x38000000);
...@@ -1363,7 +1363,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v) ...@@ -1363,7 +1363,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
// fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"? // fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"?
#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) #if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
vec_ushort8 vf16; vec_ushort8 vf16;
__asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wf" (v.val)); __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (v.val));
vec_st_l8(vec_mergesqe(vf16, vf16), ptr); vec_st_l8(vec_mergesqe(vf16, vf16), ptr);
#else #else
const vec_int4 signmask = vec_int4_sp(0x80000000); const vec_int4 signmask = vec_int4_sp(0x80000000);
......
...@@ -110,9 +110,9 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); } ...@@ -110,9 +110,9 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); }
#if defined(__GNUG__) && !defined(__clang__) #if defined(__GNUG__) && !defined(__clang__)
// inline asm helper // inline asm helper
#define VSX_IMPL_1RG(rt, rto, rg, rgo, opc, fnm) \ #define VSX_IMPL_1RG(rt, rg, opc, fnm) \
VSX_FINLINE(rt) fnm(const rg& a) \ VSX_FINLINE(rt) fnm(const rg& a) \
{ rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "="#rto (rs) : #rgo (a)); return rs; } { rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "=wa" (rs) : "wa" (a)); return rs; }
#define VSX_IMPL_1VRG(rt, rg, opc, fnm) \ #define VSX_IMPL_1VRG(rt, rg, opc, fnm) \
VSX_FINLINE(rt) fnm(const rg& a) \ VSX_FINLINE(rt) fnm(const rg& a) \
...@@ -257,44 +257,38 @@ VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp) ...@@ -257,44 +257,38 @@ VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp)
VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, __builtin_vsx_xvcvspdp) VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, __builtin_vsx_xvcvspdp)
// converts word and doubleword to double-precision // converts word and doubleword to double-precision
#ifdef vec_ctd #undef vec_ctd
# undef vec_ctd VSX_IMPL_1RG(vec_double2, vec_int4, xvcvsxwdp, vec_ctdo)
#endif VSX_IMPL_1RG(vec_double2, vec_uint4, xvcvuxwdp, vec_ctdo)
VSX_IMPL_1RG(vec_double2, wd, vec_int4, wa, xvcvsxwdp, vec_ctdo) VSX_IMPL_1RG(vec_double2, vec_dword2, xvcvsxddp, vec_ctd)
VSX_IMPL_1RG(vec_double2, wd, vec_uint4, wa, xvcvuxwdp, vec_ctdo) VSX_IMPL_1RG(vec_double2, vec_udword2, xvcvuxddp, vec_ctd)
VSX_IMPL_1RG(vec_double2, wd, vec_dword2, wi, xvcvsxddp, vec_ctd)
VSX_IMPL_1RG(vec_double2, wd, vec_udword2, wi, xvcvuxddp, vec_ctd)
// converts word and doubleword to single-precision // converts word and doubleword to single-precision
#undef vec_ctf #undef vec_ctf
VSX_IMPL_1RG(vec_float4, wf, vec_int4, wa, xvcvsxwsp, vec_ctf) VSX_IMPL_1RG(vec_float4, vec_int4, xvcvsxwsp, vec_ctf)
VSX_IMPL_1RG(vec_float4, wf, vec_uint4, wa, xvcvuxwsp, vec_ctf) VSX_IMPL_1RG(vec_float4, vec_uint4, xvcvuxwsp, vec_ctf)
VSX_IMPL_1RG(vec_float4, wf, vec_dword2, wi, xvcvsxdsp, vec_ctfo) VSX_IMPL_1RG(vec_float4, vec_dword2, xvcvsxdsp, vec_ctfo)
VSX_IMPL_1RG(vec_float4, wf, vec_udword2, wi, xvcvuxdsp, vec_ctfo) VSX_IMPL_1RG(vec_float4, vec_udword2, xvcvuxdsp, vec_ctfo)
// converts single and double precision to signed word // converts single and double precision to signed word
#undef vec_cts #undef vec_cts
VSX_IMPL_1RG(vec_int4, wa, vec_double2, wd, xvcvdpsxws, vec_ctso) VSX_IMPL_1RG(vec_int4, vec_double2, xvcvdpsxws, vec_ctso)
VSX_IMPL_1RG(vec_int4, wa, vec_float4, wf, xvcvspsxws, vec_cts) VSX_IMPL_1RG(vec_int4, vec_float4, xvcvspsxws, vec_cts)
// converts single and double precision to unsigned word // converts single and double precision to unsigned word
#undef vec_ctu #undef vec_ctu
VSX_IMPL_1RG(vec_uint4, wa, vec_double2, wd, xvcvdpuxws, vec_ctuo) VSX_IMPL_1RG(vec_uint4, vec_double2, xvcvdpuxws, vec_ctuo)
VSX_IMPL_1RG(vec_uint4, wa, vec_float4, wf, xvcvspuxws, vec_ctu) VSX_IMPL_1RG(vec_uint4, vec_float4, xvcvspuxws, vec_ctu)
// converts single and double precision to signed doubleword // converts single and double precision to signed doubleword
#ifdef vec_ctsl #undef vec_ctsl
# undef vec_ctsl VSX_IMPL_1RG(vec_dword2, vec_double2, xvcvdpsxds, vec_ctsl)
#endif VSX_IMPL_1RG(vec_dword2, vec_float4, xvcvspsxds, vec_ctslo)
VSX_IMPL_1RG(vec_dword2, wi, vec_double2, wd, xvcvdpsxds, vec_ctsl)
VSX_IMPL_1RG(vec_dword2, wi, vec_float4, wf, xvcvspsxds, vec_ctslo)
// converts single and double precision to unsigned doubleword // converts single and double precision to unsigned doubleword
#ifdef vec_ctul #undef vec_ctul
# undef vec_ctul VSX_IMPL_1RG(vec_udword2, vec_double2, xvcvdpuxds, vec_ctul)
#endif VSX_IMPL_1RG(vec_udword2, vec_float4, xvcvspuxds, vec_ctulo)
VSX_IMPL_1RG(vec_udword2, wi, vec_double2, wd, xvcvdpuxds, vec_ctul)
VSX_IMPL_1RG(vec_udword2, wi, vec_float4, wf, xvcvspuxds, vec_ctulo)
// just in case if GCC doesn't define it // just in case if GCC doesn't define it
#ifndef vec_xl #ifndef vec_xl
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment