Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
4e60db90
Commit
4e60db90
authored
Mar 20, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #14110 from seiko2plus:core_vsx_fp16
parents
a8e635f1
f4135968
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
83 additions
and
15 deletions
+83
-15
OpenCVCompilerOptions.cmake
cmake/OpenCVCompilerOptions.cmake
+8
-1
cpu_vsx_asm.cpp
cmake/checks/cpu_vsx_asm.cpp
+22
-0
CMakeLists.txt
modules/core/CMakeLists.txt
+1
-1
intrin_vsx.hpp
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+50
-13
vsx_utils.hpp
modules/core/include/opencv2/core/vsx_utils.hpp
+2
-0
No files found.
cmake/OpenCVCompilerOptions.cmake
View file @
4e60db90
...
...
@@ -294,11 +294,18 @@ endif()
# workaround gcc bug for aligned ld/st
# https://github.com/opencv/opencv/issues/13211
if
((
PPC64LE AND NOT CMAKE_CROSSCOMPILING
)
OR OPENCV_FORCE_COMPILER_CHECK_VSX_ALIGNED
)
ocv_check_runtime_flag
(
"
${
CPU_BASELINE_FLAGS
}
"
"OPENCV_CHECK_VSX_ALIGNED"
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/runtime/cpu_vsx_aligned.cpp"
)
ocv_check_runtime_flag
(
"
${
CPU_BASELINE_FLAGS
}
"
OPENCV_CHECK_VSX_ALIGNED
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/runtime/cpu_vsx_aligned.cpp"
)
if
(
NOT OPENCV_CHECK_VSX_ALIGNED
)
add_extra_compiler_option_force
(
-DCV_COMPILER_VSX_BROKEN_ALIGNED
)
endif
()
endif
()
# validate inline asm with fixes register number and constraints wa, wd, wf
if
(
PPC64LE
)
ocv_check_compiler_flag
(
CXX
"
${
CPU_BASELINE_FLAGS
}
"
OPENCV_CHECK_VSX_ASM
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_vsx_asm.cpp"
)
if
(
NOT OPENCV_CHECK_VSX_ASM
)
add_extra_compiler_option_force
(
-DCV_COMPILER_VSX_BROKEN_ASM
)
endif
()
endif
()
# combine all "extra" options
if
(
NOT OPENCV_SKIP_EXTRA_COMPILER_FLAGS
)
...
...
cmake/checks/cpu_vsx_asm.cpp
0 → 100644
View file @
4e60db90
#if defined(__VSX__)
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
#include <altivec.h>
#else
#error "OpenCV only supports little-endian mode"
#endif
#else
#error "VSX is not supported"
#endif
/*
* xlc and wide versions of clang don't support %x<n> in the inline asm template which fixes register number
* when using any of the register constraints wa, wd, wf
*/
int
main
()
{
__vector
float
vf
;
__vector
signed
int
vi
;
__asm__
__volatile__
(
"xvcvsxwsp %x0,%x1"
:
"=wf"
(
vf
)
:
"wa"
(
vi
));
return
0
;
}
\ No newline at end of file
modules/core/CMakeLists.txt
View file @
4e60db90
...
...
@@ -3,7 +3,7 @@ set(the_description "The Core Functionality")
ocv_add_dispatched_file
(
mathfuncs_core SSE2 AVX AVX2
)
ocv_add_dispatched_file
(
stat SSE4_2 AVX2
)
ocv_add_dispatched_file
(
arithm SSE2 SSE4_1 AVX2 VSX3
)
ocv_add_dispatched_file
(
convert SSE2 AVX2
)
ocv_add_dispatched_file
(
convert SSE2 AVX2
VSX3
)
ocv_add_dispatched_file
(
convert_scale SSE2 AVX2
)
ocv_add_dispatched_file
(
count_non_zero SSE2 AVX2
)
ocv_add_dispatched_file
(
matmul SSE2 AVX2
)
...
...
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
View file @
4e60db90
...
...
@@ -11,11 +11,6 @@
#define CV_SIMD128 1
#define CV_SIMD128_64F 1
/**
* todo: supporting half precision for power9
* convert instractions xvcvhpsp, xvcvsphp
**/
namespace
cv
{
...
...
@@ -1203,20 +1198,62 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec)
/////// FP16 support ////////
// [TODO] implement these 2 using VSX or universal intrinsics (copy from intrin_sse.cpp and adopt)
inline
v_float32x4
v_load_expand
(
const
float16_t
*
ptr
)
{
return
v_float32x4
((
float
)
ptr
[
0
],
(
float
)
ptr
[
1
],
(
float
)
ptr
[
2
],
(
float
)
ptr
[
3
]);
vec_ushort8
vf16
=
vec_ld_l8
((
const
ushort
*
)
ptr
);
#if CV_VSX3 && defined(vec_extract_fp_from_shorth)
return
v_float32x4
(
vec_extract_fp_from_shorth
(
vf16
));
#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
vec_float4
vf32
;
__asm__
__volatile__
(
"xvcvhpsp %x0,%x1"
:
"=wf"
(
vf32
)
:
"wa"
(
vec_mergeh
(
vf16
,
vf16
)));
return
v_float32x4
(
vf32
);
#else
const
vec_int4
z
=
vec_int4_z
,
delta
=
vec_int4_sp
(
0x38000000
);
const
vec_int4
signmask
=
vec_int4_sp
(
0x80000000
);
const
vec_int4
maxexp
=
vec_int4_sp
(
0x7c000000
);
const
vec_float4
deltaf
=
vec_float4_c
(
vec_int4_sp
(
0x38800000
));
vec_int4
bits
=
vec_int4_c
(
vec_mergeh
(
vec_short8_c
(
z
),
vec_short8_c
(
vf16
)));
vec_int4
e
=
vec_and
(
bits
,
maxexp
),
sign
=
vec_and
(
bits
,
signmask
);
vec_int4
t
=
vec_add
(
vec_sr
(
vec_xor
(
bits
,
sign
),
vec_uint4_sp
(
3
)),
delta
);
// ((h & 0x7fff) << 13) + delta
vec_int4
zt
=
vec_int4_c
(
vec_sub
(
vec_float4_c
(
vec_add
(
t
,
vec_int4_sp
(
1
<<
23
))),
deltaf
));
t
=
vec_add
(
t
,
vec_and
(
delta
,
vec_cmpeq
(
maxexp
,
e
)));
vec_bint4
zmask
=
vec_cmpeq
(
e
,
z
);
vec_int4
ft
=
vec_sel
(
t
,
zt
,
zmask
);
return
v_float32x4
(
vec_float4_c
(
vec_or
(
ft
,
sign
)));
#endif
}
inline
void
v_pack_store
(
float16_t
*
ptr
,
const
v_float32x4
&
v
)
{
float
CV_DECL_ALIGNED
(
32
)
f
[
4
];
v_store_aligned
(
f
,
v
);
ptr
[
0
]
=
float16_t
(
f
[
0
]);
ptr
[
1
]
=
float16_t
(
f
[
1
]);
ptr
[
2
]
=
float16_t
(
f
[
2
]);
ptr
[
3
]
=
float16_t
(
f
[
3
]);
// fixme: Is there any buitin op or intrinsic that cover "xvcvsphp"?
#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
vec_ushort8
vf16
;
__asm__
__volatile__
(
"xvcvsphp %x0,%x1"
:
"=wa"
(
vf16
)
:
"wf"
(
v
.
val
));
vec_st_l8
(
vec_mergesqe
(
vf16
,
vf16
),
ptr
);
#else
const
vec_int4
signmask
=
vec_int4_sp
(
0x80000000
);
const
vec_int4
rval
=
vec_int4_sp
(
0x3f000000
);
vec_int4
t
=
vec_int4_c
(
v
.
val
);
vec_int4
sign
=
vec_sra
(
vec_and
(
t
,
signmask
),
vec_uint4_sp
(
16
));
t
=
vec_and
(
vec_nor
(
signmask
,
signmask
),
t
);
vec_bint4
finitemask
=
vec_cmpgt
(
vec_int4_sp
(
0x47800000
),
t
);
vec_bint4
isnan
=
vec_cmpgt
(
t
,
vec_int4_sp
(
0x7f800000
));
vec_int4
naninf
=
vec_sel
(
vec_int4_sp
(
0x7c00
),
vec_int4_sp
(
0x7e00
),
isnan
);
vec_bint4
tinymask
=
vec_cmpgt
(
vec_int4_sp
(
0x38800000
),
t
);
vec_int4
tt
=
vec_int4_c
(
vec_add
(
vec_float4_c
(
t
),
vec_float4_c
(
rval
)));
tt
=
vec_sub
(
tt
,
rval
);
vec_int4
odd
=
vec_and
(
vec_sr
(
t
,
vec_uint4_sp
(
13
)),
vec_int4_sp
(
1
));
vec_int4
nt
=
vec_add
(
t
,
vec_int4_sp
(
0xc8000fff
));
nt
=
vec_sr
(
vec_add
(
nt
,
odd
),
vec_uint4_sp
(
13
));
t
=
vec_sel
(
nt
,
tt
,
tinymask
);
t
=
vec_sel
(
naninf
,
t
,
finitemask
);
t
=
vec_or
(
t
,
sign
);
vec_st_l8
(
vec_packs
(
t
,
t
),
ptr
);
#endif
}
inline
void
v_cleanup
()
{}
...
...
modules/core/include/opencv2/core/vsx_utils.hpp
View file @
4e60db90
...
...
@@ -291,6 +291,8 @@ VSX_IMPL_1RG(vec_udword2, wi, vec_float4, wf, xvcvspuxds, vec_ctulo)
*
* So we're not able to use inline asm and only use built-in functions that CLANG supports
* and use __builtin_convertvector if clang missng any of vector conversions built-in functions
*
* todo: clang asm template bug is fixed, need to reconsider the current workarounds.
*/
// convert vector helper
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment