Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
d34f2cfe
Commit
d34f2cfe
authored
Sep 26, 2016
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #7317 from tomoaki0705:fixIfdefFp16
parents
7f14a278
c7cb116d
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
70 additions
and
48 deletions
+70
-48
OpenCVCompilerOptions.cmake
cmake/OpenCVCompilerOptions.cmake
+29
-16
cvdef.h
modules/core/include/opencv2/core/cvdef.h
+1
-1
convert.cpp
modules/core/src/convert.cpp
+6
-5
test_intrin.cpp
modules/core/test/test_intrin.cpp
+34
-26
No files found.
cmake/OpenCVCompilerOptions.cmake
View file @
d34f2cfe
...
...
@@ -151,7 +151,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
add_extra_compiler_option
(
"-mfp16-format=ieee"
)
endif
(
ARM
)
if
(
ENABLE_NEON
)
add_extra_compiler_option
(
"-mfpu=neon
-fp16
"
)
add_extra_compiler_option
(
"-mfpu=neon"
)
endif
()
if
(
ENABLE_VFPV3 AND NOT ENABLE_NEON
)
add_extra_compiler_option
(
"-mfpu=vfpv3"
)
...
...
@@ -336,6 +336,34 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
add_extra_compiler_option
(
-fvisibility-inlines-hidden
)
endif
()
if
(
NOT OPENCV_FP16_DISABLE
)
if
(
ARM AND ENABLE_NEON
)
set
(
FP16_OPTION
"-mfpu=neon-fp16"
)
elseif
((
X86 OR X86_64
)
AND NOT MSVC AND ENABLE_AVX
)
set
(
FP16_OPTION
"-mf16c"
)
endif
()
try_compile
(
__VALID_FP16
"
${
OpenCV_BINARY_DIR
}
"
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/fp16.cpp"
COMPILE_DEFINITIONS
"-DCHECK_FP16"
"
${
FP16_OPTION
}
"
OUTPUT_VARIABLE TRY_OUT
)
if
(
NOT __VALID_FP16
)
if
((
X86 OR X86_64
)
AND NOT MSVC AND NOT ENABLE_AVX
)
# GCC enables AVX when mf16c is passed
message
(
STATUS
"FP16: Feature disabled"
)
else
()
message
(
STATUS
"FP16: Compiler support is not available"
)
endif
()
else
()
message
(
STATUS
"FP16: Compiler support is available"
)
set
(
HAVE_FP16 1
)
if
(
NOT
${
FP16_OPTION
}
STREQUAL
""
)
add_extra_compiler_option
(
${
FP16_OPTION
}
)
endif
()
endif
()
endif
()
#combine all "extra" options
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
OPENCV_EXTRA_FLAGS
}
${
OPENCV_EXTRA_C_FLAGS
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
OPENCV_EXTRA_FLAGS
}
${
OPENCV_EXTRA_CXX_FLAGS
}
"
)
...
...
@@ -376,21 +404,6 @@ if(MSVC)
endif
()
endif
()
if
(
NOT OPENCV_FP16_DISABLE
)
try_compile
(
__VALID_FP16
"
${
OpenCV_BINARY_DIR
}
"
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/fp16.cpp"
COMPILE_DEFINITIONS
"-DCHECK_FP16"
OUTPUT_VARIABLE TRY_OUT
)
if
(
NOT __VALID_FP16
)
message
(
STATUS
"FP16: Compiler support is not available"
)
else
()
message
(
STATUS
"FP16: Compiler support is available"
)
set
(
HAVE_FP16 1
)
endif
()
endif
()
if
(
APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS
"/usr/local/lib"
)
link_directories
(
"/usr/local/lib"
)
endif
()
modules/core/include/opencv2/core/cvdef.h
View file @
d34f2cfe
...
...
@@ -310,7 +310,7 @@ enum CpuFeatures {
typedef
union
Cv16suf
{
short
i
;
#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_
MINOR_
_ ) ) || ( 5 <= __GNUC__ ) ) )
__fp16
h
;
#endif
struct
_fp16Format
...
...
modules/core/src/convert.cpp
View file @
d34f2cfe
...
...
@@ -44,6 +44,7 @@
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "opencv2/core/hal/intrin.hpp"
#ifdef __APPLE__
#undef CV_NEON
...
...
@@ -4379,7 +4380,7 @@ struct Cvt_SIMD<float, int>
#endif
#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) )
#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_
MINOR_
_ ) ) || ( 5 <= __GNUC__ ) ) ) )
// const numbers for floating points format
const
unsigned
int
kShiftSignificand
=
13
;
const
unsigned
int
kMaskFp16Significand
=
0x3ff
;
...
...
@@ -4387,7 +4388,7 @@ const unsigned int kBiasFp16Exponent = 15;
const
unsigned
int
kBiasFp32Exponent
=
127
;
#endif
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_
MINOR_
_ ) ) || ( 5 <= __GNUC__ ) ) )
static
float
convertFp16SW
(
short
fp16
)
{
// Fp16 -> Fp32
...
...
@@ -4449,7 +4450,7 @@ static float convertFp16SW(short fp16)
}
#endif
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_
MINOR_
_ ) ) || ( 5 <= __GNUC__ ) ) )
static
short
convertFp16SW
(
float
fp32
)
{
// Fp32 -> Fp16
...
...
@@ -4557,7 +4558,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t
if
(
(
(
intptr_t
)
dst
&
0xf
)
==
0
)
#endif
{
#if CV_FP16
#if CV_FP16
&& CV_SIMD128
for
(
;
x
<=
size
.
width
-
4
;
x
+=
4
)
{
v_float32x4
v_src
=
v_load
(
src
+
x
);
...
...
@@ -4603,7 +4604,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t
if
(
(
(
intptr_t
)
src
&
0xf
)
==
0
)
#endif
{
#if CV_FP16
#if CV_FP16
&& CV_SIMD128
for
(
;
x
<=
size
.
width
-
4
;
x
+=
4
)
{
v_float16x4
v_src
=
v_load_f16
(
src
+
x
);
...
...
modules/core/test/test_intrin.cpp
View file @
d34f2cfe
...
...
@@ -711,48 +711,56 @@ template<typename R> struct TheTest
return
*
this
;
}
#if CV_FP16
TheTest
&
test_loadstore_fp16
()
{
#if CV_FP16
AlignedData
<
R
>
data
;
AlignedData
<
R
>
out
;
// check if addresses are aligned and unaligned respectively
EXPECT_EQ
((
size_t
)
0
,
(
size_t
)
&
data
.
a
.
d
%
16
);
EXPECT_NE
((
size_t
)
0
,
(
size_t
)
&
data
.
u
.
d
%
16
);
EXPECT_EQ
((
size_t
)
0
,
(
size_t
)
&
out
.
a
.
d
%
16
);
EXPECT_NE
((
size_t
)
0
,
(
size_t
)
&
out
.
u
.
d
%
16
);
// check some initialization methods
R
r1
=
data
.
u
;
R
r2
=
v_load_f16
(
data
.
a
.
d
);
R
r3
(
r2
);
EXPECT_EQ
(
data
.
u
[
0
],
r1
.
get0
());
EXPECT_EQ
(
data
.
a
[
0
],
r2
.
get0
());
EXPECT_EQ
(
data
.
a
[
0
],
r3
.
get0
());
// check some store methods
out
.
a
.
clear
();
v_store_f16
(
out
.
a
.
d
,
r1
);
EXPECT_EQ
(
data
.
a
,
out
.
a
);
if
(
checkHardwareSupport
(
CV_CPU_FP16
))
{
// check if addresses are aligned and unaligned respectively
EXPECT_EQ
((
size_t
)
0
,
(
size_t
)
&
data
.
a
.
d
%
16
);
EXPECT_NE
((
size_t
)
0
,
(
size_t
)
&
data
.
u
.
d
%
16
);
EXPECT_EQ
((
size_t
)
0
,
(
size_t
)
&
out
.
a
.
d
%
16
);
EXPECT_NE
((
size_t
)
0
,
(
size_t
)
&
out
.
u
.
d
%
16
);
// check some initialization methods
R
r1
=
data
.
u
;
R
r2
=
v_load_f16
(
data
.
a
.
d
);
R
r3
(
r2
);
EXPECT_EQ
(
data
.
u
[
0
],
r1
.
get0
());
EXPECT_EQ
(
data
.
a
[
0
],
r2
.
get0
());
EXPECT_EQ
(
data
.
a
[
0
],
r3
.
get0
());
// check some store methods
out
.
a
.
clear
();
v_store_f16
(
out
.
a
.
d
,
r1
);
EXPECT_EQ
(
data
.
a
,
out
.
a
);
}
return
*
this
;
#endif
}
TheTest
&
test_float_cvt_fp16
()
{
#if CV_FP16
AlignedData
<
v_float32x4
>
data
;
// check conversion
v_float32x4
r1
=
v_load
(
data
.
a
.
d
);
v_float16x4
r2
=
v_cvt_f16
(
r1
);
v_float32x4
r3
=
v_cvt_f32
(
r2
);
EXPECT_EQ
(
0x3c00
,
r2
.
get0
());
EXPECT_EQ
(
r3
.
get0
(),
r1
.
get0
());
if
(
checkHardwareSupport
(
CV_CPU_FP16
))
{
// check conversion
v_float32x4
r1
=
v_load
(
data
.
a
.
d
);
v_float16x4
r2
=
v_cvt_f16
(
r1
);
v_float32x4
r3
=
v_cvt_f32
(
r2
);
EXPECT_EQ
(
0x3c00
,
r2
.
get0
());
EXPECT_EQ
(
r3
.
get0
(),
r1
.
get0
());
}
return
*
this
;
}
#endif
}
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment