Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
48b74790
Commit
48b74790
authored
Jul 18, 2016
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #6830 from tomoaki0705:featureSupportFp16
parents
a86a5c79
addb1538
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
101 additions
and
5 deletions
+101
-5
OpenCVCompilerOptions.cmake
cmake/OpenCVCompilerOptions.cmake
+7
-1
core.hpp
modules/core/include/opencv2/core.hpp
+10
-0
cvdef.h
modules/core/include/opencv2/core/cvdef.h
+13
-2
convert.cpp
modules/core/src/convert.cpp
+0
-0
precomp.hpp
modules/core/src/precomp.hpp
+1
-0
system.cpp
modules/core/src/system.cpp
+10
-2
test_arithm.cpp
modules/core/test/test_arithm.cpp
+57
-0
ts_func.cpp
modules/ts/src/ts_func.cpp
+3
-0
No files found.
cmake/OpenCVCompilerOptions.cmake
View file @
48b74790
...
...
@@ -146,8 +146,11 @@ if(CMAKE_COMPILER_IS_GNUCXX)
elseif
(
X86 OR X86_64
)
add_extra_compiler_option
(
-mno-sse2
)
endif
()
if
(
ARM
)
add_extra_compiler_option
(
"-mfp16-format=ieee"
)
endif
(
ARM
)
if
(
ENABLE_NEON
)
add_extra_compiler_option
(
"-mfpu=neon"
)
add_extra_compiler_option
(
"-mfpu=neon
-fp16
"
)
endif
()
if
(
ENABLE_VFPV3 AND NOT ENABLE_NEON
)
add_extra_compiler_option
(
"-mfpu=vfpv3"
)
...
...
@@ -198,6 +201,9 @@ if(CMAKE_COMPILER_IS_GNUCXX)
add_extra_compiler_option
(
-mpopcnt
)
endif
()
endif
()
if
((
X86 OR X86_64
)
AND NOT MSVC AND NOT APPLE
)
add_extra_compiler_option
(
-mf16c
)
endif
((
X86 OR X86_64
)
AND NOT MSVC AND NOT APPLE
)
endif
(
NOT MINGW
)
if
(
X86 OR X86_64
)
...
...
modules/core/include/opencv2/core.hpp
View file @
48b74790
...
...
@@ -524,6 +524,16 @@ For example:
CV_EXPORTS_W
void
convertScaleAbs
(
InputArray
src
,
OutputArray
dst
,
double
alpha
=
1
,
double
beta
=
0
);
/** @brief Converts an array to half precision floating number.
convertFp16 converts FP32 to FP16 or FP16 to FP32. The input array has to have type of CV_32F or
CV_16S to represent the bit depth. If the input array is neither of them, it'll do nothing.
@param src input array.
@param dst output array.
*/
CV_EXPORTS_W
void
convertFp16
(
InputArray
src
,
OutputArray
dst
);
/** @brief Performs a look-up table transform of an array.
The function LUT fills the output array with values from the look-up table. Indices of the entries
...
...
modules/core/include/opencv2/core/cvdef.h
View file @
48b74790
...
...
@@ -112,7 +112,7 @@
#define CV_CPU_SSE4_1 6
#define CV_CPU_SSE4_2 7
#define CV_CPU_POPCNT 8
#define CV_CPU_FP16 9
#define CV_CPU_AVX 10
#define CV_CPU_AVX2 11
#define CV_CPU_FMA3 12
...
...
@@ -143,7 +143,7 @@ enum CpuFeatures {
CPU_SSE4_1
=
6
,
CPU_SSE4_2
=
7
,
CPU_POPCNT
=
8
,
CPU_FP16
=
9
,
CPU_AVX
=
10
,
CPU_AVX2
=
11
,
CPU_FMA3
=
12
,
...
...
@@ -193,6 +193,10 @@ enum CpuFeatures {
# endif
# define CV_POPCNT 1
# endif
# if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700)
# include <immintrin.h>
# define CV_FP16 1
# endif
# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
...
...
@@ -223,6 +227,10 @@ enum CpuFeatures {
# define CV_NEON 1
#endif
#if defined __GNUC__ && ((defined (__arm__) && (__ARM_FP & 0x2)) || defined(__aarch64__))
# define CV_FP16 1
#endif
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
# define CV_VFP 1
#endif
...
...
@@ -253,6 +261,9 @@ enum CpuFeatures {
#ifndef CV_SSE4_2
# define CV_SSE4_2 0
#endif
#ifndef CV_FP16
# define CV_FP16 0
#endif
#ifndef CV_AVX
# define CV_AVX 0
#endif
...
...
modules/core/src/convert.cpp
View file @
48b74790
This diff is collapsed.
Click to expand it.
modules/core/src/precomp.hpp
View file @
48b74790
...
...
@@ -135,6 +135,7 @@ typedef void (*BinaryFuncC)(const uchar* src1, size_t step1,
uchar
*
dst
,
size_t
step
,
int
width
,
int
height
,
void
*
);
BinaryFunc
getConvertFuncFp16
(
int
ddepth
);
BinaryFunc
getConvertFunc
(
int
sdepth
,
int
ddepth
);
BinaryFunc
getCopyMaskFunc
(
size_t
esz
);
...
...
modules/core/src/system.cpp
View file @
48b74790
...
...
@@ -291,6 +291,7 @@ struct HWFeatures
f
.
have
[
CV_CPU_SSE4_2
]
=
(
cpuid_data
[
2
]
&
(
1
<<
20
))
!=
0
;
f
.
have
[
CV_CPU_POPCNT
]
=
(
cpuid_data
[
2
]
&
(
1
<<
23
))
!=
0
;
f
.
have
[
CV_CPU_AVX
]
=
(((
cpuid_data
[
2
]
&
(
1
<<
28
))
!=
0
)
&&
((
cpuid_data
[
2
]
&
(
1
<<
27
))
!=
0
));
//OS uses XSAVE_XRSTORE and CPU support AVX
f
.
have
[
CV_CPU_FP16
]
=
(
cpuid_data
[
2
]
&
(
1
<<
29
))
!=
0
;
// make the second call to the cpuid command in order to get
// information about extended features like AVX2
...
...
@@ -338,7 +339,8 @@ struct HWFeatures
#if defined ANDROID || defined __linux__
#ifdef __aarch64__
f
.
have
[
CV_CPU_NEON
]
=
true
;
#else
f
.
have
[
CV_CPU_FP16
]
=
true
;
#elif defined __arm__
int
cpufile
=
open
(
"/proc/self/auxv"
,
O_RDONLY
);
if
(
cpufile
>=
0
)
...
...
@@ -351,6 +353,7 @@ struct HWFeatures
if
(
auxv
.
a_type
==
AT_HWCAP
)
{
f
.
have
[
CV_CPU_NEON
]
=
(
auxv
.
a_un
.
a_val
&
4096
)
!=
0
;
f
.
have
[
CV_CPU_FP16
]
=
(
auxv
.
a_un
.
a_val
&
2
)
!=
0
;
break
;
}
}
...
...
@@ -358,8 +361,13 @@ struct HWFeatures
close
(
cpufile
);
}
#endif
#elif (defined __clang__ || defined __APPLE__) && (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
#elif (defined __clang__ || defined __APPLE__)
#if (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
f
.
have
[
CV_CPU_NEON
]
=
true
;
#endif
#if (defined __ARM_FP && (((__ARM_FP & 0x2) != 0) && defined __ARM_NEON__))
f
.
have
[
CV_CPU_FP16
]
=
true
;
#endif
#endif
return
f
;
...
...
modules/core/test/test_arithm.cpp
View file @
48b74790
...
...
@@ -737,6 +737,62 @@ struct ConvertScaleOp : public BaseElemWiseOp
int
ddepth
;
};
struct
ConvertScaleFp16Op
:
public
BaseElemWiseOp
{
ConvertScaleFp16Op
()
:
BaseElemWiseOp
(
1
,
FIX_BETA
+
REAL_GAMMA
,
1
,
1
,
Scalar
::
all
(
0
)),
nextRange
(
0
)
{
}
void
op
(
const
vector
<
Mat
>&
src
,
Mat
&
dst
,
const
Mat
&
)
{
Mat
m
;
convertFp16
(
src
[
0
],
m
);
convertFp16
(
m
,
dst
);
}
void
refop
(
const
vector
<
Mat
>&
src
,
Mat
&
dst
,
const
Mat
&
)
{
cvtest
::
copy
(
src
[
0
],
dst
);
}
int
getRandomType
(
RNG
&
)
{
// 0: FP32 -> FP16 -> FP32
// 1: FP16 -> FP32 -> FP16
int
srctype
=
(
nextRange
&
1
)
==
0
?
CV_32F
:
CV_16S
;
return
srctype
;
}
void
getValueRange
(
int
,
double
&
minval
,
double
&
maxval
)
{
// 0: FP32 -> FP16 -> FP32
// 1: FP16 -> FP32 -> FP16
if
(
(
nextRange
&
1
)
==
0
)
{
// largest integer number that fp16 can express exactly
maxval
=
2048.
f
;
minval
=
-
maxval
;
}
else
{
// 0: positive number range
// 1: negative number range
if
(
(
nextRange
&
2
)
==
0
)
{
minval
=
0
;
// 0x0000 +0
maxval
=
31744
;
// 0x7C00 +Inf
}
else
{
minval
=
-
32768
;
// 0x8000 -0
maxval
=
-
1024
;
// 0xFC00 -Inf
}
}
}
double
getMaxErr
(
int
)
{
return
0.5
f
;
}
void
generateScalars
(
int
,
RNG
&
rng
)
{
nextRange
=
rng
.
next
();
}
int
nextRange
;
};
struct
ConvertScaleAbsOp
:
public
BaseElemWiseOp
{
...
...
@@ -1371,6 +1427,7 @@ INSTANTIATE_TEST_CASE_P(Core_Copy, ElemWiseTest, ::testing::Values(ElemWiseOpPtr
INSTANTIATE_TEST_CASE_P
(
Core_Set
,
ElemWiseTest
,
::
testing
::
Values
(
ElemWiseOpPtr
(
new
cvtest
::
SetOp
)));
INSTANTIATE_TEST_CASE_P
(
Core_SetZero
,
ElemWiseTest
,
::
testing
::
Values
(
ElemWiseOpPtr
(
new
cvtest
::
SetZeroOp
)));
INSTANTIATE_TEST_CASE_P
(
Core_ConvertScale
,
ElemWiseTest
,
::
testing
::
Values
(
ElemWiseOpPtr
(
new
cvtest
::
ConvertScaleOp
)));
INSTANTIATE_TEST_CASE_P
(
Core_ConvertScaleFp16
,
ElemWiseTest
,
::
testing
::
Values
(
ElemWiseOpPtr
(
new
cvtest
::
ConvertScaleFp16Op
)));
INSTANTIATE_TEST_CASE_P
(
Core_ConvertScaleAbs
,
ElemWiseTest
,
::
testing
::
Values
(
ElemWiseOpPtr
(
new
cvtest
::
ConvertScaleAbsOp
)));
INSTANTIATE_TEST_CASE_P
(
Core_Add
,
ElemWiseTest
,
::
testing
::
Values
(
ElemWiseOpPtr
(
new
cvtest
::
AddOp
)));
...
...
modules/ts/src/ts_func.cpp
View file @
48b74790
...
...
@@ -3064,6 +3064,9 @@ void printVersionInfo(bool useStdOut)
#if CV_NEON
if
(
checkHardwareSupport
(
CV_CPU_NEON
))
cpu_features
+=
" neon"
;
#endif
#if CV_FP16
if
(
checkHardwareSupport
(
CV_CPU_FP16
))
cpu_features
+=
" fp16"
;
#endif
cpu_features
.
erase
(
0
,
1
);
// erase initial space
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment