Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
898ca382
Commit
898ca382
authored
Dec 28, 2017
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cmake: AVX512 -> AVX_512F
parent
2938860b
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
34 additions
and
33 deletions
+34
-33
OpenCVCompilerOptimizations.cmake
cmake/OpenCVCompilerOptimizations.cmake
+9
-8
cv_cpu_dispatch.h
modules/core/include/opencv2/core/cv_cpu_dispatch.h
+2
-2
cv_cpu_helper.h
modules/core/include/opencv2/core/cv_cpu_helper.h
+12
-12
CMakeLists.txt
modules/dnn/CMakeLists.txt
+1
-1
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+6
-6
fully_connected_layer.cpp
modules/dnn/src/layers/fully_connected_layer.cpp
+3
-3
layers_common.simd.hpp
modules/dnn/src/layers/layers_common.simd.hpp
+1
-1
No files found.
cmake/OpenCVCompilerOptimizations.cmake
View file @
898ca382
...
...
@@ -2,7 +2,7 @@
# SSE / SSE2 (always available on 64-bit CPUs)
# SSE3 / SSSE3
# SSE4_1 / SSE4_2 / POPCNT
# AVX / AVX2 / AVX
512
# AVX / AVX2 / AVX
_512F
# FMA3
# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag)
...
...
@@ -26,7 +26,7 @@
#
# CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (<name>.avx2.cpp)
set
(
CPU_ALL_OPTIMIZATIONS
"SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX
512
"
)
set
(
CPU_ALL_OPTIMIZATIONS
"SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX
_512F
"
)
list
(
APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16
)
list
(
APPEND CPU_ALL_OPTIMIZATIONS VSX
)
list
(
REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS
)
...
...
@@ -145,7 +145,7 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ")
endif
()
if
(
X86 OR X86_64
)
ocv_update
(
CPU_KNOWN_OPTIMIZATIONS
"SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX
512
"
)
ocv_update
(
CPU_KNOWN_OPTIMIZATIONS
"SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX
_512F
"
)
ocv_update
(
CPU_SSE_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_sse.cpp"
)
ocv_update
(
CPU_SSE2_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_sse2.cpp"
)
...
...
@@ -157,11 +157,11 @@ if(X86 OR X86_64)
ocv_update
(
CPU_AVX_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_avx.cpp"
)
ocv_update
(
CPU_AVX2_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_avx2.cpp"
)
ocv_update
(
CPU_FP16_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_fp16.cpp"
)
ocv_update
(
CPU_AVX
512
_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_avx512.cpp"
)
ocv_update
(
CPU_AVX
_512F
_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_avx512.cpp"
)
if
(
NOT OPENCV_CPU_OPT_IMPLIES_IGNORE
)
ocv_update
(
CPU_AVX
512
_IMPLIES
"AVX2"
)
ocv_update
(
CPU_AVX
512
_FORCE
""
)
# Don't force other optimizations
ocv_update
(
CPU_AVX
_512F
_IMPLIES
"AVX2"
)
ocv_update
(
CPU_AVX
_512F
_FORCE
""
)
# Don't force other optimizations
ocv_update
(
CPU_AVX2_IMPLIES
"AVX;FMA3;FP16"
)
ocv_update
(
CPU_FMA3_IMPLIES
"AVX2"
)
ocv_update
(
CPU_FMA3_FORCE
""
)
# Don't force other optimizations
...
...
@@ -205,7 +205,7 @@ if(X86 OR X86_64)
if
(
NOT X86_64
)
# x64 compiler doesn't support /arch:sse
ocv_intel_compiler_optimization_option
(
SSE
"-msse"
"/arch:SSE"
)
endif
()
#ocv_intel_compiler_optimization_option(AVX512 "-march=core-avx
512")
ocv_intel_compiler_optimization_option
(
AVX_512F
"-march=common-avx512"
"/arch:COMMON-AVX
512"
)
elseif
(
CMAKE_COMPILER_IS_GNUCXX
)
ocv_update
(
CPU_AVX2_FLAGS_ON
"-mavx2"
)
ocv_update
(
CPU_FP16_FLAGS_ON
"-mf16c"
)
...
...
@@ -219,7 +219,8 @@ if(X86 OR X86_64)
ocv_update
(
CPU_SSE2_FLAGS_ON
"-msse2"
)
ocv_update
(
CPU_SSE_FLAGS_ON
"-msse"
)
if
(
NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS
"5.0"
)
ocv_update
(
CPU_AVX512_FLAGS_ON
"-mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi"
)
# -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi
ocv_update
(
CPU_AVX_512F_FLAGS_ON
"-mavx512f"
)
endif
()
elseif
(
MSVC
)
ocv_update
(
CPU_AVX2_FLAGS_ON
"/arch:AVX2"
)
...
...
modules/core/include/opencv2/core/cv_cpu_dispatch.h
View file @
898ca382
...
...
@@ -82,9 +82,9 @@
# include <immintrin.h>
# define CV_AVX2 1
#endif
#ifdef CV_CPU_COMPILE_AVX
512
#ifdef CV_CPU_COMPILE_AVX
_512F
# include <immintrin.h>
# define CV_AVX
512
1
# define CV_AVX
_512F
1
#endif
#ifdef CV_CPU_COMPILE_FMA3
# define CV_FMA3 1
...
...
modules/core/include/opencv2/core/cv_cpu_helper.h
View file @
898ca382
...
...
@@ -165,20 +165,20 @@
#endif
#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
512
# define CV_TRY_AVX
512
1
# define CV_CPU_HAS_SUPPORT_AVX
512
1
# define CV_CPU_CALL_AVX
512(fn, args) return (opt_AVX512
::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
512
# define CV_TRY_AVX
512
1
# define CV_CPU_HAS_SUPPORT_AVX
512 (cv::checkHardwareSupport(CV_CPU_AVX512
))
# define CV_CPU_CALL_AVX
512(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512) return (opt_AVX512
::fn args)
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
_512F
# define CV_TRY_AVX
_512F
1
# define CV_CPU_HAS_SUPPORT_AVX
_512F
1
# define CV_CPU_CALL_AVX
_512F(fn, args) return (opt_AVX_512F
::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
_512F
# define CV_TRY_AVX
_512F
1
# define CV_CPU_HAS_SUPPORT_AVX
_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F
))
# define CV_CPU_CALL_AVX
_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F
::fn args)
#else
# define CV_TRY_AVX
512
0
# define CV_CPU_HAS_SUPPORT_AVX
512
0
# define CV_CPU_CALL_AVX
512
(fn, args)
# define CV_TRY_AVX
_512F
0
# define CV_CPU_HAS_SUPPORT_AVX
_512F
0
# define CV_CPU_CALL_AVX
_512F
(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX
512(fn, args, mode, ...) CV_CPU_CALL_AVX512
(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define __CV_CPU_DISPATCH_CHAIN_AVX
_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F
(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
# define CV_TRY_NEON 1
...
...
modules/dnn/CMakeLists.txt
View file @
898ca382
...
...
@@ -13,7 +13,7 @@ endif()
set
(
the_description
"Deep neural network module. It allows to load models from different frameworks and to make forward pass"
)
ocv_add_dispatched_file
(
"layers/layers_common"
AVX AVX2 AVX
512
)
ocv_add_dispatched_file
(
"layers/layers_common"
AVX AVX2 AVX
_512F
)
ocv_add_module
(
dnn opencv_core opencv_imgproc WRAP python matlab java js
)
ocv_warnings_disable
(
CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo
...
...
modules/dnn/src/layers/convolution_layer.cpp
View file @
898ca382
...
...
@@ -384,7 +384,7 @@ public:
p
.
is1x1_
=
kernel
==
Size
(
0
,
0
)
&&
pad
==
Size
(
0
,
0
);
p
.
useAVX
=
checkHardwareSupport
(
CPU_AVX
);
p
.
useAVX2
=
checkHardwareSupport
(
CPU_AVX2
);
p
.
useAVX512
=
checkHardwareSupport
(
CPU_AVX_512DQ
)
;
p
.
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX_512F
;
int
ncn
=
std
::
min
(
inpCn
,
(
int
)
BLK_SIZE_CN
);
p
.
ofstab_
.
resize
(
kernel
.
width
*
kernel
.
height
*
ncn
);
...
...
@@ -564,10 +564,10 @@ public:
// now compute dot product of the weights
// and im2row-transformed part of the tensor
int
bsz
=
ofs1
-
ofs0
;
#if CV_TRY_AVX
512
#if CV_TRY_AVX
_512F
/* AVX512 convolution requires an alignment of 16, and ROI is only there for larger vector sizes */
if
(
useAVX512
)
opt_AVX
512
::
fastConv
(
wptr
,
wstep
,
biasptr
,
rowbuf0
,
data_out0
+
ofs0
,
opt_AVX
_512F
::
fastConv
(
wptr
,
wstep
,
biasptr
,
rowbuf0
,
data_out0
+
ofs0
,
outShape
,
bsz
,
vsz
,
vsz_a
,
relu
,
cn0
==
0
);
else
#endif
...
...
@@ -1102,7 +1102,7 @@ public:
nstripes_
=
nstripes
;
useAVX
=
checkHardwareSupport
(
CPU_AVX
);
useAVX2
=
checkHardwareSupport
(
CPU_AVX2
);
useAVX512
=
checkHardwareSupport
(
CPU_AVX_512DQ
)
;
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX_512F
;
}
void
operator
()(
const
Range
&
range_
)
const
...
...
@@ -1120,9 +1120,9 @@ public:
size_t
bstep
=
b_
->
step1
();
size_t
cstep
=
c_
->
step1
();
#if CV_TRY_AVX
512
#if CV_TRY_AVX
_512F
if
(
useAVX512
)
opt_AVX
512
::
fastGEMM
(
aptr
,
astep
,
bptr
,
bstep
,
cptr
,
cstep
,
mmax
,
kmax
,
nmax
);
opt_AVX
_512F
::
fastGEMM
(
aptr
,
astep
,
bptr
,
bstep
,
cptr
,
cstep
,
mmax
,
kmax
,
nmax
);
else
#endif
#if CV_TRY_AVX2
...
...
modules/dnn/src/layers/fully_connected_layer.cpp
View file @
898ca382
...
...
@@ -161,7 +161,7 @@ public:
p
.
activ
=
activ
;
p
.
useAVX
=
checkHardwareSupport
(
CPU_AVX
);
p
.
useAVX2
=
checkHardwareSupport
(
CPU_AVX2
);
p
.
useAVX512
=
checkHardwareSupport
(
CPU_AVX_512DQ
)
;
p
.
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX_512F
;
parallel_for_
(
Range
(
0
,
nstripes
),
p
,
nstripes
);
}
...
...
@@ -196,9 +196,9 @@ public:
memcpy
(
sptr
,
sptr_
,
vecsize
*
sizeof
(
sptr
[
0
]));
#if CV_TRY_AVX
512
#if CV_TRY_AVX
_512F
if
(
useAVX512
)
opt_AVX
512
::
fastGEMM1T
(
sptr
,
wptr
,
wstep
,
biasptr
,
dptr
,
nw
,
vecsize
);
opt_AVX
_512F
::
fastGEMM1T
(
sptr
,
wptr
,
wstep
,
biasptr
,
dptr
,
nw
,
vecsize
);
else
#endif
#if CV_TRY_AVX2
...
...
modules/dnn/src/layers/layers_common.simd.hpp
View file @
898ca382
...
...
@@ -301,7 +301,7 @@ void fastGEMM( const float* aptr, size_t astep, const float* bptr,
{
int
n
=
0
;
#if
def CV_AVX512
#if
CV_AVX_512F
for
(
;
n
<=
nb
-
32
;
n
+=
32
)
{
for
(
int
m
=
0
;
m
<
ma
;
m
+=
4
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment