Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
7d67d60f
Commit
7d67d60f
authored
Dec 29, 2017
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cmake(opt): AVX512_SKX
parent
fc1d85bf
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
88 additions
and
18 deletions
+88
-18
OpenCVCompilerOptimizations.cmake
cmake/OpenCVCompilerOptimizations.cmake
+20
-2
cpu_avx512skx.cpp
cmake/checks/cpu_avx512skx.cpp
+14
-0
cv_cpu_dispatch.h
modules/core/include/opencv2/core/cv_cpu_dispatch.h
+7
-0
cv_cpu_helper.h
modules/core/include/opencv2/core/cv_cpu_helper.h
+15
-0
cvdef.h
modules/core/include/opencv2/core/cvdef.h
+13
-4
system.cpp
modules/core/src/system.cpp
+8
-1
CMakeLists.txt
modules/dnn/CMakeLists.txt
+1
-1
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+6
-6
fully_connected_layer.cpp
modules/dnn/src/layers/fully_connected_layer.cpp
+3
-3
layers_common.simd.hpp
modules/dnn/src/layers/layers_common.simd.hpp
+1
-1
No files found.
cmake/OpenCVCompilerOptimizations.cmake
View file @
7d67d60f
...
...
@@ -8,6 +8,7 @@
# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag)
# CPU_{opt}_IMPLIES=<list>
# CPU_{opt}_FORCE=<list> - subset of "implies" list
# CPU_{opt}_GROUP=<list> - similar to "implies" list, but additionally merges compiler flags
# CPU_{opt}_FLAGS_ON=""
# CPU_{opt}_FEATURE_ALIAS - mapping to CV_CPU_* HWFeature enum
...
...
@@ -26,7 +27,7 @@
#
# CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (<name>.avx2.cpp)
set
(
CPU_ALL_OPTIMIZATIONS
"SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F"
)
set
(
CPU_ALL_OPTIMIZATIONS
"SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F
;AVX512_SKX
"
)
list
(
APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16
)
list
(
APPEND CPU_ALL_OPTIMIZATIONS VSX
)
list
(
REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS
)
...
...
@@ -145,7 +146,9 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ")
endif
()
if
(
X86 OR X86_64
)
ocv_update
(
CPU_KNOWN_OPTIMIZATIONS
"SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F"
)
ocv_update
(
CPU_KNOWN_OPTIMIZATIONS
"SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_SKX"
)
ocv_update
(
CPU_AVX512_SKX_GROUP
"AVX_512F;AVX_512CD;AVX_512BW;AVX_512DQ;AVX_512VL"
)
ocv_update
(
CPU_SSE_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_sse.cpp"
)
ocv_update
(
CPU_SSE2_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_sse2.cpp"
)
...
...
@@ -158,6 +161,7 @@ if(X86 OR X86_64)
ocv_update
(
CPU_AVX2_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_avx2.cpp"
)
ocv_update
(
CPU_FP16_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_fp16.cpp"
)
ocv_update
(
CPU_AVX_512F_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_avx512.cpp"
)
ocv_update
(
CPU_AVX512_SKX_TEST_FILE
"
${
OpenCV_SOURCE_DIR
}
/cmake/checks/cpu_avx512skx.cpp"
)
if
(
NOT OPENCV_CPU_OPT_IMPLIES_IGNORE
)
ocv_update
(
CPU_AVX_512F_IMPLIES
"AVX2"
)
...
...
@@ -206,6 +210,7 @@ if(X86 OR X86_64)
ocv_intel_compiler_optimization_option
(
SSE
"-msse"
"/arch:SSE"
)
endif
()
ocv_intel_compiler_optimization_option
(
AVX_512F
"-march=common-avx512"
"/arch:COMMON-AVX512"
)
ocv_intel_compiler_optimization_option
(
AVX512_SKX
"-march=core-avx512"
"/arch:CORE-AVX512"
)
elseif
(
CMAKE_COMPILER_IS_GNUCXX
)
ocv_update
(
CPU_AVX2_FLAGS_ON
"-mavx2"
)
ocv_update
(
CPU_FP16_FLAGS_ON
"-mf16c"
)
...
...
@@ -221,6 +226,7 @@ if(X86 OR X86_64)
if
(
NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS
"5.0"
)
# -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi
ocv_update
(
CPU_AVX_512F_FLAGS_ON
"-mavx512f"
)
ocv_update
(
CPU_AVX512_SKX_FLAGS_ON
"-mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq"
)
endif
()
elseif
(
MSVC
)
ocv_update
(
CPU_AVX2_FLAGS_ON
"/arch:AVX2"
)
...
...
@@ -348,6 +354,18 @@ endmacro()
foreach
(
OPT
${
CPU_KNOWN_OPTIMIZATIONS
}
)
set
(
CPU_
${
OPT
}
_USAGE_COUNT 0 CACHE INTERNAL
""
)
if
(
DEFINED CPU_
${
OPT
}
_GROUP
)
if
(
NOT DEFINED CPU_
${
OPT
}
_IMPLIES
)
set
(
CPU_
${
OPT
}
_IMPLIES
"
${
CPU_
${
OPT
}
_GROUP
}
"
)
endif
()
if
(
NOT DEFINED CPU_
${
OPT
}
_FLAGS_ON
)
set
(
__flags
""
)
foreach
(
OPT2
${
CPU_
${
OPT
}
_GROUP
}
)
set
(
__flags
"
${
__flags
}
${
CPU_
${
OPT2
}
_FLAGS_ON
}
"
)
endforeach
()
set
(
CPU_
${
OPT
}
_FLAGS_ON
"
${
__flags
}
"
)
endif
()
endif
()
if
(
NOT DEFINED CPU_
${
OPT
}
_FORCE
)
set
(
CPU_
${
OPT
}
_FORCE
"
${
CPU_
${
OPT
}
_IMPLIES
}
"
)
endif
()
...
...
cmake/checks/cpu_avx512skx.cpp
0 → 100644
View file @
7d67d60f
#if defined __AVX512__ || defined __AVX512F__
#include <immintrin.h>
void
test
()
{
__m512i
zmm
=
_mm512_setzero_si512
();
__m256i
a
=
_mm256_setzero_si256
();
__m256i
b
=
_mm256_abs_epi64
(
a
);
// VL
__m512i
c
=
_mm512_abs_epi8
(
zmm
);
// BW
__m512i
d
=
_mm512_broadcast_i32x8
(
b
);
// DQ
}
#else
#error "AVX512-SKX is not supported"
#endif
int
main
()
{
return
0
;
}
modules/core/include/opencv2/core/cv_cpu_dispatch.h
View file @
7d67d60f
...
...
@@ -86,6 +86,10 @@
# include <immintrin.h>
# define CV_AVX_512F 1
#endif
#ifdef CV_CPU_COMPILE_AVX512_SKX
# include <immintrin.h>
# define CV_AVX512_SKX 1
#endif
#ifdef CV_CPU_COMPILE_FMA3
# define CV_FMA3 1
#endif
...
...
@@ -222,6 +226,9 @@ struct VZeroUpperGuard {
#ifndef CV_AVX_512VL
# define CV_AVX_512VL 0
#endif
#ifndef CV_AVX512_SKX
# define CV_AVX512_SKX 0
#endif
#ifndef CV_NEON
# define CV_NEON 0
...
...
modules/core/include/opencv2/core/cv_cpu_helper.h
View file @
7d67d60f
...
...
@@ -180,6 +180,21 @@
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX
# define CV_TRY_AVX512_SKX 1
# define CV_CPU_HAS_SUPPORT_AVX512_SKX 1
# define CV_CPU_CALL_AVX512_SKX(fn, args) return (opt_AVX512_SKX::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX
# define CV_TRY_AVX512_SKX 1
# define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX))
# define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
#else
# define CV_TRY_AVX512_SKX 0
# define CV_CPU_HAS_SUPPORT_AVX512_SKX 0
# define CV_CPU_CALL_AVX512_SKX(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...) CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
# define CV_TRY_NEON 1
# define CV_CPU_HAS_SUPPORT_NEON 1
...
...
modules/core/include/opencv2/core/cvdef.h
View file @
7d67d60f
...
...
@@ -146,7 +146,8 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
#define CV_CPU_AVX_512CD 15
#define CV_CPU_AVX_512DQ 16
#define CV_CPU_AVX_512ER 17
#define CV_CPU_AVX_512IFMA512 18
#define CV_CPU_AVX_512IFMA512 18 // deprecated
#define CV_CPU_AVX_512IFMA 18
#define CV_CPU_AVX_512PF 19
#define CV_CPU_AVX_512VBMI 20
#define CV_CPU_AVX_512VL 21
...
...
@@ -155,8 +156,11 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
#define CV_CPU_VSX 200
// CPU features groups
#define CV_CPU_AVX512_SKX 256
// when adding to this list remember to update the following enum
#define CV_HARDWARE_MAX_FEATURE
255
#define CV_HARDWARE_MAX_FEATURE
512
/** @brief Available CPU features.
*/
...
...
@@ -179,14 +183,19 @@ enum CpuFeatures {
CPU_AVX_512CD
=
15
,
CPU_AVX_512DQ
=
16
,
CPU_AVX_512ER
=
17
,
CPU_AVX_512IFMA512
=
18
,
CPU_AVX_512IFMA512
=
18
,
// deprecated
CPU_AVX_512IFMA
=
18
,
CPU_AVX_512PF
=
19
,
CPU_AVX_512VBMI
=
20
,
CPU_AVX_512VL
=
21
,
CPU_NEON
=
100
,
CPU_VSX
=
200
CPU_VSX
=
200
,
CPU_AVX512_SKX
=
256
,
//!< Skylake-X with AVX-512F/CD/BW/DQ/VL
CPU_MAX_FEATURE
=
512
// see CV_HARDWARE_MAX_FEATURE
};
...
...
modules/core/src/system.cpp
View file @
7d67d60f
...
...
@@ -301,7 +301,7 @@ struct HWFeatures
g_hwFeatureNames
[
CPU_AVX_512CD
]
=
"AVX512CD"
;
g_hwFeatureNames
[
CPU_AVX_512DQ
]
=
"AVX512DQ"
;
g_hwFeatureNames
[
CPU_AVX_512ER
]
=
"AVX512ER"
;
g_hwFeatureNames
[
CPU_AVX_512IFMA
512
]
=
"AVX512IFMA"
;
g_hwFeatureNames
[
CPU_AVX_512IFMA
]
=
"AVX512IFMA"
;
g_hwFeatureNames
[
CPU_AVX_512PF
]
=
"AVX512PF"
;
g_hwFeatureNames
[
CPU_AVX_512VBMI
]
=
"AVX512VBMI"
;
g_hwFeatureNames
[
CPU_AVX_512VL
]
=
"AVX512VL"
;
...
...
@@ -309,6 +309,8 @@ struct HWFeatures
g_hwFeatureNames
[
CPU_NEON
]
=
"NEON"
;
g_hwFeatureNames
[
CPU_VSX
]
=
"VSX"
;
g_hwFeatureNames
[
CPU_AVX512_SKX
]
=
"AVX512-SKX"
;
}
void
initialize
(
void
)
...
...
@@ -456,6 +458,11 @@ struct HWFeatures
have
[
CV_CPU_AVX_512VBMI
]
=
false
;
have
[
CV_CPU_AVX_512VL
]
=
false
;
}
if
(
have
[
CV_CPU_AVX_512F
])
{
have
[
CV_CPU_AVX512_SKX
]
=
have
[
CV_CPU_AVX_512F
]
&
have
[
CV_CPU_AVX_512CD
]
&
have
[
CV_CPU_AVX_512BW
]
&
have
[
CV_CPU_AVX_512DQ
]
&
have
[
CV_CPU_AVX_512VL
];
}
}
#else
CV_UNUSED
(
cpuid_data
);
...
...
modules/dnn/CMakeLists.txt
View file @
7d67d60f
...
...
@@ -13,7 +13,7 @@ endif()
set
(
the_description
"Deep neural network module. It allows to load models from different frameworks and to make forward pass"
)
ocv_add_dispatched_file
(
"layers/layers_common"
AVX AVX2 AVX
_512F
)
ocv_add_dispatched_file
(
"layers/layers_common"
AVX AVX2 AVX
512_SKX
)
ocv_add_module
(
dnn opencv_core opencv_imgproc WRAP python matlab java js
)
ocv_warnings_disable
(
CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo
...
...
modules/dnn/src/layers/convolution_layer.cpp
View file @
7d67d60f
...
...
@@ -384,7 +384,7 @@ public:
p
.
is1x1_
=
kernel
==
Size
(
0
,
0
)
&&
pad
==
Size
(
0
,
0
);
p
.
useAVX
=
checkHardwareSupport
(
CPU_AVX
);
p
.
useAVX2
=
checkHardwareSupport
(
CPU_AVX2
);
p
.
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX
_512F
;
p
.
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX
512_SKX
;
int
ncn
=
std
::
min
(
inpCn
,
(
int
)
BLK_SIZE_CN
);
p
.
ofstab_
.
resize
(
kernel
.
width
*
kernel
.
height
*
ncn
);
...
...
@@ -564,10 +564,10 @@ public:
// now compute dot product of the weights
// and im2row-transformed part of the tensor
int
bsz
=
ofs1
-
ofs0
;
#if CV_TRY_AVX
_512F
#if CV_TRY_AVX
512_SKX
/* AVX512 convolution requires an alignment of 16, and ROI is only there for larger vector sizes */
if
(
useAVX512
)
opt_AVX
_512F
::
fastConv
(
wptr
,
wstep
,
biasptr
,
rowbuf0
,
data_out0
+
ofs0
,
opt_AVX
512_SKX
::
fastConv
(
wptr
,
wstep
,
biasptr
,
rowbuf0
,
data_out0
+
ofs0
,
outShape
,
bsz
,
vsz
,
vsz_a
,
relu
,
cn0
==
0
);
else
#endif
...
...
@@ -1102,7 +1102,7 @@ public:
nstripes_
=
nstripes
;
useAVX
=
checkHardwareSupport
(
CPU_AVX
);
useAVX2
=
checkHardwareSupport
(
CPU_AVX2
);
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX
_512F
;
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX
512_SKX
;
}
void
operator
()(
const
Range
&
range_
)
const
...
...
@@ -1120,9 +1120,9 @@ public:
size_t
bstep
=
b_
->
step1
();
size_t
cstep
=
c_
->
step1
();
#if CV_TRY_AVX
_512F
#if CV_TRY_AVX
512_SKX
if
(
useAVX512
)
opt_AVX
_512F
::
fastGEMM
(
aptr
,
astep
,
bptr
,
bstep
,
cptr
,
cstep
,
mmax
,
kmax
,
nmax
);
opt_AVX
512_SKX
::
fastGEMM
(
aptr
,
astep
,
bptr
,
bstep
,
cptr
,
cstep
,
mmax
,
kmax
,
nmax
);
else
#endif
#if CV_TRY_AVX2
...
...
modules/dnn/src/layers/fully_connected_layer.cpp
View file @
7d67d60f
...
...
@@ -161,7 +161,7 @@ public:
p
.
activ
=
activ
;
p
.
useAVX
=
checkHardwareSupport
(
CPU_AVX
);
p
.
useAVX2
=
checkHardwareSupport
(
CPU_AVX2
);
p
.
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX
_512F
;
p
.
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX
512_SKX
;
parallel_for_
(
Range
(
0
,
nstripes
),
p
,
nstripes
);
}
...
...
@@ -196,9 +196,9 @@ public:
memcpy
(
sptr
,
sptr_
,
vecsize
*
sizeof
(
sptr
[
0
]));
#if CV_TRY_AVX
_512F
#if CV_TRY_AVX
512_SKX
if
(
useAVX512
)
opt_AVX
_512F
::
fastGEMM1T
(
sptr
,
wptr
,
wstep
,
biasptr
,
dptr
,
nw
,
vecsize
);
opt_AVX
512_SKX
::
fastGEMM1T
(
sptr
,
wptr
,
wstep
,
biasptr
,
dptr
,
nw
,
vecsize
);
else
#endif
#if CV_TRY_AVX2
...
...
modules/dnn/src/layers/layers_common.simd.hpp
View file @
7d67d60f
...
...
@@ -301,7 +301,7 @@ void fastGEMM( const float* aptr, size_t astep, const float* bptr,
{
int
n
=
0
;
#if CV_AVX
_512F
#if CV_AVX
512_SKX // AVX512VL is necessary to avoid register spilling
for
(
;
n
<=
nb
-
32
;
n
+=
32
)
{
for
(
int
m
=
0
;
m
<
ma
;
m
+=
4
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment