Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
297ba853
Commit
297ba853
authored
Apr 03, 2017
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #8441 from alalek:dispatch_mathfuncs_core
parents
36e80175
1e6ce1d2
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
617 additions
and
207 deletions
+617
-207
OpenCVCompilerOptimizations.cmake
cmake/OpenCVCompilerOptimizations.cmake
+92
-11
OpenCVModule.cmake
cmake/OpenCVModule.cmake
+11
-2
OpenCVPCHSupport.cmake
cmake/OpenCVPCHSupport.cmake
+6
-4
CMakeLists.txt
modules/core/CMakeLists.txt
+3
-0
cv_cpu_dispatch.h
modules/core/include/opencv2/core/cv_cpu_dispatch.h
+27
-0
cv_cpu_helper.h
modules/core/include/opencv2/core/cv_cpu_helper.h
+51
-36
cvdef.h
modules/core/include/opencv2/core/cvdef.h
+11
-0
intrin.hpp
modules/core/include/opencv2/core/hal/intrin.hpp
+30
-0
intrin_cpp.hpp
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+7
-1
intrin_neon.hpp
modules/core/include/opencv2/core/hal/intrin_neon.hpp
+5
-1
intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+5
-1
private.hpp
modules/core/include/opencv2/core/private.hpp
+8
-2
cv_cpu_include_simd_declarations.hpp
...opencv2/core/private/cv_cpu_include_simd_declarations.hpp
+30
-0
mathfuncs_core.dispatch.cpp
modules/core/src/mathfuncs_core.dispatch.cpp
+215
-0
mathfuncs_core.simd.hpp
modules/core/src/mathfuncs_core.simd.hpp
+115
-149
CMakeLists.txt
modules/world/CMakeLists.txt
+1
-0
No files found.
cmake/OpenCVCompilerOptimizations.cmake
View file @
297ba853
...
...
@@ -275,6 +275,11 @@ set(CPU_BASELINE_FLAGS "")
set
(
CPU_BASELINE_FINAL
""
)
set
(
CPU_DISPATCH_FINAL
""
)
if
(
CV_DISABLE_OPTIMIZATION
)
set
(
CPU_DISPATCH
""
)
set
(
CPU_DISPATCH_REQUIRE
""
)
endif
()
macro
(
ocv_check_compiler_optimization OPT
)
if
(
NOT DEFINED CPU_
${
OPT
}
_SUPPORTED
)
if
((
DEFINED CPU_
${
OPT
}
_FLAGS_ON AND NOT
"x
${
CPU_
${
OPT
}
_FLAGS_ON
}
"
STREQUAL
"x"
)
OR CPU_
${
OPT
}
_TEST_FILE
)
...
...
@@ -319,7 +324,7 @@ macro(ocv_check_compiler_optimization OPT)
endmacro
()
foreach
(
OPT
${
CPU_KNOWN_OPTIMIZATIONS
}
)
set
(
CPU_
${
OPT
}
_USAGE_COUNT 0 CACHE INTERNAL
""
FORCE
)
set
(
CPU_
${
OPT
}
_USAGE_COUNT 0 CACHE INTERNAL
""
)
if
(
NOT DEFINED CPU_
${
OPT
}
_FORCE
)
set
(
CPU_
${
OPT
}
_FORCE
"
${
CPU_
${
OPT
}
_IMPLIES
}
"
)
endif
()
...
...
@@ -515,15 +520,27 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
endforeach
()
foreach
(
fname
${${
SOURCES_VAR_NAME
}}
)
string
(
TOLOWER
"
${
fname
}
"
fname_LOWER
)
if
(
fname_LOWER MATCHES
"[.]opt_.*[.]cpp$"
)
if
(
CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS
)
message
(
STATUS
"Excluding from source files list:
${
fname
}
"
)
if
(
fname_LOWER MATCHES
"
\\
.(.*)
\\
.cpp$"
)
string
(
TOUPPER
"
${
CMAKE_MATCH_1
}
"
OPT_
)
if
(
OPT_ MATCHES
"(CUDA.*|DISPATCH.*|OCL)"
)
# don't touch files like filename.cuda.cpp
list
(
APPEND __result
"
${
fname
}
"
)
#continue()
elseif
(
CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS
)
message
(
STATUS
"Excluding from source files list (optimization is disabled):
${
fname
}
"
)
#continue()
else
()
get_source_file_property
(
__definitions
"
${
fname
}
"
COMPILE_DEFINITIONS
)
if
(
__definitions
)
list
(
APPEND __definitions
"CV_CPU_DISPATCH_MODE=
${
OPT_
}
"
)
else
()
set
(
__definitions
"CV_CPU_DISPATCH_MODE=
${
OPT_
}
"
)
endif
()
set_source_files_properties
(
"
${
fname
}
"
PROPERTIES COMPILE_DEFINITIONS
"
${
__definitions
}
"
)
set
(
__opt_found 0
)
foreach
(
OPT
${
CPU_BASELINE_FINAL
}
)
string
(
TOLOWER
"
${
OPT
}
"
OPT_LOWER
)
if
(
fname_LOWER MATCHES
"
_
${
OPT_LOWER
}
[.]
cpp$"
)
if
(
fname_LOWER MATCHES
"
\\
.
${
OPT_LOWER
}
\\
.
cpp$"
)
#message("${fname} BASELINE-${OPT}")
set
(
__opt_found 1
)
list
(
APPEND __result
"
${
fname
}
"
)
...
...
@@ -533,11 +550,11 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
foreach
(
OPT
${
CPU_DISPATCH_FINAL
}
)
foreach
(
OPT2
${
CPU_DISPATCH_
${
OPT
}
_FORCED
}
)
string
(
TOLOWER
"
${
OPT2
}
"
OPT2_LOWER
)
if
(
fname_LOWER MATCHES
"
_
${
OPT2_LOWER
}
[.]
cpp$"
)
if
(
fname_LOWER MATCHES
"
\\
.
${
OPT2_LOWER
}
\\
.
cpp$"
)
list
(
APPEND __result_
${
OPT
}
"
${
fname
}
"
)
math
(
EXPR CPU_
${
OPT
}
_USAGE_COUNT
"
${
CPU_
${
OPT
}
_USAGE_COUNT
}
+1"
)
set
(
CPU_
${
OPT
}
_USAGE_COUNT
"
${
CPU_
${
OPT
}
_USAGE_COUNT
}
"
CACHE INTERNAL
""
FORCE
)
#message("${fname} ${OPT}")
#message("
(${CPU_${OPT}_USAGE_COUNT})
${fname} ${OPT}")
#message(" ${CPU_DISPATCH_${OPT}_INCLUDED}")
#message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}")
#message(" ${CPU_DISPATCH_FLAGS_${OPT}}")
...
...
@@ -573,7 +590,13 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
list
(
APPEND __result
"$<TARGET_OBJECTS:
${
TARGET_BASE_NAME
}
_
${
OPT
}
>"
)
else
()
foreach
(
fname
${
__result_
${
OPT
}}
)
set_source_files_properties
(
"
${
fname
}
"
PROPERTIES COMPILE_DEFINITIONS
"
${
CPU_DISPATCH_DEFINITIONS_
${
OPT
}}
"
)
get_source_file_property
(
__definitions
"
${
fname
}
"
COMPILE_DEFINITIONS
)
if
(
__definitions
)
list
(
APPEND __definitions
"
${
CPU_DISPATCH_DEFINITIONS_
${
OPT
}}
"
)
else
()
set
(
__definitions
"
${
CPU_DISPATCH_DEFINITIONS_
${
OPT
}}
"
)
endif
()
set_source_files_properties
(
"
${
fname
}
"
PROPERTIES COMPILE_DEFINITIONS
"
${
__definitions
}
"
)
set_source_files_properties
(
"
${
fname
}
"
PROPERTIES COMPILE_FLAGS
"
${
CPU_DISPATCH_FLAGS_
${
OPT
}}
"
)
endforeach
()
list
(
APPEND __result
${
__result_
${
OPT
}}
)
...
...
@@ -620,18 +643,25 @@ macro(ocv_compiler_optimization_fill_cpu_config)
set
(
OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE
"
${
OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE
}
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_
${
OPT
}
# define CV_CPU_HAS_SUPPORT_
${
OPT
}
1
# define CV_CPU_CALL_
${
OPT
}
(
...) return __VA_ARGS__
# define CV_CPU_CALL_
${
OPT
}
(
fn, args) return (opt_
${
OPT
}
::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_
${
OPT
}
# define CV_CPU_HAS_SUPPORT_
${
OPT
}
(cv::checkHardwareSupport(CV_CPU_
${
OPT
}
))
# define CV_CPU_CALL_
${
OPT
}
(
...) if (CV_CPU_HAS_SUPPORT_
${
OPT
}
) return __VA_ARGS__
# define CV_CPU_CALL_
${
OPT
}
(
fn, args) if (CV_CPU_HAS_SUPPORT_
${
OPT
}
) return (opt_
${
OPT
}
::fn args)
#else
# define CV_CPU_HAS_SUPPORT_
${
OPT
}
0
# define CV_CPU_CALL_
${
OPT
}
(
...
)
# define CV_CPU_CALL_
${
OPT
}
(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_
${
OPT
}
(fn, args, mode, ...) CV_CPU_CALL_
${
OPT
}
(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
"
)
endif
()
endforeach
()
set
(
OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE
"
${
OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE
}
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
"
)
set
(
__file
"
${
CMAKE_SOURCE_DIR
}
/modules/core/include/opencv2/core/cv_cpu_helper.h"
)
if
(
EXISTS
"
${
__file
}
"
)
file
(
READ
"
${
__file
}
"
__content
)
...
...
@@ -644,6 +674,57 @@ macro(ocv_compiler_optimization_fill_cpu_config)
endif
()
endmacro
()
macro
(
ocv_add_dispatched_file filename
)
if
(
NOT OPENCV_INITIAL_PASS
)
set
(
__codestr
"
#include
\"
precomp.hpp
\"
#include
\"
${
filename
}
.simd.hpp
\"
"
)
set
(
__declarations_str
"#define CV_CPU_SIMD_FILENAME
\"
${
filename
}
.simd.hpp
\"
"
)
set
(
__dispatch_modes
"BASELINE"
)
set
(
__optimizations
"
${
ARGN
}
"
)
if
(
CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS
)
set
(
__optimizations
""
)
endif
()
foreach
(
OPT
${
__optimizations
}
)
string
(
TOLOWER
"
${
OPT
}
"
OPT_LOWER
)
set
(
__file
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
filename
}
.
${
OPT_LOWER
}
.cpp"
)
if
(
EXISTS
"
${
__file
}
"
)
file
(
READ
"
${
__file
}
"
__content
)
endif
()
if
(
__content STREQUAL __codestr
)
#message(STATUS "${__file} contains up-to-date content")
else
()
file
(
WRITE
"
${
__file
}
"
"
${
__codestr
}
"
)
endif
()
list
(
APPEND OPENCV_MODULE_
${
the_module
}
_SOURCES_DISPATCHED
"
${
__file
}
"
)
set
(
__declarations_str
"
${
__declarations_str
}
#define CV_CPU_DISPATCH_MODE
${
OPT
}
#include
\"
opencv2/core/private/cv_cpu_include_simd_declarations.hpp
\"
"
)
set
(
__dispatch_modes
"
${
OPT
}
,
${
__dispatch_modes
}
"
)
endforeach
()
set
(
__declarations_str
"
${
__declarations_str
}
#define CV_CPU_DISPATCH_MODES_ALL
${
__dispatch_modes
}
"
)
set
(
__file
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
filename
}
.simd_declarations.hpp"
)
if
(
EXISTS
"
${
__file
}
"
)
file
(
READ
"
${
__file
}
"
__content
)
endif
()
if
(
__content STREQUAL __declarations_str
)
#message(STATUS "${__file} contains up-to-date content")
else
()
file
(
WRITE
"
${
__file
}
"
"
${
__declarations_str
}
"
)
endif
()
endif
()
endmacro
()
if
(
CV_DISABLE_OPTIMIZATION OR CV_ICC
)
ocv_update
(
CV_ENABLE_UNROLLED 0
)
else
()
...
...
cmake/OpenCVModule.cmake
View file @
297ba853
...
...
@@ -314,6 +314,7 @@ macro(ocv_glob_modules)
set
(
OPENCV_INITIAL_PASS OFF
)
if
(
${
BUILD_opencv_world
}
)
foreach
(
m
${
OPENCV_MODULES_BUILD
}
)
set
(
the_module
"
${
m
}
"
)
if
(
"
${
m
}
"
STREQUAL opencv_world
)
add_subdirectory
(
"
${
OPENCV_MODULE_opencv_world_LOCATION
}
"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/world"
)
elseif
(
NOT OPENCV_MODULE_
${
m
}
_IS_PART_OF_WORLD AND NOT
${
m
}
STREQUAL opencv_world
)
...
...
@@ -329,6 +330,7 @@ macro(ocv_glob_modules)
endforeach
()
else
()
foreach
(
m
${
OPENCV_MODULES_BUILD
}
)
set
(
the_module
"
${
m
}
"
)
if
(
m MATCHES
"^opencv_"
)
string
(
REGEX REPLACE
"^opencv_"
""
__shortname
"
${
m
}
"
)
add_subdirectory
(
"
${
OPENCV_MODULE_
${
m
}
_LOCATION
}
"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
__shortname
}
"
)
...
...
@@ -646,11 +648,13 @@ macro(ocv_set_module_sources)
ocv_get_module_external_sources
()
endif
()
if
(
OPENCV_MODULE_
${
the_module
}
_SOURCES_DISPATCHED
)
list
(
APPEND OPENCV_MODULE_
${
the_module
}
_SOURCES
${
OPENCV_MODULE_
${
the_module
}
_SOURCES_DISPATCHED
}
)
endif
()
# use full paths for module to be independent from the module location
ocv_convert_to_full_paths
(
OPENCV_MODULE_
${
the_module
}
_HEADERS
)
ocv_compiler_optimization_process_sources
(
OPENCV_MODULE_
${
the_module
}
_SOURCES OPENCV_MODULE_
${
the_module
}
_DEPS_EXT
${
the_module
}
)
set
(
OPENCV_MODULE_
${
the_module
}
_HEADERS
${
OPENCV_MODULE_
${
the_module
}
_HEADERS
}
CACHE INTERNAL
"List of header files for
${
the_module
}
"
)
set
(
OPENCV_MODULE_
${
the_module
}
_SOURCES
${
OPENCV_MODULE_
${
the_module
}
_SOURCES
}
CACHE INTERNAL
"List of source files for
${
the_module
}
"
)
endmacro
()
...
...
@@ -766,6 +770,11 @@ macro(ocv_create_module)
endmacro
()
macro
(
_ocv_create_module
)
ocv_compiler_optimization_process_sources
(
OPENCV_MODULE_
${
the_module
}
_SOURCES OPENCV_MODULE_
${
the_module
}
_DEPS_EXT
${
the_module
}
)
set
(
OPENCV_MODULE_
${
the_module
}
_HEADERS
${
OPENCV_MODULE_
${
the_module
}
_HEADERS
}
CACHE INTERNAL
"List of header files for
${
the_module
}
"
)
set
(
OPENCV_MODULE_
${
the_module
}
_SOURCES
${
OPENCV_MODULE_
${
the_module
}
_SOURCES
}
CACHE INTERNAL
"List of source files for
${
the_module
}
"
)
# The condition we ought to be testing here is whether ocv_add_precompiled_headers will
# be called at some point in the future. We can't look into the future, though,
# so this will have to do.
...
...
cmake/OpenCVPCHSupport.cmake
View file @
297ba853
...
...
@@ -288,11 +288,12 @@ MACRO(ADD_PRECOMPILED_HEADER _targetName _input)
foreach
(
src
${
_sources
}
)
if
(
NOT
"
${
src
}
"
MATCHES
"
\\
.mm$"
)
get_source_file_property
(
oldProps
"
${
src
}
"
COMPILE_FLAGS
)
if
(
NOT oldProps
)
get_source_file_property
(
oldProps2
"
${
src
}
"
COMPILE_DEFINITIONS
)
if
(
NOT oldProps AND NOT oldProps2
)
set
(
newProperties
"-include
\"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
_name
}
\"
"
)
set_source_files_properties
(
"
${
src
}
"
PROPERTIES COMPILE_FLAGS
"
${
newProperties
}
"
)
else
()
ocv_debug_message
(
"Skip PCH, flags:
${
oldProps
}
, file:
${
src
}
"
)
ocv_debug_message
(
"Skip PCH, flags:
${
oldProps
}
defines:
${
oldProps2
}
, file:
${
src
}
"
)
endif
()
endif
()
endforeach
()
...
...
@@ -339,11 +340,12 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input)
AND NOT
"
${
src
}
"
MATCHES
"^
\$
"
# CMake generator expressions
)
get_source_file_property
(
oldProps
"
${
src
}
"
COMPILE_FLAGS
)
if
(
NOT oldProps
)
get_source_file_property
(
oldProps2
"
${
src
}
"
COMPILE_DEFINITIONS
)
if
(
NOT oldProps AND NOT oldProps2
)
set
(
newProperties
"/Yu
\"
${
_input
}
\"
/FI
\"
${
_input
}
\"
"
)
set_source_files_properties
(
"
${
src
}
"
PROPERTIES COMPILE_FLAGS
"
${
newProperties
}
"
)
else
()
ocv_debug_message
(
"Skip PCH, flags:
${
oldProps
}
, file:
${
src
}
"
)
ocv_debug_message
(
"Skip PCH, flags:
${
oldProps
}
defines:
${
oldProps2
}
, file:
${
src
}
"
)
endif
()
endif
()
endforeach
()
...
...
modules/core/CMakeLists.txt
View file @
297ba853
set
(
the_description
"The Core Functionality"
)
ocv_add_dispatched_file
(
mathfuncs_core SSE2 AVX AVX2
)
ocv_add_module
(
core
"
${
OPENCV_HAL_LINKER_LIBS
}
"
OPTIONAL opencv_cudev
...
...
modules/core/include/opencv2/core/cv_cpu_dispatch.h
View file @
297ba853
...
...
@@ -7,6 +7,23 @@
#include "cv_cpu_config.h"
#include "cv_cpu_helper.h"
#ifdef CV_CPU_DISPATCH_MODE
#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
#else
#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
#endif
#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...)
/* done */
#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
#if defined CV_ENABLE_INTRINSICS \
&& !defined CV_DISABLE_OPTIMIZATION \
&& !defined __CUDACC__
/* do not include SSE/AVX/NEON headers for NVCC compiler */
\
...
...
@@ -76,6 +93,16 @@
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
struct
VZeroUpperGuard
{
#ifdef __GNUC__
__attribute__
((
always_inline
))
#endif
inline
~
VZeroUpperGuard
()
{
_mm256_zeroupper
();
}
};
#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard;
#endif
#endif // __OPENCV_BUILD
...
...
modules/core/include/opencv2/core/cv_cpu_helper.h
View file @
297ba853
...
...
@@ -2,132 +2,147 @@
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
# define CV_CPU_HAS_SUPPORT_SSE 1
# define CV_CPU_CALL_SSE(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSE(
fn, args) return (opt_SSE::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
# define CV_CPU_CALL_SSE(
...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__
# define CV_CPU_CALL_SSE(
fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSE 0
# define CV_CPU_CALL_SSE(
...
)
# define CV_CPU_CALL_SSE(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
# define CV_CPU_HAS_SUPPORT_SSE2 1
# define CV_CPU_CALL_SSE2(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSE2(
fn, args) return (opt_SSE2::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
# define CV_CPU_CALL_SSE2(
...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__
# define CV_CPU_CALL_SSE2(
fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSE2 0
# define CV_CPU_CALL_SSE2(
...
)
# define CV_CPU_CALL_SSE2(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
# define CV_CPU_HAS_SUPPORT_SSE3 1
# define CV_CPU_CALL_SSE3(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSE3(
fn, args) return (opt_SSE3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
# define CV_CPU_CALL_SSE3(
...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__
# define CV_CPU_CALL_SSE3(
fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSE3 0
# define CV_CPU_CALL_SSE3(
...
)
# define CV_CPU_CALL_SSE3(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
# define CV_CPU_HAS_SUPPORT_SSSE3 1
# define CV_CPU_CALL_SSSE3(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSSE3(
fn, args) return (opt_SSSE3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
# define CV_CPU_CALL_SSSE3(
...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__
# define CV_CPU_CALL_SSSE3(
fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSSE3 0
# define CV_CPU_CALL_SSSE3(
...
)
# define CV_CPU_CALL_SSSE3(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
# define CV_CPU_HAS_SUPPORT_SSE4_1 1
# define CV_CPU_CALL_SSE4_1(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSE4_1(
fn, args) return (opt_SSE4_1::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
# define CV_CPU_CALL_SSE4_1(
...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__
# define CV_CPU_CALL_SSE4_1(
fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSE4_1 0
# define CV_CPU_CALL_SSE4_1(
...
)
# define CV_CPU_CALL_SSE4_1(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
# define CV_CPU_HAS_SUPPORT_SSE4_2 1
# define CV_CPU_CALL_SSE4_2(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSE4_2(
fn, args) return (opt_SSE4_2::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
# define CV_CPU_CALL_SSE4_2(
...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__
# define CV_CPU_CALL_SSE4_2(
fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSE4_2 0
# define CV_CPU_CALL_SSE4_2(
...
)
# define CV_CPU_CALL_SSE4_2(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
# define CV_CPU_HAS_SUPPORT_POPCNT 1
# define CV_CPU_CALL_POPCNT(
...) return __VA_ARGS__
# define CV_CPU_CALL_POPCNT(
fn, args) return (opt_POPCNT::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
# define CV_CPU_CALL_POPCNT(
...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__
# define CV_CPU_CALL_POPCNT(
fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
#else
# define CV_CPU_HAS_SUPPORT_POPCNT 0
# define CV_CPU_CALL_POPCNT(
...
)
# define CV_CPU_CALL_POPCNT(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
# define CV_CPU_HAS_SUPPORT_AVX 1
# define CV_CPU_CALL_AVX(
...) return __VA_ARGS__
# define CV_CPU_CALL_AVX(
fn, args) return (opt_AVX::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
# define CV_CPU_CALL_AVX(
...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__
# define CV_CPU_CALL_AVX(
fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
#else
# define CV_CPU_HAS_SUPPORT_AVX 0
# define CV_CPU_CALL_AVX(
...
)
# define CV_CPU_CALL_AVX(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
# define CV_CPU_HAS_SUPPORT_FP16 1
# define CV_CPU_CALL_FP16(
...) return __VA_ARGS__
# define CV_CPU_CALL_FP16(
fn, args) return (opt_FP16::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
# define CV_CPU_CALL_FP16(
...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__
# define CV_CPU_CALL_FP16(
fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
#else
# define CV_CPU_HAS_SUPPORT_FP16 0
# define CV_CPU_CALL_FP16(
...
)
# define CV_CPU_CALL_FP16(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
# define CV_CPU_HAS_SUPPORT_AVX2 1
# define CV_CPU_CALL_AVX2(
...) return __VA_ARGS__
# define CV_CPU_CALL_AVX2(
fn, args) return (opt_AVX2::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
# define CV_CPU_CALL_AVX2(
...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__
# define CV_CPU_CALL_AVX2(
fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
#else
# define CV_CPU_HAS_SUPPORT_AVX2 0
# define CV_CPU_CALL_AVX2(
...
)
# define CV_CPU_CALL_AVX2(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
# define CV_CPU_HAS_SUPPORT_FMA3 1
# define CV_CPU_CALL_FMA3(
...) return __VA_ARGS__
# define CV_CPU_CALL_FMA3(
fn, args) return (opt_FMA3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
# define CV_CPU_CALL_FMA3(
...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__
# define CV_CPU_CALL_FMA3(
fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
#else
# define CV_CPU_HAS_SUPPORT_FMA3 0
# define CV_CPU_CALL_FMA3(
...
)
# define CV_CPU_CALL_FMA3(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
# define CV_CPU_HAS_SUPPORT_NEON 1
# define CV_CPU_CALL_NEON(
...) return __VA_ARGS__
# define CV_CPU_CALL_NEON(
fn, args) return (opt_NEON::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
# define CV_CPU_CALL_NEON(
...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__
# define CV_CPU_CALL_NEON(
fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
#else
# define CV_CPU_HAS_SUPPORT_NEON 0
# define CV_CPU_CALL_NEON(
...
)
# define CV_CPU_CALL_NEON(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args)
/* last in sequence */
modules/core/include/opencv2/core/cvdef.h
View file @
297ba853
...
...
@@ -52,6 +52,17 @@
#include "cvconfig.h"
#endif
#ifndef __CV_EXPAND
#define __CV_EXPAND(x) x
#endif
#ifndef __CV_CAT
#define __CV_CAT__(x, y) x ## y
#define __CV_CAT_(x, y) __CV_CAT__(x, y)
#define __CV_CAT(x, y) __CV_CAT_(x, y)
#endif
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
# define _CRT_SECURE_NO_DEPRECATE
/* to avoid multiple Visual Studio warnings */
#endif
...
...
modules/core/include/opencv2/core/hal/intrin.hpp
View file @
297ba853
...
...
@@ -60,6 +60,25 @@
// access from within opencv code more accessible
namespace
cv
{
#ifndef CV_DOXYGEN
#ifdef CV_CPU_DISPATCH_MODE
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
#else
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
#endif
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
using
namespace
CV_CPU_OPTIMIZATION_HAL_NAMESPACE
;
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#endif
//! @addtogroup core_hal_intrin
//! @{
...
...
@@ -281,6 +300,9 @@ template <typename T> struct V_SIMD128Traits
//! @}
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
}
#ifdef CV_DOXYGEN
...
...
@@ -323,6 +345,10 @@ template <typename T> struct V_SIMD128Traits
namespace
cv
{
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#endif
template
<
typename
R
>
struct
V_RegTrait128
;
template
<>
struct
V_RegTrait128
<
uchar
>
{
...
...
@@ -407,6 +433,10 @@ template <> struct V_RegTrait128<double> {
};
#endif
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
}
// cv::
//! @endcond
...
...
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
View file @
297ba853
...
...
@@ -53,6 +53,10 @@
namespace
cv
{
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#endif
/** @addtogroup core_hal_intrin
"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on
...
...
@@ -1827,7 +1831,9 @@ static inline bool hasSIMD128()
//! @}
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
}
#endif
modules/core/include/opencv2/core/hal/intrin_neon.hpp
View file @
297ba853
...
...
@@ -53,6 +53,8 @@ namespace cv
//! @cond IGNORED
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#define CV_SIMD128 1
#if defined(__aarch64__)
#define CV_SIMD128_64F 1
...
...
@@ -1238,11 +1240,13 @@ inline v_float16x4 v_cvt_f16(const v_float32x4& a)
//! @brief Check CPU capability of SIMD operation
static
inline
bool
hasSIMD128
()
{
return
checkHardwareSupport
(
CV_CPU_NEON
)
;
return
(
CV_CPU_HAS_SUPPORT_NEON
)
?
true
:
false
;
}
//! @}
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
}
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
View file @
297ba853
...
...
@@ -56,6 +56,8 @@ namespace cv
//! @cond IGNORED
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
struct
v_uint8x16
{
typedef
uchar
lane_type
;
...
...
@@ -1791,11 +1793,13 @@ inline v_float16x4 v_cvt_f16(const v_float32x4& a)
//! @brief Check CPU capability of SIMD operation
static
inline
bool
hasSIMD128
()
{
return
checkHardwareSupport
(
CV_CPU_SSE2
)
;
return
(
CV_CPU_HAS_SUPPORT_SSE2
)
?
true
:
false
;
}
//! @}
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
}
...
...
modules/core/include/opencv2/core/private.hpp
View file @
297ba853
...
...
@@ -540,7 +540,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
///// General instrumentation
// General OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION
()
CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
#define CV_INSTRUMENT_REGION
_()
CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
// Custom OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION_NAME(NAME) CV_INSTRUMENT_REGION_CUSTOM_META(NAME, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
// Instrumentation for parallel_for_ or other regions which forks and gathers threads
...
...
@@ -566,7 +566,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
#else
#define CV_INSTRUMENT_REGION_META(...)
#define CV_INSTRUMENT_REGION()
#define CV_INSTRUMENT_REGION
_
()
#define CV_INSTRUMENT_REGION_NAME(...)
#define CV_INSTRUMENT_REGION_MT_FORK()
...
...
@@ -580,6 +580,12 @@ CV_EXPORTS InstrNode* getCurrentNode();
#define CV_INSTRUMENT_MARK_OPENCL(...)
#endif
#ifdef __CV_AVX_GUARD
#define CV_INSTRUMENT_REGION() __CV_AVX_GUARD CV_INSTRUMENT_REGION_()
#else
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_()
#endif
//! @endcond
#endif // OPENCV_CORE_PRIVATE_HPP
modules/core/include/opencv2/core/private/cv_cpu_include_simd_declarations.hpp
0 → 100644
View file @
297ba853
// Helper file to include dispatched functions declaration:
//
// Usage:
// #define CV_CPU_SIMD_FILENAME "<filename>.simd.hpp"
// #define CV_CPU_DISPATCH_MODE AVX2
// #include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
// #define CV_CPU_DISPATCH_MODE SSE2
// #include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
#ifndef CV_DISABLE_OPTIMIZATION
#ifdef _MSC_VER
#pragma warning(disable: 4702) // unreachable code
#endif
#endif
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
#define CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
#endif
#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
#undef CV_CPU_OPTIMIZATION_NAMESPACE_END
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
#include CV_CPU_SIMD_FILENAME
#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
#undef CV_CPU_OPTIMIZATION_NAMESPACE_END
#undef CV_CPU_DISPATCH_MODE
modules/core/src/mathfuncs_core.dispatch.cpp
0 → 100644
View file @
297ba853
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "mathfuncs_core.simd.hpp"
#include "mathfuncs_core.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace
cv
{
namespace
hal
{
///////////////////////////////////// ATAN2 ////////////////////////////////////
void
fastAtan32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan32f
,
cv_hal_fastAtan32f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
CV_CPU_DISPATCH
(
fastAtan32f
,
(
Y
,
X
,
angle
,
len
,
angleInDegrees
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
fastAtan64f
(
const
double
*
Y
,
const
double
*
X
,
double
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan64f
,
cv_hal_fastAtan64f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
CV_CPU_DISPATCH
(
fastAtan64f
,
(
Y
,
X
,
angle
,
len
,
angleInDegrees
),
CV_CPU_DISPATCH_MODES_ALL
);
}
// deprecated
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
fastAtan32f
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
void
magnitude32f
(
const
float
*
x
,
const
float
*
y
,
float
*
mag
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude32f
,
cv_hal_magnitude32f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_32f
,
x
,
y
,
mag
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
magnitude32f
,
(
x
,
y
,
mag
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
magnitude64f
(
const
double
*
x
,
const
double
*
y
,
double
*
mag
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude64f
,
cv_hal_magnitude64f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_64f
,
x
,
y
,
mag
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
magnitude64f
,
(
x
,
y
,
mag
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
invSqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt32f
,
cv_hal_invSqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
invSqrt32f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
invSqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt64f
,
cv_hal_invSqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
invSqrt64f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
sqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt32f
,
cv_hal_sqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
sqrt32f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
sqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt64f
,
cv_hal_sqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
sqrt64f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
exp32f
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp32f
,
cv_hal_exp32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_32f_A21
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
exp32f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
exp64f
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp64f
,
cv_hal_exp64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_64f_A50
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
exp64f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
log32f
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log32f
,
cv_hal_log32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_32f_A21
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
log32f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
log64f
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log64f
,
cv_hal_log64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_64f_A50
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
log64f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
//=============================================================================
// for compatibility with 3.0
void
exp
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
exp32f
(
src
,
dst
,
n
);
}
void
exp
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
exp64f
(
src
,
dst
,
n
);
}
void
log
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
log32f
(
src
,
dst
,
n
);
}
void
log
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
log64f
(
src
,
dst
,
n
);
}
void
magnitude
(
const
float
*
x
,
const
float
*
y
,
float
*
dst
,
int
n
)
{
magnitude32f
(
x
,
y
,
dst
,
n
);
}
void
magnitude
(
const
double
*
x
,
const
double
*
y
,
double
*
dst
,
int
n
)
{
magnitude64f
(
x
,
y
,
dst
,
n
);
}
void
sqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
sqrt32f
(
src
,
dst
,
len
);
}
void
sqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
sqrt64f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
invSqrt32f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
invSqrt64f
(
src
,
dst
,
len
);
}
}}
// namespace cv::hal::
float
cv
::
fastAtan2
(
float
y
,
float
x
)
{
using
namespace
cv
::
hal
;
CV_CPU_CALL_BASELINE
(
fastAtan2
,
(
y
,
x
));
}
modules/core/src/mathfuncs_core.
c
pp
→
modules/core/src/mathfuncs_core.
simd.h
pp
View file @
297ba853
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
void
fastAtan32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
);
void
fastAtan64f
(
const
double
*
Y
,
const
double
*
X
,
double
*
angle
,
int
len
,
bool
angleInDegrees
);
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
);
void
magnitude32f
(
const
float
*
x
,
const
float
*
y
,
float
*
mag
,
int
len
);
void
magnitude64f
(
const
double
*
x
,
const
double
*
y
,
double
*
mag
,
int
len
);
void
invSqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
);
void
invSqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
);
void
sqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
);
void
sqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
);
void
exp32f
(
const
float
*
src
,
float
*
dst
,
int
n
);
void
exp64f
(
const
double
*
src
,
double
*
dst
,
int
n
);
void
log32f
(
const
float
*
src
,
float
*
dst
,
int
n
);
void
log64f
(
const
double
*
src
,
double
*
dst
,
int
n
);
float
fastAtan2
(
float
y
,
float
x
);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
using
namespace
std
;
...
...
@@ -197,23 +180,17 @@ static inline void atanImpl(const T *Y, const T *X, T *angle, int len, bool angl
}
// anonymous::
namespace
cv
{
namespace
hal
{
///////////////////////////////////// ATAN2 ////////////////////////////////////
void
fastAtan32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan32f
,
cv_hal_fastAtan32f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
atanImpl
<
float
>
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
void
fastAtan64f
(
const
double
*
Y
,
const
double
*
X
,
double
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan64f
,
cv_hal_fastAtan64f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
atanImpl
<
double
>
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
...
...
@@ -221,7 +198,6 @@ void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
fastAtan32f
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
...
...
@@ -229,9 +205,6 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude32f
,
cv_hal_magnitude32f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_32f
,
x
,
y
,
mag
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128
...
...
@@ -257,9 +230,6 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude64f
,
cv_hal_magnitude64f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_64f
,
x
,
y
,
mag
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128_64F
...
...
@@ -286,9 +256,6 @@ void invSqrt32f(const float* src, float* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt32f
,
cv_hal_invSqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128
...
...
@@ -310,9 +277,6 @@ void invSqrt64f(const double* src, double* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt64f
,
cv_hal_invSqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SSE2
...
...
@@ -330,9 +294,6 @@ void sqrt32f(const float* src, float* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt32f
,
cv_hal_sqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128
...
...
@@ -354,9 +315,6 @@ void sqrt64f(const double* src, double* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt64f
,
cv_hal_sqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128_64F
...
...
@@ -381,9 +339,6 @@ void exp32f(const float *src, float *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp32f
,
cv_hal_exp32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_32f_A21
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
exp
(
src
[
i
]);
...
...
@@ -394,9 +349,6 @@ void exp64f(const double *src, double *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp64f
,
cv_hal_exp64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_64f_A50
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
exp
(
src
[
i
]);
...
...
@@ -407,9 +359,6 @@ void log32f(const float *src, float *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log32f
,
cv_hal_log32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_32f_A21
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
log
(
src
[
i
]);
...
...
@@ -419,9 +368,6 @@ void log64f(const double *src, double *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log64f
,
cv_hal_log64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_64f_A50
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
log
(
src
[
i
]);
...
...
@@ -534,9 +480,6 @@ void exp32f( const float *_x, float *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp32f
,
cv_hal_exp32f
,
_x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_32f_A21
,
_x
,
y
,
n
)
>=
0
);
static
const
float
A4
=
(
float
)(
1.000000000000002438532970795181890933776
/
EXPPOLY_32F_A0
),
A3
=
(
float
)(
.6931471805521448196800669615864773144641
/
EXPPOLY_32F_A0
),
...
...
@@ -551,7 +494,90 @@ void exp32f( const float *_x, float *y, int n )
const
Cv32suf
*
x
=
(
const
Cv32suf
*
)
_x
;
Cv32suf
buf
[
4
];
#if CV_SSE2
#if CV_AVX2
if
(
n
>=
8
)
{
static
const
__m256d
prescale4
=
_mm256_set1_pd
(
exp_prescale
);
static
const
__m256
postscale8
=
_mm256_set1_ps
((
float
)
exp_postscale
);
static
const
__m128
maxval4
=
_mm_set1_ps
((
float
)(
exp_max_val
/
exp_prescale
));
static
const
__m128
minval4
=
_mm_set1_ps
((
float
)(
-
exp_max_val
/
exp_prescale
));
static
const
__m256
mA1
=
_mm256_set1_ps
(
A1
);
static
const
__m256
mA2
=
_mm256_set1_ps
(
A2
);
static
const
__m256
mA3
=
_mm256_set1_ps
(
A3
);
static
const
__m256
mA4
=
_mm256_set1_ps
(
A4
);
bool
y_aligned
=
(
size_t
)(
void
*
)
y
%
32
==
0
;
ushort
CV_DECL_ALIGNED
(
32
)
tab_idx
[
16
];
for
(
;
i
<=
n
-
8
;
i
+=
8
)
{
__m128i
xi0
,
xi1
;
__m256d
xd0
=
_mm256_cvtps_pd
(
_mm_min_ps
(
_mm_max_ps
(
_mm_loadu_ps
(
&
x
[
i
].
f
),
minval4
),
maxval4
));
__m256d
xd1
=
_mm256_cvtps_pd
(
_mm_min_ps
(
_mm_max_ps
(
_mm_loadu_ps
(
&
x
[
i
+
4
].
f
),
minval4
),
maxval4
));
xd0
=
_mm256_mul_pd
(
xd0
,
prescale4
);
xd1
=
_mm256_mul_pd
(
xd1
,
prescale4
);
xi0
=
_mm256_cvtpd_epi32
(
xd0
);
xi1
=
_mm256_cvtpd_epi32
(
xd1
);
xd0
=
_mm256_sub_pd
(
xd0
,
_mm256_cvtepi32_pd
(
xi0
));
xd1
=
_mm256_sub_pd
(
xd1
,
_mm256_cvtepi32_pd
(
xi1
));
// gcc does not support _mm256_set_m128
//xf = _mm256_set_m128(_mm256_cvtpd_ps(xd1), _mm256_cvtpd_ps(xd0));
__m256
xf
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
_mm256_cvtpd_ps
(
xd0
)),
_mm256_cvtpd_ps
(
xd1
),
1
);
xf
=
_mm256_mul_ps
(
xf
,
postscale8
);
xi0
=
_mm_packs_epi32
(
xi0
,
xi1
);
_mm_store_si128
((
__m128i
*
)
tab_idx
,
_mm_and_si128
(
xi0
,
_mm_set1_epi16
(
EXPTAB_MASK
)));
xi0
=
_mm_add_epi16
(
_mm_srai_epi16
(
xi0
,
EXPTAB_SCALE
),
_mm_set1_epi16
(
127
));
xi0
=
_mm_max_epi16
(
xi0
,
_mm_setzero_si128
());
xi0
=
_mm_min_epi16
(
xi0
,
_mm_set1_epi16
(
255
));
xi1
=
_mm_unpackhi_epi16
(
xi0
,
_mm_setzero_si128
());
xi0
=
_mm_unpacklo_epi16
(
xi0
,
_mm_setzero_si128
());
__m256d
yd0
=
_mm256_set_pd
(
expTab
[
tab_idx
[
3
]],
expTab
[
tab_idx
[
2
]],
expTab
[
tab_idx
[
1
]],
expTab
[
tab_idx
[
0
]]);
__m256d
yd1
=
_mm256_set_pd
(
expTab
[
tab_idx
[
7
]],
expTab
[
tab_idx
[
6
]],
expTab
[
tab_idx
[
5
]],
expTab
[
tab_idx
[
4
]]);
// gcc does not support _mm256_set_m128
//__m256 yf = _mm256_set_m128(_mm256_cvtpd_ps(yd1), _mm256_cvtpd_ps(yd0));
__m256
yf
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
_mm256_cvtpd_ps
(
yd0
)),
_mm256_cvtpd_ps
(
yd1
),
1
);
//_mm256_set_m128i(xi1, xi0)
__m256i
temp
=
(
__m256i
)
_mm256_insertf128_ps
(
_mm256_castps128_ps256
((
__m128
)
xi0
),
(
__m128
)
xi1
,
1
);
yf
=
_mm256_mul_ps
(
yf
,
_mm256_castsi256_ps
(
_mm256_slli_epi32
(
temp
,
23
)));
__m256
zf
=
_mm256_add_ps
(
xf
,
mA1
);
#if CV_FMA3
zf
=
_mm256_fmadd_ps
(
zf
,
xf
,
mA2
);
zf
=
_mm256_fmadd_ps
(
zf
,
xf
,
mA3
);
zf
=
_mm256_fmadd_ps
(
zf
,
xf
,
mA4
);
#else
zf
=
_mm256_add_ps
(
_mm256_mul_ps
(
zf
,
xf
),
mA2
);
zf
=
_mm256_add_ps
(
_mm256_mul_ps
(
zf
,
xf
),
mA3
);
zf
=
_mm256_add_ps
(
_mm256_mul_ps
(
zf
,
xf
),
mA4
);
#endif
zf
=
_mm256_mul_ps
(
zf
,
yf
);
if
(
y_aligned
)
{
_mm256_store_ps
(
y
+
i
,
zf
);
}
else
{
_mm256_storeu_ps
(
y
+
i
,
zf
);
}
}
}
#elif CV_SSE2
if
(
n
>=
8
)
{
static
const
__m128d
prescale2
=
_mm_set1_pd
(
exp_prescale
);
...
...
@@ -738,9 +764,6 @@ void exp64f( const double *_x, double *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp64f
,
cv_hal_exp64f
,
_x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_64f_A50
,
_x
,
y
,
n
)
>=
0
);
static
const
double
A5
=
.99999999999999999998285227504999
/
EXPPOLY_32F_A0
,
A4
=
.69314718055994546743029643825322
/
EXPPOLY_32F_A0
,
...
...
@@ -1187,9 +1210,6 @@ void log32f( const float *_x, float *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log32f
,
cv_hal_log32f
,
_x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_32f_A21
,
_x
,
y
,
n
)
>=
0
);
static
const
float
shift
[]
=
{
0
,
-
1.
f
/
512
};
static
const
float
A0
=
0.3333333333333333333333333
f
,
...
...
@@ -1336,9 +1356,6 @@ void log64f( const double *x, double *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log64f
,
cv_hal_log64f
,
x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_64f_A50
,
x
,
y
,
n
)
>=
0
);
static
const
double
shift
[]
=
{
0
,
-
1.
/
512
};
static
const
double
A7
=
1.0
,
...
...
@@ -1524,64 +1541,13 @@ void log64f( const double *x, double *y, int n )
#endif // issue 7795
//=============================================================================
// for compatibility with 3.0
void
exp
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
exp32f
(
src
,
dst
,
n
);
}
void
exp
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
exp64f
(
src
,
dst
,
n
);
}
void
log
(
const
float
*
src
,
float
*
dst
,
int
n
)
float
fastAtan2
(
float
y
,
float
x
)
{
log32f
(
src
,
dst
,
n
);
}
void
log
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
log64f
(
src
,
dst
,
n
);
}
void
magnitude
(
const
float
*
x
,
const
float
*
y
,
float
*
dst
,
int
n
)
{
magnitude32f
(
x
,
y
,
dst
,
n
);
}
void
magnitude
(
const
double
*
x
,
const
double
*
y
,
double
*
dst
,
int
n
)
{
magnitude64f
(
x
,
y
,
dst
,
n
);
}
void
sqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
sqrt32f
(
src
,
dst
,
len
);
}
void
sqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
sqrt64f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
invSqrt32f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
invSqrt64f
(
src
,
dst
,
len
);
return
atanImpl
<
float
>
(
y
,
x
);
}
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
}
// cv::hal::
}
// cv::
CV_CPU_OPTIMIZATION_NAMESPACE_END
float
cv
::
fastAtan2
(
float
y
,
float
x
)
{
return
atanImpl
<
float
>
(
y
,
x
);
}
}}
// namespace cv::hal
modules/world/CMakeLists.txt
View file @
297ba853
...
...
@@ -24,6 +24,7 @@ if(NOT OPENCV_INITIAL_PASS)
message
(
STATUS
"Processing WORLD modules..."
)
foreach
(
m
${
OPENCV_MODULES_BUILD
}
)
set
(
the_module
${
m
}
)
if
(
OPENCV_MODULE_
${
m
}
_IS_PART_OF_WORLD
)
message
(
STATUS
" module
${
m
}
..."
)
set
(
CMAKE_CURRENT_SOURCE_DIR
"
${
OPENCV_MODULE_
${
m
}
_LOCATION
}
"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment