Commit e16227b5 authored by Alexander Alekhin's avatar Alexander Alekhin

cmake: support multiple CPU targets

parent 47ae5f14
...@@ -91,6 +91,10 @@ if(POLICY CMP0042) ...@@ -91,6 +91,10 @@ if(POLICY CMP0042)
cmake_policy(SET CMP0042 NEW) cmake_policy(SET CMP0042 NEW)
endif() endif()
if(POLICY CMP0051)
cmake_policy(SET CMP0051 NEW)
endif()
include(cmake/OpenCVUtils.cmake) include(cmake/OpenCVUtils.cmake)
# must go before the project command # must go before the project command
...@@ -280,16 +284,6 @@ OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" ...@@ -280,16 +284,6 @@ OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov"
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) ) OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_POPCNT "Enable POPCNT instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_AVX2 "Enable AVX2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_FMA3 "Enable FMA3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" "${NEON}" IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) ) OCV_OPTION(ENABLE_NEON "Enable NEON instructions" "${NEON}" IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) ) OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF ) OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
...@@ -299,6 +293,9 @@ OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function c ...@@ -299,6 +293,9 @@ OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function c
OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF ) OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF )
OCV_OPTION(ENABLE_GNU_STL_DEBUG "Enable GNU STL Debug mode (defines _GLIBCXX_DEBUG)" OFF IF ((NOT CMAKE_VERSION VERSION_LESS "2.8.11") AND CMAKE_COMPILER_IS_GNUCXX) ) OCV_OPTION(ENABLE_GNU_STL_DEBUG "Enable GNU STL Debug mode (defines _GLIBCXX_DEBUG)" OFF IF ((NOT CMAKE_VERSION VERSION_LESS "2.8.11") AND CMAKE_COMPILER_IS_GNUCXX) )
OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX) OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX)
OCV_OPTION(CV_ENABLE_INTRINSICS "Use intrinsic-based optimized code" ON )
OCV_OPTION(CV_DISABLE_OPTIMIZATION "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF )
OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF ) OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF )
...@@ -499,6 +496,9 @@ if(CMAKE_GENERATOR MATCHES "Makefiles|Ninja" AND "${CMAKE_BUILD_TYPE}" STREQUAL ...@@ -499,6 +496,9 @@ if(CMAKE_GENERATOR MATCHES "Makefiles|Ninja" AND "${CMAKE_BUILD_TYPE}" STREQUAL
set(CMAKE_BUILD_TYPE Release) set(CMAKE_BUILD_TYPE Release)
endif() endif()
# --- Python Support ---
include(cmake/OpenCVDetectPython.cmake)
include(cmake/OpenCVCompilerOptions.cmake) include(cmake/OpenCVCompilerOptions.cmake)
...@@ -576,9 +576,6 @@ else() ...@@ -576,9 +576,6 @@ else()
unset(DOXYGEN_FOUND CACHE) unset(DOXYGEN_FOUND CACHE)
endif() endif()
# --- Python Support ---
include(cmake/OpenCVDetectPython.cmake)
# --- Java Support --- # --- Java Support ---
include(cmake/OpenCVDetectApacheAnt.cmake) include(cmake/OpenCVDetectApacheAnt.cmake)
if(ANDROID) if(ANDROID)
...@@ -867,6 +864,33 @@ if(NOT CMAKE_GENERATOR MATCHES "Xcode|Visual Studio") ...@@ -867,6 +864,33 @@ if(NOT CMAKE_GENERATOR MATCHES "Xcode|Visual Studio")
status(" Configuration:" ${CMAKE_BUILD_TYPE}) status(" Configuration:" ${CMAKE_BUILD_TYPE})
endif() endif()
# ========================= CPU code generation mode =========================
status("")
status(" CPU/HW features:")
status(" Baseline:" "${CPU_BASELINE_FINAL}")
if(NOT CPU_BASELINE STREQUAL CPU_BASELINE_FINAL)
status(" requested:" "${CPU_BASELINE}")
endif()
if(CPU_BASELINE_REQUIRE)
status(" required:" "${CPU_BASELINE_REQUIRE}")
endif()
if(CPU_BASELINE_DISABLE)
status(" disabled:" "${CPU_BASELINE_DISABLE}")
endif()
if(CPU_DISPATCH_FINAL OR CPU_DISPATCH)
status(" Dispatched code generation:" "${CPU_DISPATCH_FINAL}")
if(NOT CPU_DISPATCH STREQUAL CPU_DISPATCH_FINAL)
status(" requested:" "${CPU_DISPATCH}")
endif()
if(CPU_DISPATCH_REQUIRE)
status(" required:" "${CPU_DISPATCH_REQUIRE}")
endif()
foreach(OPT ${CPU_DISPATCH_FINAL})
status(" ${OPT} (${CPU_${OPT}_USAGE_COUNT} files):" "+ ${CPU_DISPATCH_${OPT}_INCLUDED}")
endforeach()
endif()
# ========================== C/C++ options ========================== # ========================== C/C++ options ==========================
if(CMAKE_CXX_COMPILER_VERSION) if(CMAKE_CXX_COMPILER_VERSION)
set(OPENCV_COMPILER_STR "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1} (ver ${CMAKE_CXX_COMPILER_VERSION})") set(OPENCV_COMPILER_STR "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1} (ver ${CMAKE_CXX_COMPILER_VERSION})")
......
This diff is collapsed.
...@@ -31,24 +31,21 @@ endif() ...@@ -31,24 +31,21 @@ endif()
if(MINGW OR (X86 AND UNIX AND NOT APPLE)) if(MINGW OR (X86 AND UNIX AND NOT APPLE))
# mingw compiler is known to produce unstable SSE code with -O3 hence we are trying to use -O2 instead # mingw compiler is known to produce unstable SSE code with -O3 hence we are trying to use -O2 instead
if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_COMPILER_IS_GNUCXX)
foreach(flags CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) foreach(flags
string(REPLACE "-O3" "-O2" ${flags} "${${flags}}") CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG
endforeach() CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG)
endif()
if(CMAKE_COMPILER_IS_GNUCC)
foreach(flags CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG)
string(REPLACE "-O3" "-O2" ${flags} "${${flags}}") string(REPLACE "-O3" "-O2" ${flags} "${${flags}}")
endforeach() endforeach()
endif() endif()
endif() endif()
if(MSVC) if(MSVC)
string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS)
string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT}") string(STRIP "${CMAKE_CXX_FLAGS_INIT}" CMAKE_CXX_FLAGS_INIT)
if(CMAKE_CXX_FLAGS STREQUAL CMAKE_CXX_FLAGS_INIT) if(CMAKE_CXX_FLAGS STREQUAL CMAKE_CXX_FLAGS_INIT)
# override cmake default exception handling option # override cmake default exception handling option
string(REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "/EHsc" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHa")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "Flags used by the compiler during all build types." FORCE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "Flags used by the compiler during all build types." FORCE)
endif() endif()
endif() endif()
...@@ -63,9 +60,6 @@ set(OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE "") ...@@ -63,9 +60,6 @@ set(OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE "")
set(OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG "") set(OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG "")
macro(add_extra_compiler_option option) macro(add_extra_compiler_option option)
if(CMAKE_BUILD_TYPE)
set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
endif()
ocv_check_flag_support(CXX "${option}" _varname "${OPENCV_EXTRA_CXX_FLAGS} ${ARGN}") ocv_check_flag_support(CXX "${option}" _varname "${OPENCV_EXTRA_CXX_FLAGS} ${ARGN}")
if(${_varname}) if(${_varname})
set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}") set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}")
...@@ -77,6 +71,12 @@ macro(add_extra_compiler_option option) ...@@ -77,6 +71,12 @@ macro(add_extra_compiler_option option)
endif() endif()
endmacro() endmacro()
macro(add_extra_compiler_option_force option)
set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}")
set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} ${option}")
endmacro()
# Gets environment variable and puts its value to the corresponding preprocessor definition # Gets environment variable and puts its value to the corresponding preprocessor definition
# Useful for WINRT that has no access to environment variables # Useful for WINRT that has no access to environment variables
macro(add_env_definitions option) macro(add_env_definitions option)
...@@ -102,7 +102,11 @@ if(MINGW) ...@@ -102,7 +102,11 @@ if(MINGW)
endif() endif()
if(CV_ICC AND NOT ENABLE_FAST_MATH) if(CV_ICC AND NOT ENABLE_FAST_MATH)
if(MSVC)
add_extra_compiler_option("/fp:precise")
else()
add_extra_compiler_option("-fp-model precise") add_extra_compiler_option("-fp-model precise")
endif()
endif() endif()
if(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_COMPILER_IS_GNUCXX)
...@@ -141,7 +145,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) ...@@ -141,7 +145,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
endif() endif()
# We need pthread's # We need pthread's
if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) # TODO
add_extra_compiler_option(-pthread) add_extra_compiler_option(-pthread)
endif() endif()
...@@ -170,83 +174,6 @@ if(CMAKE_COMPILER_IS_GNUCXX) ...@@ -170,83 +174,6 @@ if(CMAKE_COMPILER_IS_GNUCXX)
if(ENABLE_FAST_MATH) if(ENABLE_FAST_MATH)
add_extra_compiler_option(-ffast-math) add_extra_compiler_option(-ffast-math)
endif() endif()
if(ENABLE_POWERPC)
add_extra_compiler_option("-mcpu=G3 -mtune=G5")
endif()
if(ENABLE_SSE)
add_extra_compiler_option(-msse)
endif()
if(ENABLE_SSE2)
add_extra_compiler_option(-msse2)
elseif(X86 OR X86_64)
add_extra_compiler_option(-mno-sse2)
endif()
if(ARM)
add_extra_compiler_option("-mfp16-format=ieee")
endif(ARM)
if(ENABLE_NEON)
add_extra_compiler_option("-mfpu=neon")
endif()
if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
add_extra_compiler_option("-mfpu=vfpv3")
endif()
# SSE3 and further should be disabled under MingW because it generates compiler errors
if(NOT MINGW)
if(ENABLE_AVX)
add_extra_compiler_option(-mavx)
elseif(X86 OR X86_64)
add_extra_compiler_option(-mno-avx)
endif()
if(ENABLE_AVX2)
add_extra_compiler_option(-mavx2)
if(ENABLE_FMA3)
add_extra_compiler_option(-mfma)
endif()
endif()
# GCC depresses SSEx instructions when -mavx is used. Instead, it generates new AVX instructions or AVX equivalence for all SSEx instructions when needed.
if(NOT OPENCV_EXTRA_CXX_FLAGS MATCHES "-mavx")
if(ENABLE_SSE3)
add_extra_compiler_option(-msse3)
elseif(X86 OR X86_64)
add_extra_compiler_option(-mno-sse3)
endif()
if(ENABLE_SSSE3)
add_extra_compiler_option(-mssse3)
elseif(X86 OR X86_64)
add_extra_compiler_option(-mno-ssse3)
endif()
if(ENABLE_SSE41)
add_extra_compiler_option(-msse4.1)
elseif(X86 OR X86_64)
add_extra_compiler_option(-mno-sse4.1)
endif()
if(ENABLE_SSE42)
add_extra_compiler_option(-msse4.2)
elseif(X86 OR X86_64)
add_extra_compiler_option(-mno-sse4.2)
endif()
if(ENABLE_POPCNT)
add_extra_compiler_option(-mpopcnt)
endif()
endif()
endif(NOT MINGW)
if(X86 OR X86_64)
if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4)
if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)")
add_extra_compiler_option(-mfpmath=sse)# !! important - be on the same wave with x64 compilers
else()
add_extra_compiler_option(-mfpmath=387)
endif()
endif()
endif()
# Profiling? # Profiling?
if(ENABLE_PROFILING) if(ENABLE_PROFILING)
...@@ -257,7 +184,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) ...@@ -257,7 +184,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
string(REPLACE "-fomit-frame-pointer" "" ${flags} "${${flags}}") string(REPLACE "-fomit-frame-pointer" "" ${flags} "${${flags}}")
string(REPLACE "-ffunction-sections" "" ${flags} "${${flags}}") string(REPLACE "-ffunction-sections" "" ${flags} "${${flags}}")
endforeach() endforeach()
elseif(NOT APPLE AND NOT ANDROID) elseif(NOT ((IOS OR ANDROID) AND NOT BUILD_SHARED_LIBS))
# Remove unreferenced functions: function level linking # Remove unreferenced functions: function level linking
add_extra_compiler_option(-ffunction-sections) add_extra_compiler_option(-ffunction-sections)
endif() endif()
...@@ -296,41 +223,6 @@ if(MSVC) ...@@ -296,41 +223,6 @@ if(MSVC)
set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi") set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi")
endif() endif()
if(ENABLE_AVX2 AND NOT MSVC_VERSION LESS 1800)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX2")
endif()
if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX")
endif()
if(ENABLE_SSE4_1 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1")
endif()
if(ENABLE_SSE3 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE3")
endif()
if(NOT MSVC64)
# 64-bit MSVC compiler uses SSE/SSE2 by default
if(ENABLE_SSE2 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2")
endif()
if(ENABLE_SSE AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:")
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE")
endif()
endif()
if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1 OR ENABLE_AVX OR ENABLE_AVX2)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi")
endif()
if(X86 OR X86_64)
if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND ENABLE_SSE2)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers
endif()
endif()
if(OPENCV_WARNINGS_ARE_ERRORS) if(OPENCV_WARNINGS_ARE_ERRORS)
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /WX") set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /WX")
endif() endif()
...@@ -353,6 +245,16 @@ if(NOT BUILD_SHARED_LIBS AND CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID) ...@@ -353,6 +245,16 @@ if(NOT BUILD_SHARED_LIBS AND CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID)
set(OPENCV_EXTRA_FLAGS "-fPIC ${OPENCV_EXTRA_FLAGS}") set(OPENCV_EXTRA_FLAGS "-fPIC ${OPENCV_EXTRA_FLAGS}")
endif() endif()
include(cmake/OpenCVCompilerOptimizations.cmake)
if(COMMAND ocv_compiler_optimization_options)
ocv_compiler_optimization_options()
endif()
if(COMMAND ocv_compiler_optimization_options_finalize)
ocv_compiler_optimization_options_finalize()
endif()
# Add user supplied extra options (optimization, etc...) # Add user supplied extra options (optimization, etc...)
# ========================================================== # ==========================================================
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS}" CACHE INTERNAL "Extra compiler options") set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS}" CACHE INTERNAL "Extra compiler options")
...@@ -370,6 +272,7 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) ...@@ -370,6 +272,7 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
add_extra_compiler_option(-fvisibility-inlines-hidden) add_extra_compiler_option(-fvisibility-inlines-hidden)
endif() endif()
# TODO !!!!!
if(NOT OPENCV_FP16_DISABLE AND NOT IOS) if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
if(ARM AND ENABLE_NEON) if(ARM AND ENABLE_NEON)
set(FP16_OPTION "-mfpu=neon-fp16") set(FP16_OPTION "-mfpu=neon-fp16")
...@@ -378,7 +281,7 @@ if(NOT OPENCV_FP16_DISABLE AND NOT IOS) ...@@ -378,7 +281,7 @@ if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
endif() endif()
try_compile(__VALID_FP16 try_compile(__VALID_FP16
"${OpenCV_BINARY_DIR}" "${OpenCV_BINARY_DIR}"
"${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp"
COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
OUTPUT_VARIABLE TRY_OUT OUTPUT_VARIABLE TRY_OUT
) )
......
...@@ -3,6 +3,10 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CO ...@@ -3,6 +3,10 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CO
configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/cvconfig.h") configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/cvconfig.h")
install(FILES "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2 COMPONENT dev) install(FILES "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2 COMPONENT dev)
# platform-specific config file
ocv_compiler_optimization_fill_cpu_config()
configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cv_cpu_config.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cv_cpu_config.h")
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------
# opencv_modules.hpp based on actual modules list # opencv_modules.hpp based on actual modules list
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------
......
...@@ -65,6 +65,7 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD ...@@ -65,6 +65,7 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD
unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE) unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE)
unset(OPENCV_MODULE_${mod}_LINK_DEPS CACHE) unset(OPENCV_MODULE_${mod}_LINK_DEPS CACHE)
unset(OPENCV_MODULE_${mod}_WRAPPERS CACHE) unset(OPENCV_MODULE_${mod}_WRAPPERS CACHE)
unset(OPENCV_DEPENDANT_TARGETS_${mod} CACHE)
endforeach() endforeach()
# clean modules info which needs to be recalculated # clean modules info which needs to be recalculated
...@@ -648,6 +649,8 @@ macro(ocv_set_module_sources) ...@@ -648,6 +649,8 @@ macro(ocv_set_module_sources)
# use full paths for module to be independent from the module location # use full paths for module to be independent from the module location
ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS) ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS)
ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module})
set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}") set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}")
set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}") set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")
endmacro() endmacro()
......
...@@ -328,7 +328,10 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input) ...@@ -328,7 +328,10 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input)
get_target_property(_sources ${_targetName} SOURCES) get_target_property(_sources ${_targetName} SOURCES)
foreach(src ${_sources}) foreach(src ${_sources})
if(NOT "${src}" MATCHES "\\.mm$") if(NOT "${src}" MATCHES "\\.mm$"
AND NOT "${src}" MATCHES "\\.h$" AND NOT "${src}" MATCHES "\\.hpp$" # header files
AND NOT "${src}" MATCHES "^\$" # CMake generator expressions
)
get_source_file_property(oldProps "${src}" COMPILE_FLAGS) get_source_file_property(oldProps "${src}" COMPILE_FLAGS)
if(NOT oldProps) if(NOT oldProps)
set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"") set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"")
......
...@@ -37,7 +37,11 @@ endmacro() ...@@ -37,7 +37,11 @@ endmacro()
macro(ocv_update VAR) macro(ocv_update VAR)
if(NOT DEFINED ${VAR}) if(NOT DEFINED ${VAR})
if("x${ARGN}" STREQUAL "x")
set(${VAR} "")
else()
set(${VAR} ${ARGN}) set(${VAR} ${ARGN})
endif()
else() else()
#ocv_debug_message("Preserve old value for ${VAR}: ${${VAR}}") #ocv_debug_message("Preserve old value for ${VAR}: ${${VAR}}")
endif() endif()
...@@ -151,8 +155,15 @@ function(ocv_append_target_property target prop) ...@@ -151,8 +155,15 @@ function(ocv_append_target_property target prop)
endif() endif()
endfunction() endfunction()
function(ocv_append_dependant_targets target)
#ocv_debug_message("ocv_append_dependant_targets(${target} ${ARGN})")
_ocv_fix_target(target)
set(OPENCV_DEPENDANT_TARGETS_${target} "${OPENCV_DEPENDANT_TARGETS_${target}};${ARGN}" CACHE INTERNAL "" FORCE)
endfunction()
# adds include directories in such way that directories from the OpenCV source tree go first # adds include directories in such way that directories from the OpenCV source tree go first
function(ocv_target_include_directories target) function(ocv_target_include_directories target)
#ocv_debug_message("ocv_target_include_directories(${target} ${ARGN})")
_ocv_fix_target(target) _ocv_fix_target(target)
set(__params "") set(__params "")
if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.0" AND if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.0" AND
...@@ -173,6 +184,11 @@ function(ocv_target_include_directories target) ...@@ -173,6 +184,11 @@ function(ocv_target_include_directories target)
else() else()
if(TARGET ${target}) if(TARGET ${target})
target_include_directories(${target} PRIVATE ${__params}) target_include_directories(${target} PRIVATE ${__params})
if(OPENCV_DEPENDANT_TARGETS_${target})
foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}})
target_include_directories(${t} PRIVATE ${__params})
endforeach()
endif()
else() else()
set(__new_inc "${OCV_TARGET_INCLUDE_DIRS_${target}};${__params}") set(__new_inc "${OCV_TARGET_INCLUDE_DIRS_${target}};${__params}")
set(OCV_TARGET_INCLUDE_DIRS_${target} "${__new_inc}" CACHE INTERNAL "") set(OCV_TARGET_INCLUDE_DIRS_${target} "${__new_inc}" CACHE INTERNAL "")
...@@ -205,8 +221,11 @@ set(OCV_COMPILER_FAIL_REGEX ...@@ -205,8 +221,11 @@ set(OCV_COMPILER_FAIL_REGEX
) )
MACRO(ocv_check_compiler_flag LANG FLAG RESULT) MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
set(_fname "${ARGN}")
if(NOT DEFINED ${RESULT}) if(NOT DEFINED ${RESULT})
if("_${LANG}_" MATCHES "_CXX_") if(_fname)
# nothing
elseif("_${LANG}_" MATCHES "_CXX_")
set(_fname "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx") set(_fname "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx")
if("${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror " OR "${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror=unknown-pragmas ") if("${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror " OR "${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror=unknown-pragmas ")
FILE(WRITE "${_fname}" "int main() { return 0; }\n") FILE(WRITE "${_fname}" "int main() { return 0; }\n")
...@@ -231,7 +250,13 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT) ...@@ -231,7 +250,13 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
unset(_fname) unset(_fname)
endif() endif()
if(_fname) if(_fname)
MESSAGE(STATUS "Performing Test ${RESULT}") if(NOT "x${ARGN}" STREQUAL "x")
file(RELATIVE_PATH __msg "${CMAKE_SOURCE_DIR}" "${ARGN}")
set(__msg " (check file: ${__msg})")
else()
set(__msg "")
endif()
MESSAGE(STATUS "Performing Test ${RESULT}${__msg}")
TRY_COMPILE(${RESULT} TRY_COMPILE(${RESULT}
"${CMAKE_BINARY_DIR}" "${CMAKE_BINARY_DIR}"
"${_fname}" "${_fname}"
...@@ -278,7 +303,11 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT) ...@@ -278,7 +303,11 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT)
endif() endif()
ENDMACRO() ENDMACRO()
macro(ocv_check_flag_support lang flag varname) macro(ocv_check_flag_support lang flag varname base_options)
if(CMAKE_BUILD_TYPE)
set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
endif()
if("_${lang}_" MATCHES "_CXX_") if("_${lang}_" MATCHES "_CXX_")
set(_lang CXX) set(_lang CXX)
elseif("_${lang}_" MATCHES "_C_") elseif("_${lang}_" MATCHES "_C_")
...@@ -293,7 +322,7 @@ macro(ocv_check_flag_support lang flag varname) ...@@ -293,7 +322,7 @@ macro(ocv_check_flag_support lang flag varname)
string(REGEX REPLACE "^(/|-)" "HAVE_${_lang}_" ${varname} "${${varname}}") string(REGEX REPLACE "^(/|-)" "HAVE_${_lang}_" ${varname} "${${varname}}")
string(REGEX REPLACE " -|-|=| |\\." "_" ${varname} "${${varname}}") string(REGEX REPLACE " -|-|=| |\\." "_" ${varname} "${${varname}}")
ocv_check_compiler_flag("${_lang}" "${ARGN} ${flag}" ${${varname}}) ocv_check_compiler_flag("${_lang}" "${base_options} ${flag}" ${${varname}} ${ARGN})
endmacro() endmacro()
# turns off warnings # turns off warnings
...@@ -327,7 +356,7 @@ macro(ocv_warnings_disable) ...@@ -327,7 +356,7 @@ macro(ocv_warnings_disable)
string(REPLACE "${warning}" "" ${var} "${${var}}") string(REPLACE "${warning}" "" ${var} "${${var}}")
string(REPLACE "-W" "-Wno-" warning "${warning}") string(REPLACE "-W" "-Wno-" warning "${warning}")
endif() endif()
ocv_check_flag_support(${var} "${warning}" _varname) ocv_check_flag_support(${var} "${warning}" _varname "")
if(${_varname}) if(${_varname})
set(${var} "${${var}} ${warning}") set(${var} "${${var}} ${warning}")
endif() endif()
...@@ -342,7 +371,7 @@ macro(ocv_warnings_disable) ...@@ -342,7 +371,7 @@ macro(ocv_warnings_disable)
else() else()
string(REPLACE "-wd" "-Qwd" warning "${warning}") string(REPLACE "-wd" "-Qwd" warning "${warning}")
endif() endif()
ocv_check_flag_support(${var} "${warning}" _varname) ocv_check_flag_support(${var} "${warning}" _varname "")
if(${_varname}) if(${_varname})
set(${var} "${${var}} ${warning}") set(${var} "${${var}} ${warning}")
endif() endif()
...@@ -357,7 +386,7 @@ macro(ocv_warnings_disable) ...@@ -357,7 +386,7 @@ macro(ocv_warnings_disable)
endmacro() endmacro()
macro(add_apple_compiler_options the_module) macro(add_apple_compiler_options the_module)
ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS) ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS "")
if(HAVE_OBJC_EXCEPTIONS) if(HAVE_OBJC_EXCEPTIONS)
foreach(source ${OPENCV_MODULE_${the_module}_SOURCES}) foreach(source ${OPENCV_MODULE_${the_module}_SOURCES})
if("${source}" MATCHES "\\.mm$") if("${source}" MATCHES "\\.mm$")
...@@ -903,6 +932,11 @@ function(_ocv_append_target_includes target) ...@@ -903,6 +932,11 @@ function(_ocv_append_target_includes target)
if (TARGET ${target}_object) if (TARGET ${target}_object)
target_include_directories(${target}_object PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}}) target_include_directories(${target}_object PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}})
endif() endif()
if(OPENCV_DEPENDANT_TARGETS_${target})
foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}})
target_include_directories(${t} PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}})
endforeach()
endif()
unset(OCV_TARGET_INCLUDE_DIRS_${target} CACHE) unset(OCV_TARGET_INCLUDE_DIRS_${target} CACHE)
endif() endif()
endfunction() endfunction()
......
#if !defined __AVX__ // MSVC supports this flag since MSVS 2013
#error "__AVX__ define is missing"
#endif
#include <immintrin.h>
void test()
{
__m256 a = _mm256_set1_ps(0.0f);
}
int main() { return 0; }
#if !defined __AVX2__ // MSVC supports this flag since MSVS 2013
#error "__AVX2__ define is missing"
#endif
#include <immintrin.h>
void test()
{
int data[8] = {0,0,0,0, 0,0,0,0};
__m256i a = _mm256_loadu_si256((const __m256i *)data);
}
int main() { return 0; }
#if defined __AVX512__ || defined __AVX512F__
#include <immintrin.h>
void test()
{
__m512i zmm = _mm512_setzero_si512();
}
#else
#error "AVX512 is not supported"
#endif
int main() { return 0; }
#include <stdio.h> #include <stdio.h>
#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) #if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) || (defined __INTEL_COMPILER && defined __AVX__)
#include <immintrin.h> #include <immintrin.h>
int test() int test()
{ {
......
#include <nmmintrin.h>
#ifndef _MSC_VER
#include <popcntintrin.h>
#endif
int main() {
int i = _mm_popcnt_u64(1);
return 0;
}
#include <xmmintrin.h>
int main() { return 0; }
#include <emmintrin.h>
int main() { return 0; }
#include <pmmintrin.h>
int main() {
__m128 u, v;
u = _mm_set1_ps(0.0f);
v = _mm_moveldup_ps(u); // SSE3
return 0;
}
#include <smmintrin.h>
int main() {
__m128i a = _mm_setzero_si128(), b = _mm_setzero_si128();
__m128i c = _mm_packus_epi32(a, b);
return 0;
}
#include <nmmintrin.h>
int main() {
int i = _mm_popcnt_u64(1);
return 0;
}
#include <tmmintrin.h>
const double v = 0;
int main() {
__m128i a = _mm_setzero_si128();
__m128i b = _mm_abs_epi32(a);
return 0;
}
// OpenCV CPU baseline features
@OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE@
// OpenCV supported CPU dispatched features
@OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE@
#ifndef OPENCV_CVCONFIG_H_INCLUDED
#define OPENCV_CVCONFIG_H_INCLUDED
/* OpenCV compiled as static or dynamic libs */ /* OpenCV compiled as static or dynamic libs */
#cmakedefine BUILD_SHARED_LIBS #cmakedefine BUILD_SHARED_LIBS
/* OpenCV intrinsics optimized code */
#cmakedefine CV_ENABLE_INTRINSICS
/* OpenCV additional optimized code */
#cmakedefine CV_DISABLE_OPTIMIZATION
/* Compile for 'real' NVIDIA GPU architectures */ /* Compile for 'real' NVIDIA GPU architectures */
#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}" #define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
...@@ -206,3 +215,7 @@ ...@@ -206,3 +215,7 @@
/* OpenVX */ /* OpenVX */
#cmakedefine HAVE_OPENVX #cmakedefine HAVE_OPENVX
#endif // OPENCV_CVCONFIG_H_INCLUDED
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#if defined __OPENCV_BUILD \
#include "cv_cpu_config.h"
#include "cv_cpu_helper.h"
#if defined CV_ENABLE_INTRINSICS \
&& !defined CV_DISABLE_OPTIMIZATION \
&& !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
#ifdef CV_CPU_COMPILE_SSE2
# include <emmintrin.h>
# define CV_MMX 1
# define CV_SSE 1
# define CV_SSE2 1
#endif
#ifdef CV_CPU_COMPILE_SSE3
# include <pmmintrin.h>
# define CV_SSE3 1
#endif
#ifdef CV_CPU_COMPILE_SSSE3
# include <tmmintrin.h>
# define CV_SSSE3 1
#endif
#ifdef CV_CPU_COMPILE_SSE4_1
# include <smmintrin.h>
# define CV_SSE4_1 1
#endif
#ifdef CV_CPU_COMPILE_SSE4_2
# include <nmmintrin.h>
# define CV_SSE4_2 1
#endif
#ifdef CV_CPU_COMPILE_POPCNT
# ifdef _MSC_VER
# include <nmmintrin.h>
# if defined(_M_X64)
# define CV_POPCNT_U64 _mm_popcnt_u64
# endif
# define CV_POPCNT_U32 _mm_popcnt_u32
# else
# include <popcntintrin.h>
# if defined(__x86_64__)
# define CV_POPCNT_U64 __builtin_popcountll
# endif
# define CV_POPCNT_U32 __builtin_popcount
# endif
# define CV_POPCNT 1
#endif
#ifdef CV_CPU_COMPILE_AVX
# include <immintrin.h>
# define CV_AVX 1
#endif
#ifdef CV_CPU_COMPILE_AVX2
# include <immintrin.h>
# define CV_AVX2 1
#endif
#ifdef CV_CPU_COMPILE_FMA3
# define CV_FMA3 1
#endif
#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
# include <Intrin.h>
# include <arm_neon.h>
# define CV_NEON 1
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
# include <arm_neon.h>
# define CV_NEON 1
#endif
#if defined(__ARM_NEON__) || defined(__aarch64__)
# include <arm_neon.h>
#endif
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
#endif // __OPENCV_BUILD
#if !defined __OPENCV_BUILD // Compatibility code
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
# include <emmintrin.h>
# define CV_MMX 1
# define CV_SSE 1
# define CV_SSE2 1
#elif (defined WIN32 || defined _WIN32) && defined(_M_ARM)
# include <Intrin.h>
# include <arm_neon.h>
# define CV_NEON 1
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
# include <arm_neon.h>
# define CV_NEON 1
#endif
#endif // !__OPENCV_BUILD (Compatibility code)
#ifndef CV_MMX
# define CV_MMX 0
#endif
#ifndef CV_SSE
# define CV_SSE 0
#endif
#ifndef CV_SSE2
# define CV_SSE2 0
#endif
#ifndef CV_SSE3
# define CV_SSE3 0
#endif
#ifndef CV_SSSE3
# define CV_SSSE3 0
#endif
#ifndef CV_SSE4_1
# define CV_SSE4_1 0
#endif
#ifndef CV_SSE4_2
# define CV_SSE4_2 0
#endif
#ifndef CV_POPCNT
# define CV_POPCNT 0
#endif
#ifndef CV_AVX
# define CV_AVX 0
#endif
#ifndef CV_AVX2
# define CV_AVX2 0
#endif
#ifndef CV_FMA3
# define CV_FMA3 0
#endif
#ifndef CV_AVX_512F
# define CV_AVX_512F 0
#endif
#ifndef CV_AVX_512BW
# define CV_AVX_512BW 0
#endif
#ifndef CV_AVX_512CD
# define CV_AVX_512CD 0
#endif
#ifndef CV_AVX_512DQ
# define CV_AVX_512DQ 0
#endif
#ifndef CV_AVX_512ER
# define CV_AVX_512ER 0
#endif
#ifndef CV_AVX_512IFMA512
# define CV_AVX_512IFMA512 0
#endif
#ifndef CV_AVX_512PF
# define CV_AVX_512PF 0
#endif
#ifndef CV_AVX_512VBMI
# define CV_AVX_512VBMI 0
#endif
#ifndef CV_AVX_512VL
# define CV_AVX_512VL 0
#endif
#ifndef CV_NEON
# define CV_NEON 0
#endif
// AUTOGENERATED, DO NOT EDIT
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
# define CV_CPU_HAS_SUPPORT_SSE 1
# define CV_CPU_CALL_SSE(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSE 0
# define CV_CPU_CALL_SSE(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
# define CV_CPU_HAS_SUPPORT_SSE2 1
# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSE2 0
# define CV_CPU_CALL_SSE2(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
# define CV_CPU_HAS_SUPPORT_SSE3 1
# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSE3 0
# define CV_CPU_CALL_SSE3(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
# define CV_CPU_HAS_SUPPORT_SSSE3 1
# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSSE3 0
# define CV_CPU_CALL_SSSE3(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
# define CV_CPU_HAS_SUPPORT_SSE4_1 1
# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSE4_1 0
# define CV_CPU_CALL_SSE4_1(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
# define CV_CPU_HAS_SUPPORT_SSE4_2 1
# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_SSE4_2 0
# define CV_CPU_CALL_SSE4_2(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
# define CV_CPU_HAS_SUPPORT_POPCNT 1
# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_POPCNT 0
# define CV_CPU_CALL_POPCNT(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
# define CV_CPU_HAS_SUPPORT_AVX 1
# define CV_CPU_CALL_AVX(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_AVX 0
# define CV_CPU_CALL_AVX(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
# define CV_CPU_HAS_SUPPORT_FP16 1
# define CV_CPU_CALL_FP16(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_FP16 0
# define CV_CPU_CALL_FP16(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
# define CV_CPU_HAS_SUPPORT_AVX2 1
# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_AVX2 0
# define CV_CPU_CALL_AVX2(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
# define CV_CPU_HAS_SUPPORT_FMA3 1
# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_FMA3 0
# define CV_CPU_CALL_FMA3(...)
#endif
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
# define CV_CPU_HAS_SUPPORT_NEON 1
# define CV_CPU_CALL_NEON(...) return __VA_ARGS__
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__
#else
# define CV_CPU_HAS_SUPPORT_NEON 0
# define CV_CPU_CALL_NEON(...)
#endif
...@@ -48,6 +48,10 @@ ...@@ -48,6 +48,10 @@
//! @addtogroup core_utils //! @addtogroup core_utils
//! @{ //! @{
#ifdef __OPENCV_BUILD
#include "cvconfig.h"
#endif
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 #if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ # define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
#endif #endif
...@@ -59,10 +63,6 @@ ...@@ -59,10 +63,6 @@
#undef abs #undef abs
#undef Complex #undef Complex
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
#endif
#include <limits.h> #include <limits.h>
#include "opencv2/core/hal/interface.h" #include "opencv2/core/hal/interface.h"
...@@ -88,7 +88,7 @@ ...@@ -88,7 +88,7 @@
# endif # endif
#endif #endif
#if defined CV_ICC && !defined CV_ENABLE_UNROLLED #if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
# define CV_ENABLE_UNROLLED 0 # define CV_ENABLE_UNROLLED 0
#else #else
# define CV_ENABLE_UNROLLED 1 # define CV_ENABLE_UNROLLED 1
...@@ -161,150 +161,9 @@ enum CpuFeatures { ...@@ -161,150 +161,9 @@ enum CpuFeatures {
CPU_NEON = 100 CPU_NEON = 100
}; };
// do not include SSE/AVX/NEON headers for NVCC compiler
#ifndef __CUDACC__
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
# include <emmintrin.h>
# define CV_MMX 1
# define CV_SSE 1
# define CV_SSE2 1
# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <pmmintrin.h>
# define CV_SSE3 1
# endif
# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <tmmintrin.h>
# define CV_SSSE3 1
# endif
# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <smmintrin.h>
# define CV_SSE4_1 1
# endif
# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500)
# include <nmmintrin.h>
# define CV_SSE4_2 1
# endif
# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500)
# ifdef _MSC_VER
# include <nmmintrin.h>
# if defined(_M_X64)
# define CV_POPCNT_U64 _mm_popcnt_u64
# endif
# define CV_POPCNT_U32 _mm_popcnt_u32
# else
# include <popcntintrin.h>
# if defined(__x86_64__)
# define CV_POPCNT_U64 __builtin_popcountll
# endif
# define CV_POPCNT_U32 __builtin_popcount
# endif
# define CV_POPCNT 1
# endif
# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0)
// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX
// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32
# include <immintrin.h>
# define CV_AVX 1
# if defined(_XCR_XFEATURE_ENABLED_MASK)
# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
# else
# define __xgetbv() 0
# endif
# endif
# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0)
# include <immintrin.h>
# define CV_AVX2 1
# if defined __FMA__
# define CV_FMA3 1
# endif
# endif
#endif
#if (defined WIN32 || defined _WIN32) && defined(_M_ARM)
# include <Intrin.h>
# include <arm_neon.h>
# define CV_NEON 1
# define CPU_HAS_NEON_FEATURE (true)
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
# include <arm_neon.h>
# define CV_NEON 1
#endif
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
# define CV_VFP 1
#endif
#endif // __CUDACC__ #include "cv_cpu_dispatch.h"
#ifndef CV_POPCNT
#define CV_POPCNT 0
#endif
#ifndef CV_MMX
# define CV_MMX 0
#endif
#ifndef CV_SSE
# define CV_SSE 0
#endif
#ifndef CV_SSE2
# define CV_SSE2 0
#endif
#ifndef CV_SSE3
# define CV_SSE3 0
#endif
#ifndef CV_SSSE3
# define CV_SSSE3 0
#endif
#ifndef CV_SSE4_1
# define CV_SSE4_1 0
#endif
#ifndef CV_SSE4_2
# define CV_SSE4_2 0
#endif
#ifndef CV_AVX
# define CV_AVX 0
#endif
#ifndef CV_AVX2
# define CV_AVX2 0
#endif
#ifndef CV_FMA3
# define CV_FMA3 0
#endif
#ifndef CV_AVX_512F
# define CV_AVX_512F 0
#endif
#ifndef CV_AVX_512BW
# define CV_AVX_512BW 0
#endif
#ifndef CV_AVX_512CD
# define CV_AVX_512CD 0
#endif
#ifndef CV_AVX_512DQ
# define CV_AVX_512DQ 0
#endif
#ifndef CV_AVX_512ER
# define CV_AVX_512ER 0
#endif
#ifndef CV_AVX_512IFMA512
# define CV_AVX_512IFMA512 0
#endif
#ifndef CV_AVX_512PF
# define CV_AVX_512PF 0
#endif
#ifndef CV_AVX_512VBMI
# define CV_AVX_512VBMI 0
#endif
#ifndef CV_AVX_512VL
# define CV_AVX_512VL 0
#endif
#ifndef CV_NEON
# define CV_NEON 0
#endif
#ifndef CV_VFP
# define CV_VFP 0
#endif
/* fundamental constants */ /* fundamental constants */
#define CV_PI 3.1415926535897932384626433832795 #define CV_PI 3.1415926535897932384626433832795
......
...@@ -47,6 +47,12 @@ ...@@ -47,6 +47,12 @@
#include "opencv2/core/cvdef.h" #include "opencv2/core/cvdef.h"
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
#include <emmintrin.h>
#endif
//! @addtogroup core_utils //! @addtogroup core_utils
//! @{ //! @{
...@@ -66,7 +72,7 @@ ...@@ -66,7 +72,7 @@
# include "tegra_round.hpp" # include "tegra_round.hpp"
#endif #endif
#if CV_VFP #if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
// 1. general scheme // 1. general scheme
#define ARM_ROUND(_value, _asm_string) \ #define ARM_ROUND(_value, _asm_string) \
int res; \ int res; \
...@@ -82,7 +88,7 @@ ...@@ -82,7 +88,7 @@
#endif #endif
// 3. version for float // 3. version for float
#define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
#endif // CV_VFP #endif
/** @brief Rounds floating-point number to the nearest integer /** @brief Rounds floating-point number to the nearest integer
...@@ -93,7 +99,7 @@ CV_INLINE int ...@@ -93,7 +99,7 @@ CV_INLINE int
cvRound( double value ) cvRound( double value )
{ {
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
__m128d t = _mm_set_sd( value ); __m128d t = _mm_set_sd( value );
return _mm_cvtsd_si32(t); return _mm_cvtsd_si32(t);
#elif defined _MSC_VER && defined _M_IX86 #elif defined _MSC_VER && defined _M_IX86
...@@ -108,7 +114,7 @@ cvRound( double value ) ...@@ -108,7 +114,7 @@ cvRound( double value )
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
TEGRA_ROUND_DBL(value); TEGRA_ROUND_DBL(value);
#elif defined CV_ICC || defined __GNUC__ #elif defined CV_ICC || defined __GNUC__
# if CV_VFP # if defined ARM_ROUND_DBL
ARM_ROUND_DBL(value); ARM_ROUND_DBL(value);
# else # else
return (int)lrint(value); return (int)lrint(value);
...@@ -130,18 +136,8 @@ cvRound( double value ) ...@@ -130,18 +136,8 @@ cvRound( double value )
*/ */
CV_INLINE int cvFloor( double value ) CV_INLINE int cvFloor( double value )
{ {
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
__m128d t = _mm_set_sd( value );
int i = _mm_cvtsd_si32(t);
return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i)));
#elif defined __GNUC__
int i = (int)value; int i = (int)value;
return i - (i > value); return i - (i > value);
#else
int i = cvRound(value);
float diff = (float)(value - i);
return i - (diff < 0);
#endif
} }
/** @brief Rounds floating-point number to the nearest integer not smaller than the original. /** @brief Rounds floating-point number to the nearest integer not smaller than the original.
...@@ -153,18 +149,8 @@ CV_INLINE int cvFloor( double value ) ...@@ -153,18 +149,8 @@ CV_INLINE int cvFloor( double value )
*/ */
CV_INLINE int cvCeil( double value ) CV_INLINE int cvCeil( double value )
{ {
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
__m128d t = _mm_set_sd( value );
int i = _mm_cvtsd_si32(t);
return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t));
#elif defined __GNUC__
int i = (int)value; int i = (int)value;
return i + (i < value); return i + (i < value);
#else
int i = cvRound(value);
float diff = (float)(i - value);
return i + (diff < 0);
#endif
} }
/** @brief Determines if the argument is Not A Number. /** @brief Determines if the argument is Not A Number.
...@@ -200,8 +186,8 @@ CV_INLINE int cvIsInf( double value ) ...@@ -200,8 +186,8 @@ CV_INLINE int cvIsInf( double value )
/** @overload */ /** @overload */
CV_INLINE int cvRound(float value) CV_INLINE int cvRound(float value)
{ {
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \ #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
__m128 t = _mm_set_ss( value ); __m128 t = _mm_set_ss( value );
return _mm_cvtss_si32(t); return _mm_cvtss_si32(t);
#elif defined _MSC_VER && defined _M_IX86 #elif defined _MSC_VER && defined _M_IX86
...@@ -216,7 +202,7 @@ CV_INLINE int cvRound(float value) ...@@ -216,7 +202,7 @@ CV_INLINE int cvRound(float value)
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
TEGRA_ROUND_FLT(value); TEGRA_ROUND_FLT(value);
#elif defined CV_ICC || defined __GNUC__ #elif defined CV_ICC || defined __GNUC__
# if CV_VFP # if defined ARM_ROUND_FLT
ARM_ROUND_FLT(value); ARM_ROUND_FLT(value);
# else # else
return (int)lrintf(value); return (int)lrintf(value);
...@@ -237,18 +223,8 @@ CV_INLINE int cvRound( int value ) ...@@ -237,18 +223,8 @@ CV_INLINE int cvRound( int value )
/** @overload */ /** @overload */
CV_INLINE int cvFloor( float value ) CV_INLINE int cvFloor( float value )
{ {
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
__m128 t = _mm_set_ss( value );
int i = _mm_cvtss_si32(t);
return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i)));
#elif defined __GNUC__
int i = (int)value; int i = (int)value;
return i - (i > value); return i - (i > value);
#else
int i = cvRound(value);
float diff = (float)(value - i);
return i - (diff < 0);
#endif
} }
/** @overload */ /** @overload */
...@@ -260,18 +236,8 @@ CV_INLINE int cvFloor( int value ) ...@@ -260,18 +236,8 @@ CV_INLINE int cvFloor( int value )
/** @overload */ /** @overload */
CV_INLINE int cvCeil( float value ) CV_INLINE int cvCeil( float value )
{ {
#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__)
__m128 t = _mm_set_ss( value );
int i = _mm_cvtss_si32(t);
return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t));
#elif defined __GNUC__
int i = (int)value; int i = (int)value;
return i + (i < value); return i + (i < value);
#else
int i = cvRound(value);
float diff = (float)(i - value);
return i + (diff < 0);
#endif
} }
/** @overload */ /** @overload */
......
This diff is collapsed.
...@@ -65,7 +65,7 @@ elseif(HAVE_QT) ...@@ -65,7 +65,7 @@ elseif(HAVE_QT)
list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES}) list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES})
list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES}) list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES})
ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag) ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag "")
if(${_have_flag}) if(${_have_flag})
set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations) set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations)
endif() endif()
......
...@@ -1649,7 +1649,7 @@ struct VResizeLanczos4 ...@@ -1649,7 +1649,7 @@ struct VResizeLanczos4
{ {
CastOp castOp; CastOp castOp;
VecOp vecOp; VecOp vecOp;
int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width); int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
#if CV_ENABLE_UNROLLED #if CV_ENABLE_UNROLLED
for( ; x <= width - 4; x += 4 ) for( ; x <= width - 4; x += 4 )
{ {
...@@ -1657,7 +1657,7 @@ struct VResizeLanczos4 ...@@ -1657,7 +1657,7 @@ struct VResizeLanczos4
const WT* S = src[0]; const WT* S = src[0];
WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b; WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b;
for( k = 1; k < 8; k++ ) for( int k = 1; k < 8; k++ )
{ {
b = beta[k]; S = src[k]; b = beta[k]; S = src[k];
s0 += S[x]*b; s1 += S[x+1]*b; s0 += S[x]*b; s1 += S[x+1]*b;
......
...@@ -824,10 +824,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, ...@@ -824,10 +824,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
CvPoint pt, double& stage_sum, int start_stage ) CvPoint pt, double& stage_sum, int start_stage )
{ {
#ifdef CV_HAAR_USE_AVX #ifdef CV_HAAR_USE_AVX
bool haveAVX = false; bool haveAVX = cv::checkHardwareSupport(CV_CPU_AVX);
if(cv::checkHardwareSupport(CV_CPU_AVX))
if(__xgetbv()&0x6)// Check if the OS will save the YMM registers
haveAVX = true;
#else #else
# ifdef CV_HAAR_USE_SSE # ifdef CV_HAAR_USE_SSE
bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2); bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment