Commit b4bcdd10 authored by Maksim Shabunin's avatar Maksim Shabunin

HAL: improvements

- added new functions from core module: split, merge, add, sub, mul, div, ...
- added function replacement mechanism
- added example of HAL replacement library
parent e8742be3
......@@ -587,6 +587,11 @@ include(cmake/OpenCVFindMatlab.cmake)
include(cmake/OpenCVDetectVTK.cmake)
if (OPENCV_HAL_HEADERS AND OPENCV_HAL_LIBS)
get_filename_component(OPENCV_HAL_HEADERS "${OPENCV_HAL_HEADERS}" ABSOLUTE)
get_filename_component(OPENCV_HAL_LIBS "${OPENCV_HAL_LIBS}" ABSOLUTE)
endif()
# ----------------------------------------------------------------------------
# Add CUDA libraries (needed for apps/tools, samples)
# ----------------------------------------------------------------------------
......
#ifndef _CUSTOM_HAL_INCLUDED_
#define _CUSTOM_HAL_INCLUDED_
@OPENCV_HAL_HEADERS_INCLUDES@
#endif
......@@ -762,6 +762,4 @@ inline float32x2_t cv_vsqrt_f32(float32x2_t val)
} // cv
#include "sse_utils.hpp"
#endif //__OPENCV_CORE_BASE_HPP__
......@@ -277,37 +277,6 @@ execution time.
*/
CV_EXPORTS_W int64 getCPUTickCount();
/** @brief Available CPU features.
remember to keep this list identical to the one in cvdef.h
*/
enum CpuFeatures {
CPU_MMX = 1,
CPU_SSE = 2,
CPU_SSE2 = 3,
CPU_SSE3 = 4,
CPU_SSSE3 = 5,
CPU_SSE4_1 = 6,
CPU_SSE4_2 = 7,
CPU_POPCNT = 8,
CPU_AVX = 10,
CPU_AVX2 = 11,
CPU_FMA3 = 12,
CPU_AVX_512F = 13,
CPU_AVX_512BW = 14,
CPU_AVX_512CD = 15,
CPU_AVX_512DQ = 16,
CPU_AVX_512ER = 17,
CPU_AVX_512IFMA512 = 18,
CPU_AVX_512PF = 19,
CPU_AVX_512VBMI = 20,
CPU_AVX_512VL = 21,
CPU_NEON = 100
};
/** @brief Returns true if the specified feature is supported by the host hardware.
The function returns true if the host hardware supports the specified feature. When user calls
......
This diff is collapsed.
This diff is collapsed.
......@@ -83,6 +83,11 @@ typedef void (*BinaryFunc)(const uchar* src1, size_t step1,
uchar* dst, size_t step, Size sz,
void*);
typedef void (*BinaryFuncC)(const uchar* src1, size_t step1,
const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height,
void*);
BinaryFunc getConvertFunc(int sdepth, int ddepth);
BinaryFunc getCopyMaskFunc(size_t esz);
......@@ -114,46 +119,6 @@ extern const uchar g_Saturate8u[];
void deleteThreadAllocData();
#endif
template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a + b); }
};
template<typename T1, typename T2=T1, typename T3=T1> struct OpSub
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(a - b); }
};
template<typename T1, typename T2=T1, typename T3=T1> struct OpRSub
{
typedef T1 type1;
typedef T2 type2;
typedef T3 rtype;
T3 operator ()(const T1 a, const T2 b) const { return saturate_cast<T3>(b - a); }
};
template<typename T> struct OpMin
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator ()(const T a, const T b) const { return std::min(a, b); }
};
template<typename T> struct OpMax
{
typedef T type1;
typedef T type2;
typedef T rtype;
T operator ()(const T a, const T b) const { return std::max(a, b); }
};
inline Size getContinuousSize_( int flags, int cols, int rows, int widthScale )
{
int64 sz = (int64)cols * rows * widthScale;
......@@ -201,11 +166,6 @@ struct NoVec
size_t operator()(const void*, const void*, void*, size_t) const { return 0; }
};
extern volatile bool USE_SSE2;
extern volatile bool USE_SSE4_2;
extern volatile bool USE_AVX;
extern volatile bool USE_AVX2;
enum { BLOCK_SIZE = 1024 };
#if defined HAVE_IPP && (IPP_VERSION_X100 >= 700)
......
......@@ -86,45 +86,6 @@ Mutex* __initialization_mutex_initializer = &getInitializationMutex();
#undef max
#undef abs
#include <tchar.h>
#if defined _MSC_VER
#if _MSC_VER >= 1400
#include <intrin.h>
#elif defined _M_IX86
static void __cpuid(int* cpuid_data, int)
{
__asm
{
push ebx
push edi
mov edi, cpuid_data
mov eax, 1
cpuid
mov [edi], eax
mov [edi + 4], ebx
mov [edi + 8], ecx
mov [edi + 12], edx
pop edi
pop ebx
}
}
static void __cpuidex(int* cpuid_data, int, int)
{
__asm
{
push edi
mov edi, cpuid_data
mov eax, 7
mov ecx, 0
cpuid
mov [edi], eax
mov [edi + 4], ebx
mov [edi + 8], ecx
mov [edi + 12], edx
pop edi
}
}
#endif
#endif
#ifdef WINRT
#include <wrl/client.h>
......@@ -237,160 +198,15 @@ void Exception::formatMessage()
msg = format("%s:%d: error: (%d) %s\n", file.c_str(), line, code, err.c_str());
}
struct HWFeatures
{
enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE };
HWFeatures(void)
{
memset( have, 0, sizeof(have) );
x86_family = 0;
}
static HWFeatures initialize(void)
{
HWFeatures f;
int cpuid_data[4] = { 0, 0, 0, 0 };
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
__cpuid(cpuid_data, 1);
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
#ifdef __x86_64__
asm __volatile__
(
"movl $1, %%eax\n\t"
"cpuid\n\t"
:[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
:
: "cc"
);
#else
asm volatile
(
"pushl %%ebx\n\t"
"movl $1,%%eax\n\t"
"cpuid\n\t"
"popl %%ebx\n\t"
: "=a"(cpuid_data[0]), "=c"(cpuid_data[2]), "=d"(cpuid_data[3])
:
: "cc"
);
#endif
#endif
f.x86_family = (cpuid_data[0] >> 8) & 15;
if( f.x86_family >= 6 )
{
f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0;
f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX
// make the second call to the cpuid command in order to get
// information about extended features like AVX2
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
__cpuidex(cpuid_data, 7, 0);
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
#ifdef __x86_64__
asm __volatile__
(
"movl $7, %%eax\n\t"
"movl $0, %%ecx\n\t"
"cpuid\n\t"
:[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
:
: "cc"
);
#else
asm volatile
(
"pushl %%ebx\n\t"
"movl $7,%%eax\n\t"
"movl $0,%%ecx\n\t"
"cpuid\n\t"
"movl %%ebx, %0\n\t"
"popl %%ebx\n\t"
: "=r"(cpuid_data[1]), "=c"(cpuid_data[2])
:
: "cc"
);
#endif
#endif
f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0;
f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0;
f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0;
f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0;
f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0;
f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0;
f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0;
f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0;
f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0;
f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0;
}
#if defined ANDROID || defined __linux__
#ifdef __aarch64__
f.have[CV_CPU_NEON] = true;
#else
int cpufile = open("/proc/self/auxv", O_RDONLY);
if (cpufile >= 0)
{
Elf32_auxv_t auxv;
const size_t size_auxv_t = sizeof(auxv);
while ((size_t)read(cpufile, &auxv, size_auxv_t) == size_auxv_t)
{
if (auxv.a_type == AT_HWCAP)
{
f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
break;
}
}
close(cpufile);
}
#endif
#elif (defined __clang__ || defined __APPLE__) && (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
f.have[CV_CPU_NEON] = true;
#endif
return f;
}
int x86_family;
bool have[MAX_FEATURE+1];
};
static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures();
static HWFeatures* currentFeatures = &featuresEnabled;
bool checkHardwareSupport(int feature)
{
CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
return currentFeatures->have[feature];
return cv::hal::checkHardwareSupport(feature);
}
volatile bool useOptimizedFlag = true;
volatile bool USE_SSE2 = featuresEnabled.have[CV_CPU_SSE2];
volatile bool USE_SSE4_2 = featuresEnabled.have[CV_CPU_SSE4_2];
volatile bool USE_AVX = featuresEnabled.have[CV_CPU_AVX];
volatile bool USE_AVX2 = featuresEnabled.have[CV_CPU_AVX2];
void setUseOptimized( bool flag )
{
useOptimizedFlag = flag;
currentFeatures = flag ? &featuresEnabled : &featuresDisabled;
USE_SSE2 = currentFeatures->have[CV_CPU_SSE2];
cv::hal::setUseOptimized(flag);
ipp::setUseIPP(flag);
#ifdef HAVE_OPENCL
......@@ -403,7 +219,7 @@ void setUseOptimized( bool flag )
bool useOptimized(void)
{
return useOptimizedFlag;
return cv::hal::useOptimized();
}
int64 getTickCount(void)
......@@ -683,12 +499,12 @@ redirectError( CvErrorCallback errCallback, void* userdata, void** prevUserdata)
CV_IMPL int cvCheckHardwareSupport(int feature)
{
CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
return cv::currentFeatures->have[feature];
return cv::hal::checkHardwareSupport(feature);
}
CV_IMPL int cvUseOptimized( int flag )
{
int prevMode = cv::useOptimizedFlag;
int prevMode = cv::useOptimized();
cv::setUseOptimized( flag != 0 );
return prevMode;
}
......
......@@ -2,10 +2,20 @@ set(the_description "The Hardware Acceleration Layer (HAL) module")
set(OPENCV_MODULE_TYPE STATIC)
if(OPENCV_HAL_HEADERS AND OPENCV_HAL_LIBS)
set(OPENCV_HAL_HEADERS_INCLUDES "#include \"${OPENCV_HAL_HEADERS}\"")
set(DEPS "${OPENCV_HAL_LIBS}")
else()
set(OPENCV_HAL_HEADERS_INCLUDES "// using default HAL")
set(DEPS "")
endif()
configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/custom_hal.hpp.in" "${CMAKE_BINARY_DIR}/custom_hal.hpp" @ONLY)
if(UNIX)
if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
endif()
endif()
ocv_define_module(hal)
ocv_define_module(hal ${DEPS})
This diff is collapsed.
......@@ -53,6 +53,7 @@
#endif
#include <limits.h>
#include "opencv2/hal/interface.hpp"
#if defined __ICL
# define CV_ICC __ICL
......@@ -117,9 +118,38 @@
#define CV_CPU_NEON 100
// when adding to this list remember to update the enum in core/utility.cpp
// when adding to this list remember to update the following enum
#define CV_HARDWARE_MAX_FEATURE 255
/** @brief Available CPU features.
*/
enum CpuFeatures {
CPU_MMX = 1,
CPU_SSE = 2,
CPU_SSE2 = 3,
CPU_SSE3 = 4,
CPU_SSSE3 = 5,
CPU_SSE4_1 = 6,
CPU_SSE4_2 = 7,
CPU_POPCNT = 8,
CPU_AVX = 10,
CPU_AVX2 = 11,
CPU_FMA3 = 12,
CPU_AVX_512F = 13,
CPU_AVX_512BW = 14,
CPU_AVX_512CD = 15,
CPU_AVX_512DQ = 16,
CPU_AVX_512ER = 17,
CPU_AVX_512IFMA512 = 18,
CPU_AVX_512PF = 19,
CPU_AVX_512VBMI = 20,
CPU_AVX_512VL = 21,
CPU_NEON = 100
};
// do not include SSE/AVX/NEON headers for NVCC compiler
#ifndef __CUDACC__
......@@ -257,49 +287,6 @@
# define CV_VFP 0
#endif
/* primitive types */
/*
schar - signed 1 byte integer
uchar - unsigned 1 byte integer
short - signed 2 byte integer
ushort - unsigned 2 byte integer
int - signed 4 byte integer
uint - unsigned 4 byte integer
int64 - signed 8 byte integer
uint64 - unsigned 8 byte integer
*/
#if !defined _MSC_VER && !defined __BORLANDC__
# if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
# include <cstdint>
typedef std::uint32_t uint;
# else
# include <stdint.h>
typedef uint32_t uint;
# endif
#else
typedef unsigned uint;
#endif
typedef signed char schar;
#ifndef __IPL_H__
typedef unsigned char uchar;
typedef unsigned short ushort;
#endif
#if defined _MSC_VER || defined __BORLANDC__
typedef __int64 int64;
typedef unsigned __int64 uint64;
# define CV_BIG_INT(n) n##I64
# define CV_BIG_UINT(n) n##UI64
#else
typedef int64_t int64;
typedef uint64_t uint64;
# define CV_BIG_INT(n) n##LL
# define CV_BIG_UINT(n) n##ULL
#endif
/* fundamental constants */
#define CV_PI 3.1415926535897932384626433832795
#define CV_2PI 6.283185307179586476925286766559
......@@ -321,6 +308,19 @@ typedef union Cv64suf
}
Cv64suf;
namespace cv { namespace hal {
bool checkHardwareSupport(int feature);
void setUseOptimized(bool onoff);
bool useOptimized();
}}
#define USE_SSE2 (cv::hal::checkHardwareSupport(CV_CPU_SSE))
#define USE_SSE4_2 (cv::hal::checkHardwareSupport(CV_CPU_SSE4_2))
#define USE_AVX (cv::hal::checkHardwareSupport(CV_CPU_AVX))
#define USE_AVX2 (cv::hal::checkHardwareSupport(CV_CPU_AVX2))
/****************************************************************************************\
* fast math *
......
#ifndef _HAL_INTERFACE_HPP_INCLUDED_
#define _HAL_INTERFACE_HPP_INCLUDED_
#define CV_HAL_ERROR_OK 0
#define CV_HAL_ERROR_NI 1
#define CV_HAL_ERROR_UNKNOWN -1
#define CV_HAL_CMP_EQ 0
#define CV_HAL_CMP_GT 1
#define CV_HAL_CMP_GE 2
#define CV_HAL_CMP_LT 3
#define CV_HAL_CMP_LE 4
#define CV_HAL_CMP_NE 5
#ifdef __cplusplus
namespace cv { namespace hal {
namespace Error {
enum
{
Ok = 0,
NotImplemented = 1,
Unknown = -1
};
}
enum
{
CMP_EQ = 0,
CMP_GT = 1,
CMP_GE = 2,
CMP_LT = 3,
CMP_LE = 4,
CMP_NE = 5
};
}}
#endif
#ifdef __cplusplus
#include <cstddef>
#else
#include <stddef.h>
#endif
/* primitive types */
/*
schar - signed 1 byte integer
uchar - unsigned 1 byte integer
short - signed 2 byte integer
ushort - unsigned 2 byte integer
int - signed 4 byte integer
uint - unsigned 4 byte integer
int64 - signed 8 byte integer
uint64 - unsigned 8 byte integer
*/
#if !defined _MSC_VER && !defined __BORLANDC__
# if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
# include <cstdint>
typedef std::uint32_t uint;
# else
# include <stdint.h>
typedef uint32_t uint;
# endif
#else
typedef unsigned uint;
#endif
typedef signed char schar;
#ifndef __IPL_H__
typedef unsigned char uchar;
typedef unsigned short ushort;
#endif
#if defined _MSC_VER || defined __BORLANDC__
typedef __int64 int64;
typedef unsigned __int64 uint64;
# define CV_BIG_INT(n) n##I64
# define CV_BIG_UINT(n) n##UI64
#else
typedef int64_t int64;
typedef uint64_t uint64;
# define CV_BIG_INT(n) n##LL
# define CV_BIG_UINT(n) n##ULL
#endif
#endif
......@@ -46,6 +46,8 @@
# error sse_utils.hpp header must be compiled as C++
#endif
#include "opencv2/hal/defs.h"
#if CV_SSE2
inline void _mm_deinterleave_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, __m128i & v_g1)
......
cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
if(UNIX)
if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
endif()
endif()
add_library(simple_hal simple.cpp)
set(OPENCV_HAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..")
target_include_directories(simple_hal PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${OPENCV_HAL_DIR}/include)
#include "simple.hpp"
int slow_and8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height)
{
for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step)
for(int x = 0 ; x < width; x++ )
dst[x] = src1[x] & src2[x];
return cv::hal::Error::Ok;
}
int slow_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height)
{
for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step)
for(int x = 0 ; x < width; x++ )
dst[x] = src1[x] | src2[x];
return cv::hal::Error::Ok;
}
int slow_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height)
{
for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step)
for(int x = 0 ; x < width; x++ )
dst[x] = src1[x] ^ src2[x];
return cv::hal::Error::Ok;
}
int slow_not8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height)
{
for(; height--; src1 = src1 + step1, src2 = src2 + step2, dst = dst + step)
for(int x = 0 ; x < width; x++ )
dst[x] = ~src1[x];
return cv::hal::Error::Ok;
}
#ifndef _SIMPLE_HPP_INCLUDED_
#define _SIMPLE_HPP_INCLUDED_
#include "opencv2/hal/interface.hpp"
int slow_and8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height);
int slow_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height);
int slow_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height);
int slow_not8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height);
#undef hal_and8u
#define hal_and8u slow_and8u
#undef hal_or8u
#define hal_or8u slow_or8u
#undef hal_xor8u
#define hal_xor8u slow_xor8u
#undef hal_not8u
#define hal_not8u slow_not8u
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
#include "precomp.hpp"
#if defined WIN32 || defined _WIN32 || defined WINCE
#include <windows.h>
#if defined _MSC_VER
#if _MSC_VER >= 1400
#include <intrin.h>
#elif defined _M_IX86
static void __cpuid(int* cpuid_data, int)
{
__asm
{
push ebx
push edi
mov edi, cpuid_data
mov eax, 1
cpuid
mov [edi], eax
mov [edi + 4], ebx
mov [edi + 8], ecx
mov [edi + 12], edx
pop edi
pop ebx
}
}
static void __cpuidex(int* cpuid_data, int, int)
{
__asm
{
push edi
mov edi, cpuid_data
mov eax, 7
mov ecx, 0
cpuid
mov [edi], eax
mov [edi + 4], ebx
mov [edi + 8], ecx
mov [edi + 12], edx
pop edi
}
}
#endif
#endif
#endif
#if defined ANDROID || defined __linux__
# include <unistd.h>
# include <fcntl.h>
# include <elf.h>
# include <linux/auxvec.h>
#endif
#if defined __linux__ || defined __APPLE__ || defined __EMSCRIPTEN__
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#if defined ANDROID
#include <sys/sysconf.h>
#endif
#endif
#ifdef ANDROID
# include <android/log.h>
#endif
struct HWFeatures
{
enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE };
HWFeatures(void)
{
memset( have, 0, sizeof(have) );
x86_family = 0;
}
static HWFeatures initialize(void)
{
HWFeatures f;
int cpuid_data[4] = { 0, 0, 0, 0 };
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
__cpuid(cpuid_data, 1);
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
#ifdef __x86_64__
asm __volatile__
(
"movl $1, %%eax\n\t"
"cpuid\n\t"
:[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
:
: "cc"
);
#else
asm volatile
(
"pushl %%ebx\n\t"
"movl $1,%%eax\n\t"
"cpuid\n\t"
"popl %%ebx\n\t"
: "=a"(cpuid_data[0]), "=c"(cpuid_data[2]), "=d"(cpuid_data[3])
:
: "cc"
);
#endif
#endif
f.x86_family = (cpuid_data[0] >> 8) & 15;
if( f.x86_family >= 6 )
{
f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0;
f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0;
f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0;
f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0;
f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0;
f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0;
f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0;
f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0;
f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0;
f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX
// make the second call to the cpuid command in order to get
// information about extended features like AVX2
#if defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
__cpuidex(cpuid_data, 7, 0);
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
#ifdef __x86_64__
asm __volatile__
(
"movl $7, %%eax\n\t"
"movl $0, %%ecx\n\t"
"cpuid\n\t"
:[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3])
:
: "cc"
);
#else
asm volatile
(
"pushl %%ebx\n\t"
"movl $7,%%eax\n\t"
"movl $0,%%ecx\n\t"
"cpuid\n\t"
"movl %%ebx, %0\n\t"
"popl %%ebx\n\t"
: "=r"(cpuid_data[1]), "=c"(cpuid_data[2])
:
: "cc"
);
#endif
#endif
f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0;
f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0;
f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0;
f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0;
f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0;
f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0;
f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0;
f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0;
f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0;
f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0;
}
#if defined ANDROID || defined __linux__
#ifdef __aarch64__
f.have[CV_CPU_NEON] = true;
#else
int cpufile = open("/proc/self/auxv", O_RDONLY);
if (cpufile >= 0)
{
Elf32_auxv_t auxv;
const size_t size_auxv_t = sizeof(auxv);
while ((size_t)read(cpufile, &auxv, size_auxv_t) == size_auxv_t)
{
if (auxv.a_type == AT_HWCAP)
{
f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0;
break;
}
}
close(cpufile);
}
#endif
#elif (defined __clang__ || defined __APPLE__) && (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__))
f.have[CV_CPU_NEON] = true;
#endif
return f;
}
int x86_family;
bool have[MAX_FEATURE+1];
};
static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures();
static HWFeatures* currentFeatures = &featuresEnabled;
volatile bool useOptimizedFlag = true;
namespace cv { namespace hal {
bool checkHardwareSupport(int feature)
{
// CV_DbgAssert( 0 <= feature && feature <= CV_HARDWARE_MAX_FEATURE );
return currentFeatures->have[feature];
}
void setUseOptimized( bool flag )
{
useOptimizedFlag = flag;
currentFeatures = flag ? &featuresEnabled : &featuresDisabled;
}
bool useOptimized(void)
{
return useOptimizedFlag;
}
}}
This diff is collapsed.
......@@ -47,3 +47,13 @@
#include <cstdlib>
#include <limits>
#include <float.h>
#include <cstring>
#include <cassert>
#include "opencv2/hal/sse_utils.hpp"
#if defined HAVE_IPP && (IPP_VERSION_X100 >= 700)
#define ARITHM_USE_IPP 1
#else
#define ARITHM_USE_IPP 0
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -94,4 +94,6 @@ extern const float icv8x32fSqrTab[];
#include "_geom.h"
#include "filterengine.hpp"
#include "opencv2/hal/sse_utils.hpp"
#endif /*__OPENCV_CV_INTERNAL_H_*/
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment