Commit 1e667de1 authored by Maksim Shabunin's avatar Maksim Shabunin

HAL math interfaces: fastAtan2, magnitude, sqrt, invSqrt, log, exp

parent f6fa1cee
...@@ -85,7 +85,8 @@ CV_EXPORTS void exp64f(const double* src, double* dst, int n); ...@@ -85,7 +85,8 @@ CV_EXPORTS void exp64f(const double* src, double* dst, int n);
CV_EXPORTS void log32f(const float* src, float* dst, int n); CV_EXPORTS void log32f(const float* src, float* dst, int n);
CV_EXPORTS void log64f(const double* src, double* dst, int n); CV_EXPORTS void log64f(const double* src, double* dst, int n);
CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees); CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n); CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n); CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
CV_EXPORTS void sqrt32f(const float* src, float* dst, int len); CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
...@@ -228,6 +229,7 @@ CV_EXPORTS void exp(const double* src, double* dst, int n); ...@@ -228,6 +229,7 @@ CV_EXPORTS void exp(const double* src, double* dst, int n);
CV_EXPORTS void log(const float* src, float* dst, int n); CV_EXPORTS void log(const float* src, float* dst, int n);
CV_EXPORTS void log(const double* src, double* dst, int n); CV_EXPORTS void log(const double* src, double* dst, int n);
CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n); CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n); CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
CV_EXPORTS void sqrt(const float* src, float* dst, int len); CV_EXPORTS void sqrt(const float* src, float* dst, int len);
......
...@@ -317,4 +317,98 @@ template <typename T> struct V_SIMD128Traits ...@@ -317,4 +317,98 @@ template <typename T> struct V_SIMD128Traits
//! @} //! @}
//==================================================================================================
//! @cond IGNORED
namespace cv {
template <typename R> struct V_RegTrait128;
template <> struct V_RegTrait128<uchar> {
typedef v_uint8x16 reg;
typedef v_uint16x8 w_reg;
typedef v_uint32x4 q_reg;
typedef v_uint8x16 u_reg;
static v_uint8x16 zero() { return v_setzero_u8(); }
static v_uint8x16 all(uchar val) { return v_setall_u8(val); }
};
template <> struct V_RegTrait128<schar> {
typedef v_int8x16 reg;
typedef v_int16x8 w_reg;
typedef v_int32x4 q_reg;
typedef v_uint8x16 u_reg;
static v_int8x16 zero() { return v_setzero_s8(); }
static v_int8x16 all(schar val) { return v_setall_s8(val); }
};
template <> struct V_RegTrait128<ushort> {
typedef v_uint16x8 reg;
typedef v_uint32x4 w_reg;
typedef v_int16x8 int_reg;
typedef v_uint16x8 u_reg;
static v_uint16x8 zero() { return v_setzero_u16(); }
static v_uint16x8 all(ushort val) { return v_setall_u16(val); }
};
template <> struct V_RegTrait128<short> {
typedef v_int16x8 reg;
typedef v_int32x4 w_reg;
typedef v_uint16x8 u_reg;
static v_int16x8 zero() { return v_setzero_s16(); }
static v_int16x8 all(short val) { return v_setall_s16(val); }
};
template <> struct V_RegTrait128<unsigned> {
typedef v_uint32x4 reg;
typedef v_uint64x2 w_reg;
typedef v_int32x4 int_reg;
typedef v_uint32x4 u_reg;
static v_uint32x4 zero() { return v_setzero_u32(); }
static v_uint32x4 all(unsigned val) { return v_setall_u32(val); }
};
template <> struct V_RegTrait128<int> {
typedef v_int32x4 reg;
typedef v_int64x2 w_reg;
typedef v_uint32x4 u_reg;
static v_int32x4 zero() { return v_setzero_s32(); }
static v_int32x4 all(int val) { return v_setall_s32(val); }
};
template <> struct V_RegTrait128<uint64> {
typedef v_uint64x2 reg;
static v_uint64x2 zero() { return v_setzero_u64(); }
static v_uint64x2 all(uint64 val) { return v_setall_u64(val); }
};
template <> struct V_RegTrait128<int64> {
typedef v_int64x2 reg;
static v_int64x2 zero() { return v_setzero_s64(); }
static v_int64x2 all(int64 val) { return v_setall_s64(val); }
};
template <> struct V_RegTrait128<float> {
typedef v_float32x4 reg;
typedef v_int32x4 int_reg;
typedef v_float32x4 u_reg;
static v_float32x4 zero() { return v_setzero_f32(); }
static v_float32x4 all(float val) { return v_setall_f32(val); }
};
#if CV_SIMD128_64F
template <> struct V_RegTrait128<double> {
typedef v_float64x2 reg;
typedef v_int32x4 int_reg;
typedef v_float64x2 u_reg;
static v_float64x2 zero() { return v_setzero_f64(); }
static v_float64x2 all(double val) { return v_setall_f64(val); }
};
#endif
} // cv::
//! @endcond
#endif #endif
...@@ -25,6 +25,20 @@ PERF_TEST_P(VectorLength, phase32f, testing::Values(128, 1000, 128*1024, 512*102 ...@@ -25,6 +25,20 @@ PERF_TEST_P(VectorLength, phase32f, testing::Values(128, 1000, 128*1024, 512*102
SANITY_CHECK(angle, 5e-5); SANITY_CHECK(angle, 5e-5);
} }
PERF_TEST_P(VectorLength, phase64f, testing::Values(128, 1000, 128*1024, 512*1024, 1024*1024))
{
size_t length = GetParam();
vector<double> X(length);
vector<double> Y(length);
vector<double> angle(length);
declare.in(X, Y, WARMUP_RNG).out(angle);
TEST_CYCLE_N(200) cv::phase(X, Y, angle, true);
SANITY_CHECK(angle, 5e-5);
}
PERF_TEST_P( MaxDim_MaxPoints, kmeans, PERF_TEST_P( MaxDim_MaxPoints, kmeans,
testing::Combine( testing::Values( 16, 32, 64 ), testing::Combine( testing::Values( 16, 32, 64 ),
testing::Values( 300, 400, 500) ) ) testing::Values( 300, 400, 500) ) )
......
...@@ -376,6 +376,110 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int ...@@ -376,6 +376,110 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int
#define cv_hal_merge64s hal_ni_merge64s #define cv_hal_merge64s hal_ni_merge64s
//! @endcond //! @endcond
/**
@param y,x source Y and X arrays
@param dst destination array
@param len length of arrays
@param angleInDegrees if set to true return angles in degrees, otherwise in radians
*/
//! @addtogroup core_hal_interface_fastAtan Atan calculation
//! @{
inline int hal_ni_fastAtan32f(const float* y, const float* x, float* dst, int len, bool angleInDegrees) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_fastAtan64f(const double* y, const double* x, double* dst, int len, bool angleInDegrees) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
//! @}
//! @cond IGNORED
#define cv_hal_fastAtan32f hal_ni_fastAtan32f
#define cv_hal_fastAtan64f hal_ni_fastAtan64f
//! @endcond
/**
@param x,y source X and Y arrays
@param dst destination array
@param len length of arrays
*/
//! @addtogroup core_hal_interface_magnitude Magnitude calculation
//! @{
inline int hal_ni_magnitude32f(const float *x, const float *y, float *dst, int len) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_magnitude64f(const double *x, const double *y, double *dst, int len) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
//! @}
//! @cond IGNORED
#define cv_hal_magnitude32f hal_ni_magnitude32f
#define cv_hal_magnitude64f hal_ni_magnitude64f
//! @endcond
/**
@param src source array
@param dst destination array
@param len length of arrays
*/
//! @addtogroup core_hal_interface_invSqrt Inverse square root calculation
//! @{
inline int hal_ni_invSqrt32f(const float* src, float* dst, int len) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_invSqrt64f(const double* src, double* dst, int len) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
//! @}
//! @cond IGNORED
#define cv_hal_invSqrt32f hal_ni_invSqrt32f
#define cv_hal_invSqrt64f hal_ni_invSqrt64f
//! @endcond
/**
@param src source array
@param dst destination array
@param len length of arrays
*/
//! @addtogroup core_hal_interface_sqrt Square root calculation
//! @{
inline int hal_ni_sqrt32f(const float* src, float* dst, int len) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_sqrt64f(const double* src, double* dst, int len) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
//! @}
//! @cond IGNORED
#define cv_hal_sqrt32f hal_ni_sqrt32f
#define cv_hal_sqrt64f hal_ni_sqrt64f
//! @endcond
/**
@param src source array
@param dst destination array
@param len length of arrays
*/
//! @addtogroup core_hal_interface_log Natural logarithm calculation
//! @{
inline int hal_ni_log32f(const float* src, float* dst, int len) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_log64f(const double* src, double* dst, int len) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
//! @}
//! @cond IGNORED
#define cv_hal_log32f hal_ni_log32f
#define cv_hal_log64f hal_ni_log64f
//! @endcond
/**
@param src source array
@param dst destination array
@param len length of arrays
*/
//! @addtogroup core_hal_interface_exp Exponent calculation
//! @{
inline int hal_ni_exp32f(const float* src, float* dst, int len) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
inline int hal_ni_exp64f(const double* src, double* dst, int len) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
//! @}
//! @cond IGNORED
#define cv_hal_exp32f hal_ni_exp32f
#define cv_hal_exp64f hal_ni_exp64f
//! @endcond
/** /**
@brief Dummy structure storing DFT/DCT context @brief Dummy structure storing DFT/DCT context
......
...@@ -51,11 +51,6 @@ namespace cv ...@@ -51,11 +51,6 @@ namespace cv
typedef void (*MathFunc)(const void* src, void* dst, int len); typedef void (*MathFunc)(const void* src, void* dst, int len);
static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
enum { OCL_OP_LOG=0, OCL_OP_EXP=1, OCL_OP_MAG=2, OCL_OP_PHASE_DEGREES=3, OCL_OP_PHASE_RADIANS=4 }; enum { OCL_OP_LOG=0, OCL_OP_EXP=1, OCL_OP_MAG=2, OCL_OP_PHASE_DEGREES=3, OCL_OP_PHASE_RADIANS=4 };
...@@ -100,29 +95,6 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in ...@@ -100,29 +95,6 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in
#endif #endif
float fastAtan2( float y, float x )
{
float ax = std::abs(x), ay = std::abs(y);
float a, c, c2;
if( ax >= ay )
{
c = ay/(ax + (float)DBL_EPSILON);
c2 = c*c;
a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
}
else
{
c = ax/(ay + (float)DBL_EPSILON);
c2 = c*c;
a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
}
if( x < 0 )
a = 180.f - a;
if( y < 0 )
a = 360.f - a;
return a;
}
/* ************************************************************************** *\ /* ************************************************************************** *\
Fast cube root by Ken Turkowski Fast cube root by Ken Turkowski
(http://www.worldserver.com/turk/computergraphics/papers.html) (http://www.worldserver.com/turk/computergraphics/papers.html)
...@@ -202,7 +174,6 @@ void magnitude( InputArray src1, InputArray src2, OutputArray dst ) ...@@ -202,7 +174,6 @@ void magnitude( InputArray src1, InputArray src2, OutputArray dst )
} }
} }
void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegrees ) void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegrees )
{ {
int type = src1.type(), depth = src1.depth(), cn = src1.channels(); int type = src1.type(), depth = src1.depth(), cn = src1.channels();
...@@ -218,19 +189,8 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre ...@@ -218,19 +189,8 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
const Mat* arrays[] = {&X, &Y, &Angle, 0}; const Mat* arrays[] = {&X, &Y, &Angle, 0};
uchar* ptrs[3]; uchar* ptrs[3];
NAryMatIterator it(arrays, ptrs); NAryMatIterator it(arrays, ptrs);
cv::AutoBuffer<float> _buf; int j, total = (int)(it.size*cn), blockSize = total;
float* buf[2] = {0, 0};
int j, k, total = (int)(it.size*cn), blockSize = total;
size_t esz1 = X.elemSize1(); size_t esz1 = X.elemSize1();
if( depth == CV_64F )
{
blockSize = std::min(blockSize, ((BLOCK_SIZE+cn-1)/cn)*cn);
_buf.allocate(blockSize*2);
buf[0] = _buf;
buf[1] = buf[0] + blockSize;
}
for( size_t i = 0; i < it.nplanes; i++, ++it ) for( size_t i = 0; i < it.nplanes; i++, ++it )
{ {
for( j = 0; j < total; j += blockSize ) for( j = 0; j < total; j += blockSize )
...@@ -240,53 +200,13 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre ...@@ -240,53 +200,13 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
{ {
const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
float *angle = (float*)ptrs[2]; float *angle = (float*)ptrs[2];
hal::fastAtan2( y, x, angle, len, angleInDegrees ); hal::fastAtan32f( y, x, angle, len, angleInDegrees );
} }
else else
{ {
const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1]; const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
double *angle = (double*)ptrs[2]; double *angle = (double*)ptrs[2];
k = 0; hal::fastAtan64f(y, x, angle, len, angleInDegrees);
#if CV_SSE2
if (USE_SSE2)
{
for ( ; k <= len - 4; k += 4)
{
__m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)),
_mm_cvtpd_ps(_mm_loadu_pd(x + k + 2)));
__m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)),
_mm_cvtpd_ps(_mm_loadu_pd(y + k + 2)));
_mm_storeu_ps(buf[0] + k, v_dst0);
_mm_storeu_ps(buf[1] + k, v_dst1);
}
}
#endif
for( ; k < len; k++ )
{
buf[0][k] = (float)x[k];
buf[1][k] = (float)y[k];
}
hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees );
k = 0;
#if CV_SSE2
if (USE_SSE2)
{
for ( ; k <= len - 4; k += 4)
{
__m128 v_src = _mm_loadu_ps(buf[0] + k);
_mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src));
_mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8))));
}
}
#endif
for( ; k < len; k++ )
angle[k] = buf[0][k];
} }
ptrs[0] += len*esz1; ptrs[0] += len*esz1;
ptrs[1] += len*esz1; ptrs[1] += len*esz1;
...@@ -353,18 +273,9 @@ void cartToPolar( InputArray src1, InputArray src2, ...@@ -353,18 +273,9 @@ void cartToPolar( InputArray src1, InputArray src2,
const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0}; const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0};
uchar* ptrs[4]; uchar* ptrs[4];
NAryMatIterator it(arrays, ptrs); NAryMatIterator it(arrays, ptrs);
cv::AutoBuffer<float> _buf; int j, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
float* buf[2] = {0, 0};
int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
size_t esz1 = X.elemSize1(); size_t esz1 = X.elemSize1();
if( depth == CV_64F )
{
_buf.allocate(blockSize*2);
buf[0] = _buf;
buf[1] = buf[0] + blockSize;
}
for( size_t i = 0; i < it.nplanes; i++, ++it ) for( size_t i = 0; i < it.nplanes; i++, ++it )
{ {
for( j = 0; j < total; j += blockSize ) for( j = 0; j < total; j += blockSize )
...@@ -375,55 +286,14 @@ void cartToPolar( InputArray src1, InputArray src2, ...@@ -375,55 +286,14 @@ void cartToPolar( InputArray src1, InputArray src2,
const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3]; float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3];
hal::magnitude32f( x, y, mag, len ); hal::magnitude32f( x, y, mag, len );
hal::fastAtan2( y, x, angle, len, angleInDegrees ); hal::fastAtan32f( y, x, angle, len, angleInDegrees );
} }
else else
{ {
const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1]; const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
double *angle = (double*)ptrs[3]; double *angle = (double*)ptrs[3];
hal::magnitude64f(x, y, (double*)ptrs[2], len); hal::magnitude64f(x, y, (double*)ptrs[2], len);
k = 0; hal::fastAtan64f(y, x, angle, len, angleInDegrees);
#if CV_SSE2
if (USE_SSE2)
{
for ( ; k <= len - 4; k += 4)
{
__m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)),
_mm_cvtpd_ps(_mm_loadu_pd(x + k + 2)));
__m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)),
_mm_cvtpd_ps(_mm_loadu_pd(y + k + 2)));
_mm_storeu_ps(buf[0] + k, v_dst0);
_mm_storeu_ps(buf[1] + k, v_dst1);
}
}
#endif
for( ; k < len; k++ )
{
buf[0][k] = (float)x[k];
buf[1][k] = (float)y[k];
}
hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees );
k = 0;
#if CV_SSE2
if (USE_SSE2)
{
for ( ; k <= len - 4; k += 4)
{
__m128 v_src = _mm_loadu_ps(buf[0] + k);
_mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src));
_mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8))));
}
}
#endif
for( ; k < len; k++ )
angle[k] = buf[0][k];
} }
ptrs[0] += len*esz1; ptrs[0] += len*esz1;
ptrs[1] += len*esz1; ptrs[1] += len*esz1;
......
This diff is collapsed.
...@@ -69,8 +69,8 @@ template<typename R> struct TheTest ...@@ -69,8 +69,8 @@ template<typename R> struct TheTest
EXPECT_EQ(d, res); EXPECT_EQ(d, res);
// zero, all // zero, all
Data<R> resZ = RegTrait<R>::zero(); Data<R> resZ = V_RegTrait128<LaneType>::zero();
Data<R> resV = RegTrait<R>::all(8); Data<R> resV = V_RegTrait128<LaneType>::all(8);
for (int i = 0; i < R::nlanes; ++i) for (int i = 0; i < R::nlanes; ++i)
{ {
EXPECT_EQ((LaneType)0, resZ[i]); EXPECT_EQ((LaneType)0, resZ[i]);
...@@ -135,7 +135,7 @@ template<typename R> struct TheTest ...@@ -135,7 +135,7 @@ template<typename R> struct TheTest
// v_expand and v_load_expand // v_expand and v_load_expand
TheTest & test_expand() TheTest & test_expand()
{ {
typedef typename RegTrait<R>::w_reg Rx2; typedef typename V_RegTrait128<LaneType>::w_reg Rx2;
Data<R> dataA; Data<R> dataA;
R a = dataA; R a = dataA;
...@@ -158,7 +158,7 @@ template<typename R> struct TheTest ...@@ -158,7 +158,7 @@ template<typename R> struct TheTest
TheTest & test_expand_q() TheTest & test_expand_q()
{ {
typedef typename RegTrait<R>::q_reg Rx4; typedef typename V_RegTrait128<LaneType>::q_reg Rx4;
Data<R> data; Data<R> data;
Data<Rx4> out = v_load_expand_q(data.d); Data<Rx4> out = v_load_expand_q(data.d);
const int n = Rx4::nlanes; const int n = Rx4::nlanes;
...@@ -232,7 +232,7 @@ template<typename R> struct TheTest ...@@ -232,7 +232,7 @@ template<typename R> struct TheTest
TheTest & test_mul_expand() TheTest & test_mul_expand()
{ {
typedef typename RegTrait<R>::w_reg Rx2; typedef typename V_RegTrait128<LaneType>::w_reg Rx2;
Data<R> dataA, dataB(2); Data<R> dataA, dataB(2);
R a = dataA, b = dataB; R a = dataA, b = dataB;
Rx2 c, d; Rx2 c, d;
...@@ -295,7 +295,7 @@ template<typename R> struct TheTest ...@@ -295,7 +295,7 @@ template<typename R> struct TheTest
TheTest & test_dot_prod() TheTest & test_dot_prod()
{ {
typedef typename RegTrait<R>::w_reg Rx2; typedef typename V_RegTrait128<LaneType>::w_reg Rx2;
Data<R> dataA, dataB(2); Data<R> dataA, dataB(2);
R a = dataA, b = dataB; R a = dataA, b = dataB;
...@@ -361,7 +361,7 @@ template<typename R> struct TheTest ...@@ -361,7 +361,7 @@ template<typename R> struct TheTest
TheTest & test_absdiff() TheTest & test_absdiff()
{ {
typedef typename RegTrait<R>::u_reg Ru; typedef typename V_RegTrait128<LaneType>::u_reg Ru;
typedef typename Ru::lane_type u_type; typedef typename Ru::lane_type u_type;
Data<R> dataA(std::numeric_limits<LaneType>::max()), Data<R> dataA(std::numeric_limits<LaneType>::max()),
dataB(std::numeric_limits<LaneType>::min()); dataB(std::numeric_limits<LaneType>::min());
...@@ -445,7 +445,7 @@ template<typename R> struct TheTest ...@@ -445,7 +445,7 @@ template<typename R> struct TheTest
template <int s> template <int s>
TheTest & test_pack() TheTest & test_pack()
{ {
typedef typename RegTrait<R>::w_reg Rx2; typedef typename V_RegTrait128<LaneType>::w_reg Rx2;
typedef typename Rx2::lane_type w_type; typedef typename Rx2::lane_type w_type;
Data<Rx2> dataA, dataB; Data<Rx2> dataA, dataB;
dataA += std::numeric_limits<LaneType>::is_signed ? -10 : 10; dataA += std::numeric_limits<LaneType>::is_signed ? -10 : 10;
...@@ -480,8 +480,8 @@ template<typename R> struct TheTest ...@@ -480,8 +480,8 @@ template<typename R> struct TheTest
template <int s> template <int s>
TheTest & test_pack_u() TheTest & test_pack_u()
{ {
typedef typename RegTrait<R>::w_reg Rx2; typedef typename V_TypeTraits<LaneType>::w_type LaneType_w;
typedef typename RegTrait<Rx2>::int_reg Ri2; typedef typename V_RegTrait128<LaneType_w>::int_reg Ri2;
typedef typename Ri2::lane_type w_type; typedef typename Ri2::lane_type w_type;
Data<Ri2> dataA, dataB; Data<Ri2> dataA, dataB;
...@@ -572,7 +572,7 @@ template<typename R> struct TheTest ...@@ -572,7 +572,7 @@ template<typename R> struct TheTest
TheTest & test_float_math() TheTest & test_float_math()
{ {
typedef typename RegTrait<R>::int_reg Ri; typedef typename V_RegTrait128<LaneType>::int_reg Ri;
Data<R> data1, data2, data3; Data<R> data1, data2, data3;
data1 *= 1.1; data1 *= 1.1;
data2 += 10; data2 += 10;
......
...@@ -155,80 +155,4 @@ template <typename R> std::ostream & operator<<(std::ostream & out, const Data<R ...@@ -155,80 +155,4 @@ template <typename R> std::ostream & operator<<(std::ostream & out, const Data<R
return out; return out;
} }
//==================================================================================================
template <typename R> struct RegTrait;
template <> struct RegTrait<cv::v_uint8x16> {
typedef cv::v_uint16x8 w_reg;
typedef cv::v_uint32x4 q_reg;
typedef cv::v_uint8x16 u_reg;
static cv::v_uint8x16 zero() { return cv::v_setzero_u8(); }
static cv::v_uint8x16 all(uchar val) { return cv::v_setall_u8(val); }
};
template <> struct RegTrait<cv::v_int8x16> {
typedef cv::v_int16x8 w_reg;
typedef cv::v_int32x4 q_reg;
typedef cv::v_uint8x16 u_reg;
static cv::v_int8x16 zero() { return cv::v_setzero_s8(); }
static cv::v_int8x16 all(schar val) { return cv::v_setall_s8(val); }
};
template <> struct RegTrait<cv::v_uint16x8> {
typedef cv::v_uint32x4 w_reg;
typedef cv::v_int16x8 int_reg;
typedef cv::v_uint16x8 u_reg;
static cv::v_uint16x8 zero() { return cv::v_setzero_u16(); }
static cv::v_uint16x8 all(ushort val) { return cv::v_setall_u16(val); }
};
template <> struct RegTrait<cv::v_int16x8> {
typedef cv::v_int32x4 w_reg;
typedef cv::v_uint16x8 u_reg;
static cv::v_int16x8 zero() { return cv::v_setzero_s16(); }
static cv::v_int16x8 all(short val) { return cv::v_setall_s16(val); }
};
template <> struct RegTrait<cv::v_uint32x4> {
typedef cv::v_uint64x2 w_reg;
typedef cv::v_int32x4 int_reg;
typedef cv::v_uint32x4 u_reg;
static cv::v_uint32x4 zero() { return cv::v_setzero_u32(); }
static cv::v_uint32x4 all(unsigned val) { return cv::v_setall_u32(val); }
};
template <> struct RegTrait<cv::v_int32x4> {
typedef cv::v_int64x2 w_reg;
typedef cv::v_uint32x4 u_reg;
static cv::v_int32x4 zero() { return cv::v_setzero_s32(); }
static cv::v_int32x4 all(int val) { return cv::v_setall_s32(val); }
};
template <> struct RegTrait<cv::v_uint64x2> {
static cv::v_uint64x2 zero() { return cv::v_setzero_u64(); }
static cv::v_uint64x2 all(uint64 val) { return cv::v_setall_u64(val); }
};
template <> struct RegTrait<cv::v_int64x2> {
static cv::v_int64x2 zero() { return cv::v_setzero_s64(); }
static cv::v_int64x2 all(int64 val) { return cv::v_setall_s64(val); }
};
template <> struct RegTrait<cv::v_float32x4> {
typedef cv::v_int32x4 int_reg;
typedef cv::v_float32x4 u_reg;
static cv::v_float32x4 zero() { return cv::v_setzero_f32(); }
static cv::v_float32x4 all(float val) { return cv::v_setall_f32(val); }
};
#if CV_SIMD128_64F
template <> struct RegTrait<cv::v_float64x2> {
typedef cv::v_int32x4 int_reg;
typedef cv::v_float64x2 u_reg;
static cv::v_float64x2 zero() { return cv::v_setzero_f64(); }
static cv::v_float64x2 all(double val) { return cv::v_setall_f64(val); }
};
#endif
#endif #endif
...@@ -2411,8 +2411,9 @@ TEST(Core_SolvePoly, regression_5599) ...@@ -2411,8 +2411,9 @@ TEST(Core_SolvePoly, regression_5599)
class Core_PhaseTest : public cvtest::BaseTest class Core_PhaseTest : public cvtest::BaseTest
{ {
int t;
public: public:
Core_PhaseTest() {} Core_PhaseTest(int t_) : t(t_) {}
~Core_PhaseTest() {} ~Core_PhaseTest() {}
protected: protected:
virtual void run(int) virtual void run(int)
...@@ -2421,9 +2422,9 @@ protected: ...@@ -2421,9 +2422,9 @@ protected:
const int axisCount = 8; const int axisCount = 8;
const int dim = theRNG().uniform(1,10); const int dim = theRNG().uniform(1,10);
const float scale = theRNG().uniform(1.f, 100.f); const float scale = theRNG().uniform(1.f, 100.f);
Mat x(axisCount + 1, dim, CV_32FC1), Mat x(axisCount + 1, dim, t),
y(axisCount + 1, dim, CV_32FC1); y(axisCount + 1, dim, t);
Mat anglesInDegrees(axisCount + 1, dim, CV_32FC1); Mat anglesInDegrees(axisCount + 1, dim, t);
// fill the data // fill the data
x.row(0).setTo(Scalar(0)); x.row(0).setTo(Scalar(0));
...@@ -2696,8 +2697,8 @@ TEST(Core_SVD, accuracy) { Core_SVDTest test; test.safe_run(); } ...@@ -2696,8 +2697,8 @@ TEST(Core_SVD, accuracy) { Core_SVDTest test; test.safe_run(); }
TEST(Core_SVBkSb, accuracy) { Core_SVBkSbTest test; test.safe_run(); } TEST(Core_SVBkSb, accuracy) { Core_SVBkSbTest test; test.safe_run(); }
TEST(Core_Trace, accuracy) { Core_TraceTest test; test.safe_run(); } TEST(Core_Trace, accuracy) { Core_TraceTest test; test.safe_run(); }
TEST(Core_SolvePoly, accuracy) { Core_SolvePolyTest test; test.safe_run(); } TEST(Core_SolvePoly, accuracy) { Core_SolvePolyTest test; test.safe_run(); }
TEST(Core_Phase, accuracy) { Core_PhaseTest test; test.safe_run(); } TEST(Core_Phase, accuracy32f) { Core_PhaseTest test(CV_32FC1); test.safe_run(); }
TEST(Core_Phase, accuracy64f) { Core_PhaseTest test(CV_64FC1); test.safe_run(); }
TEST(Core_SVD, flt) TEST(Core_SVD, flt)
{ {
......
...@@ -812,7 +812,7 @@ void AKAZEFeatures::Compute_Main_Orientation(KeyPoint& kpt, const std::vector<TE ...@@ -812,7 +812,7 @@ void AKAZEFeatures::Compute_Main_Orientation(KeyPoint& kpt, const std::vector<TE
} }
} }
} }
hal::fastAtan2(resY, resX, Ang, ang_size, false); hal::fastAtan32f(resY, resX, Ang, ang_size, false);
// Loop slides pi/3 window around feature point // Loop slides pi/3 window around feature point
for (ang1 = 0; ang1 < (float)(2.0 * CV_PI); ang1 += 0.15f) { for (ang1 = 0; ang1 < (float)(2.0 * CV_PI); ang1 += 0.15f) {
ang2 = (ang1 + (float)(CV_PI / 3.0) >(float)(2.0*CV_PI) ? ang1 - (float)(5.0*CV_PI / 3.0) : ang1 + (float)(CV_PI / 3.0)); ang2 = (ang1 + (float)(CV_PI / 3.0) >(float)(2.0*CV_PI) ? ang1 - (float)(5.0*CV_PI / 3.0) : ang1 + (float)(CV_PI / 3.0));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment