Commit 5efad375 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #3384 from ilya-lavrenov:neon_new

parents 72419c37 1fe54411
...@@ -605,6 +605,48 @@ inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v) ...@@ -605,6 +605,48 @@ inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v)
return vcvtq_u32_f32(vaddq_f32(v, v_05)); return vcvtq_u32_f32(vaddq_f32(v, v_05));
} }
inline float32x4_t cv_vrecpq_f32(float32x4_t val)
{
float32x4_t reciprocal = vrecpeq_f32(val);
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
return reciprocal;
}
inline float32x2_t cv_vrecp_f32(float32x2_t val)
{
float32x2_t reciprocal = vrecpe_f32(val);
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
return reciprocal;
}
inline float32x4_t cv_vrsqrtq_f32(float32x4_t val)
{
float32x4_t e = vrsqrteq_f32(val);
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
return e;
}
inline float32x2_t cv_vrsqrt_f32(float32x2_t val)
{
float32x2_t e = vrsqrte_f32(val);
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
return e;
}
inline float32x4_t cv_vsqrtq_f32(float32x4_t val)
{
return cv_vrecpq_f32(cv_vrsqrtq_f32(val));
}
inline float32x2_t cv_vsqrt_f32(float32x2_t val)
{
return cv_vrecp_f32(cv_vrsqrt_f32(val));
}
#endif #endif
} // cv } // cv
......
This diff is collapsed.
...@@ -69,7 +69,7 @@ static void calcMinEigenVal( const Mat& _cov, Mat& _dst ) ...@@ -69,7 +69,7 @@ static void calcMinEigenVal( const Mat& _cov, Mat& _dst )
if( simd ) if( simd )
{ {
__m128 half = _mm_set1_ps(0.5f); __m128 half = _mm_set1_ps(0.5f);
for( ; j <= size.width - 5; j += 4 ) for( ; j <= size.width - 4; j += 4 )
{ {
__m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x
__m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x
...@@ -90,6 +90,19 @@ static void calcMinEigenVal( const Mat& _cov, Mat& _dst ) ...@@ -90,6 +90,19 @@ static void calcMinEigenVal( const Mat& _cov, Mat& _dst )
_mm_storeu_ps(dst + j, a); _mm_storeu_ps(dst + j, a);
} }
} }
#elif CV_NEON
float32x4_t v_half = vdupq_n_f32(0.5f);
for( ; j <= size.width - 4; j += 4 )
{
float32x4x3_t v_src = vld3q_f32(cov + j * 3);
float32x4_t v_a = vmulq_f32(v_src.val[0], v_half);
float32x4_t v_b = v_src.val[1];
float32x4_t v_c = vmulq_f32(v_src.val[2], v_half);
float32x4_t v_t = vsubq_f32(v_a, v_c);
v_t = vmlaq_f32(vmulq_f32(v_t, v_t), v_b, v_b);
vst1q_f32(dst + j, vsubq_f32(vaddq_f32(v_a, v_c), cv_vsqrtq_f32(v_t)));
}
#endif #endif
for( ; j < size.width; j++ ) for( ; j < size.width; j++ )
{ {
...@@ -290,8 +303,24 @@ cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size, ...@@ -290,8 +303,24 @@ cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size,
float* cov_data = cov.ptr<float>(i); float* cov_data = cov.ptr<float>(i);
const float* dxdata = Dx.ptr<float>(i); const float* dxdata = Dx.ptr<float>(i);
const float* dydata = Dy.ptr<float>(i); const float* dydata = Dy.ptr<float>(i);
j = 0;
for( j = 0; j < size.width; j++ ) #if CV_NEON
for( ; j <= size.width - 4; j += 4 )
{
float32x4_t v_dx = vld1q_f32(dxdata + j);
float32x4_t v_dy = vld1q_f32(dydata + j);
float32x4x3_t v_dst;
v_dst.val[0] = vmulq_f32(v_dx, v_dx);
v_dst.val[1] = vmulq_f32(v_dx, v_dy);
v_dst.val[2] = vmulq_f32(v_dy, v_dy);
vst3q_f32(cov_data + j * 3, v_dst);
}
#endif
for( ; j < size.width; j++ )
{ {
float dx = dxdata[j]; float dx = dxdata[j];
float dy = dydata[j]; float dy = dydata[j];
......
...@@ -2316,7 +2316,16 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method ) ...@@ -2316,7 +2316,16 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
} }
else if( method == CV_COMP_INTERSECT ) else if( method == CV_COMP_INTERSECT )
{ {
for( j = 0; j < len; j++ ) j = 0;
#if CV_NEON
float32x4_t v_result = vdupq_n_f32(0.0f);
for( ; j <= len - 4; j += 4 )
v_result = vaddq_f32(v_result, vminq_f32(vld1q_f32(h1 + j), vld1q_f32(h2 + j)));
float CV_DECL_ALIGNED(16) ar[4];
vst1q_f32(ar, v_result);
result += ar[0] + ar[1] + ar[2] + ar[3];
#endif
for( ; j < len; j++ )
result += std::min(h1[j], h2[j]); result += std::min(h1[j], h2[j]);
} }
else if( method == CV_COMP_BHATTACHARYYA ) else if( method == CV_COMP_BHATTACHARYYA )
......
This diff is collapsed.
This diff is collapsed.
...@@ -1100,7 +1100,7 @@ int CV_CompareHistTest::validate_test_results( int /*test_case_idx*/ ) ...@@ -1100,7 +1100,7 @@ int CV_CompareHistTest::validate_test_results( int /*test_case_idx*/ )
code = cvtest::TS::FAIL_INVALID_OUTPUT; code = cvtest::TS::FAIL_INVALID_OUTPUT;
break; break;
} }
else if( fabs(v0 - v) > FLT_EPSILON*10*MAX(fabs(v0),0.1) ) else if( fabs(v0 - v) > FLT_EPSILON*14*MAX(fabs(v0),0.1) )
{ {
ts->printf( cvtest::TS::LOG, "The comparison result using the method #%d (%s)\n\tis inaccurate (=%g, should be =%g)\n", ts->printf( cvtest::TS::LOG, "The comparison result using the method #%d (%s)\n\tis inaccurate (=%g, should be =%g)\n",
i, method_name, v, v0 ); i, method_name, v, v0 );
......
...@@ -1548,9 +1548,28 @@ TEST(Imgproc_GetQuadSubPix, accuracy) { CV_GetQuadSubPixTest test; test.safe_run ...@@ -1548,9 +1548,28 @@ TEST(Imgproc_GetQuadSubPix, accuracy) { CV_GetQuadSubPixTest test; test.safe_run
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
template <typename T, typename WT> template <typename T, typename WT>
struct IntCast
{
T operator() (WT val) const
{
return cv::saturate_cast<T>(val >> 2);
}
};
template <typename T, typename WT>
struct FltCast
{
T operator() (WT val) const
{
return cv::saturate_cast<T>(val * 0.25);
}
};
template <typename T, typename WT, int one, typename CastOp>
void resizeArea(const cv::Mat & src, cv::Mat & dst) void resizeArea(const cv::Mat & src, cv::Mat & dst)
{ {
int cn = src.channels(); int cn = src.channels();
CastOp castOp;
for (int y = 0; y < dst.rows; ++y) for (int y = 0; y < dst.rows; ++y)
{ {
...@@ -1565,9 +1584,9 @@ void resizeArea(const cv::Mat & src, cv::Mat & dst) ...@@ -1565,9 +1584,9 @@ void resizeArea(const cv::Mat & src, cv::Mat & dst)
for (int c = 0; c < cn; ++c) for (int c = 0; c < cn; ++c)
{ {
WT sum = WT(sptr0[x1 + c]) + WT(sptr0[x1 + c + cn]); WT sum = WT(sptr0[x1 + c]) + WT(sptr0[x1 + c + cn]);
sum += WT(sptr1[x1 + c]) + WT(sptr1[x1 + c + cn]) + (WT)(2); sum += WT(sptr1[x1 + c]) + WT(sptr1[x1 + c + cn]) + (WT)(one);
dptr[x + c] = cv::saturate_cast<T>(sum >> 2); dptr[x + c] = castOp(sum);
} }
} }
} }
...@@ -1575,32 +1594,38 @@ void resizeArea(const cv::Mat & src, cv::Mat & dst) ...@@ -1575,32 +1594,38 @@ void resizeArea(const cv::Mat & src, cv::Mat & dst)
TEST(Resize, Area_half) TEST(Resize, Area_half)
{ {
const int size = 10; const int size = 1000;
int types[] = { CV_8UC1, CV_8UC4, CV_16UC1, CV_16UC4 }; int types[] = { CV_8UC1, CV_8UC4, CV_16UC1, CV_16UC4, CV_16SC1, CV_16SC4, CV_32FC1, CV_32FC4 };
cv::RNG rng(17); cv::RNG rng(17);
for (int i = 0, _size = sizeof(types) / sizeof(types[0]); i < _size; ++i) for (int i = 0, _size = sizeof(types) / sizeof(types[0]); i < _size; ++i)
{ {
int type = types[i], depth = CV_MAT_DEPTH(type); int type = types[i], depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
const float eps = depth <= CV_32S ? 0 : 7e-5f;
SCOPED_TRACE(depth); SCOPED_TRACE(depth);
SCOPED_TRACE(cn);
cv::Mat src(size, size, type), dst_actual(size >> 1, size >> 1, type), cv::Mat src(size, size, type), dst_actual(size >> 1, size >> 1, type),
dst_reference(size >> 1, size >> 1, type); dst_reference(size >> 1, size >> 1, type);
rng.fill(src, cv::RNG::UNIFORM, 0, 1000, true); rng.fill(src, cv::RNG::UNIFORM, -1000, 1000, true);
if (depth == CV_8U) if (depth == CV_8U)
resizeArea<uchar, ushort>(src, dst_reference); resizeArea<uchar, ushort, 2, IntCast<uchar, ushort> >(src, dst_reference);
else if (depth == CV_16U) else if (depth == CV_16U)
resizeArea<ushort, int>(src, dst_reference); resizeArea<ushort, uint, 2, IntCast<ushort, uint> >(src, dst_reference);
else if (depth == CV_16S)
resizeArea<short, int, 2, IntCast<short, int> >(src, dst_reference);
else if (depth == CV_32F)
resizeArea<float, float, 0, FltCast<float, float> >(src, dst_reference);
else else
CV_Assert(0); CV_Assert(0);
cv::resize(src, dst_actual, dst_actual.size(), 0, 0, cv::INTER_AREA); cv::resize(src, dst_actual, dst_actual.size(), 0, 0, cv::INTER_AREA);
ASSERT_EQ(0, cvtest::norm(dst_reference, dst_actual, cv::NORM_INF)); ASSERT_GE(eps, cvtest::norm(dst_reference, dst_actual, cv::NORM_INF));
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment