Commit 18d10d6b authored by Vitaly Tuzov's avatar Vitaly Tuzov

Fixed v_reduce_sad intrinsics implementation and added tests

parent 5c0a98cf
......@@ -1141,12 +1141,16 @@ inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b,
inline unsigned v_reduce_sad(const v_uint8x32& a, const v_uint8x32& b)
{
return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(a.val, b.val));
__m256i half = _mm256_sad_epu8(a.val, b.val);
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
}
inline unsigned v_reduce_sad(const v_int8x32& a, const v_int8x32& b)
{
__m256i half = _mm256_set1_epi8(0x7f);
return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half)));
half = _mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half));
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
}
inline unsigned v_reduce_sad(const v_uint16x16& a, const v_uint16x16& b)
{
......
......@@ -1486,13 +1486,14 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min)
inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
{
return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(a.val, b.val));
__m128i half = _mm_sad_epu8(a.val, b.val);
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
}
inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
{
__m128i half = _mm_set1_epi8(0x7f);
return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(_mm_add_epi8(a.val, half),
_mm_add_epi8(b.val, half)));
half = _mm_sad_epu8(_mm_add_epi8(a.val, half), _mm_add_epi8(b.val, half));
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
}
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
{
......
......@@ -770,6 +770,15 @@ template<typename R> struct TheTest
return *this;
}
TheTest & test_reduce_sad()
{
Data<R> dataA, dataB(R::nlanes/2);
R a = dataA;
R b = dataB;
EXPECT_EQ((unsigned)(R::nlanes*R::nlanes/4), v_reduce_sad(a, b));
return *this;
}
TheTest & test_mask()
{
typedef typename V_RegTraits<R>::int_reg int_reg;
......@@ -1320,6 +1329,7 @@ void test_hal_intrin_uint8()
.test_logic()
.test_min_max()
.test_absdiff()
.test_reduce_sad()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
......@@ -1358,6 +1368,7 @@ void test_hal_intrin_int8()
.test_absdiff()
.test_absdiffs()
.test_abs()
.test_reduce_sad()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
......@@ -1387,6 +1398,7 @@ void test_hal_intrin_uint16()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
......@@ -1418,6 +1430,7 @@ void test_hal_intrin_int16()
.test_absdiffs()
.test_abs()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
......@@ -1446,6 +1459,7 @@ void test_hal_intrin_uint32()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_popcount()
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
......@@ -1473,6 +1487,7 @@ void test_hal_intrin_int32()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
.test_unpack()
......@@ -1528,6 +1543,7 @@ void test_hal_intrin_float32()
.test_min_max()
.test_float_absdiff()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_unpack()
.test_float_math()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment