Commit 841cccca authored by Tomoaki Teshima's avatar Tomoaki Teshima Committed by Tomoaki Teshima

use universal intrinsic in canny

  * add v_abs for universal intrinsic
  * add test of v_abs in test_intrin
  * fix compile error on gcc
  * fix bool OR operation
parent 69704692
......@@ -549,6 +549,13 @@ inline v_float32x4 v_invsqrt(const v_float32x4& x)
}
#endif
#define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \
inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); }
OPENCV_HAL_IMPL_NEON_ABS(v_uint8x16, v_int8x16, u8, s8)
OPENCV_HAL_IMPL_NEON_ABS(v_uint16x8, v_int16x8, u16, s16)
OPENCV_HAL_IMPL_NEON_ABS(v_uint32x4, v_int32x4, u32, s32)
inline v_float32x4 v_abs(v_float32x4 x)
{ return v_float32x4(vabsq_f32(x.val)); }
......
......@@ -739,6 +739,18 @@ inline v_float64x2 v_invsqrt(const v_float64x2& x)
return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val)));
}
#define OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(_Tpuvec, _Tpsvec, func, suffix, subWidth) \
inline _Tpuvec v_abs(const _Tpsvec& x) \
{ return _Tpuvec(_mm_##func##_ep##suffix(x.val, _mm_sub_ep##subWidth(_mm_setzero_si128(), x.val))); }
OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint8x16, v_int8x16, min, u8, i8)
OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint16x8, v_int16x8, max, i16, i16)
inline v_uint32x4 v_abs(const v_int32x4& x)
{
__m128i s = _mm_srli_epi32(x.val, 31);
__m128i f = _mm_srai_epi32(x.val, 31);
return v_uint32x4(_mm_add_epi32(_mm_xor_si128(x.val, f), s));
}
inline v_float32x4 v_abs(const v_float32x4& x)
{ return v_float32x4(_mm_and_ps(x.val, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); }
inline v_float64x2 v_abs(const v_float64x2& x)
......
......@@ -277,6 +277,24 @@ template<typename R> struct TheTest
return *this;
}
TheTest & test_abs()
{
typedef typename V_RegTrait128<LaneType>::u_reg Ru;
typedef typename Ru::lane_type u_type;
Data<R> dataA, dataB(10);
R a = dataA, b = dataB;
a = a - b;
Data<Ru> resC = v_abs(a);
for (int i = 0; i < Ru::nlanes; ++i)
{
EXPECT_EQ((u_type)std::abs(dataA[i] - dataB[i]), resC[i]);
}
return *this;
}
template <int s>
TheTest & test_shift()
{
......@@ -799,6 +817,7 @@ TEST(hal_intrin, int8x16) {
.test_logic()
.test_min_max()
.test_absdiff()
.test_abs()
.test_mask()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
.test_unpack()
......@@ -847,6 +866,7 @@ TEST(hal_intrin, int16x8) {
.test_logic()
.test_min_max()
.test_absdiff()
.test_abs()
.test_mask()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
.test_unpack()
......@@ -886,6 +906,7 @@ TEST(hal_intrin, int32x4) {
.test_expand()
.test_addsub()
.test_mul()
.test_abs()
.test_cmp()
.test_shift<1>().test_shift<8>()
.test_logic()
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment