use universal intrinsic in canny

* add v_abs for universal intrinsic * add test of v_abs in test_intrin * fix compile error on gcc * fix bool OR operation

use universal intrinsic in canny
* add v_abs for universal intrinsic * add test of v_abs in test_intrin * fix compile error on gcc * fix bool OR operation
841cccca · Tomoaki Teshima · Tomoaki Teshima · 69704692 · 841cccca · 841cccca
Commit 841cccca authored Sep 30, 2016 by Tomoaki Teshima Committed by Tomoaki Teshima Oct 03, 2016
4 changed files
--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@@ -549,6 +549,13 @@ inline v_float32x4 v_invsqrt(const v_float32x4& x)
 }
 #endif

+#define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \
+inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); }
+
+OPENCV_HAL_IMPL_NEON_ABS(v_uint8x16, v_int8x16, u8, s8)
+OPENCV_HAL_IMPL_NEON_ABS(v_uint16x8, v_int16x8, u16, s16)
+OPENCV_HAL_IMPL_NEON_ABS(v_uint32x4, v_int32x4, u32, s32)
+
 inline v_float32x4 v_abs(v_float32x4 x)
 { return v_float32x4(vabsq_f32(x.val)); }


--- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp
@@ -739,6 +739,18 @@ inline v_float64x2 v_invsqrt(const v_float64x2& x)
    return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val)));
 }

+#define OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(_Tpuvec, _Tpsvec, func, suffix, subWidth) \
+inline _Tpuvec v_abs(const _Tpsvec& x) \
+{ return _Tpuvec(_mm_##func##_ep##suffix(x.val, _mm_sub_ep##subWidth(_mm_setzero_si128(), x.val))); }
+
+OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint8x16, v_int8x16, min, u8, i8)
+OPENCV_HAL_IMPL_SSE_ABS_INT_FUNC(v_uint16x8, v_int16x8, max, i16, i16)
+inline v_uint32x4 v_abs(const v_int32x4& x)
+{
+    __m128i s = _mm_srli_epi32(x.val, 31);
+    __m128i f = _mm_srai_epi32(x.val, 31);
+    return v_uint32x4(_mm_add_epi32(_mm_xor_si128(x.val, f), s));
+}
 inline v_float32x4 v_abs(const v_float32x4& x)
 { return v_float32x4(_mm_and_ps(x.val, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); }
 inline v_float64x2 v_abs(const v_float64x2& x)

--- a/modules/core/test/test_intrin.cpp
+++ b/modules/core/test/test_intrin.cpp
@@ -277,6 +277,24 @@ template<typename R> struct TheTest
        return *this;
    }

+    TheTest & test_abs()
+    {
+        typedef typename V_RegTrait128<LaneType>::u_reg Ru;
+        typedef typename Ru::lane_type u_type;
+        Data<R> dataA, dataB(10);
+        R a = dataA, b = dataB;
+        a = a - b;
+
+        Data<Ru> resC = v_abs(a);
+
+        for (int i = 0; i < Ru::nlanes; ++i)
+        {
+            EXPECT_EQ((u_type)std::abs(dataA[i] - dataB[i]), resC[i]);
+        }
+
+        return *this;
+    }
+
    template <int s>
    TheTest & test_shift()
    {
@@ -799,6 +817,7 @@ TEST(hal_intrin, int8x16) {
        .test_logic()
        .test_min_max()
        .test_absdiff()
+        .test_abs()
        .test_mask()
        .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
        .test_unpack()
@@ -847,6 +866,7 @@ TEST(hal_intrin, int16x8) {
        .test_logic()
        .test_min_max()
        .test_absdiff()
+        .test_abs()
        .test_mask()
        .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
        .test_unpack()
@@ -886,6 +906,7 @@ TEST(hal_intrin, int32x4) {
        .test_expand()
        .test_addsub()
        .test_mul()
+        .test_abs()
        .test_cmp()
        .test_shift<1>().test_shift<8>()
        .test_logic()

--- a/modules/imgproc/src/canny.cpp
+++ b/modules/imgproc/src/canny.cpp