add 64F intrinsic in HAL NEON

* use universal intrinsic for accumulate series using float/double * accumulate, accumulateSquare, accumulateProduct and accumulateWeighted * add v_cvt_f64_high in both SSE/NEON * add test for conversion v_cvt_f64_high in test_intrin.cpp * improve some existing universal intrinsic by using new instructions in Aarch64 * add workaround for Android build in intrin_neon.hpp

add 64F intrinsic in HAL NEON
* use universal intrinsic for accumulate series using float/double * accumulate, accumulateSquare, accumulateProduct and accumulateWeighted * add v_cvt_f64_high in both SSE/NEON * add test for conversion v_cvt_f64_high in test_intrin.cpp * improve some existing universal intrinsic by using new instructions in Aarch64 * add workaround for Android build in intrin_neon.hpp
7fef96be · Tomoaki Teshima · da5ead2c · 7fef96be · 7fef96be · 7fef96be
Commit 7fef96be authored Aug 30, 2016 by Tomoaki Teshima
4 changed files
--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
--- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp
@@ -1587,11 +1587,21 @@ inline v_float64x2 v_cvt_f64(const v_int32x4& a)
    return v_float64x2(_mm_cvtepi32_pd(a.val));
 }
+inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
+{
+    return v_float64x2(_mm_cvtepi32_pd(_mm_srli_si128(a.val,8)));
+}
 inline v_float64x2 v_cvt_f64(const v_float32x4& a)
 {
    return v_float64x2(_mm_cvtps_pd(a.val));
 }
+inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
+{
+    return v_float64x2(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a.val),8))));
+}
 //! @endcond
 }

--- a/modules/core/test/test_intrin.cpp
+++ b/modules/core/test/test_intrin.cpp
@@ -626,12 +626,18 @@ template<typename R> struct TheTest
        dataA *= 1.1;
        R a = dataA;
        Rt b = v_cvt_f64(a);
+        Rt c = v_cvt_f64_high(a);
        Data<Rt> resB = b;
+        Data<Rt> resC = c;
        int n = std::min<int>(Rt::nlanes, R::nlanes);
        for (int i = 0; i < n; ++i)
        {
            EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]);
        }
+        for (int i = 0; i < n; ++i)
+        {
+            EXPECT_EQ((typename Rt::lane_type)dataA[i+n], resC[i]);
+        }
 #endif
        return *this;
    }

--- a/modules/imgproc/src/accum.cpp
+++ b/modules/imgproc/src/accum.cpp