Commit 7fef96be authored by Tomoaki Teshima's avatar Tomoaki Teshima

add 64F intrinsic in HAL NEON

  * use universal intrinsic for accumulate series using float/double
  * accumulate, accumulateSquare, accumulateProduct and accumulateWeighted
  * add v_cvt_f64_high in both SSE/NEON
  * add test for conversion v_cvt_f64_high in test_intrin.cpp
  * improve some existing universal intrinsic by using new instructions in Aarch64
  * add workaround for Android build in intrin_neon.hpp
parent da5ead2c
......@@ -1587,11 +1587,21 @@ inline v_float64x2 v_cvt_f64(const v_int32x4& a)
return v_float64x2(_mm_cvtepi32_pd(a.val));
}
inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
{
return v_float64x2(_mm_cvtepi32_pd(_mm_srli_si128(a.val,8)));
}
inline v_float64x2 v_cvt_f64(const v_float32x4& a)
{
return v_float64x2(_mm_cvtps_pd(a.val));
}
inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
{
return v_float64x2(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a.val),8))));
}
//! @endcond
}
......
......@@ -626,12 +626,18 @@ template<typename R> struct TheTest
dataA *= 1.1;
R a = dataA;
Rt b = v_cvt_f64(a);
Rt c = v_cvt_f64_high(a);
Data<Rt> resB = b;
Data<Rt> resC = c;
int n = std::min<int>(Rt::nlanes, R::nlanes);
for (int i = 0; i < n; ++i)
{
EXPECT_EQ((typename Rt::lane_type)dataA[i], resB[i]);
}
for (int i = 0; i < n; ++i)
{
EXPECT_EQ((typename Rt::lane_type)dataA[i+n], resC[i]);
}
#endif
return *this;
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment