Commit 301626ba authored by Chip Kerchner's avatar Chip Kerchner Committed by Alexander Alekhin

Merge pull request #15488 from ChipKerchner:vectorizeMinMax2

Vectorize minMaxIdx functions

* Updated documentation and intrinsic tests for v_reduce

* Add other files back in from the forced push

* Prevent an constant overflow with v_reduce for int8 type

* Another alternative to fix constant overflow warning.

* Fix another compiler warning.

* Update comments and change comparison form to be consistent with other vectorized loops.

* Change return type of v_reduce_min & max for v_uint8 and v_uint16 to be same as lane type.

* Cast v_reduce functions to int to avoid overflow. Reduce number of parameters in MINMAXIDX_REDUCE macro.

* Restore cast type for v_reduce_min & max to LaneType
parent 886220b9
...@@ -213,7 +213,7 @@ Regular integers: ...@@ -213,7 +213,7 @@ Regular integers:
|min, max | x | x | x | x | x | x | |min, max | x | x | x | x | x | x |
|absdiff | x | x | x | x | x | x | |absdiff | x | x | x | x | x | x |
|absdiffs | | x | | x | | | |absdiffs | | x | | x | | |
|reduce | | | | | x | x | |reduce | x | x | x | x | x | x |
|mask | x | x | x | x | x | x | |mask | x | x | x | x | x | x |
|pack | x | x | x | x | x | x | |pack | x | x | x | x | x | x |
|pack_u | x | | x | | | | |pack_u | x | | x | | | |
...@@ -670,7 +670,7 @@ Scheme: ...@@ -670,7 +670,7 @@ Scheme:
@code @code
{A1 A2 A3 ...} => min(A1,A2,A3,...) {A1 A2 A3 ...} => min(A1,A2,A3,...)
@endcode @endcode
For 32-bit integer and 32-bit floating point types. */ For all types except 64-bit integer and 64-bit floating point types. */
OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
/** @brief Find one max value /** @brief Find one max value
...@@ -679,7 +679,7 @@ Scheme: ...@@ -679,7 +679,7 @@ Scheme:
@code @code
{A1 A2 A3 ...} => max(A1,A2,A3,...) {A1 A2 A3 ...} => max(A1,A2,A3,...)
@endcode @endcode
For 32-bit integer and 32-bit floating point types. */ For all types except 64-bit integer and 64-bit floating point types. */
OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max)
static const unsigned char popCountTable[] = static const unsigned char popCountTable[] =
...@@ -1219,7 +1219,7 @@ Scheme: ...@@ -1219,7 +1219,7 @@ Scheme:
@code @code
{A1 A2 A3 ...} => sum{A1,A2,A3,...} {A1 A2 A3 ...} => sum{A1,A2,A3,...}
@endcode @endcode
For 32-bit integer and 32-bit floating point types.*/ */
template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a)
{ {
typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; typename V_TypeTraits<_Tp>::sum_type c = a.s[0];
......
...@@ -1241,6 +1241,20 @@ inline int v_reduce_sum(const v_int16x8& a) ...@@ -1241,6 +1241,20 @@ inline int v_reduce_sum(const v_int16x8& a)
return vget_lane_s32(vpadd_s32(t1, t1), 0); return vget_lane_s32(vpadd_s32(t1, t1), 0);
} }
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
_Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
a0 = vp##vectorfunc##_##suffix(a0, a0); \
a0 = vp##vectorfunc##_##suffix(a0, a0); \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
}
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, max, max, u8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, min, min, u8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, max, max, s8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, min, min, s8)
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \ inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \ { \
...@@ -1249,10 +1263,10 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \ ...@@ -1249,10 +1263,10 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
} }
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned int, max, max, u16) OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, max, max, u16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned int, min, min, u16) OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, min, min, u16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, int, max, max, s16) OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, int, min, min, s16) OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16)
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \ inline scalartype v_reduce_##func(const _Tpvec& a) \
......
This diff is collapsed.
...@@ -894,13 +894,18 @@ template<typename R> struct TheTest ...@@ -894,13 +894,18 @@ template<typename R> struct TheTest
TheTest & test_reduce() TheTest & test_reduce()
{ {
Data<R> dataA; Data<R> dataA;
int sum = 0;
for (int i = 0; i < R::nlanes; ++i)
{
sum += (int)(dataA[i]); // To prevent a constant overflow with int8
}
R a = dataA; R a = dataA;
EXPECT_EQ((LaneType)1, v_reduce_min(a)); EXPECT_EQ((LaneType)1, (LaneType)v_reduce_min(a));
EXPECT_EQ((LaneType)R::nlanes, v_reduce_max(a)); EXPECT_EQ((LaneType)(R::nlanes), (LaneType)v_reduce_max(a));
EXPECT_EQ((LaneType)((1 + R::nlanes)*R::nlanes/2), v_reduce_sum(a)); EXPECT_EQ((int)(sum), (int)v_reduce_sum(a));
dataA[0] += R::nlanes; dataA[0] += R::nlanes;
R an = dataA; R an = dataA;
EXPECT_EQ((LaneType)2, v_reduce_min(an)); EXPECT_EQ((LaneType)2, (LaneType)v_reduce_min(an));
return *this; return *this;
} }
...@@ -1588,6 +1593,7 @@ void test_hal_intrin_uint8() ...@@ -1588,6 +1593,7 @@ void test_hal_intrin_uint8()
.test_dotprod_expand() .test_dotprod_expand()
.test_min_max() .test_min_max()
.test_absdiff() .test_absdiff()
.test_reduce()
.test_reduce_sad() .test_reduce_sad()
.test_mask() .test_mask()
.test_popcount() .test_popcount()
...@@ -1629,6 +1635,7 @@ void test_hal_intrin_int8() ...@@ -1629,6 +1635,7 @@ void test_hal_intrin_int8()
.test_absdiff() .test_absdiff()
.test_absdiffs() .test_absdiffs()
.test_abs() .test_abs()
.test_reduce()
.test_reduce_sad() .test_reduce_sad()
.test_mask() .test_mask()
.test_popcount() .test_popcount()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment