Commit 301626ba authored by Chip Kerchner's avatar Chip Kerchner Committed by Alexander Alekhin

Merge pull request #15488 from ChipKerchner:vectorizeMinMax2

Vectorize minMaxIdx functions

* Updated documentation and intrinsic tests for v_reduce

* Add other files back in from the forced push

* Prevent an constant overflow with v_reduce for int8 type

* Another alternative to fix constant overflow warning.

* Fix another compiler warning.

* Update comments and change comparison form to be consistent with other vectorized loops.

* Change return type of v_reduce_min & max for v_uint8 and v_uint16 to be same as lane type.

* Cast v_reduce functions to int to avoid overflow. Reduce number of parameters in MINMAXIDX_REDUCE macro.

* Restore cast type for v_reduce_min & max to LaneType
parent 886220b9
......@@ -213,7 +213,7 @@ Regular integers:
|min, max | x | x | x | x | x | x |
|absdiff | x | x | x | x | x | x |
|absdiffs | | x | | x | | |
|reduce | | | | | x | x |
|reduce | x | x | x | x | x | x |
|mask | x | x | x | x | x | x |
|pack | x | x | x | x | x | x |
|pack_u | x | | x | | | |
......@@ -670,7 +670,7 @@ Scheme:
@code
{A1 A2 A3 ...} => min(A1,A2,A3,...)
@endcode
For 32-bit integer and 32-bit floating point types. */
For all types except 64-bit integer and 64-bit floating point types. */
OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
/** @brief Find one max value
......@@ -679,7 +679,7 @@ Scheme:
@code
{A1 A2 A3 ...} => max(A1,A2,A3,...)
@endcode
For 32-bit integer and 32-bit floating point types. */
For all types except 64-bit integer and 64-bit floating point types. */
OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max)
static const unsigned char popCountTable[] =
......@@ -1219,7 +1219,7 @@ Scheme:
@code
{A1 A2 A3 ...} => sum{A1,A2,A3,...}
@endcode
For 32-bit integer and 32-bit floating point types.*/
*/
template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a)
{
typename V_TypeTraits<_Tp>::sum_type c = a.s[0];
......
......@@ -1241,6 +1241,20 @@ inline int v_reduce_sum(const v_int16x8& a)
return vget_lane_s32(vpadd_s32(t1, t1), 0);
}
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
_Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
a0 = vp##vectorfunc##_##suffix(a0, a0); \
a0 = vp##vectorfunc##_##suffix(a0, a0); \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
}
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, max, max, u8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, min, min, u8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, max, max, s8)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, min, min, s8)
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
{ \
......@@ -1249,10 +1263,10 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
}
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned int, max, max, u16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned int, min, min, u16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, int, max, max, s16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, int, min, min, s16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, max, max, u16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, min, min, u16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16)
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16)
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
......
This diff is collapsed.
......@@ -894,13 +894,18 @@ template<typename R> struct TheTest
TheTest & test_reduce()
{
Data<R> dataA;
int sum = 0;
for (int i = 0; i < R::nlanes; ++i)
{
sum += (int)(dataA[i]); // To prevent a constant overflow with int8
}
R a = dataA;
EXPECT_EQ((LaneType)1, v_reduce_min(a));
EXPECT_EQ((LaneType)R::nlanes, v_reduce_max(a));
EXPECT_EQ((LaneType)((1 + R::nlanes)*R::nlanes/2), v_reduce_sum(a));
EXPECT_EQ((LaneType)1, (LaneType)v_reduce_min(a));
EXPECT_EQ((LaneType)(R::nlanes), (LaneType)v_reduce_max(a));
EXPECT_EQ((int)(sum), (int)v_reduce_sum(a));
dataA[0] += R::nlanes;
R an = dataA;
EXPECT_EQ((LaneType)2, v_reduce_min(an));
EXPECT_EQ((LaneType)2, (LaneType)v_reduce_min(an));
return *this;
}
......@@ -1588,6 +1593,7 @@ void test_hal_intrin_uint8()
.test_dotprod_expand()
.test_min_max()
.test_absdiff()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_popcount()
......@@ -1629,6 +1635,7 @@ void test_hal_intrin_int8()
.test_absdiff()
.test_absdiffs()
.test_abs()
.test_reduce()
.test_reduce_sad()
.test_mask()
.test_popcount()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment