Commit 3cf91851 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #15538 from terfendail:wui_checkany

parents c4d2e3c0 66842f5a
...@@ -1241,6 +1241,11 @@ inline int v_signmask(const v_int32x8& a) ...@@ -1241,6 +1241,11 @@ inline int v_signmask(const v_int32x8& a)
inline int v_signmask(const v_uint32x8& a) inline int v_signmask(const v_uint32x8& a)
{ return v_signmask(v_reinterpret_as_f32(a)); } { return v_signmask(v_reinterpret_as_f32(a)); }
inline int v_signmask(const v_int64x4& a)
{ return v_signmask(v_reinterpret_as_f64(a)); }
inline int v_signmask(const v_uint64x4& a)
{ return v_signmask(v_reinterpret_as_f64(a)); }
inline int v_scan_forward(const v_int8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } inline int v_scan_forward(const v_int8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
inline int v_scan_forward(const v_uint8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } inline int v_scan_forward(const v_uint8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
inline int v_scan_forward(const v_int16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; } inline int v_scan_forward(const v_int16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
...@@ -1253,40 +1258,23 @@ inline int v_scan_forward(const v_uint64x4& a) { return trailingZeros32(v_signma ...@@ -1253,40 +1258,23 @@ inline int v_scan_forward(const v_uint64x4& a) { return trailingZeros32(v_signma
inline int v_scan_forward(const v_float64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; } inline int v_scan_forward(const v_float64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
/** Checks **/ /** Checks **/
#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, and_op, allmask) \ #define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, allmask) \
inline bool v_check_all(const _Tpvec& a) \ inline bool v_check_all(const _Tpvec& a) { return v_signmask(a) == allmask; } \
{ \ inline bool v_check_any(const _Tpvec& a) { return v_signmask(a) != 0; }
int mask = v_signmask(v_reinterpret_as_s8(a)); \ OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, -1)
return and_op(mask, allmask) == allmask; \ OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, -1)
} \ OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, 255)
inline bool v_check_any(const _Tpvec& a) \ OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, 255)
{ \ OPENCV_HAL_IMPL_AVX_CHECK(v_uint64x4, 15)
int mask = v_signmask(v_reinterpret_as_s8(a)); \ OPENCV_HAL_IMPL_AVX_CHECK(v_int64x4, 15)
return and_op(mask, allmask) != 0; \ OPENCV_HAL_IMPL_AVX_CHECK(v_float32x8, 255)
} OPENCV_HAL_IMPL_AVX_CHECK(v_float64x4, 15)
OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, OPENCV_HAL_1ST, -1) #define OPENCV_HAL_IMPL_AVX_CHECK_SHORT(_Tpvec) \
OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, OPENCV_HAL_1ST, -1) inline bool v_check_all(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) == 0xaaaaaaaa; } \
OPENCV_HAL_IMPL_AVX_CHECK(v_uint16x16, OPENCV_HAL_AND, (int)0xaaaaaaaa) inline bool v_check_any(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) != 0; }
OPENCV_HAL_IMPL_AVX_CHECK(v_int16x16, OPENCV_HAL_AND, (int)0xaaaaaaaa) OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_uint16x16)
OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, OPENCV_HAL_AND, (int)0x88888888) OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_int16x16)
OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, OPENCV_HAL_AND, (int)0x88888888)
#define OPENCV_HAL_IMPL_AVX_CHECK_FLT(_Tpvec, allmask) \
inline bool v_check_all(const _Tpvec& a) \
{ \
int mask = v_signmask(a); \
return mask == allmask; \
} \
inline bool v_check_any(const _Tpvec& a) \
{ \
int mask = v_signmask(a); \
return mask != 0; \
}
OPENCV_HAL_IMPL_AVX_CHECK_FLT(v_float32x8, 255)
OPENCV_HAL_IMPL_AVX_CHECK_FLT(v_float64x4, 15)
////////// Other math ///////// ////////// Other math /////////
......
...@@ -1080,7 +1080,7 @@ Example: ...@@ -1080,7 +1080,7 @@ Example:
v_int32x4 r; // set to {-1, -1, 1, 1} v_int32x4 r; // set to {-1, -1, 1, 1}
int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011 int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011
@endcode @endcode
For all types except 64-bit. */ */
template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a) template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
{ {
int mask = 0; int mask = 0;
...@@ -1109,7 +1109,7 @@ template <typename _Tp, int n> inline int v_scan_forward(const v_reg<_Tp, n>& a) ...@@ -1109,7 +1109,7 @@ template <typename _Tp, int n> inline int v_scan_forward(const v_reg<_Tp, n>& a)
/** @brief Check if all packed values are less than zero /** @brief Check if all packed values are less than zero
Unsigned values will be casted to signed: `uchar 254 => char -2`. Unsigned values will be casted to signed: `uchar 254 => char -2`.
For all types except 64-bit. */ */
template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a) template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
{ {
for( int i = 0; i < n; i++ ) for( int i = 0; i < n; i++ )
...@@ -1121,7 +1121,7 @@ template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a) ...@@ -1121,7 +1121,7 @@ template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
/** @brief Check if any of packed values is less than zero /** @brief Check if any of packed values is less than zero
Unsigned values will be casted to signed: `uchar 254 => char -2`. Unsigned values will be casted to signed: `uchar 254 => char -2`.
For all types except 64-bit. */ */
template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a) template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
{ {
for( int i = 0; i < n; i++ ) for( int i = 0; i < n; i++ )
......
...@@ -1139,9 +1139,17 @@ inline bool v_check_any(const v_##_Tpvec& a) \ ...@@ -1139,9 +1139,17 @@ inline bool v_check_any(const v_##_Tpvec& a) \
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7) OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7)
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15) OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15)
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31) OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint64x2, u64, 63) inline bool v_check_all(const v_uint64x2& a)
#endif {
uint64x2_t v0 = vshrq_n_u64(a.val, 63);
return (vgetq_lane_u64(v0, 0) & vgetq_lane_u64(v0, 1)) == 1;
}
inline bool v_check_any(const v_uint64x2& a)
{
uint64x2_t v0 = vshrq_n_u64(a.val, 63);
return (vgetq_lane_u64(v0, 0) | vgetq_lane_u64(v0, 1)) != 0;
}
inline bool v_check_all(const v_int8x16& a) inline bool v_check_all(const v_int8x16& a)
{ return v_check_all(v_reinterpret_as_u8(a)); } { return v_check_all(v_reinterpret_as_u8(a)); }
...@@ -1161,13 +1169,13 @@ inline bool v_check_any(const v_int32x4& a) ...@@ -1161,13 +1169,13 @@ inline bool v_check_any(const v_int32x4& a)
inline bool v_check_any(const v_float32x4& a) inline bool v_check_any(const v_float32x4& a)
{ return v_check_any(v_reinterpret_as_u32(a)); } { return v_check_any(v_reinterpret_as_u32(a)); }
#if CV_SIMD128_64F
inline bool v_check_all(const v_int64x2& a) inline bool v_check_all(const v_int64x2& a)
{ return v_check_all(v_reinterpret_as_u64(a)); } { return v_check_all(v_reinterpret_as_u64(a)); }
inline bool v_check_all(const v_float64x2& a)
{ return v_check_all(v_reinterpret_as_u64(a)); }
inline bool v_check_any(const v_int64x2& a) inline bool v_check_any(const v_int64x2& a)
{ return v_check_any(v_reinterpret_as_u64(a)); } { return v_check_any(v_reinterpret_as_u64(a)); }
#if CV_SIMD128_64F
inline bool v_check_all(const v_float64x2& a)
{ return v_check_all(v_reinterpret_as_u64(a)); }
inline bool v_check_any(const v_float64x2& a) inline bool v_check_any(const v_float64x2& a)
{ return v_check_any(v_reinterpret_as_u64(a)); } { return v_check_any(v_reinterpret_as_u64(a)); }
#endif #endif
......
...@@ -1591,31 +1591,25 @@ inline v_uint32x4 v_popcount(const v_int32x4& a) ...@@ -1591,31 +1591,25 @@ inline v_uint32x4 v_popcount(const v_int32x4& a)
inline v_uint64x2 v_popcount(const v_int64x2& a) inline v_uint64x2 v_popcount(const v_int64x2& a)
{ return v_popcount(v_reinterpret_as_u64(a)); } { return v_popcount(v_reinterpret_as_u64(a)); }
#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, pack_op, and_op, signmask, allmask) \ #define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, cast_op, allmask) \
inline int v_signmask(const _Tpvec& a) \ inline int v_signmask(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)); } \
{ \ inline bool v_check_all(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) == allmask; } \
return and_op(_mm_movemask_##suffix(pack_op(a.val)), signmask); \ inline bool v_check_any(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) != 0; }
} \ OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, 65535)
inline bool v_check_all(const _Tpvec& a) \ OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, 65535)
{ return and_op(_mm_movemask_##suffix(a.val), allmask) == allmask; } \ OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, ps, _mm_castsi128_ps, 15)
inline bool v_check_any(const _Tpvec& a) \ OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, ps, _mm_castsi128_ps, 15)
{ return and_op(_mm_movemask_##suffix(a.val), allmask) != 0; } OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint64x2, pd, _mm_castsi128_pd, 3)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int64x2, pd, _mm_castsi128_pd, 3)
#define OPENCV_HAL_PACKS(a) _mm_packs_epi16(a, a) OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, 15)
inline __m128i v_packq_epi32(__m128i a) OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, 3)
{
__m128i b = _mm_packs_epi32(a, a); #define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(_Tpvec) \
return _mm_packs_epi16(b, b); inline int v_signmask(const _Tpvec& a) { return _mm_movemask_epi8(_mm_packs_epi16(a.val, a.val)) & 255; } \
} inline bool v_check_all(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) == 0xaaaa; } \
inline bool v_check_any(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) != 0; }
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535) OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_uint16x8)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535) OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_int16x8)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3)
inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); } inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
......
...@@ -899,6 +899,8 @@ inline bool v_check_all(const v_uint16x8& a) ...@@ -899,6 +899,8 @@ inline bool v_check_all(const v_uint16x8& a)
{ return v_check_all(v_reinterpret_as_s16(a)); } { return v_check_all(v_reinterpret_as_s16(a)); }
inline bool v_check_all(const v_uint32x4& a) inline bool v_check_all(const v_uint32x4& a)
{ return v_check_all(v_reinterpret_as_s32(a)); } { return v_check_all(v_reinterpret_as_s32(a)); }
inline bool v_check_all(const v_uint64x2& a)
{ return v_check_all(v_reinterpret_as_s64(a)); }
inline bool v_check_all(const v_float32x4& a) inline bool v_check_all(const v_float32x4& a)
{ return v_check_all(v_reinterpret_as_s32(a)); } { return v_check_all(v_reinterpret_as_s32(a)); }
inline bool v_check_all(const v_float64x2& a) inline bool v_check_all(const v_float64x2& a)
...@@ -913,6 +915,8 @@ inline bool v_check_any(const v_uint16x8& a) ...@@ -913,6 +915,8 @@ inline bool v_check_any(const v_uint16x8& a)
{ return v_check_any(v_reinterpret_as_s16(a)); } { return v_check_any(v_reinterpret_as_s16(a)); }
inline bool v_check_any(const v_uint32x4& a) inline bool v_check_any(const v_uint32x4& a)
{ return v_check_any(v_reinterpret_as_s32(a)); } { return v_check_any(v_reinterpret_as_s32(a)); }
inline bool v_check_any(const v_uint64x2& a)
{ return v_check_any(v_reinterpret_as_s64(a)); }
inline bool v_check_any(const v_float32x4& a) inline bool v_check_any(const v_float32x4& a)
{ return v_check_any(v_reinterpret_as_s32(a)); } { return v_check_any(v_reinterpret_as_s32(a)); }
inline bool v_check_any(const v_float64x2& a) inline bool v_check_any(const v_float64x2& a)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment