Commit 3cf91851 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #15538 from terfendail:wui_checkany

parents c4d2e3c0 66842f5a
......@@ -1241,6 +1241,11 @@ inline int v_signmask(const v_int32x8& a)
inline int v_signmask(const v_uint32x8& a)
{ return v_signmask(v_reinterpret_as_f32(a)); }
inline int v_signmask(const v_int64x4& a)
{ return v_signmask(v_reinterpret_as_f64(a)); }
inline int v_signmask(const v_uint64x4& a)
{ return v_signmask(v_reinterpret_as_f64(a)); }
inline int v_scan_forward(const v_int8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
inline int v_scan_forward(const v_uint8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
inline int v_scan_forward(const v_int16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
......@@ -1253,40 +1258,23 @@ inline int v_scan_forward(const v_uint64x4& a) { return trailingZeros32(v_signma
inline int v_scan_forward(const v_float64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
/** Checks **/
#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, and_op, allmask) \
inline bool v_check_all(const _Tpvec& a) \
{ \
int mask = v_signmask(v_reinterpret_as_s8(a)); \
return and_op(mask, allmask) == allmask; \
} \
inline bool v_check_any(const _Tpvec& a) \
{ \
int mask = v_signmask(v_reinterpret_as_s8(a)); \
return and_op(mask, allmask) != 0; \
}
OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, OPENCV_HAL_1ST, -1)
OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, OPENCV_HAL_1ST, -1)
OPENCV_HAL_IMPL_AVX_CHECK(v_uint16x16, OPENCV_HAL_AND, (int)0xaaaaaaaa)
OPENCV_HAL_IMPL_AVX_CHECK(v_int16x16, OPENCV_HAL_AND, (int)0xaaaaaaaa)
OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, OPENCV_HAL_AND, (int)0x88888888)
OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, OPENCV_HAL_AND, (int)0x88888888)
#define OPENCV_HAL_IMPL_AVX_CHECK_FLT(_Tpvec, allmask) \
inline bool v_check_all(const _Tpvec& a) \
{ \
int mask = v_signmask(a); \
return mask == allmask; \
} \
inline bool v_check_any(const _Tpvec& a) \
{ \
int mask = v_signmask(a); \
return mask != 0; \
}
OPENCV_HAL_IMPL_AVX_CHECK_FLT(v_float32x8, 255)
OPENCV_HAL_IMPL_AVX_CHECK_FLT(v_float64x4, 15)
#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, allmask) \
inline bool v_check_all(const _Tpvec& a) { return v_signmask(a) == allmask; } \
inline bool v_check_any(const _Tpvec& a) { return v_signmask(a) != 0; }
OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, -1)
OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, -1)
OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, 255)
OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, 255)
OPENCV_HAL_IMPL_AVX_CHECK(v_uint64x4, 15)
OPENCV_HAL_IMPL_AVX_CHECK(v_int64x4, 15)
OPENCV_HAL_IMPL_AVX_CHECK(v_float32x8, 255)
OPENCV_HAL_IMPL_AVX_CHECK(v_float64x4, 15)
#define OPENCV_HAL_IMPL_AVX_CHECK_SHORT(_Tpvec) \
inline bool v_check_all(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) == 0xaaaaaaaa; } \
inline bool v_check_any(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) != 0; }
OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_uint16x16)
OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_int16x16)
////////// Other math /////////
......
......@@ -1080,7 +1080,7 @@ Example:
v_int32x4 r; // set to {-1, -1, 1, 1}
int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011
@endcode
For all types except 64-bit. */
*/
template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
{
int mask = 0;
......@@ -1109,7 +1109,7 @@ template <typename _Tp, int n> inline int v_scan_forward(const v_reg<_Tp, n>& a)
/** @brief Check if all packed values are less than zero
Unsigned values will be casted to signed: `uchar 254 => char -2`.
For all types except 64-bit. */
*/
template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
{
for( int i = 0; i < n; i++ )
......@@ -1121,7 +1121,7 @@ template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
/** @brief Check if any of packed values is less than zero
Unsigned values will be casted to signed: `uchar 254 => char -2`.
For all types except 64-bit. */
*/
template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
{
for( int i = 0; i < n; i++ )
......
......@@ -1139,9 +1139,17 @@ inline bool v_check_any(const v_##_Tpvec& a) \
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7)
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15)
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint64x2, u64, 63)
#endif
inline bool v_check_all(const v_uint64x2& a)
{
uint64x2_t v0 = vshrq_n_u64(a.val, 63);
return (vgetq_lane_u64(v0, 0) & vgetq_lane_u64(v0, 1)) == 1;
}
inline bool v_check_any(const v_uint64x2& a)
{
uint64x2_t v0 = vshrq_n_u64(a.val, 63);
return (vgetq_lane_u64(v0, 0) | vgetq_lane_u64(v0, 1)) != 0;
}
inline bool v_check_all(const v_int8x16& a)
{ return v_check_all(v_reinterpret_as_u8(a)); }
......@@ -1161,13 +1169,13 @@ inline bool v_check_any(const v_int32x4& a)
inline bool v_check_any(const v_float32x4& a)
{ return v_check_any(v_reinterpret_as_u32(a)); }
#if CV_SIMD128_64F
inline bool v_check_all(const v_int64x2& a)
{ return v_check_all(v_reinterpret_as_u64(a)); }
inline bool v_check_all(const v_float64x2& a)
{ return v_check_all(v_reinterpret_as_u64(a)); }
inline bool v_check_any(const v_int64x2& a)
{ return v_check_any(v_reinterpret_as_u64(a)); }
#if CV_SIMD128_64F
inline bool v_check_all(const v_float64x2& a)
{ return v_check_all(v_reinterpret_as_u64(a)); }
inline bool v_check_any(const v_float64x2& a)
{ return v_check_any(v_reinterpret_as_u64(a)); }
#endif
......
......@@ -1591,31 +1591,25 @@ inline v_uint32x4 v_popcount(const v_int32x4& a)
inline v_uint64x2 v_popcount(const v_int64x2& a)
{ return v_popcount(v_reinterpret_as_u64(a)); }
#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, pack_op, and_op, signmask, allmask) \
inline int v_signmask(const _Tpvec& a) \
{ \
return and_op(_mm_movemask_##suffix(pack_op(a.val)), signmask); \
} \
inline bool v_check_all(const _Tpvec& a) \
{ return and_op(_mm_movemask_##suffix(a.val), allmask) == allmask; } \
inline bool v_check_any(const _Tpvec& a) \
{ return and_op(_mm_movemask_##suffix(a.val), allmask) != 0; }
#define OPENCV_HAL_PACKS(a) _mm_packs_epi16(a, a)
inline __m128i v_packq_epi32(__m128i a)
{
__m128i b = _mm_packs_epi32(a, a);
return _mm_packs_epi16(b, b);
}
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3)
#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, cast_op, allmask) \
inline int v_signmask(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)); } \
inline bool v_check_all(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) == allmask; } \
inline bool v_check_any(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) != 0; }
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, 65535)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, 65535)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, ps, _mm_castsi128_ps, 15)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, ps, _mm_castsi128_ps, 15)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint64x2, pd, _mm_castsi128_pd, 3)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int64x2, pd, _mm_castsi128_pd, 3)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, 15)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, 3)
#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(_Tpvec) \
inline int v_signmask(const _Tpvec& a) { return _mm_movemask_epi8(_mm_packs_epi16(a.val, a.val)) & 255; } \
inline bool v_check_all(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) == 0xaaaa; } \
inline bool v_check_any(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) != 0; }
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_uint16x8)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_int16x8)
inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
......
......@@ -899,6 +899,8 @@ inline bool v_check_all(const v_uint16x8& a)
{ return v_check_all(v_reinterpret_as_s16(a)); }
inline bool v_check_all(const v_uint32x4& a)
{ return v_check_all(v_reinterpret_as_s32(a)); }
inline bool v_check_all(const v_uint64x2& a)
{ return v_check_all(v_reinterpret_as_s64(a)); }
inline bool v_check_all(const v_float32x4& a)
{ return v_check_all(v_reinterpret_as_s32(a)); }
inline bool v_check_all(const v_float64x2& a)
......@@ -913,6 +915,8 @@ inline bool v_check_any(const v_uint16x8& a)
{ return v_check_any(v_reinterpret_as_s16(a)); }
inline bool v_check_any(const v_uint32x4& a)
{ return v_check_any(v_reinterpret_as_s32(a)); }
inline bool v_check_any(const v_uint64x2& a)
{ return v_check_any(v_reinterpret_as_s64(a)); }
inline bool v_check_any(const v_float32x4& a)
{ return v_check_any(v_reinterpret_as_s32(a)); }
inline bool v_check_any(const v_float64x2& a)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment