Commit 2a8344f7 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #10149 from mshabunin:fix-saturate-intrin

parents bc547c42 6c135261
...@@ -1762,14 +1762,14 @@ OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) ...@@ -1762,14 +1762,14 @@ OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64)
//! @brief Helper macro //! @brief Helper macro
//! @ingroup core_hal_intrin_impl //! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix) \ #define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \
inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
{ \ { \
_Tpnvec c; \ _Tpnvec c; \
for( int i = 0; i < _Tpvec::nlanes; i++ ) \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \
{ \ { \
c.s[i] = saturate_cast<_Tpn>(a.s[i]); \ c.s[i] = cast<_Tpn>(a.s[i]); \
c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>(b.s[i]); \ c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \
} \ } \
return c; \ return c; \
} }
...@@ -1783,26 +1783,28 @@ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ ...@@ -1783,26 +1783,28 @@ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
//! //!
//! - pack: for 16-, 32- and 64-bit integer input types //! - pack: for 16-, 32- and 64-bit integer input types
//! - pack_u: for 16- and 32-bit signed integer input types //! - pack_u: for 16- and 32-bit signed integer input types
OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack) //!
OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack) //! @note All variants except 64-bit use saturation.
OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast)
OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack) OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast)
OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast)
OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack) OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast)
OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast)
OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u) OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast)
OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast)
OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast)
//! @} //! @}
//! @brief Helper macro //! @brief Helper macro
//! @ingroup core_hal_intrin_impl //! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ #define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
{ \ { \
_Tpnvec c; \ _Tpnvec c; \
for( int i = 0; i < _Tpvec::nlanes; i++ ) \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \
{ \ { \
c.s[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \
} \ } \
return c; \ return c; \
} }
...@@ -1816,51 +1818,55 @@ template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpve ...@@ -1816,51 +1818,55 @@ template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpve
//! //!
//! - pack: for 16-, 32- and 64-bit integer input types //! - pack: for 16-, 32- and 64-bit integer input types
//! - pack_u: for 16- and 32-bit signed integer input types //! - pack_u: for 16- and 32-bit signed integer input types
OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack) //!
OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack) //! @note All variants except 64-bit use saturation.
OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
//! @} //! @}
//! @brief Helper macro //! @brief Helper macro
//! @ingroup core_hal_intrin_impl //! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ #define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
{ \ { \
for( int i = 0; i < _Tpvec::nlanes; i++ ) \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \
ptr[i] = saturate_cast<_Tpn>(a.s[i]); \ ptr[i] = cast<_Tpn>(a.s[i]); \
} }
//! @name Pack and store //! @name Pack and store
//! @{ //! @{
//! @brief Store values from the input vector into memory with pack //! @brief Store values from the input vector into memory with pack
//! //!
//! Values will be stored into memory with saturating conversion to narrower type. //! Values will be stored into memory with conversion to narrower type.
//! Variant with _u_ suffix converts to corresponding unsigned type. //! Variant with _u_ suffix converts to corresponding unsigned type.
//! //!
//! - pack: for 16-, 32- and 64-bit integer input types //! - pack: for 16-, 32- and 64-bit integer input types
//! - pack_u: for 16- and 32-bit signed integer input types //! - pack_u: for 16- and 32-bit signed integer input types
OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack) //!
OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack) //! @note All variants except 64-bit use saturation.
OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u) OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast)
OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
//! @} //! @}
//! @brief Helper macro //! @brief Helper macro
//! @ingroup core_hal_intrin_impl //! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ #define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \
template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
{ \ { \
for( int i = 0; i < _Tpvec::nlanes; i++ ) \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \
ptr[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
} }
//! @name Pack and store with rounding shift //! @name Pack and store with rounding shift
...@@ -1872,14 +1878,16 @@ template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec ...@@ -1872,14 +1878,16 @@ template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec
//! //!
//! - pack: for 16-, 32- and 64-bit integer input types //! - pack: for 16-, 32- and 64-bit integer input types
//! - pack_u: for 16- and 32-bit signed integer input types //! - pack_u: for 16- and 32-bit signed integer input types
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack) //!
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack) //! @note All variants except 64-bit use saturation.
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast)
//! @} //! @}
/** @brief Matrix multiplication /** @brief Matrix multiplication
......
...@@ -226,7 +226,7 @@ struct v_uint64x2 ...@@ -226,7 +226,7 @@ struct v_uint64x2
v_uint64x2() {} v_uint64x2() {}
explicit v_uint64x2(uint64x2_t v) : val(v) {} explicit v_uint64x2(uint64x2_t v) : val(v) {}
v_uint64x2(unsigned v0, unsigned v1) v_uint64x2(uint64 v0, uint64 v1)
{ {
uint64 v[] = {v0, v1}; uint64 v[] = {v0, v1};
val = vld1q_u64(v); val = vld1q_u64(v);
...@@ -245,7 +245,7 @@ struct v_int64x2 ...@@ -245,7 +245,7 @@ struct v_int64x2
v_int64x2() {} v_int64x2() {}
explicit v_int64x2(int64x2_t v) : val(v) {} explicit v_int64x2(int64x2_t v) : val(v) {}
v_int64x2(int v0, int v1) v_int64x2(int64 v0, int64 v1)
{ {
int64 v[] = {v0, v1}; int64 v[] = {v0, v1};
val = vld1q_s64(v); val = vld1q_s64(v);
...@@ -360,40 +360,40 @@ OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32) ...@@ -360,40 +360,40 @@ OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32)
OPENCV_HAL_IMPL_NEON_INIT_64(float64x2, f64) OPENCV_HAL_IMPL_NEON_INIT_64(float64x2, f64)
#endif #endif
#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, wsuffix, pack, op) \ #define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, pack, mov, rshr) \
inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
{ \ { \
hreg a1 = vqmov##op##_##wsuffix(a.val), b1 = vqmov##op##_##wsuffix(b.val); \ hreg a1 = mov(a.val), b1 = mov(b.val); \
return _Tpvec(vcombine_##suffix(a1, b1)); \ return _Tpvec(vcombine_##suffix(a1, b1)); \
} \ } \
inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
{ \ { \
hreg a1 = vqmov##op##_##wsuffix(a.val); \ hreg a1 = mov(a.val); \
vst1_##suffix(ptr, a1); \ vst1_##suffix(ptr, a1); \
} \ } \
template<int n> inline \ template<int n> inline \
_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \
{ \ { \
hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \ hreg a1 = rshr(a.val, n); \
hreg b1 = vqrshr##op##_n_##wsuffix(b.val, n); \ hreg b1 = rshr(b.val, n); \
return _Tpvec(vcombine_##suffix(a1, b1)); \ return _Tpvec(vcombine_##suffix(a1, b1)); \
} \ } \
template<int n> inline \ template<int n> inline \
void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
{ \ { \
hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \ hreg a1 = rshr(a.val, n); \
vst1_##suffix(ptr, a1); \ vst1_##suffix(ptr, a1); \
} }
OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, u16, pack, n) OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, pack, vqmovn_u16, vqrshrn_n_u16)
OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, s16, pack, n) OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, pack, vqmovn_s16, vqrshrn_n_s16)
OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, u32, pack, n) OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, pack, vqmovn_u32, vqrshrn_n_u32)
OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, s32, pack, n) OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, pack, vqmovn_s32, vqrshrn_n_s32)
OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, u64, pack, n) OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, pack, vmovn_u64, vrshrn_n_u64)
OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, s64, pack, n) OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, pack, vmovn_s64, vrshrn_n_s64)
OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, s16, pack_u, un) OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, pack_u, vqmovun_s16, vqrshrun_n_s16)
OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, s32, pack_u, un) OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, pack_u, vqmovun_s32, vqrshrun_n_s32)
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
const v_float32x4& m1, const v_float32x4& m2, const v_float32x4& m1, const v_float32x4& m2,
......
...@@ -394,16 +394,17 @@ OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int, ...@@ -394,16 +394,17 @@ OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int,
vec_sra, vec_packs, vec_add, pack) vec_sra, vec_packs, vec_add, pack)
OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_uint64x2, unsigned long long, unsigned long long, OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_uint64x2, unsigned long long, unsigned long long,
vec_sr, vec_packs, vec_add, pack) vec_sr, vec_pack, vec_add, pack)
OPENCV_HAL_IMPL_VSX_PACK(v_int32x4, int, v_int64x2, unsigned long long, long long, OPENCV_HAL_IMPL_VSX_PACK(v_int32x4, int, v_int64x2, unsigned long long, long long,
vec_sra, vec_packs, vec_add, pack) vec_sra, vec_pack, vec_add, pack)
OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_int16x8, unsigned short, short, OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_int16x8, unsigned short, short,
vec_sra, vec_packsu, vec_adds, pack_u) vec_sra, vec_packsu, vec_adds, pack_u)
OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int, OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int,
vec_sra, vec_packsu, vec_add, pack_u) vec_sra, vec_packsu, vec_add, pack_u)
OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long, // Following variant is not implemented on other platforms:
vec_sra, vec_packsu, vec_add, pack_u) //OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long,
// vec_sra, vec_packsu, vec_add, pack_u)
/* Recombine */ /* Recombine */
template <typename _Tpvec> template <typename _Tpvec>
......
...@@ -136,12 +136,27 @@ template<> inline short saturate_cast<short>(double v) { int iv = cvRound( ...@@ -136,12 +136,27 @@ template<> inline short saturate_cast<short>(double v) { int iv = cvRound(
template<> inline short saturate_cast<short>(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } template<> inline short saturate_cast<short>(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
template<> inline short saturate_cast<short>(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); } template<> inline short saturate_cast<short>(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); }
template<> inline int saturate_cast<int>(unsigned v) { return (int)std::min(v, (unsigned)INT_MAX); }
template<> inline int saturate_cast<int>(int64 v) { return (int)((uint64)(v - INT_MIN) <= (uint64)UINT_MAX ? v : v > 0 ? INT_MAX : INT_MIN); }
template<> inline int saturate_cast<int>(uint64 v) { return (int)std::min(v, (uint64)INT_MAX); }
template<> inline int saturate_cast<int>(float v) { return cvRound(v); } template<> inline int saturate_cast<int>(float v) { return cvRound(v); }
template<> inline int saturate_cast<int>(double v) { return cvRound(v); } template<> inline int saturate_cast<int>(double v) { return cvRound(v); }
template<> inline unsigned saturate_cast<unsigned>(schar v) { return (unsigned)std::max(v, (schar)0); }
template<> inline unsigned saturate_cast<unsigned>(short v) { return (unsigned)std::max(v, (short)0); }
template<> inline unsigned saturate_cast<unsigned>(int v) { return (unsigned)std::max(v, (int)0); }
template<> inline unsigned saturate_cast<unsigned>(int64 v) { return (unsigned)((uint64)v <= (uint64)UINT_MAX ? v : v > 0 ? UINT_MAX : 0); }
template<> inline unsigned saturate_cast<unsigned>(uint64 v) { return (unsigned)std::min(v, (uint64)UINT_MAX); }
// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc. // we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
template<> inline unsigned saturate_cast<unsigned>(float v) { return cvRound(v); } template<> inline unsigned saturate_cast<unsigned>(float v) { return static_cast<unsigned>(cvRound(v)); }
template<> inline unsigned saturate_cast<unsigned>(double v) { return cvRound(v); } template<> inline unsigned saturate_cast<unsigned>(double v) { return static_cast<unsigned>(cvRound(v)); }
template<> inline uint64 saturate_cast<uint64>(schar v) { return (uint64)std::max(v, (schar)0); }
template<> inline uint64 saturate_cast<uint64>(short v) { return (uint64)std::max(v, (short)0); }
template<> inline uint64 saturate_cast<uint64>(int v) { return (uint64)std::max(v, (int)0); }
template<> inline uint64 saturate_cast<uint64>(int64 v) { return (uint64)std::max(v, (int64)0); }
template<> inline int64 saturate_cast<int64>(uint64 v) { return (int64)std::min(v, (uint64)LLONG_MAX); }
//! @} //! @}
......
...@@ -167,6 +167,14 @@ template<> inline void EXPECT_COMPARE_EQ_<double>(const double a, const double b ...@@ -167,6 +167,14 @@ template<> inline void EXPECT_COMPARE_EQ_<double>(const double a, const double b
EXPECT_DOUBLE_EQ( a, b ); EXPECT_DOUBLE_EQ( a, b );
} }
// pack functions do not do saturation when converting from 64-bit types
template<typename T, typename W>
inline T pack_saturate_cast(W a) { return saturate_cast<T>(a); }
template<>
inline int pack_saturate_cast<int, int64>(int64 a) { return static_cast<int>(a); }
template<>
inline unsigned pack_saturate_cast<unsigned, uint64>(uint64 a) { return static_cast<unsigned>(a); }
template<typename R> struct TheTest template<typename R> struct TheTest
{ {
typedef typename R::lane_type LaneType; typedef typename R::lane_type LaneType;
...@@ -464,16 +472,19 @@ template<typename R> struct TheTest ...@@ -464,16 +472,19 @@ template<typename R> struct TheTest
template <int s> template <int s>
TheTest & test_shift() TheTest & test_shift()
{ {
SCOPED_TRACE(s);
Data<R> dataA; Data<R> dataA;
dataA[0] = static_cast<LaneType>(std::numeric_limits<LaneType>::max());
R a = dataA; R a = dataA;
Data<R> resB = a << s, resC = v_shl<s>(a), resD = a >> s, resE = v_shr<s>(a); Data<R> resB = a << s, resC = v_shl<s>(a), resD = a >> s, resE = v_shr<s>(a);
for (int i = 0; i < R::nlanes; ++i) for (int i = 0; i < R::nlanes; ++i)
{ {
EXPECT_EQ(dataA[i] << s, resB[i]); EXPECT_EQ(static_cast<LaneType>(dataA[i] << s), resB[i]);
EXPECT_EQ(dataA[i] << s, resC[i]); EXPECT_EQ(static_cast<LaneType>(dataA[i] << s), resC[i]);
EXPECT_EQ(dataA[i] >> s, resD[i]); EXPECT_EQ(static_cast<LaneType>(dataA[i] >> s), resD[i]);
EXPECT_EQ(dataA[i] >> s, resE[i]); EXPECT_EQ(static_cast<LaneType>(dataA[i] >> s), resE[i]);
} }
return *this; return *this;
} }
...@@ -668,11 +679,13 @@ template<typename R> struct TheTest ...@@ -668,11 +679,13 @@ template<typename R> struct TheTest
template <int s> template <int s>
TheTest & test_pack() TheTest & test_pack()
{ {
SCOPED_TRACE(s);
typedef typename V_RegTrait128<LaneType>::w_reg Rx2; typedef typename V_RegTrait128<LaneType>::w_reg Rx2;
typedef typename Rx2::lane_type w_type; typedef typename Rx2::lane_type w_type;
Data<Rx2> dataA, dataB; Data<Rx2> dataA, dataB;
dataA += std::numeric_limits<LaneType>::is_signed ? -10 : 10; dataA += std::numeric_limits<LaneType>::is_signed ? -10 : 10;
dataB *= 10; dataB *= 10;
dataB[0] = static_cast<w_type>(std::numeric_limits<LaneType>::max()) + 17; // to check saturation
Rx2 a = dataA, b = dataB; Rx2 a = dataA, b = dataB;
Data<R> resC = v_pack(a, b); Data<R> resC = v_pack(a, b);
...@@ -688,13 +701,13 @@ template<typename R> struct TheTest ...@@ -688,13 +701,13 @@ template<typename R> struct TheTest
const w_type add = (w_type)1 << (s - 1); const w_type add = (w_type)1 << (s - 1);
for (int i = 0; i < n; ++i) for (int i = 0; i < n; ++i)
{ {
EXPECT_EQ(saturate_cast<LaneType>(dataA[i]), resC[i]); EXPECT_EQ(pack_saturate_cast<LaneType>(dataA[i]), resC[i]);
EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resC[i + n]); EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resC[i + n]);
EXPECT_EQ(saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]); EXPECT_EQ(pack_saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]);
EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]); EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]);
EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resE[i]); EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resE[i]);
EXPECT_EQ((LaneType)0, resE[i + n]); EXPECT_EQ((LaneType)0, resE[i + n]);
EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]); EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]);
EXPECT_EQ((LaneType)0, resF[i + n]); EXPECT_EQ((LaneType)0, resF[i + n]);
} }
return *this; return *this;
...@@ -703,6 +716,7 @@ template<typename R> struct TheTest ...@@ -703,6 +716,7 @@ template<typename R> struct TheTest
template <int s> template <int s>
TheTest & test_pack_u() TheTest & test_pack_u()
{ {
SCOPED_TRACE(s);
typedef typename V_TypeTraits<LaneType>::w_type LaneType_w; typedef typename V_TypeTraits<LaneType>::w_type LaneType_w;
typedef typename V_RegTrait128<LaneType_w>::int_reg Ri2; typedef typename V_RegTrait128<LaneType_w>::int_reg Ri2;
typedef typename Ri2::lane_type w_type; typedef typename Ri2::lane_type w_type;
...@@ -710,6 +724,7 @@ template<typename R> struct TheTest ...@@ -710,6 +724,7 @@ template<typename R> struct TheTest
Data<Ri2> dataA, dataB; Data<Ri2> dataA, dataB;
dataA += -10; dataA += -10;
dataB *= 10; dataB *= 10;
dataB[0] = static_cast<w_type>(std::numeric_limits<LaneType>::max()) + 17; // to check saturation
Ri2 a = dataA, b = dataB; Ri2 a = dataA, b = dataB;
Data<R> resC = v_pack_u(a, b); Data<R> resC = v_pack_u(a, b);
...@@ -725,13 +740,13 @@ template<typename R> struct TheTest ...@@ -725,13 +740,13 @@ template<typename R> struct TheTest
const w_type add = (w_type)1 << (s - 1); const w_type add = (w_type)1 << (s - 1);
for (int i = 0; i < n; ++i) for (int i = 0; i < n; ++i)
{ {
EXPECT_EQ(saturate_cast<LaneType>(dataA[i]), resC[i]); EXPECT_EQ(pack_saturate_cast<LaneType>(dataA[i]), resC[i]);
EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resC[i + n]); EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resC[i + n]);
EXPECT_EQ(saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]); EXPECT_EQ(pack_saturate_cast<LaneType>((dataA[i] + add) >> s), resD[i]);
EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]); EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resD[i + n]);
EXPECT_EQ(saturate_cast<LaneType>(dataB[i]), resE[i]); EXPECT_EQ(pack_saturate_cast<LaneType>(dataB[i]), resE[i]);
EXPECT_EQ((LaneType)0, resE[i + n]); EXPECT_EQ((LaneType)0, resE[i + n]);
EXPECT_EQ(saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]); EXPECT_EQ(pack_saturate_cast<LaneType>((dataB[i] + add) >> s), resF[i]);
EXPECT_EQ((LaneType)0, resF[i + n]); EXPECT_EQ((LaneType)0, resF[i + n]);
} }
return *this; return *this;
...@@ -776,6 +791,7 @@ template<typename R> struct TheTest ...@@ -776,6 +791,7 @@ template<typename R> struct TheTest
template<int s> template<int s>
TheTest & test_extract() TheTest & test_extract()
{ {
SCOPED_TRACE(s);
Data<R> dataA, dataB; Data<R> dataA, dataB;
dataB *= 10; dataB *= 10;
R a = dataA, b = dataB; R a = dataA, b = dataB;
...@@ -796,6 +812,7 @@ template<typename R> struct TheTest ...@@ -796,6 +812,7 @@ template<typename R> struct TheTest
template<int s> template<int s>
TheTest & test_rotate() TheTest & test_rotate()
{ {
SCOPED_TRACE(s);
Data<R> dataA, dataB; Data<R> dataA, dataB;
dataB *= 10; dataB *= 10;
R a = dataA, b = dataB; R a = dataA, b = dataB;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment