Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
2a8344f7
Commit
2a8344f7
authored
Nov 28, 2017
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #10149 from mshabunin:fix-saturate-intrin
parents
bc547c42
6c135261
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
122 additions
and
81 deletions
+122
-81
intrin_cpp.hpp
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+51
-43
intrin_neon.hpp
modules/core/include/opencv2/core/hal/intrin_neon.hpp
+16
-16
intrin_vsx.hpp
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+5
-4
saturate.hpp
modules/core/include/opencv2/core/saturate.hpp
+17
-2
test_intrin_utils.hpp
modules/core/test/test_intrin_utils.hpp
+33
-16
No files found.
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
View file @
2a8344f7
...
...
@@ -1762,14 +1762,14 @@ OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64)
//! @brief Helper macro
//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix) \
#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix
, cast
) \
inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
{ \
_Tpnvec c; \
for( int i = 0; i < _Tpvec::nlanes; i++ ) \
{ \
c.s[i] =
saturate_
cast<_Tpn>(a.s[i]); \
c.s[i+_Tpvec::nlanes] =
saturate_
cast<_Tpn>(b.s[i]); \
c.s[i] = cast<_Tpn>(a.s[i]); \
c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \
} \
return c; \
}
...
...
@@ -1783,26 +1783,28 @@ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
//!
//! - pack: for 16-, 32- and 64-bit integer input types
//! - pack_u: for 16- and 32-bit signed integer input types
OPENCV_HAL_IMPL_C_PACK
(
v_uint16x8
,
v_uint8x16
,
uchar
,
pack
)
OPENCV_HAL_IMPL_C_PACK
(
v_int16x8
,
v_int8x16
,
schar
,
pack
)
OPENCV_HAL_IMPL_C_PACK
(
v_uint32x4
,
v_uint16x8
,
ushort
,
pack
)
OPENCV_HAL_IMPL_C_PACK
(
v_int32x4
,
v_int16x8
,
short
,
pack
)
OPENCV_HAL_IMPL_C_PACK
(
v_uint64x2
,
v_uint32x4
,
unsigned
,
pack
)
OPENCV_HAL_IMPL_C_PACK
(
v_int64x2
,
v_int32x4
,
int
,
pack
)
OPENCV_HAL_IMPL_C_PACK
(
v_int16x8
,
v_uint8x16
,
uchar
,
pack_u
)
OPENCV_HAL_IMPL_C_PACK
(
v_int32x4
,
v_uint16x8
,
ushort
,
pack_u
)
//!
//! @note All variants except 64-bit use saturation.
OPENCV_HAL_IMPL_C_PACK
(
v_uint16x8
,
v_uint8x16
,
uchar
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_PACK
(
v_int16x8
,
v_int8x16
,
schar
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_PACK
(
v_uint32x4
,
v_uint16x8
,
ushort
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_PACK
(
v_int32x4
,
v_int16x8
,
short
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_PACK
(
v_uint64x2
,
v_uint32x4
,
unsigned
,
pack
,
static_cast
)
OPENCV_HAL_IMPL_C_PACK
(
v_int64x2
,
v_int32x4
,
int
,
pack
,
static_cast
)
OPENCV_HAL_IMPL_C_PACK
(
v_int16x8
,
v_uint8x16
,
uchar
,
pack_u
,
saturate_cast
)
OPENCV_HAL_IMPL_C_PACK
(
v_int32x4
,
v_uint16x8
,
ushort
,
pack_u
,
saturate_cast
)
//! @}
//! @brief Helper macro
//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix
, cast
) \
template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
{ \
_Tpnvec c; \
for( int i = 0; i < _Tpvec::nlanes; i++ ) \
{ \
c.s[i] =
saturate_
cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
c.s[i+_Tpvec::nlanes] =
saturate_
cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \
c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \
} \
return c; \
}
...
...
@@ -1816,51 +1818,55 @@ template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpve
//!
//! - pack: for 16-, 32- and 64-bit integer input types
//! - pack_u: for 16- and 32-bit signed integer input types
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_uint16x8
,
ushort
,
v_uint8x16
,
uchar
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_int16x8
,
short
,
v_int8x16
,
schar
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_uint32x4
,
unsigned
,
v_uint16x8
,
ushort
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_int32x4
,
int
,
v_int16x8
,
short
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_uint64x2
,
uint64
,
v_uint32x4
,
unsigned
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_int64x2
,
int64
,
v_int32x4
,
int
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_int16x8
,
short
,
v_uint8x16
,
uchar
,
pack_u
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_int32x4
,
int
,
v_uint16x8
,
ushort
,
pack_u
)
//!
//! @note All variants except 64-bit use saturation.
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_uint16x8
,
ushort
,
v_uint8x16
,
uchar
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_int16x8
,
short
,
v_int8x16
,
schar
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_uint32x4
,
unsigned
,
v_uint16x8
,
ushort
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_int32x4
,
int
,
v_int16x8
,
short
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_uint64x2
,
uint64
,
v_uint32x4
,
unsigned
,
pack
,
static_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_int64x2
,
int64
,
v_int32x4
,
int
,
pack
,
static_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_int16x8
,
short
,
v_uint8x16
,
uchar
,
pack_u
,
saturate_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK
(
v_int32x4
,
int
,
v_uint16x8
,
ushort
,
pack_u
,
saturate_cast
)
//! @}
//! @brief Helper macro
//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix
, cast
) \
inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
{ \
for( int i = 0; i < _Tpvec::nlanes; i++ ) \
ptr[i] =
saturate_
cast<_Tpn>(a.s[i]); \
ptr[i] = cast<_Tpn>(a.s[i]); \
}
//! @name Pack and store
//! @{
//! @brief Store values from the input vector into memory with pack
//!
//! Values will be stored into memory with
saturating
conversion to narrower type.
//! Values will be stored into memory with conversion to narrower type.
//! Variant with _u_ suffix converts to corresponding unsigned type.
//!
//! - pack: for 16-, 32- and 64-bit integer input types
//! - pack_u: for 16- and 32-bit signed integer input types
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_uint16x8
,
ushort
,
v_uint8x16
,
uchar
,
pack
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_int16x8
,
short
,
v_int8x16
,
schar
,
pack
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_uint32x4
,
unsigned
,
v_uint16x8
,
ushort
,
pack
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_int32x4
,
int
,
v_int16x8
,
short
,
pack
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_uint64x2
,
uint64
,
v_uint32x4
,
unsigned
,
pack
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_int64x2
,
int64
,
v_int32x4
,
int
,
pack
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_int16x8
,
short
,
v_uint8x16
,
uchar
,
pack_u
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_int32x4
,
int
,
v_uint16x8
,
ushort
,
pack_u
)
//!
//! @note All variants except 64-bit use saturation.
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_uint16x8
,
ushort
,
v_uint8x16
,
uchar
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_int16x8
,
short
,
v_int8x16
,
schar
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_uint32x4
,
unsigned
,
v_uint16x8
,
ushort
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_int32x4
,
int
,
v_int16x8
,
short
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_uint64x2
,
uint64
,
v_uint32x4
,
unsigned
,
pack
,
static_cast
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_int64x2
,
int64
,
v_int32x4
,
int
,
pack
,
static_cast
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_int16x8
,
short
,
v_uint8x16
,
uchar
,
pack_u
,
saturate_cast
)
OPENCV_HAL_IMPL_C_PACK_STORE
(
v_int32x4
,
int
,
v_uint16x8
,
ushort
,
pack_u
,
saturate_cast
)
//! @}
//! @brief Helper macro
//! @ingroup core_hal_intrin_impl
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix
, cast
) \
template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
{ \
for( int i = 0; i < _Tpvec::nlanes; i++ ) \
ptr[i] =
saturate_
cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
}
//! @name Pack and store with rounding shift
...
...
@@ -1872,14 +1878,16 @@ template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec
//!
//! - pack: for 16-, 32- and 64-bit integer input types
//! - pack_u: for 16- and 32-bit signed integer input types
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_uint16x8
,
ushort
,
v_uint8x16
,
uchar
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_int16x8
,
short
,
v_int8x16
,
schar
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_uint32x4
,
unsigned
,
v_uint16x8
,
ushort
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_int32x4
,
int
,
v_int16x8
,
short
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_uint64x2
,
uint64
,
v_uint32x4
,
unsigned
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_int64x2
,
int64
,
v_int32x4
,
int
,
pack
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_int16x8
,
short
,
v_uint8x16
,
uchar
,
pack_u
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_int32x4
,
int
,
v_uint16x8
,
ushort
,
pack_u
)
//!
//! @note All variants except 64-bit use saturation.
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_uint16x8
,
ushort
,
v_uint8x16
,
uchar
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_int16x8
,
short
,
v_int8x16
,
schar
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_uint32x4
,
unsigned
,
v_uint16x8
,
ushort
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_int32x4
,
int
,
v_int16x8
,
short
,
pack
,
saturate_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_uint64x2
,
uint64
,
v_uint32x4
,
unsigned
,
pack
,
static_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_int64x2
,
int64
,
v_int32x4
,
int
,
pack
,
static_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_int16x8
,
short
,
v_uint8x16
,
uchar
,
pack_u
,
saturate_cast
)
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE
(
v_int32x4
,
int
,
v_uint16x8
,
ushort
,
pack_u
,
saturate_cast
)
//! @}
/** @brief Matrix multiplication
...
...
modules/core/include/opencv2/core/hal/intrin_neon.hpp
View file @
2a8344f7
...
...
@@ -226,7 +226,7 @@ struct v_uint64x2
v_uint64x2
()
{}
explicit
v_uint64x2
(
uint64x2_t
v
)
:
val
(
v
)
{}
v_uint64x2
(
u
nsigned
v0
,
unsigned
v1
)
v_uint64x2
(
u
int64
v0
,
uint64
v1
)
{
uint64
v
[]
=
{
v0
,
v1
};
val
=
vld1q_u64
(
v
);
...
...
@@ -245,7 +245,7 @@ struct v_int64x2
v_int64x2
()
{}
explicit
v_int64x2
(
int64x2_t
v
)
:
val
(
v
)
{}
v_int64x2
(
int
v0
,
int
v1
)
v_int64x2
(
int
64
v0
,
int64
v1
)
{
int64
v
[]
=
{
v0
,
v1
};
val
=
vld1q_s64
(
v
);
...
...
@@ -360,40 +360,40 @@ OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32)
OPENCV_HAL_IMPL_NEON_INIT_64
(
float64x2
,
f64
)
#endif
#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec,
wsuffix, pack, op
) \
#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec,
pack, mov, rshr
) \
inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
{ \
hreg a1 =
vqmov##op##_##wsuffix(a.val), b1 = vqmov##op##_##wsuffix
(b.val); \
hreg a1 =
mov(a.val), b1 = mov
(b.val); \
return _Tpvec(vcombine_##suffix(a1, b1)); \
} \
inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
{ \
hreg a1 =
vqmov##op##_##wsuffix
(a.val); \
hreg a1 =
mov
(a.val); \
vst1_##suffix(ptr, a1); \
} \
template<int n> inline \
_Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \
{ \
hreg a1 =
vqrshr##op##_n_##wsuffix
(a.val, n); \
hreg b1 =
vqrshr##op##_n_##wsuffix
(b.val, n); \
hreg a1 =
rshr
(a.val, n); \
hreg b1 =
rshr
(b.val, n); \
return _Tpvec(vcombine_##suffix(a1, b1)); \
} \
template<int n> inline \
void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
{ \
hreg a1 =
vqrshr##op##_n_##wsuffix
(a.val, n); \
hreg a1 =
rshr
(a.val, n); \
vst1_##suffix(ptr, a1); \
}
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint8x16
,
uchar
,
uint8x8_t
,
u8
,
v_uint16x8
,
u16
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_int8x16
,
schar
,
int8x8_t
,
s8
,
v_int16x8
,
s16
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint16x8
,
ushort
,
uint16x4_t
,
u16
,
v_uint32x4
,
u32
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_int16x8
,
short
,
int16x4_t
,
s16
,
v_int32x4
,
s32
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint32x4
,
unsigned
,
uint32x2_t
,
u32
,
v_uint64x2
,
u64
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_int32x4
,
int
,
int32x2_t
,
s32
,
v_int64x2
,
s64
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint8x16
,
uchar
,
uint8x8_t
,
u8
,
v_uint16x8
,
pack
,
vqmovn_u16
,
vqrshrn_n_u16
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_int8x16
,
schar
,
int8x8_t
,
s8
,
v_int16x8
,
pack
,
vqmovn_s16
,
vqrshrn_n_s16
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint16x8
,
ushort
,
uint16x4_t
,
u16
,
v_uint32x4
,
pack
,
vqmovn_u32
,
vqrshrn_n_u32
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_int16x8
,
short
,
int16x4_t
,
s16
,
v_int32x4
,
pack
,
vqmovn_s32
,
vqrshrn_n_s32
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint32x4
,
unsigned
,
uint32x2_t
,
u32
,
v_uint64x2
,
pack
,
vmovn_u64
,
vrshrn_n_u64
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_int32x4
,
int
,
int32x2_t
,
s32
,
v_int64x2
,
pack
,
vmovn_s64
,
vrshrn_n_s64
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint8x16
,
uchar
,
uint8x8_t
,
u8
,
v_int16x8
,
s16
,
pack_u
,
un
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint16x8
,
ushort
,
uint16x4_t
,
u16
,
v_int32x4
,
s32
,
pack_u
,
un
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint8x16
,
uchar
,
uint8x8_t
,
u8
,
v_int16x8
,
pack_u
,
vqmovun_s16
,
vqrshrun_n_s16
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint16x8
,
ushort
,
uint16x4_t
,
u16
,
v_int32x4
,
pack_u
,
vqmovun_s32
,
vqrshrun_n_s32
)
inline
v_float32x4
v_matmul
(
const
v_float32x4
&
v
,
const
v_float32x4
&
m0
,
const
v_float32x4
&
m1
,
const
v_float32x4
&
m2
,
...
...
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
View file @
2a8344f7
...
...
@@ -394,16 +394,17 @@ OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int,
vec_sra
,
vec_packs
,
vec_add
,
pack
)
OPENCV_HAL_IMPL_VSX_PACK
(
v_uint32x4
,
uint
,
v_uint64x2
,
unsigned
long
long
,
unsigned
long
long
,
vec_sr
,
vec_pack
s
,
vec_add
,
pack
)
vec_sr
,
vec_pack
,
vec_add
,
pack
)
OPENCV_HAL_IMPL_VSX_PACK
(
v_int32x4
,
int
,
v_int64x2
,
unsigned
long
long
,
long
long
,
vec_sra
,
vec_pack
s
,
vec_add
,
pack
)
vec_sra
,
vec_pack
,
vec_add
,
pack
)
OPENCV_HAL_IMPL_VSX_PACK
(
v_uint8x16
,
uchar
,
v_int16x8
,
unsigned
short
,
short
,
vec_sra
,
vec_packsu
,
vec_adds
,
pack_u
)
OPENCV_HAL_IMPL_VSX_PACK
(
v_uint16x8
,
ushort
,
v_int32x4
,
unsigned
int
,
int
,
vec_sra
,
vec_packsu
,
vec_add
,
pack_u
)
OPENCV_HAL_IMPL_VSX_PACK
(
v_uint32x4
,
uint
,
v_int64x2
,
unsigned
long
long
,
long
long
,
vec_sra
,
vec_packsu
,
vec_add
,
pack_u
)
// Following variant is not implemented on other platforms:
//OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long,
// vec_sra, vec_packsu, vec_add, pack_u)
/* Recombine */
template
<
typename
_Tpvec
>
...
...
modules/core/include/opencv2/core/saturate.hpp
View file @
2a8344f7
...
...
@@ -136,12 +136,27 @@ template<> inline short saturate_cast<short>(double v) { int iv = cvRound(
template
<>
inline
short
saturate_cast
<
short
>
(
int64
v
)
{
return
(
short
)((
uint64
)((
int64
)
v
-
SHRT_MIN
)
<=
(
uint64
)
USHRT_MAX
?
v
:
v
>
0
?
SHRT_MAX
:
SHRT_MIN
);
}
template
<>
inline
short
saturate_cast
<
short
>
(
uint64
v
)
{
return
(
short
)
std
::
min
(
v
,
(
uint64
)
SHRT_MAX
);
}
template
<>
inline
int
saturate_cast
<
int
>
(
unsigned
v
)
{
return
(
int
)
std
::
min
(
v
,
(
unsigned
)
INT_MAX
);
}
template
<>
inline
int
saturate_cast
<
int
>
(
int64
v
)
{
return
(
int
)((
uint64
)(
v
-
INT_MIN
)
<=
(
uint64
)
UINT_MAX
?
v
:
v
>
0
?
INT_MAX
:
INT_MIN
);
}
template
<>
inline
int
saturate_cast
<
int
>
(
uint64
v
)
{
return
(
int
)
std
::
min
(
v
,
(
uint64
)
INT_MAX
);
}
template
<>
inline
int
saturate_cast
<
int
>
(
float
v
)
{
return
cvRound
(
v
);
}
template
<>
inline
int
saturate_cast
<
int
>
(
double
v
)
{
return
cvRound
(
v
);
}
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
schar
v
)
{
return
(
unsigned
)
std
::
max
(
v
,
(
schar
)
0
);
}
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
short
v
)
{
return
(
unsigned
)
std
::
max
(
v
,
(
short
)
0
);
}
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
int
v
)
{
return
(
unsigned
)
std
::
max
(
v
,
(
int
)
0
);
}
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
int64
v
)
{
return
(
unsigned
)((
uint64
)
v
<=
(
uint64
)
UINT_MAX
?
v
:
v
>
0
?
UINT_MAX
:
0
);
}
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
uint64
v
)
{
return
(
unsigned
)
std
::
min
(
v
,
(
uint64
)
UINT_MAX
);
}
// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
float
v
)
{
return
cvRound
(
v
);
}
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
double
v
)
{
return
cvRound
(
v
);
}
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
float
v
)
{
return
static_cast
<
unsigned
>
(
cvRound
(
v
));
}
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
double
v
)
{
return
static_cast
<
unsigned
>
(
cvRound
(
v
));
}
template
<>
inline
uint64
saturate_cast
<
uint64
>
(
schar
v
)
{
return
(
uint64
)
std
::
max
(
v
,
(
schar
)
0
);
}
template
<>
inline
uint64
saturate_cast
<
uint64
>
(
short
v
)
{
return
(
uint64
)
std
::
max
(
v
,
(
short
)
0
);
}
template
<>
inline
uint64
saturate_cast
<
uint64
>
(
int
v
)
{
return
(
uint64
)
std
::
max
(
v
,
(
int
)
0
);
}
template
<>
inline
uint64
saturate_cast
<
uint64
>
(
int64
v
)
{
return
(
uint64
)
std
::
max
(
v
,
(
int64
)
0
);
}
template
<>
inline
int64
saturate_cast
<
int64
>
(
uint64
v
)
{
return
(
int64
)
std
::
min
(
v
,
(
uint64
)
LLONG_MAX
);
}
//! @}
...
...
modules/core/test/test_intrin_utils.hpp
View file @
2a8344f7
...
...
@@ -167,6 +167,14 @@ template<> inline void EXPECT_COMPARE_EQ_<double>(const double a, const double b
EXPECT_DOUBLE_EQ
(
a
,
b
);
}
// pack functions do not do saturation when converting from 64-bit types
template
<
typename
T
,
typename
W
>
inline
T
pack_saturate_cast
(
W
a
)
{
return
saturate_cast
<
T
>
(
a
);
}
template
<>
inline
int
pack_saturate_cast
<
int
,
int64
>
(
int64
a
)
{
return
static_cast
<
int
>
(
a
);
}
template
<>
inline
unsigned
pack_saturate_cast
<
unsigned
,
uint64
>
(
uint64
a
)
{
return
static_cast
<
unsigned
>
(
a
);
}
template
<
typename
R
>
struct
TheTest
{
typedef
typename
R
::
lane_type
LaneType
;
...
...
@@ -464,16 +472,19 @@ template<typename R> struct TheTest
template
<
int
s
>
TheTest
&
test_shift
()
{
SCOPED_TRACE
(
s
);
Data
<
R
>
dataA
;
dataA
[
0
]
=
static_cast
<
LaneType
>
(
std
::
numeric_limits
<
LaneType
>::
max
());
R
a
=
dataA
;
Data
<
R
>
resB
=
a
<<
s
,
resC
=
v_shl
<
s
>
(
a
),
resD
=
a
>>
s
,
resE
=
v_shr
<
s
>
(
a
);
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
{
EXPECT_EQ
(
dataA
[
i
]
<<
s
,
resB
[
i
]);
EXPECT_EQ
(
dataA
[
i
]
<<
s
,
resC
[
i
]);
EXPECT_EQ
(
dataA
[
i
]
>>
s
,
resD
[
i
]);
EXPECT_EQ
(
dataA
[
i
]
>>
s
,
resE
[
i
]);
EXPECT_EQ
(
static_cast
<
LaneType
>
(
dataA
[
i
]
<<
s
)
,
resB
[
i
]);
EXPECT_EQ
(
static_cast
<
LaneType
>
(
dataA
[
i
]
<<
s
)
,
resC
[
i
]);
EXPECT_EQ
(
static_cast
<
LaneType
>
(
dataA
[
i
]
>>
s
)
,
resD
[
i
]);
EXPECT_EQ
(
static_cast
<
LaneType
>
(
dataA
[
i
]
>>
s
)
,
resE
[
i
]);
}
return
*
this
;
}
...
...
@@ -668,11 +679,13 @@ template<typename R> struct TheTest
template
<
int
s
>
TheTest
&
test_pack
()
{
SCOPED_TRACE
(
s
);
typedef
typename
V_RegTrait128
<
LaneType
>::
w_reg
Rx2
;
typedef
typename
Rx2
::
lane_type
w_type
;
Data
<
Rx2
>
dataA
,
dataB
;
dataA
+=
std
::
numeric_limits
<
LaneType
>::
is_signed
?
-
10
:
10
;
dataB
*=
10
;
dataB
[
0
]
=
static_cast
<
w_type
>
(
std
::
numeric_limits
<
LaneType
>::
max
())
+
17
;
// to check saturation
Rx2
a
=
dataA
,
b
=
dataB
;
Data
<
R
>
resC
=
v_pack
(
a
,
b
);
...
...
@@ -688,13 +701,13 @@ template<typename R> struct TheTest
const
w_type
add
=
(
w_type
)
1
<<
(
s
-
1
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_EQ
(
saturate_cast
<
LaneType
>
(
dataA
[
i
]),
resC
[
i
]);
EXPECT_EQ
(
saturate_cast
<
LaneType
>
(
dataB
[
i
]),
resC
[
i
+
n
]);
EXPECT_EQ
(
saturate_cast
<
LaneType
>
((
dataA
[
i
]
+
add
)
>>
s
),
resD
[
i
]);
EXPECT_EQ
(
saturate_cast
<
LaneType
>
((
dataB
[
i
]
+
add
)
>>
s
),
resD
[
i
+
n
]);
EXPECT_EQ
(
saturate_cast
<
LaneType
>
(
dataB
[
i
]),
resE
[
i
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
(
dataA
[
i
]),
resC
[
i
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
(
dataB
[
i
]),
resC
[
i
+
n
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
((
dataA
[
i
]
+
add
)
>>
s
),
resD
[
i
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
((
dataB
[
i
]
+
add
)
>>
s
),
resD
[
i
+
n
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
(
dataB
[
i
]),
resE
[
i
]);
EXPECT_EQ
((
LaneType
)
0
,
resE
[
i
+
n
]);
EXPECT_EQ
(
saturate_cast
<
LaneType
>
((
dataB
[
i
]
+
add
)
>>
s
),
resF
[
i
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
((
dataB
[
i
]
+
add
)
>>
s
),
resF
[
i
]);
EXPECT_EQ
((
LaneType
)
0
,
resF
[
i
+
n
]);
}
return
*
this
;
...
...
@@ -703,6 +716,7 @@ template<typename R> struct TheTest
template
<
int
s
>
TheTest
&
test_pack_u
()
{
SCOPED_TRACE
(
s
);
typedef
typename
V_TypeTraits
<
LaneType
>::
w_type
LaneType_w
;
typedef
typename
V_RegTrait128
<
LaneType_w
>::
int_reg
Ri2
;
typedef
typename
Ri2
::
lane_type
w_type
;
...
...
@@ -710,6 +724,7 @@ template<typename R> struct TheTest
Data
<
Ri2
>
dataA
,
dataB
;
dataA
+=
-
10
;
dataB
*=
10
;
dataB
[
0
]
=
static_cast
<
w_type
>
(
std
::
numeric_limits
<
LaneType
>::
max
())
+
17
;
// to check saturation
Ri2
a
=
dataA
,
b
=
dataB
;
Data
<
R
>
resC
=
v_pack_u
(
a
,
b
);
...
...
@@ -725,13 +740,13 @@ template<typename R> struct TheTest
const
w_type
add
=
(
w_type
)
1
<<
(
s
-
1
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_EQ
(
saturate_cast
<
LaneType
>
(
dataA
[
i
]),
resC
[
i
]);
EXPECT_EQ
(
saturate_cast
<
LaneType
>
(
dataB
[
i
]),
resC
[
i
+
n
]);
EXPECT_EQ
(
saturate_cast
<
LaneType
>
((
dataA
[
i
]
+
add
)
>>
s
),
resD
[
i
]);
EXPECT_EQ
(
saturate_cast
<
LaneType
>
((
dataB
[
i
]
+
add
)
>>
s
),
resD
[
i
+
n
]);
EXPECT_EQ
(
saturate_cast
<
LaneType
>
(
dataB
[
i
]),
resE
[
i
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
(
dataA
[
i
]),
resC
[
i
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
(
dataB
[
i
]),
resC
[
i
+
n
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
((
dataA
[
i
]
+
add
)
>>
s
),
resD
[
i
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
((
dataB
[
i
]
+
add
)
>>
s
),
resD
[
i
+
n
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
(
dataB
[
i
]),
resE
[
i
]);
EXPECT_EQ
((
LaneType
)
0
,
resE
[
i
+
n
]);
EXPECT_EQ
(
saturate_cast
<
LaneType
>
((
dataB
[
i
]
+
add
)
>>
s
),
resF
[
i
]);
EXPECT_EQ
(
pack_
saturate_cast
<
LaneType
>
((
dataB
[
i
]
+
add
)
>>
s
),
resF
[
i
]);
EXPECT_EQ
((
LaneType
)
0
,
resF
[
i
+
n
]);
}
return
*
this
;
...
...
@@ -776,6 +791,7 @@ template<typename R> struct TheTest
template
<
int
s
>
TheTest
&
test_extract
()
{
SCOPED_TRACE
(
s
);
Data
<
R
>
dataA
,
dataB
;
dataB
*=
10
;
R
a
=
dataA
,
b
=
dataB
;
...
...
@@ -796,6 +812,7 @@ template<typename R> struct TheTest
template
<
int
s
>
TheTest
&
test_rotate
()
{
SCOPED_TRACE
(
s
);
Data
<
R
>
dataA
,
dataB
;
dataB
*=
10
;
R
a
=
dataA
,
b
=
dataB
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment