Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a275489f
Commit
a275489f
authored
Aug 27, 2015
by
Maksim Shabunin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
HAL universal intrinsics tests and documentation
parent
190d00ea
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
372 additions
and
34 deletions
+372
-34
hal.hpp
modules/hal/include/opencv2/hal.hpp
+13
-0
defs.h
modules/hal/include/opencv2/hal/defs.h
+10
-10
intrin.hpp
modules/hal/include/opencv2/hal/intrin.hpp
+28
-0
intrin_cpp.hpp
modules/hal/include/opencv2/hal/intrin_cpp.hpp
+0
-0
intrin_neon.hpp
modules/hal/include/opencv2/hal/intrin_neon.hpp
+24
-7
intrin_sse.hpp
modules/hal/include/opencv2/hal/intrin_sse.hpp
+49
-17
test_intrin.cpp
modules/hal/test/test_intrin.cpp
+0
-0
test_intrin_utils.hpp
modules/hal/test/test_intrin_utils.hpp
+234
-0
test_main.cpp
modules/hal/test/test_main.cpp
+3
-0
test_precomp.hpp
modules/hal/test/test_precomp.hpp
+11
-0
No files found.
modules/hal/include/opencv2/hal.hpp
View file @
a275489f
...
...
@@ -49,10 +49,21 @@
/**
@defgroup hal Hardware Acceleration Layer
@{
@defgroup hal_intrin Universal intrinsics
@{
@defgroup hal_intrin_impl Private implementation helpers
@}
@defgroup hal_utils Platform-dependent utils
@}
*/
namespace
cv
{
namespace
hal
{
//! @addtogroup hal
//! @{
namespace
Error
{
enum
...
...
@@ -93,6 +104,8 @@ void sqrt(const double* src, double* dst, int len);
void
invSqrt
(
const
float
*
src
,
float
*
dst
,
int
len
);
void
invSqrt
(
const
double
*
src
,
double
*
dst
,
int
len
);
//! @}
}}
//cv::hal
#endif //__OPENCV_HAL_HPP__
modules/hal/include/opencv2/hal/defs.h
View file @
a275489f
...
...
@@ -45,6 +45,9 @@
#ifndef __OPENCV_DEF_H__
#define __OPENCV_DEF_H__
//! @addtogroup hal_utils
//! @{
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
# define _CRT_SECURE_NO_DEPRECATE
/* to avoid multiple Visual Studio warnings */
#endif
...
...
@@ -335,9 +338,6 @@ Cv64suf;
# include "tegra_round.hpp"
#endif
//! @addtogroup core_utils
//! @{
#if CV_VFP
// 1. general scheme
#define ARM_ROUND(_value, _asm_string) \
...
...
@@ -567,15 +567,19 @@ CV_INLINE int cvIsInf( float value )
return
(
ieee754
.
u
&
0x7fffffff
)
==
0x7f800000
;
}
//! @}
#include <algorithm>
namespace
cv
{
//! @addtogroup hal_utils
//! @{
/////////////// saturate_cast (used in image & signal processing) ///////////////////
/**
Template function for accurate conversion from one primitive type to another.
/** @brief Template function for accurate conversion from one primitive type to another.
The functions saturate_cast resemble the standard C++ cast operations, such as static_cast\<T\>()
and others. They perform an efficient and accurate conversion from one primitive type to another
...
...
@@ -618,8 +622,6 @@ template<typename _Tp> static inline _Tp saturate_cast(int64 v) { return _Tp(
/** @overload */
template
<
typename
_Tp
>
static
inline
_Tp
saturate_cast
(
uint64
v
)
{
return
_Tp
(
v
);
}
//! @cond IGNORED
template
<>
inline
uchar
saturate_cast
<
uchar
>
(
schar
v
)
{
return
(
uchar
)
std
::
max
((
int
)
v
,
0
);
}
template
<>
inline
uchar
saturate_cast
<
uchar
>
(
ushort
v
)
{
return
(
uchar
)
std
::
min
((
unsigned
)
v
,
(
unsigned
)
UCHAR_MAX
);
}
template
<>
inline
uchar
saturate_cast
<
uchar
>
(
int
v
)
{
return
(
uchar
)((
unsigned
)
v
<=
UCHAR_MAX
?
v
:
v
>
0
?
UCHAR_MAX
:
0
);
}
...
...
@@ -664,12 +666,10 @@ template<> inline int saturate_cast<int>(double v) { return cvRound(v)
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
float
v
)
{
return
cvRound
(
v
);
}
template
<>
inline
unsigned
saturate_cast
<
unsigned
>
(
double
v
)
{
return
cvRound
(
v
);
}
//! @
endcond
//! @
}
}
#endif // __cplusplus
//! @} core_utils
#endif //__OPENCV_HAL_H__
modules/hal/include/opencv2/hal/intrin.hpp
View file @
a275489f
...
...
@@ -48,6 +48,7 @@
#include <cmath>
#include <float.h>
#include <stdlib.h>
#include "opencv2/hal/defs.h"
#define OPENCV_HAL_ADD(a, b) ((a) + (b))
#define OPENCV_HAL_AND(a, b) ((a) & (b))
...
...
@@ -59,6 +60,10 @@
// access from within opencv code more accessible
namespace
cv
{
//! @addtogroup hal_intrin
//! @{
//! @cond IGNORED
template
<
typename
_Tp
>
struct
V_TypeTraits
{
typedef
_Tp
int_type
;
...
...
@@ -82,6 +87,7 @@ template<> struct V_TypeTraits<uchar>
typedef
int
sum_type
;
typedef
ushort
w_type
;
typedef
unsigned
q_type
;
enum
{
delta
=
128
,
shift
=
8
};
...
...
@@ -99,6 +105,7 @@ template<> struct V_TypeTraits<schar>
typedef
int
sum_type
;
typedef
short
w_type
;
typedef
int
q_type
;
enum
{
delta
=
128
,
shift
=
8
};
...
...
@@ -265,8 +272,22 @@ template<> struct V_TypeTraits<double>
}
};
template
<
typename
T
>
struct
V_SIMD128Traits
{
enum
{
nlanes
=
16
/
sizeof
(
T
)
};
};
//! @endcond
//! @}
}
#ifdef CV_DOXYGEN
# undef CV_SSE2
# undef CV_NEON
#endif
#if CV_SSE2
#include "opencv2/hal/intrin_sse.hpp"
...
...
@@ -281,12 +302,19 @@ template<> struct V_TypeTraits<double>
#endif
//! @addtogroup hal_intrin
//! @{
#ifndef CV_SIMD128
//! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
#define CV_SIMD128 0
#endif
#ifndef CV_SIMD128_64F
//! Set to 1 if current intrinsics implementation supports 64-bit float vectors
#define CV_SIMD128_64F 0
#endif
//! @}
#endif
modules/hal/include/opencv2/hal/intrin_cpp.hpp
View file @
a275489f
This diff is collapsed.
Click to expand it.
modules/hal/include/opencv2/hal/intrin_neon.hpp
View file @
a275489f
...
...
@@ -48,6 +48,8 @@
namespace
cv
{
//! @cond IGNORED
#define CV_SIMD128 1
struct
v_uint8x16
...
...
@@ -278,14 +280,15 @@ void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
}
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint8x16
,
uchar
,
uint8x8_t
,
u8
,
v_uint16x8
,
u16
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint8x16
,
uchar
,
uint8x8_t
,
u8
,
v_int16x8
,
s16
,
pack_u
,
un
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_int8x16
,
schar
,
int8x8_t
,
s8
,
v_int16x8
,
s16
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint16x8
,
ushort
,
uint16x4_t
,
u16
,
v_uint32x4
,
u32
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint16x8
,
ushort
,
uint16x4_t
,
u16
,
v_int32x4
,
s32
,
pack_u
,
un
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_int16x8
,
short
,
int16x4_t
,
s16
,
v_int32x4
,
s32
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint32x4
,
unsigned
,
uint32x2_t
,
u32
,
v_uint64x2
,
u64
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_int32x4
,
int
,
int32x2_t
,
s32
,
v_int64x2
,
s64
,
pack
,
n
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint8x16
,
uchar
,
uint8x8_t
,
u8
,
v_int16x8
,
s16
,
pack_u
,
un
)
OPENCV_HAL_IMPL_NEON_PACK
(
v_uint16x8
,
ushort
,
uint16x4_t
,
u16
,
v_int32x4
,
s32
,
pack_u
,
un
)
inline
v_float32x4
v_matmul
(
const
v_float32x4
&
v
,
const
v_float32x4
&
m0
,
const
v_float32x4
&
m1
,
const
v_float32x4
&
m2
,
const
v_float32x4
&
m3
)
...
...
@@ -374,7 +377,7 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
{
int32x4_t
c
=
vmull_s16
(
vget_low_s16
(
a
.
val
),
vget_low_s16
(
b
.
val
));
int32x4_t
d
=
vmull_s16
(
vget_high_s16
(
a
.
val
),
vget_high_s16
(
b
.
val
));
int32x4x2_t
cd
=
v
trn
q_s32
(
c
,
d
);
int32x4x2_t
cd
=
v
uzp
q_s32
(
c
,
d
);
return
v_int32x4
(
vaddq_s32
(
cd
.
val
[
0
],
cd
.
val
[
1
]));
}
...
...
@@ -497,6 +500,16 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16)
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_uint32x4
,
v_absdiff
,
vabdq_u32
)
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_float32x4
,
v_absdiff
,
vabdq_f32
)
#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
{ \
return _Tpvec2(cast(intrin(a.val, b.val))); \
}
OPENCV_HAL_IMPL_NEON_BIN_FUNC2
(
v_int8x16
,
v_uint8x16
,
vreinterpretq_u8_s8
,
v_absdiff
,
vabdq_s8
)
OPENCV_HAL_IMPL_NEON_BIN_FUNC2
(
v_int16x8
,
v_uint16x8
,
vreinterpretq_u16_s16
,
v_absdiff
,
vabdq_s16
)
OPENCV_HAL_IMPL_NEON_BIN_FUNC2
(
v_int32x4
,
v_uint32x4
,
vreinterpretq_u32_s32
,
v_absdiff
,
vabdq_s32
)
inline
v_float32x4
v_magnitude
(
const
v_float32x4
&
a
,
const
v_float32x4
&
b
)
{
v_float32x4
x
(
vmlaq_f32
(
vmulq_f32
(
a
.
val
,
a
.
val
),
b
.
val
,
b
.
val
));
...
...
@@ -641,13 +654,13 @@ inline bool v_check_all(const v_float32x4& a)
{
return
v_check_all
(
v_reinterpret_as_u32
(
a
));
}
inline
bool
v_check_any
(
const
v_int8x16
&
a
)
{
return
v_check_a
ll
(
v_reinterpret_as_u8
(
a
));
}
{
return
v_check_a
ny
(
v_reinterpret_as_u8
(
a
));
}
inline
bool
v_check_any
(
const
v_int16x8
&
a
)
{
return
v_check_a
ll
(
v_reinterpret_as_u16
(
a
));
}
{
return
v_check_a
ny
(
v_reinterpret_as_u16
(
a
));
}
inline
bool
v_check_any
(
const
v_int32x4
&
a
)
{
return
v_check_a
ll
(
v_reinterpret_as_u32
(
a
));
}
{
return
v_check_a
ny
(
v_reinterpret_as_u32
(
a
));
}
inline
bool
v_check_any
(
const
v_float32x4
&
a
)
{
return
v_check_a
ll
(
v_reinterpret_as_u32
(
a
));
}
{
return
v_check_a
ny
(
v_reinterpret_as_u32
(
a
));
}
#define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
...
...
@@ -678,6 +691,8 @@ OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8)
OPENCV_HAL_IMPL_NEON_EXPAND
(
v_int8x16
,
v_int16x8
,
schar
,
s8
)
OPENCV_HAL_IMPL_NEON_EXPAND
(
v_uint16x8
,
v_uint32x4
,
ushort
,
u16
)
OPENCV_HAL_IMPL_NEON_EXPAND
(
v_int16x8
,
v_int32x4
,
short
,
s16
)
OPENCV_HAL_IMPL_NEON_EXPAND
(
v_uint32x4
,
v_uint64x2
,
uint
,
u32
)
OPENCV_HAL_IMPL_NEON_EXPAND
(
v_int32x4
,
v_int64x2
,
int
,
s32
)
inline
v_uint32x4
v_load_expand_q
(
const
uchar
*
ptr
)
{
...
...
@@ -840,6 +855,8 @@ inline v_float32x4 v_cvt_f32(const v_int32x4& a)
return
v_float32x4
(
vcvtq_f32_s32
(
a
.
val
));
}
//! @endcond
}
#endif
modules/hal/include/opencv2/hal/intrin_sse.hpp
View file @
a275489f
...
...
@@ -51,6 +51,8 @@
namespace
cv
{
//! @cond IGNORED
struct
v_uint8x16
{
typedef
uchar
lane_type
;
...
...
@@ -296,6 +298,11 @@ OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT(v_int32x4, s32)
OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT
(
v_uint64x2
,
u64
)
OPENCV_HAL_IMPL_SSE_INIT_FROM_FLT
(
v_int64x2
,
s64
)
inline
v_float32x4
v_reinterpret_as_f32
(
const
v_float32x4
&
a
)
{
return
a
;
}
inline
v_float64x2
v_reinterpret_as_f64
(
const
v_float64x2
&
a
)
{
return
a
;
}
inline
v_float32x4
v_reinterpret_as_f32
(
const
v_float64x2
&
a
)
{
return
v_float32x4
(
_mm_castpd_ps
(
a
.
val
));
}
inline
v_float64x2
v_reinterpret_as_f64
(
const
v_float32x4
&
a
)
{
return
v_float64x2
(
_mm_castps_pd
(
a
.
val
));
}
//////////////// PACK ///////////////
inline
v_uint8x16
v_pack
(
const
v_uint16x8
&
a
,
const
v_uint16x8
&
b
)
{
...
...
@@ -430,6 +437,17 @@ inline void v_pack_u_store(ushort* ptr, const v_int32x4& a)
_mm_storel_epi64
((
__m128i
*
)
ptr
,
r
);
}
template
<
int
n
>
inline
v_uint16x8
v_rshr_pack_u
(
const
v_int32x4
&
a
,
const
v_int32x4
&
b
)
{
__m128i
delta
=
_mm_set1_epi32
(
1
<<
(
n
-
1
)),
delta32
=
_mm_set1_epi32
(
32768
);
__m128i
a1
=
_mm_sub_epi32
(
_mm_srai_epi32
(
_mm_add_epi32
(
a
.
val
,
delta
),
n
),
delta32
);
__m128i
a2
=
_mm_sub_epi16
(
_mm_packs_epi32
(
a1
,
a1
),
_mm_set1_epi16
(
-
32768
));
__m128i
b1
=
_mm_sub_epi32
(
_mm_srai_epi32
(
_mm_add_epi32
(
b
.
val
,
delta
),
n
),
delta32
);
__m128i
b2
=
_mm_sub_epi16
(
_mm_packs_epi32
(
b1
,
b1
),
_mm_set1_epi16
(
-
32768
));
return
v_uint16x8
(
_mm_unpacklo_epi64
(
a2
,
b2
));
}
template
<
int
n
>
inline
void
v_rshr_pack_u_store
(
ushort
*
ptr
,
const
v_int32x4
&
a
)
{
...
...
@@ -460,7 +478,7 @@ void v_rshr_pack_store(short* ptr, const v_int32x4& a)
{
__m128i
delta
=
_mm_set1_epi32
(
1
<<
(
n
-
1
));
__m128i
a1
=
_mm_srai_epi32
(
_mm_add_epi32
(
a
.
val
,
delta
),
n
);
_mm_storel_epi64
((
__m128i
*
)
ptr
,
a1
);
_mm_storel_epi64
((
__m128i
*
)
ptr
,
_mm_packs_epi32
(
a1
,
a1
)
);
}
...
...
@@ -469,7 +487,7 @@ inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b)
{
__m128i
v0
=
_mm_unpacklo_epi32
(
a
.
val
,
b
.
val
);
// a0 a1 0 0
__m128i
v1
=
_mm_unpackhi_epi32
(
a
.
val
,
b
.
val
);
// b0 b1 0 0
return
v_uint32x4
(
_mm_unpacklo_epi
64
(
v0
,
v1
));
return
v_uint32x4
(
_mm_unpacklo_epi
32
(
v0
,
v1
));
}
inline
void
v_pack_store
(
unsigned
*
ptr
,
const
v_uint64x2
&
a
)
...
...
@@ -483,7 +501,7 @@ inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b)
{
__m128i
v0
=
_mm_unpacklo_epi32
(
a
.
val
,
b
.
val
);
// a0 a1 0 0
__m128i
v1
=
_mm_unpackhi_epi32
(
a
.
val
,
b
.
val
);
// b0 b1 0 0
return
v_int32x4
(
_mm_unpacklo_epi
64
(
v0
,
v1
));
return
v_int32x4
(
_mm_unpacklo_epi
32
(
v0
,
v1
));
}
inline
void
v_pack_store
(
int
*
ptr
,
const
v_int64x2
&
a
)
...
...
@@ -501,7 +519,7 @@ v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
__m128i
b1
=
_mm_srli_epi64
(
_mm_add_epi64
(
b
.
val
,
delta2
.
val
),
n
);
__m128i
v0
=
_mm_unpacklo_epi32
(
a1
,
b1
);
// a0 a1 0 0
__m128i
v1
=
_mm_unpackhi_epi32
(
a1
,
b1
);
// b0 b1 0 0
return
v_uint32x4
(
_mm_unpacklo_epi
64
(
v0
,
v1
));
return
v_uint32x4
(
_mm_unpacklo_epi
32
(
v0
,
v1
));
}
template
<
int
n
>
inline
...
...
@@ -534,7 +552,7 @@ v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
__m128i
b1
=
v_srai_epi64
(
_mm_add_epi64
(
b
.
val
,
delta2
.
val
),
n
);
__m128i
v0
=
_mm_unpacklo_epi32
(
a1
,
b1
);
// a0 a1 0 0
__m128i
v1
=
_mm_unpackhi_epi32
(
a1
,
b1
);
// b0 b1 0 0
return
v_int32x4
(
_mm_unpacklo_epi
64
(
v0
,
v1
));
return
v_int32x4
(
_mm_unpacklo_epi
32
(
v0
,
v1
));
}
template
<
int
n
>
inline
...
...
@@ -630,8 +648,8 @@ inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
{
__m128i
v0
=
_mm_mullo_epi16
(
a
.
val
,
b
.
val
);
__m128i
v1
=
_mm_mulhi_epi16
(
a
.
val
,
b
.
val
);
c
.
val
=
_mm_unpacklo_epi
32
(
v0
,
v1
);
d
.
val
=
_mm_unpackhi_epi
32
(
v0
,
v1
);
c
.
val
=
_mm_unpacklo_epi
16
(
v0
,
v1
);
d
.
val
=
_mm_unpackhi_epi
16
(
v0
,
v1
);
}
inline
void
v_mul_expand
(
const
v_uint16x8
&
a
,
const
v_uint16x8
&
b
,
...
...
@@ -639,8 +657,8 @@ inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
{
__m128i
v0
=
_mm_mullo_epi16
(
a
.
val
,
b
.
val
);
__m128i
v1
=
_mm_mulhi_epu16
(
a
.
val
,
b
.
val
);
c
.
val
=
_mm_unpacklo_epi
32
(
v0
,
v1
);
d
.
val
=
_mm_unpackhi_epi
32
(
v0
,
v1
);
c
.
val
=
_mm_unpacklo_epi
16
(
v0
,
v1
);
d
.
val
=
_mm_unpackhi_epi
16
(
v0
,
v1
);
}
inline
void
v_mul_expand
(
const
v_uint32x4
&
a
,
const
v_uint32x4
&
b
,
...
...
@@ -869,6 +887,18 @@ inline _Tpuvec v_absdiff(const _Tpsvec& a, const _Tpsvec& b) \
OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16
(
v_uint8x16
,
v_int8x16
,
8
,
(
int
)
0x80808080
)
OPENCV_HAL_IMPL_SSE_ABSDIFF_8_16
(
v_uint16x8
,
v_int16x8
,
16
,
(
int
)
0x80008000
)
inline
v_uint32x4
v_absdiff
(
const
v_uint32x4
&
a
,
const
v_uint32x4
&
b
)
{
return
v_max
(
a
,
b
)
-
v_min
(
a
,
b
);
}
inline
v_uint32x4
v_absdiff
(
const
v_int32x4
&
a
,
const
v_int32x4
&
b
)
{
__m128i
d
=
_mm_sub_epi32
(
a
.
val
,
b
.
val
);
__m128i
m
=
_mm_cmpgt_epi32
(
b
.
val
,
a
.
val
);
return
v_uint32x4
(
_mm_sub_epi32
(
_mm_xor_si128
(
d
,
m
),
m
));
}
#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \
inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
{ \
...
...
@@ -1047,8 +1077,8 @@ OPENCV_HAL_IMPL_SSE_SELECT(v_uint16x8, si128)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_int16x8
,
si128
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_uint32x4
,
si128
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_int32x4
,
si128
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_uint64x2
,
si128
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_int64x2
,
si128
)
//
OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, si128)
//
OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_float32x4
,
ps
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_float64x2
,
pd
)
...
...
@@ -1257,7 +1287,7 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
__m128i
v0
=
_mm_unpacklo_epi8
(
u0
,
u2
);
// a0 a8 b0 b8 ...
__m128i
v1
=
_mm_unpackhi_epi8
(
u0
,
u2
);
// a2 a10 b2 b10 ...
__m128i
v2
=
_mm_unpacklo_epi8
(
u1
,
u3
);
// a4 a12 b4 b12 ...
__m128i
v3
=
_mm_unpackhi_epi8
(
u1
,
u3
);
// a6 a14 b
4
b14 ...
__m128i
v3
=
_mm_unpackhi_epi8
(
u1
,
u3
);
// a6 a14 b
6
b14 ...
u0
=
_mm_unpacklo_epi8
(
v0
,
v2
);
// a0 a4 a8 a12 ...
u1
=
_mm_unpacklo_epi8
(
v1
,
v3
);
// a2 a6 a10 a14 ...
...
...
@@ -1266,13 +1296,13 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
v0
=
_mm_unpacklo_epi8
(
u0
,
u1
);
// a0 a2 a4 a6 ...
v1
=
_mm_unpacklo_epi8
(
u2
,
u3
);
// a1 a3 a5 a7 ...
v2
=
_mm_unpackhi_epi8
(
u0
,
u1
);
//
b0 b2 b4 b
6 ...
v3
=
_mm_unpackhi_epi8
(
u2
,
u3
);
//
b1 b3 b5 b
7 ...
v2
=
_mm_unpackhi_epi8
(
u0
,
u1
);
//
c0 c2 c4 c
6 ...
v3
=
_mm_unpackhi_epi8
(
u2
,
u3
);
//
c1 c3 c5 c
7 ...
a
.
val
=
_mm_unpacklo_epi8
(
v0
,
v1
);
b
.
val
=
_mm_unpack
lo_epi8
(
v2
,
v3
);
c
.
val
=
_mm_unpack
hi_epi8
(
v0
,
v1
);
d
.
val
=
_mm_unpack
lo
_epi8
(
v2
,
v3
);
b
.
val
=
_mm_unpack
hi_epi8
(
v0
,
v1
);
c
.
val
=
_mm_unpack
lo_epi8
(
v2
,
v3
);
d
.
val
=
_mm_unpack
hi
_epi8
(
v2
,
v3
);
}
inline
void
v_load_deinterleave
(
const
ushort
*
ptr
,
v_uint16x8
&
a
,
v_uint16x8
&
b
,
v_uint16x8
&
c
)
...
...
@@ -1560,6 +1590,8 @@ inline v_float64x2 v_cvt_f64(const v_float32x4& a)
return
v_float64x2
(
_mm_cvtps_pd
(
a
.
val
));
}
//! @endcond
}
#endif
modules/hal/test/test_intrin.cpp
0 → 100644
View file @
a275489f
This diff is collapsed.
Click to expand it.
modules/hal/test/test_intrin_utils.hpp
0 → 100644
View file @
a275489f
#ifndef _TEST_UTILS_HPP_
#define _TEST_UTILS_HPP_
#include "opencv2/hal/intrin.hpp"
#include "opencv2/ts.hpp"
#include <ostream>
#include <algorithm>
template
<
typename
R
>
struct
Data
;
template
<
int
N
>
struct
initializer
;
template
<>
struct
initializer
<
16
>
{
template
<
typename
R
>
static
R
init
(
const
Data
<
R
>
&
d
)
{
return
R
(
d
[
0
],
d
[
1
],
d
[
2
],
d
[
3
],
d
[
4
],
d
[
5
],
d
[
6
],
d
[
7
],
d
[
8
],
d
[
9
],
d
[
10
],
d
[
11
],
d
[
12
],
d
[
13
],
d
[
14
],
d
[
15
]);
}
};
template
<>
struct
initializer
<
8
>
{
template
<
typename
R
>
static
R
init
(
const
Data
<
R
>
&
d
)
{
return
R
(
d
[
0
],
d
[
1
],
d
[
2
],
d
[
3
],
d
[
4
],
d
[
5
],
d
[
6
],
d
[
7
]);
}
};
template
<>
struct
initializer
<
4
>
{
template
<
typename
R
>
static
R
init
(
const
Data
<
R
>
&
d
)
{
return
R
(
d
[
0
],
d
[
1
],
d
[
2
],
d
[
3
]);
}
};
template
<>
struct
initializer
<
2
>
{
template
<
typename
R
>
static
R
init
(
const
Data
<
R
>
&
d
)
{
return
R
(
d
[
0
],
d
[
1
]);
}
};
//==================================================================================================
template
<
typename
R
>
struct
Data
{
typedef
typename
R
::
lane_type
LaneType
;
Data
()
{
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
d
[
i
]
=
(
LaneType
)(
i
+
1
);
}
Data
(
LaneType
val
)
{
fill
(
val
);
}
Data
(
const
R
&
r
)
{
*
this
=
r
;
}
operator
R
()
{
return
initializer
<
R
::
nlanes
>
().
init
(
*
this
);
}
Data
<
R
>
&
operator
=
(
const
R
&
r
)
{
v_store
(
d
,
r
);
return
*
this
;
}
template
<
typename
T
>
Data
<
R
>
&
operator
*=
(
T
m
)
{
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
d
[
i
]
*=
(
LaneType
)
m
;
return
*
this
;
}
template
<
typename
T
>
Data
<
R
>
&
operator
+=
(
T
m
)
{
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
d
[
i
]
+=
(
LaneType
)
m
;
return
*
this
;
}
void
fill
(
LaneType
val
)
{
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
d
[
i
]
=
val
;
}
void
reverse
()
{
for
(
int
i
=
0
;
i
<
R
::
nlanes
/
2
;
++
i
)
std
::
swap
(
d
[
i
],
d
[
R
::
nlanes
-
i
-
1
]);
}
const
LaneType
&
operator
[](
int
i
)
const
{
CV_Assert
(
i
>=
0
&&
i
<
R
::
nlanes
);
return
d
[
i
];
}
LaneType
&
operator
[](
int
i
)
{
CV_Assert
(
i
>=
0
&&
i
<
R
::
nlanes
);
return
d
[
i
];
}
const
LaneType
*
mid
()
const
{
return
d
+
R
::
nlanes
/
2
;
}
LaneType
*
mid
()
{
return
d
+
R
::
nlanes
/
2
;
}
bool
operator
==
(
const
Data
<
R
>
&
other
)
const
{
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
if
(
d
[
i
]
!=
other
.
d
[
i
])
return
false
;
return
true
;
}
void
clear
()
{
fill
(
0
);
}
bool
isZero
()
const
{
return
isValue
(
0
);
}
bool
isValue
(
uchar
val
)
const
{
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
if
(
d
[
i
]
!=
val
)
return
false
;
return
true
;
}
LaneType
d
[
R
::
nlanes
];
};
template
<
typename
R
>
struct
AlignedData
{
Data
<
R
>
CV_DECL_ALIGNED
(
16
)
a
;
// aligned
char
dummy
;
Data
<
R
>
u
;
// unaligned
};
template
<
typename
R
>
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
Data
<
R
>
&
d
)
{
out
<<
"{ "
;
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
{
// out << std::hex << +V_TypeTraits<typename R::lane_type>::reinterpret_int(d.d[i]);
out
<<
+
d
.
d
[
i
];
if
(
i
+
1
<
R
::
nlanes
)
out
<<
", "
;
}
out
<<
" }"
;
return
out
;
}
//==================================================================================================
template
<
typename
R
>
struct
RegTrait
;
template
<>
struct
RegTrait
<
cv
::
v_uint8x16
>
{
typedef
cv
::
v_uint16x8
w_reg
;
typedef
cv
::
v_uint32x4
q_reg
;
typedef
cv
::
v_uint8x16
u_reg
;
static
cv
::
v_uint8x16
zero
()
{
return
cv
::
v_setzero_u8
();
}
static
cv
::
v_uint8x16
all
(
uchar
val
)
{
return
cv
::
v_setall_u8
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_int8x16
>
{
typedef
cv
::
v_int16x8
w_reg
;
typedef
cv
::
v_int32x4
q_reg
;
typedef
cv
::
v_uint8x16
u_reg
;
static
cv
::
v_int8x16
zero
()
{
return
cv
::
v_setzero_s8
();
}
static
cv
::
v_int8x16
all
(
schar
val
)
{
return
cv
::
v_setall_s8
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_uint16x8
>
{
typedef
cv
::
v_uint32x4
w_reg
;
typedef
cv
::
v_int16x8
int_reg
;
typedef
cv
::
v_uint16x8
u_reg
;
static
cv
::
v_uint16x8
zero
()
{
return
cv
::
v_setzero_u16
();
}
static
cv
::
v_uint16x8
all
(
ushort
val
)
{
return
cv
::
v_setall_u16
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_int16x8
>
{
typedef
cv
::
v_int32x4
w_reg
;
typedef
cv
::
v_uint16x8
u_reg
;
static
cv
::
v_int16x8
zero
()
{
return
cv
::
v_setzero_s16
();
}
static
cv
::
v_int16x8
all
(
short
val
)
{
return
cv
::
v_setall_s16
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_uint32x4
>
{
typedef
cv
::
v_uint64x2
w_reg
;
typedef
cv
::
v_int32x4
int_reg
;
typedef
cv
::
v_uint32x4
u_reg
;
static
cv
::
v_uint32x4
zero
()
{
return
cv
::
v_setzero_u32
();
}
static
cv
::
v_uint32x4
all
(
unsigned
val
)
{
return
cv
::
v_setall_u32
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_int32x4
>
{
typedef
cv
::
v_int64x2
w_reg
;
typedef
cv
::
v_uint32x4
u_reg
;
static
cv
::
v_int32x4
zero
()
{
return
cv
::
v_setzero_s32
();
}
static
cv
::
v_int32x4
all
(
int
val
)
{
return
cv
::
v_setall_s32
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_uint64x2
>
{
static
cv
::
v_uint64x2
zero
()
{
return
cv
::
v_setzero_u64
();
}
static
cv
::
v_uint64x2
all
(
uint64
val
)
{
return
cv
::
v_setall_u64
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_int64x2
>
{
static
cv
::
v_int64x2
zero
()
{
return
cv
::
v_setzero_s64
();
}
static
cv
::
v_int64x2
all
(
int64
val
)
{
return
cv
::
v_setall_s64
(
val
);
}
};
template
<>
struct
RegTrait
<
cv
::
v_float32x4
>
{
typedef
cv
::
v_int32x4
int_reg
;
typedef
cv
::
v_float32x4
u_reg
;
static
cv
::
v_float32x4
zero
()
{
return
cv
::
v_setzero_f32
();
}
static
cv
::
v_float32x4
all
(
float
val
)
{
return
cv
::
v_setall_f32
(
val
);
}
};
#if CV_SIMD128_64F
template
<>
struct
RegTrait
<
cv
::
v_float64x2
>
{
typedef
cv
::
v_int32x4
int_reg
;
typedef
cv
::
v_float64x2
u_reg
;
static
cv
::
v_float64x2
zero
()
{
return
cv
::
v_setzero_f64
();
}
static
cv
::
v_float64x2
all
(
double
val
)
{
return
cv
::
v_setall_f64
(
val
);
}
};
#endif
#endif
modules/hal/test/test_main.cpp
0 → 100644
View file @
a275489f
#include "opencv2/ts.hpp"
CV_TEST_MAIN
(
"cv"
)
modules/hal/test/test_precomp.hpp
0 → 100644
View file @
a275489f
#ifndef __OPENCV_HAL_TEST_PRECOMP_HPP__
#define __OPENCV_HAL_TEST_PRECOMP_HPP__
#include <iostream>
#include <limits>
#include "opencv2/ts.hpp"
#include "opencv2/hal.hpp"
#include "opencv2/hal/defs.h"
#include "opencv2/hal/intrin.hpp"
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment