Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6499263b
Commit
6499263b
authored
Jul 24, 2018
by
Sayed Adel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
core:test Expand hal_intrin tests to support SIMD256
parent
5336b9ad
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
454 additions
and
292 deletions
+454
-292
intrin.hpp
modules/core/include/opencv2/core/hal/intrin.hpp
+6
-4
intrin_avx.hpp
modules/core/include/opencv2/core/hal/intrin_avx.hpp
+56
-65
intrin_neon.hpp
modules/core/include/opencv2/core/hal/intrin_neon.hpp
+8
-0
intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+5
-0
test_intrin.avx2.cpp
modules/core/test/test_intrin.avx2.cpp
+6
-0
test_intrin.cpp
modules/core/test/test_intrin.cpp
+76
-223
test_intrin.simd.hpp
modules/core/test/test_intrin.simd.hpp
+297
-0
test_intrin_utils.hpp
modules/core/test/test_intrin_utils.hpp
+0
-0
No files found.
modules/core/include/opencv2/core/hal/intrin.hpp
View file @
6499263b
...
@@ -154,7 +154,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
...
@@ -154,7 +154,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
// available instruction set) will get vx_ prefix
// available instruction set) will get vx_ prefix
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v2
45
_load())
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v2
56
_load())
#if CV_AVX2
#if CV_AVX2
#include "opencv2/core/hal/intrin_avx.hpp"
#include "opencv2/core/hal/intrin_avx.hpp"
...
@@ -214,14 +214,16 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
...
@@ -214,14 +214,16 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); }
inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); }
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
...
@@ -316,7 +318,7 @@ template<typename _Tp> struct V_RegTraits
...
@@ -316,7 +318,7 @@ template<typename _Tp> struct V_RegTraits
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES
(
v256
)
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES
(
v256
)
CV_INTRIN_DEFINE_WIDE_INTRIN
(
double
,
v_float64
,
f64
,
v256
,
load
)
CV_INTRIN_DEFINE_WIDE_INTRIN
(
double
,
v_float64
,
f64
,
v256
,
load
)
inline
void
vx_cleanup
()
{
v256_cleanup
();
}
inline
void
vx_cleanup
()
{
v256_cleanup
();
}
#elif CV_SIMD128
#elif CV_SIMD128
|| CV_SIMD128_CPP
typedef
v_uint8x16
v_uint8
;
typedef
v_uint8x16
v_uint8
;
typedef
v_int8x16
v_int8
;
typedef
v_int8x16
v_int8
;
typedef
v_uint16x8
v_uint16
;
typedef
v_uint16x8
v_uint16
;
...
...
modules/core/include/opencv2/core/hal/intrin_avx.hpp
View file @
6499263b
...
@@ -407,6 +407,11 @@ inline v_float16x16 v256_load_f16(const short* ptr)
...
@@ -407,6 +407,11 @@ inline v_float16x16 v256_load_f16(const short* ptr)
inline
v_float16x16
v256_load_f16_aligned
(
const
short
*
ptr
)
inline
v_float16x16
v256_load_f16_aligned
(
const
short
*
ptr
)
{
return
v_float16x16
(
_mm256_load_si256
((
const
__m256i
*
)
ptr
));
}
{
return
v_float16x16
(
_mm256_load_si256
((
const
__m256i
*
)
ptr
));
}
inline
v_float16x16
v256_load_f16_low
(
const
short
*
ptr
)
{
return
v_float16x16
(
v256_load_low
(
ptr
).
val
);
}
inline
v_float16x16
v256_load_f16_halves
(
const
short
*
ptr0
,
const
short
*
ptr1
)
{
return
v_float16x16
(
v256_load_halves
(
ptr0
,
ptr1
).
val
);
}
inline
void
v_store
(
short
*
ptr
,
const
v_float16x16
&
a
)
inline
void
v_store
(
short
*
ptr
,
const
v_float16x16
&
a
)
{
_mm256_storeu_si256
((
__m256i
*
)
ptr
,
a
.
val
);
}
{
_mm256_storeu_si256
((
__m256i
*
)
ptr
,
a
.
val
);
}
inline
void
v_store_aligned
(
short
*
ptr
,
const
v_float16x16
&
a
)
inline
void
v_store_aligned
(
short
*
ptr
,
const
v_float16x16
&
a
)
...
@@ -819,94 +824,80 @@ OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float64x4, _mm256_max_pd)
...
@@ -819,94 +824,80 @@ OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float64x4, _mm256_max_pd)
template
<
int
imm
>
template
<
int
imm
>
inline
v_uint8x32
v_rotate_left
(
const
v_uint8x32
&
a
,
const
v_uint8x32
&
b
)
inline
v_uint8x32
v_rotate_left
(
const
v_uint8x32
&
a
,
const
v_uint8x32
&
b
)
{
{
__m256i
swap
=
_mm256_permute2x128_si256
(
a
.
val
,
b
.
val
,
0x03
);
enum
{
IMM_R
=
(
16
-
imm
)
&
0xFF
};
enum
{
IMM_R2
=
(
32
-
imm
)
&
0xFF
};
switch
(
imm
)
{
case
0
:
return
a
;
case
32
:
return
b
;
case
16
:
return
v_uint8x32
(
swap
);
}
if
(
imm
<
16
)
return
v_uint8x32
(
_mm256_alignr_epi8
(
a
.
val
,
swap
,
16
-
imm
));
if
(
imm
==
0
)
return
a
;
if
(
imm
<
32
)
return
v_uint8x32
(
_mm256_alignr_epi8
(
swap
,
b
.
val
,
32
-
imm
));
if
(
imm
==
32
)
return
b
;
if
(
imm
>
32
)
return
v_uint8x32
();
return
v_uint8x32
();
__m256i
swap
=
_mm256_permute2x128_si256
(
a
.
val
,
b
.
val
,
0x03
);
if
(
imm
==
16
)
return
v_uint8x32
(
swap
);
if
(
imm
<
16
)
return
v_uint8x32
(
_mm256_alignr_epi8
(
a
.
val
,
swap
,
IMM_R
));
return
v_uint8x32
(
_mm256_alignr_epi8
(
swap
,
b
.
val
,
IMM_R2
));
// imm < 32
}
}
template
<
int
imm
>
template
<
int
imm
>
inline
v_uint8x32
v_rotate_right
(
const
v_uint8x32
&
a
,
const
v_uint8x32
&
b
)
inline
v_uint8x32
v_rotate_right
(
const
v_uint8x32
&
a
,
const
v_uint8x32
&
b
)
{
{
__m256i
swap
=
_mm256_permute2x128_si256
(
a
.
val
,
b
.
val
,
0x21
)
;
enum
{
IMM_L
=
(
imm
-
16
)
&
0xFF
}
;
switch
(
imm
)
if
(
imm
==
0
)
return
a
;
{
if
(
imm
==
32
)
return
b
;
case
0
:
return
a
;
if
(
imm
>
32
)
return
v_uint8x32
();
case
32
:
return
b
;
case
16
:
return
v_uint8x32
(
swap
);
}
if
(
imm
<
16
)
return
v_uint8x32
(
_mm256_alignr_epi8
(
swap
,
a
.
val
,
imm
)
);
__m256i
swap
=
_mm256_permute2x128_si256
(
a
.
val
,
b
.
val
,
0x21
);
if
(
imm
<
32
)
return
v_uint8x32
(
_mm256_alignr_epi8
(
b
.
val
,
swap
,
imm
-
16
)
);
if
(
imm
==
16
)
return
v_uint8x32
(
swap
);
if
(
imm
<
16
)
return
v_uint8x32
(
_mm256_alignr_epi8
(
swap
,
a
.
val
,
imm
));
return
v_uint8x32
();
return
v_uint8x32
(
_mm256_alignr_epi8
(
b
.
val
,
swap
,
IMM_L
)
);
}
}
template
<
int
imm
>
template
<
int
imm
>
inline
v_uint8x32
v_rotate_left
(
const
v_uint8x32
&
a
)
inline
v_uint8x32
v_rotate_left
(
const
v_uint8x32
&
a
)
{
{
v_uint8x32
res
;
enum
{
IMM_L
=
(
imm
-
16
)
&
0xFF
};
enum
{
IMM_R
=
(
16
-
imm
)
&
0xFF
};
if
(
imm
==
0
)
return
a
;
if
(
imm
>
32
)
return
v_uint8x32
();
// ESAC control[3] ? [127:0] = 0
// ESAC control[3] ? [127:0] = 0
__m256i
swapz
=
_mm256_permute2x128_si256
(
a
.
val
,
a
.
val
,
_MM_SHUFFLE
(
0
,
0
,
2
,
0
));
__m256i
swapz
=
_mm256_permute2x128_si256
(
a
.
val
,
a
.
val
,
_MM_SHUFFLE
(
0
,
0
,
2
,
0
));
if
(
imm
==
16
)
return
v_uint8x32
(
swapz
);
if
(
imm
==
0
)
if
(
imm
<
16
)
return
v_uint8x32
(
_mm256_alignr_epi8
(
a
.
val
,
swapz
,
IMM_R
));
return
a
;
return
v_uint8x32
(
_mm256_slli_si256
(
swapz
,
IMM_L
));
if
(
imm
==
16
)
res
.
val
=
swapz
;
else
if
(
imm
<
16
)
res
.
val
=
_mm256_alignr_epi8
(
a
.
val
,
swapz
,
16
-
imm
);
else
if
(
imm
<
32
)
res
.
val
=
_mm256_slli_si256
(
swapz
,
imm
-
16
);
else
return
v_uint8x32
();
return
res
;
}
}
template
<
int
imm
>
template
<
int
imm
>
inline
v_uint8x32
v_rotate_right
(
const
v_uint8x32
&
a
)
inline
v_uint8x32
v_rotate_right
(
const
v_uint8x32
&
a
)
{
{
v_uint8x32
res
;
enum
{
IMM_L
=
(
imm
-
16
)
&
0xFF
};
if
(
imm
==
0
)
return
a
;
if
(
imm
>
32
)
return
v_uint8x32
();
// ESAC control[3] ? [127:0] = 0
// ESAC control[3] ? [127:0] = 0
__m256i
swapz
=
_mm256_permute2x128_si256
(
a
.
val
,
a
.
val
,
_MM_SHUFFLE
(
2
,
0
,
0
,
1
));
__m256i
swapz
=
_mm256_permute2x128_si256
(
a
.
val
,
a
.
val
,
_MM_SHUFFLE
(
2
,
0
,
0
,
1
));
if
(
imm
==
16
)
return
v_uint8x32
(
swapz
);
if
(
imm
==
0
)
if
(
imm
<
16
)
return
v_uint8x32
(
_mm256_alignr_epi8
(
swapz
,
a
.
val
,
imm
));
return
a
;
return
v_uint8x32
(
_mm256_srli_si256
(
swapz
,
IMM_L
));
if
(
imm
==
16
)
}
res
.
val
=
swapz
;
else
if
(
imm
<
16
)
#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \
res
.
val
=
_mm256_alignr_epi8
(
swapz
,
a
.
val
,
imm
);
template<int imm> \
else
if
(
imm
<
32
)
inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \
res
.
val
=
_mm256_srli_si256
(
swapz
,
imm
-
16
);
{ \
else
enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \
return
v_uint8x32
();
v_uint8x32 ret = intrin<IMMxW>(v_reinterpret_as_u8(a), \
return
res
;
v_reinterpret_as_u8(b)); \
}
return _Tpvec(cast(ret.val)); \
} \
#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \
template<int imm> \
template<int imm> \
inline _Tpvec intrin(const _Tpvec& a) \
inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \
{ \
{ \
enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \
const int w = sizeof(typename _Tpvec::lane_type); \
v_uint8x32 ret = intrin<IMMxW>(v_reinterpret_as_u8(a)); \
v_uint8x32 ret = intrin<imm*w>(v_reinterpret_as_u8(a), \
return _Tpvec(cast(ret.val)); \
v_reinterpret_as_u8(b)); \
return _Tpvec(cast(ret.val)); \
} \
template<int imm> \
inline _Tpvec intrin(const _Tpvec& a) \
{ \
const int w = sizeof(typename _Tpvec::lane_type); \
v_uint8x32 ret = intrin<imm*w>(v_reinterpret_as_u8(a)); \
return _Tpvec(cast(ret.val)); \
}
}
#define OPENCV_HAL_IMPL_AVX_ROTATE(_Tpvec) \
#define OPENCV_HAL_IMPL_AVX_ROTATE(_Tpvec) \
...
...
modules/core/include/opencv2/core/hal/intrin_neon.hpp
View file @
6499263b
...
@@ -319,6 +319,9 @@ static inline void cv_vst1_f16(void* ptr, float16x4_t a)
...
@@ -319,6 +319,9 @@ static inline void cv_vst1_f16(void* ptr, float16x4_t a)
#endif
#endif
}
}
#ifndef vdup_n_f16
#define vdup_n_f16(v) (float16x4_t){v, v, v, v}
#endif
struct
v_float16x8
struct
v_float16x8
{
{
...
@@ -889,6 +892,11 @@ inline v_float16x8 v_load_f16(const short* ptr)
...
@@ -889,6 +892,11 @@ inline v_float16x8 v_load_f16(const short* ptr)
inline
v_float16x8
v_load_f16_aligned
(
const
short
*
ptr
)
inline
v_float16x8
v_load_f16_aligned
(
const
short
*
ptr
)
{
return
v_float16x8
(
cv_vld1q_f16
(
ptr
));
}
{
return
v_float16x8
(
cv_vld1q_f16
(
ptr
));
}
inline
v_float16x8
v_load_f16_low
(
const
short
*
ptr
)
{
return
v_float16x8
(
vcombine_f16
(
cv_vld1_f16
(
ptr
),
vdup_n_f16
((
float16_t
)
0
)));
}
inline
v_float16x8
v_load_f16_halves
(
const
short
*
ptr0
,
const
short
*
ptr1
)
{
return
v_float16x8
(
vcombine_f16
(
cv_vld1_f16
(
ptr0
),
cv_vld1_f16
(
ptr1
)));
}
inline
void
v_store
(
short
*
ptr
,
const
v_float16x8
&
a
)
inline
void
v_store
(
short
*
ptr
,
const
v_float16x8
&
a
)
{
cv_vst1q_f16
(
ptr
,
a
.
val
);
}
{
cv_vst1q_f16
(
ptr
,
a
.
val
);
}
inline
void
v_store_aligned
(
short
*
ptr
,
const
v_float16x8
&
a
)
inline
void
v_store_aligned
(
short
*
ptr
,
const
v_float16x8
&
a
)
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
View file @
6499263b
...
@@ -1308,6 +1308,11 @@ inline v_float16x8 v_load_f16(const short* ptr)
...
@@ -1308,6 +1308,11 @@ inline v_float16x8 v_load_f16(const short* ptr)
inline
v_float16x8
v_load_f16_aligned
(
const
short
*
ptr
)
inline
v_float16x8
v_load_f16_aligned
(
const
short
*
ptr
)
{
return
v_float16x8
(
_mm_load_si128
((
const
__m128i
*
)
ptr
));
}
{
return
v_float16x8
(
_mm_load_si128
((
const
__m128i
*
)
ptr
));
}
inline
v_float16x8
v_load_f16_low
(
const
short
*
ptr
)
{
return
v_float16x8
(
v_load_low
(
ptr
).
val
);
}
inline
v_float16x8
v_load_f16_halves
(
const
short
*
ptr0
,
const
short
*
ptr1
)
{
return
v_float16x8
(
v_load_halves
(
ptr0
,
ptr1
).
val
);
}
inline
void
v_store
(
short
*
ptr
,
const
v_float16x8
&
a
)
inline
void
v_store
(
short
*
ptr
,
const
v_float16x8
&
a
)
{
_mm_storeu_si128
((
__m128i
*
)
ptr
,
a
.
val
);
}
{
_mm_storeu_si128
((
__m128i
*
)
ptr
,
a
.
val
);
}
inline
void
v_store_aligned
(
short
*
ptr
,
const
v_float16x8
&
a
)
inline
void
v_store_aligned
(
short
*
ptr
,
const
v_float16x8
&
a
)
...
...
modules/core/test/test_intrin.avx2.cpp
0 → 100644
View file @
6499263b
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "test_intrin.simd.hpp"
\ No newline at end of file
modules/core/test/test_intrin.cpp
View file @
6499263b
...
@@ -2,249 +2,101 @@
...
@@ -2,249 +2,101 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "test_precomp.hpp"
#include "test_intrin.simd.hpp"
#include "test_intrin_utils.hpp"
#define CV_CPU_SIMD_FILENAME "test_intrin.simd.hpp"
#define CV_CPU_SIMD_FILENAME "test_intrin_utils.hpp"
#define CV_CPU_DISPATCH_MODE FP16
#define CV_CPU_DISPATCH_MODE FP16
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
#define CV_CPU_DISPATCH_MODE AVX2
using
namespace
cv
;
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
namespace
opencv_test
{
namespace
hal
{
namespace
opencv_test
{
namespace
hal
{
using
namespace
CV_CPU_OPTIMIZATION_NAMESPACE
;
using
namespace
CV_CPU_OPTIMIZATION_NAMESPACE
;
//============= 8-bit integer =====================================================================
TEST
(
hal_intrin
,
uint8x16
)
{
test_hal_intrin_uint8
();
}
TEST
(
hal_intrin
,
uint8x16
)
{
TheTest
<
v_uint8x16
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_expand_q
()
.
test_addsub
()
.
test_addsub_wrap
()
.
test_cmp
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_mask
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
3
>
().
test_pack
<
8
>
()
.
test_pack_u
<
1
>
().
test_pack_u
<
2
>
().
test_pack_u
<
3
>
().
test_pack_u
<
8
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
8
>
().
test_extract
<
15
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
8
>
().
test_rotate
<
15
>
()
;
}
TEST
(
hal_intrin
,
int8x16
)
{
TEST
(
hal_intrin
,
int8x16
)
TheTest
<
v_int8x16
>
()
{
test_hal_intrin_int8
();
}
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_expand_q
()
.
test_addsub
()
.
test_addsub_wrap
()
.
test_cmp
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_abs
()
.
test_mask
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
3
>
().
test_pack
<
8
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
8
>
().
test_extract
<
15
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
8
>
().
test_rotate
<
15
>
()
;
}
//============= 16-bit integer =====================================================================
TEST
(
hal_intrin
,
uint16x8
)
{
test_hal_intrin_uint16
();
}
TEST
(
hal_intrin
,
uint16x8
)
{
TheTest
<
v_uint16x8
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_addsub
()
.
test_addsub_wrap
()
.
test_mul
()
.
test_mul_expand
()
.
test_cmp
()
.
test_shift
<
1
>
()
.
test_shift
<
8
>
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_reduce
()
.
test_mask
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
7
>
().
test_pack
<
16
>
()
.
test_pack_u
<
1
>
().
test_pack_u
<
2
>
().
test_pack_u
<
7
>
().
test_pack_u
<
16
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
4
>
().
test_extract
<
7
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
4
>
().
test_rotate
<
7
>
()
;
}
TEST
(
hal_intrin
,
int16x8
)
{
TEST
(
hal_intrin
,
int16x8
)
TheTest
<
v_int16x8
>
()
{
test_hal_intrin_int16
();
}
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_addsub
()
.
test_addsub_wrap
()
.
test_mul
()
.
test_mul_expand
()
.
test_cmp
()
.
test_shift
<
1
>
()
.
test_shift
<
8
>
()
.
test_dot_prod
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_abs
()
.
test_reduce
()
.
test_mask
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
7
>
().
test_pack
<
16
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
4
>
().
test_extract
<
7
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
4
>
().
test_rotate
<
7
>
()
;
}
//============= 32-bit integer =====================================================================
TEST
(
hal_intrin
,
int32x4
)
{
test_hal_intrin_int32
();
}
TEST
(
hal_intrin
,
uint32x4
)
{
TheTest
<
v_uint32x4
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_addsub
()
.
test_mul
()
.
test_mul_expand
()
.
test_cmp
()
.
test_shift
<
1
>
()
.
test_shift
<
8
>
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_reduce
()
.
test_mask
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
15
>
().
test_pack
<
32
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
2
>
().
test_extract
<
3
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
2
>
().
test_rotate
<
3
>
()
.
test_transpose
()
;
}
TEST
(
hal_intrin
,
int32x4
)
{
TEST
(
hal_intrin
,
uint32x4
)
TheTest
<
v_int32x4
>
()
{
test_hal_intrin_uint32
();
}
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_addsub
()
.
test_mul
()
.
test_abs
()
.
test_cmp
()
.
test_popcount
()
.
test_shift
<
1
>
().
test_shift
<
8
>
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_reduce
()
.
test_mask
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
15
>
().
test_pack
<
32
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
2
>
().
test_extract
<
3
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
2
>
().
test_rotate
<
3
>
()
.
test_float_cvt32
()
.
test_float_cvt64
()
.
test_transpose
()
;
}
//============= 64-bit integer =====================================================================
TEST
(
hal_intrin
,
uint64x2
)
{
test_hal_intrin_uint64
();
}
TEST
(
hal_intrin
,
uint64x2
)
{
TheTest
<
v_uint64x2
>
()
.
test_loadstore
()
.
test_addsub
()
.
test_shift
<
1
>
().
test_shift
<
8
>
()
.
test_logic
()
.
test_extract
<
0
>
().
test_extract
<
1
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
()
;
}
TEST
(
hal_intrin
,
int64x2
)
{
TEST
(
hal_intrin
,
int64x2
)
TheTest
<
v_int64x2
>
()
{
test_hal_intrin_int64
();
}
.
test_loadstore
()
.
test_addsub
()
.
test_shift
<
1
>
().
test_shift
<
8
>
()
.
test_logic
()
.
test_extract
<
0
>
().
test_extract
<
1
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
()
;
}
//============= Floating point =====================================================================
TEST
(
hal_intrin
,
float32x4
)
{
test_hal_intrin_float32
();
}
TEST
(
hal_intrin
,
float32x4
)
{
TheTest
<
v_float32x4
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_interleave_2channel
()
.
test_addsub
()
.
test_mul
()
.
test_div
()
.
test_cmp
()
.
test_sqrt_abs
()
.
test_min_max
()
.
test_float_absdiff
()
.
test_reduce
()
.
test_mask
()
.
test_unpack
()
.
test_float_math
()
.
test_float_cvt64
()
.
test_matmul
()
.
test_transpose
()
.
test_reduce_sum4
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
2
>
().
test_extract
<
3
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
2
>
().
test_rotate
<
3
>
()
;
}
#if CV_SIMD128_64F
TEST
(
hal_intrin
,
float64x2
)
TEST
(
hal_intrin
,
float64x2
)
{
{
test_hal_intrin_float64
();
}
TheTest
<
v_float64x2
>
()
.
test_loadstore
()
.
test_addsub
()
.
test_mul
()
.
test_div
()
.
test_cmp
()
.
test_sqrt_abs
()
.
test_min_max
()
.
test_float_absdiff
()
.
test_mask
()
.
test_unpack
()
.
test_float_math
()
.
test_float_cvt32
()
.
test_extract
<
0
>
().
test_extract
<
1
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
()
;
}
#endif
TEST
(
hal_intrin
,
float16
)
TEST
(
hal_intrin
,
float16x8
)
{
{
CV_CPU_CALL_FP16_
(
test_hal_intrin_float16
,
());
CV_CPU_CALL_FP16_
(
test_hal_intrin_float16
,
());
throw
SkipTestException
(
"Unsupported hardware: FP16 is not available"
);
throw
SkipTestException
(
"Unsupported hardware: FP16 is not available"
);
}
}
}}
#define DISPATCH_SIMD_MODES AVX2
#define DISPATCH_SIMD_NAME "SIMD256"
#define DISPATCH_SIMD(fun) \
do { \
CV_CPU_DISPATCH(fun, (), DISPATCH_SIMD_MODES); \
throw SkipTestException( \
"Unsupported hardware: " \
DISPATCH_SIMD_NAME \
" is not available" \
); \
} while(0)
TEST
(
hal_intrin256
,
uint8x32
)
{
DISPATCH_SIMD
(
test_hal_intrin_uint8
);
}
TEST
(
hal_intrin256
,
int8x32
)
{
DISPATCH_SIMD
(
test_hal_intrin_int8
);
}
TEST
(
hal_intrin256
,
uint16x16
)
{
DISPATCH_SIMD
(
test_hal_intrin_uint16
);
}
TEST
(
hal_intrin256
,
int16x16
)
{
DISPATCH_SIMD
(
test_hal_intrin_int16
);
}
TEST
(
hal_intrin256
,
uint32x8
)
{
DISPATCH_SIMD
(
test_hal_intrin_uint32
);
}
TEST
(
hal_intrin256
,
int32x8
)
{
DISPATCH_SIMD
(
test_hal_intrin_int32
);
}
TEST
(
hal_intrin256
,
uint64x4
)
{
DISPATCH_SIMD
(
test_hal_intrin_uint64
);
}
TEST
(
hal_intrin256
,
int64x4
)
{
DISPATCH_SIMD
(
test_hal_intrin_int64
);
}
TEST
(
hal_intrin256
,
float32x8
)
{
DISPATCH_SIMD
(
test_hal_intrin_float32
);
}
TEST
(
hal_intrin256
,
float64x4
)
{
DISPATCH_SIMD
(
test_hal_intrin_float64
);
}
TEST
(
hal_intrin256
,
float16x16
)
{
if
(
!
CV_CPU_HAS_SUPPORT_FP16
)
throw
SkipTestException
(
"Unsupported hardware: FP16 is not available"
);
DISPATCH_SIMD
(
test_hal_intrin_float16
);
}
}}
// namespace
\ No newline at end of file
modules/core/test/test_intrin.simd.hpp
0 → 100644
View file @
6499263b
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "test_intrin_utils.hpp"
namespace
opencv_test
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
void
test_hal_intrin_uint8
();
void
test_hal_intrin_int8
();
void
test_hal_intrin_uint16
();
void
test_hal_intrin_int16
();
void
test_hal_intrin_uint32
();
void
test_hal_intrin_int32
();
void
test_hal_intrin_uint64
();
void
test_hal_intrin_int64
();
void
test_hal_intrin_float32
();
void
test_hal_intrin_float64
();
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
//============= 8-bit integer =====================================================================
void
test_hal_intrin_uint8
()
{
TheTest
<
v_uint8
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_expand_q
()
.
test_addsub
()
.
test_addsub_wrap
()
.
test_cmp
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_mask
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
3
>
().
test_pack
<
8
>
()
.
test_pack_u
<
1
>
().
test_pack_u
<
2
>
().
test_pack_u
<
3
>
().
test_pack_u
<
8
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
8
>
().
test_extract
<
15
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
8
>
().
test_rotate
<
15
>
()
;
#if CV_SIMD256
TheTest
<
v_uint8
>
()
.
test_pack
<
9
>
().
test_pack
<
10
>
().
test_pack
<
13
>
().
test_pack
<
15
>
()
.
test_pack_u
<
9
>
().
test_pack_u
<
10
>
().
test_pack_u
<
13
>
().
test_pack_u
<
15
>
()
.
test_extract
<
16
>
().
test_extract
<
17
>
().
test_extract
<
23
>
().
test_extract
<
31
>
()
.
test_rotate
<
16
>
().
test_rotate
<
17
>
().
test_rotate
<
23
>
().
test_rotate
<
31
>
()
;
#endif
}
void
test_hal_intrin_int8
()
{
TheTest
<
v_int8
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_expand_q
()
.
test_addsub
()
.
test_addsub_wrap
()
.
test_cmp
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_abs
()
.
test_mask
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
3
>
().
test_pack
<
8
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
8
>
().
test_extract
<
15
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
8
>
().
test_rotate
<
15
>
()
;
}
//============= 16-bit integer =====================================================================
void
test_hal_intrin_uint16
()
{
TheTest
<
v_uint16
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_addsub
()
.
test_addsub_wrap
()
.
test_mul
()
.
test_mul_expand
()
.
test_cmp
()
.
test_shift
<
1
>
()
.
test_shift
<
8
>
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_reduce
()
.
test_mask
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
7
>
().
test_pack
<
16
>
()
.
test_pack_u
<
1
>
().
test_pack_u
<
2
>
().
test_pack_u
<
7
>
().
test_pack_u
<
16
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
4
>
().
test_extract
<
7
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
4
>
().
test_rotate
<
7
>
()
;
}
void
test_hal_intrin_int16
()
{
TheTest
<
v_int16
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_addsub
()
.
test_addsub_wrap
()
.
test_mul
()
.
test_mul_expand
()
.
test_cmp
()
.
test_shift
<
1
>
()
.
test_shift
<
8
>
()
.
test_dot_prod
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_abs
()
.
test_reduce
()
.
test_mask
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
7
>
().
test_pack
<
16
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
4
>
().
test_extract
<
7
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
4
>
().
test_rotate
<
7
>
()
;
}
//============= 32-bit integer =====================================================================
void
test_hal_intrin_uint32
()
{
TheTest
<
v_uint32
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_addsub
()
.
test_mul
()
.
test_mul_expand
()
.
test_cmp
()
.
test_shift
<
1
>
()
.
test_shift
<
8
>
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_reduce
()
.
test_mask
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
15
>
().
test_pack
<
32
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
2
>
().
test_extract
<
3
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
2
>
().
test_rotate
<
3
>
()
.
test_transpose
()
;
}
void
test_hal_intrin_int32
()
{
TheTest
<
v_int32
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_expand
()
.
test_addsub
()
.
test_mul
()
.
test_abs
()
.
test_cmp
()
.
test_popcount
()
.
test_shift
<
1
>
().
test_shift
<
8
>
()
.
test_logic
()
.
test_min_max
()
.
test_absdiff
()
.
test_reduce
()
.
test_mask
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
15
>
().
test_pack
<
32
>
()
.
test_unpack
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
2
>
().
test_extract
<
3
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
2
>
().
test_rotate
<
3
>
()
.
test_float_cvt32
()
.
test_float_cvt64
()
.
test_transpose
()
;
}
//============= 64-bit integer =====================================================================
void
test_hal_intrin_uint64
()
{
TheTest
<
v_uint64
>
()
.
test_loadstore
()
.
test_addsub
()
.
test_shift
<
1
>
().
test_shift
<
8
>
()
.
test_logic
()
.
test_extract
<
0
>
().
test_extract
<
1
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
()
;
}
void
test_hal_intrin_int64
()
{
TheTest
<
v_int64
>
()
.
test_loadstore
()
.
test_addsub
()
.
test_shift
<
1
>
().
test_shift
<
8
>
()
.
test_logic
()
.
test_extract
<
0
>
().
test_extract
<
1
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
()
;
}
//============= Floating point =====================================================================
void
test_hal_intrin_float32
()
{
TheTest
<
v_float32
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_interleave_2channel
()
.
test_addsub
()
.
test_mul
()
.
test_div
()
.
test_cmp
()
.
test_sqrt_abs
()
.
test_min_max
()
.
test_float_absdiff
()
.
test_reduce
()
.
test_mask
()
.
test_unpack
()
.
test_float_math
()
.
test_float_cvt64
()
.
test_matmul
()
.
test_transpose
()
.
test_reduce_sum4
()
.
test_extract
<
0
>
().
test_extract
<
1
>
().
test_extract
<
2
>
().
test_extract
<
3
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
().
test_rotate
<
2
>
().
test_rotate
<
3
>
()
;
#if CV_SIMD256
TheTest
<
v_float32
>
()
.
test_extract
<
4
>
().
test_extract
<
5
>
().
test_extract
<
6
>
().
test_extract
<
7
>
()
.
test_rotate
<
4
>
().
test_rotate
<
5
>
().
test_rotate
<
6
>
().
test_rotate
<
7
>
()
;
#endif
}
void
test_hal_intrin_float64
()
{
#if CV_SIMD_64F
TheTest
<
v_float64
>
()
.
test_loadstore
()
.
test_addsub
()
.
test_mul
()
.
test_div
()
.
test_cmp
()
.
test_sqrt_abs
()
.
test_min_max
()
.
test_float_absdiff
()
.
test_mask
()
.
test_unpack
()
.
test_float_math
()
.
test_float_cvt32
()
.
test_extract
<
0
>
().
test_extract
<
1
>
()
.
test_rotate
<
0
>
().
test_rotate
<
1
>
()
;
#if CV_SIMD256
TheTest
<
v_float64
>
()
.
test_extract
<
2
>
().
test_extract
<
3
>
()
.
test_rotate
<
2
>
().
test_rotate
<
3
>
()
;
#endif //CV_SIMD256
#endif
}
#if CV_FP16 && CV_SIMD_WIDTH > 16
void
test_hal_intrin_float16
()
{
TheTest
<
v_float16
>
()
.
test_loadstore_fp16
()
.
test_float_cvt_fp16
()
;
}
#endif
#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
CV_CPU_OPTIMIZATION_NAMESPACE_END
}}
//namespace
\ No newline at end of file
modules/core/test/test_intrin_utils.hpp
View file @
6499263b
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment