Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
18d10d6b
Commit
18d10d6b
authored
Apr 22, 2019
by
Vitaly Tuzov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed v_reduce_sad intrinsics implementation and added tests
parent
5c0a98cf
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
5 deletions
+26
-5
intrin_avx.hpp
modules/core/include/opencv2/core/hal/intrin_avx.hpp
+6
-2
intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+4
-3
test_intrin_utils.hpp
modules/core/test/test_intrin_utils.hpp
+16
-0
No files found.
modules/core/include/opencv2/core/hal/intrin_avx.hpp
View file @
18d10d6b
...
@@ -1141,12 +1141,16 @@ inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b,
...
@@ -1141,12 +1141,16 @@ inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b,
inline
unsigned
v_reduce_sad
(
const
v_uint8x32
&
a
,
const
v_uint8x32
&
b
)
inline
unsigned
v_reduce_sad
(
const
v_uint8x32
&
a
,
const
v_uint8x32
&
b
)
{
{
return
(
unsigned
)
_v_cvtsi256_si32
(
_mm256_sad_epu8
(
a
.
val
,
b
.
val
));
__m256i
half
=
_mm256_sad_epu8
(
a
.
val
,
b
.
val
);
__m128i
quarter
=
_mm_add_epi32
(
_v256_extract_low
(
half
),
_v256_extract_high
(
half
));
return
(
unsigned
)
_mm_cvtsi128_si32
(
_mm_add_epi32
(
quarter
,
_mm_unpackhi_epi64
(
quarter
,
quarter
)));
}
}
inline
unsigned
v_reduce_sad
(
const
v_int8x32
&
a
,
const
v_int8x32
&
b
)
inline
unsigned
v_reduce_sad
(
const
v_int8x32
&
a
,
const
v_int8x32
&
b
)
{
{
__m256i
half
=
_mm256_set1_epi8
(
0x7f
);
__m256i
half
=
_mm256_set1_epi8
(
0x7f
);
return
(
unsigned
)
_v_cvtsi256_si32
(
_mm256_sad_epu8
(
_mm256_add_epi8
(
a
.
val
,
half
),
_mm256_add_epi8
(
b
.
val
,
half
)));
half
=
_mm256_sad_epu8
(
_mm256_add_epi8
(
a
.
val
,
half
),
_mm256_add_epi8
(
b
.
val
,
half
));
__m128i
quarter
=
_mm_add_epi32
(
_v256_extract_low
(
half
),
_v256_extract_high
(
half
));
return
(
unsigned
)
_mm_cvtsi128_si32
(
_mm_add_epi32
(
quarter
,
_mm_unpackhi_epi64
(
quarter
,
quarter
)));
}
}
inline
unsigned
v_reduce_sad
(
const
v_uint16x16
&
a
,
const
v_uint16x16
&
b
)
inline
unsigned
v_reduce_sad
(
const
v_uint16x16
&
a
,
const
v_uint16x16
&
b
)
{
{
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
View file @
18d10d6b
...
@@ -1486,13 +1486,14 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min)
...
@@ -1486,13 +1486,14 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min)
inline
unsigned
v_reduce_sad
(
const
v_uint8x16
&
a
,
const
v_uint8x16
&
b
)
inline
unsigned
v_reduce_sad
(
const
v_uint8x16
&
a
,
const
v_uint8x16
&
b
)
{
{
return
(
unsigned
)
_mm_cvtsi128_si32
(
_mm_sad_epu8
(
a
.
val
,
b
.
val
));
__m128i
half
=
_mm_sad_epu8
(
a
.
val
,
b
.
val
);
return
(
unsigned
)
_mm_cvtsi128_si32
(
_mm_add_epi32
(
half
,
_mm_unpackhi_epi64
(
half
,
half
)));
}
}
inline
unsigned
v_reduce_sad
(
const
v_int8x16
&
a
,
const
v_int8x16
&
b
)
inline
unsigned
v_reduce_sad
(
const
v_int8x16
&
a
,
const
v_int8x16
&
b
)
{
{
__m128i
half
=
_mm_set1_epi8
(
0x7f
);
__m128i
half
=
_mm_set1_epi8
(
0x7f
);
return
(
unsigned
)
_mm_cvtsi128_si32
(
_mm_sad_epu8
(
_mm_add_epi8
(
a
.
val
,
half
),
half
=
_mm_sad_epu8
(
_mm_add_epi8
(
a
.
val
,
half
),
_mm_add_epi8
(
b
.
val
,
half
));
_mm_add_epi8
(
b
.
val
,
half
)));
return
(
unsigned
)
_mm_cvtsi128_si32
(
_mm_add_epi32
(
half
,
_mm_unpackhi_epi64
(
half
,
half
)));
}
}
inline
unsigned
v_reduce_sad
(
const
v_uint16x8
&
a
,
const
v_uint16x8
&
b
)
inline
unsigned
v_reduce_sad
(
const
v_uint16x8
&
a
,
const
v_uint16x8
&
b
)
{
{
...
...
modules/core/test/test_intrin_utils.hpp
View file @
18d10d6b
...
@@ -770,6 +770,15 @@ template<typename R> struct TheTest
...
@@ -770,6 +770,15 @@ template<typename R> struct TheTest
return
*
this
;
return
*
this
;
}
}
TheTest
&
test_reduce_sad
()
{
Data
<
R
>
dataA
,
dataB
(
R
::
nlanes
/
2
);
R
a
=
dataA
;
R
b
=
dataB
;
EXPECT_EQ
((
unsigned
)(
R
::
nlanes
*
R
::
nlanes
/
4
),
v_reduce_sad
(
a
,
b
));
return
*
this
;
}
TheTest
&
test_mask
()
TheTest
&
test_mask
()
{
{
typedef
typename
V_RegTraits
<
R
>::
int_reg
int_reg
;
typedef
typename
V_RegTraits
<
R
>::
int_reg
int_reg
;
...
@@ -1320,6 +1329,7 @@ void test_hal_intrin_uint8()
...
@@ -1320,6 +1329,7 @@ void test_hal_intrin_uint8()
.
test_logic
()
.
test_logic
()
.
test_min_max
()
.
test_min_max
()
.
test_absdiff
()
.
test_absdiff
()
.
test_reduce_sad
()
.
test_mask
()
.
test_mask
()
.
test_popcount
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
3
>
().
test_pack
<
8
>
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
3
>
().
test_pack
<
8
>
()
...
@@ -1358,6 +1368,7 @@ void test_hal_intrin_int8()
...
@@ -1358,6 +1368,7 @@ void test_hal_intrin_int8()
.
test_absdiff
()
.
test_absdiff
()
.
test_absdiffs
()
.
test_absdiffs
()
.
test_abs
()
.
test_abs
()
.
test_reduce_sad
()
.
test_mask
()
.
test_mask
()
.
test_popcount
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
3
>
().
test_pack
<
8
>
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
3
>
().
test_pack
<
8
>
()
...
@@ -1387,6 +1398,7 @@ void test_hal_intrin_uint16()
...
@@ -1387,6 +1398,7 @@ void test_hal_intrin_uint16()
.
test_min_max
()
.
test_min_max
()
.
test_absdiff
()
.
test_absdiff
()
.
test_reduce
()
.
test_reduce
()
.
test_reduce_sad
()
.
test_mask
()
.
test_mask
()
.
test_popcount
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
7
>
().
test_pack
<
16
>
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
7
>
().
test_pack
<
16
>
()
...
@@ -1418,6 +1430,7 @@ void test_hal_intrin_int16()
...
@@ -1418,6 +1430,7 @@ void test_hal_intrin_int16()
.
test_absdiffs
()
.
test_absdiffs
()
.
test_abs
()
.
test_abs
()
.
test_reduce
()
.
test_reduce
()
.
test_reduce_sad
()
.
test_mask
()
.
test_mask
()
.
test_popcount
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
7
>
().
test_pack
<
16
>
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
7
>
().
test_pack
<
16
>
()
...
@@ -1446,6 +1459,7 @@ void test_hal_intrin_uint32()
...
@@ -1446,6 +1459,7 @@ void test_hal_intrin_uint32()
.
test_min_max
()
.
test_min_max
()
.
test_absdiff
()
.
test_absdiff
()
.
test_reduce
()
.
test_reduce
()
.
test_reduce_sad
()
.
test_mask
()
.
test_mask
()
.
test_popcount
()
.
test_popcount
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
15
>
().
test_pack
<
32
>
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
15
>
().
test_pack
<
32
>
()
...
@@ -1473,6 +1487,7 @@ void test_hal_intrin_int32()
...
@@ -1473,6 +1487,7 @@ void test_hal_intrin_int32()
.
test_min_max
()
.
test_min_max
()
.
test_absdiff
()
.
test_absdiff
()
.
test_reduce
()
.
test_reduce
()
.
test_reduce_sad
()
.
test_mask
()
.
test_mask
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
15
>
().
test_pack
<
32
>
()
.
test_pack
<
1
>
().
test_pack
<
2
>
().
test_pack
<
15
>
().
test_pack
<
32
>
()
.
test_unpack
()
.
test_unpack
()
...
@@ -1528,6 +1543,7 @@ void test_hal_intrin_float32()
...
@@ -1528,6 +1543,7 @@ void test_hal_intrin_float32()
.
test_min_max
()
.
test_min_max
()
.
test_float_absdiff
()
.
test_float_absdiff
()
.
test_reduce
()
.
test_reduce
()
.
test_reduce_sad
()
.
test_mask
()
.
test_mask
()
.
test_unpack
()
.
test_unpack
()
.
test_float_math
()
.
test_float_math
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment