Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
909a2557
Commit
909a2557
authored
Apr 24, 2018
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #11332 from alalek:v_select_x86
parents
5c57e6bd
65726e42
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
50 additions
and
35 deletions
+50
-35
intrin_cpp.hpp
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+11
-8
intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+26
-1
perf_mat.cpp
modules/core/perf/perf_mat.cpp
+2
-2
copy.cpp
modules/core/src/copy.cpp
+2
-19
test_intrin_utils.hpp
modules/core/test/test_intrin_utils.hpp
+9
-5
No files found.
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
View file @
909a2557
...
...
@@ -1042,13 +1042,16 @@ template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
return
false
;
}
/** @brief
Bitwise select
/** @brief
Per-element select (blend operation)
Return value will be built by combining values a and b using the following scheme:
If the i-th bit in _mask_ is 1
select i-th bit from _a_
else
select i-th bit from _b_ */
Return value will be built by combining values _a_ and _b_ using the following scheme:
result[i] = mask[i] ? a[i] : b[i];
@Note: _mask_ element values are restricted to these values:
- 0: select element from _b_
- 0xff/0xffff/etc: select element from _a_
(fully compatible with bitwise-based operator)
*/
template
<
typename
_Tp
,
int
n
>
inline
v_reg
<
_Tp
,
n
>
v_select
(
const
v_reg
<
_Tp
,
n
>&
mask
,
const
v_reg
<
_Tp
,
n
>&
a
,
const
v_reg
<
_Tp
,
n
>&
b
)
{
...
...
@@ -1058,8 +1061,8 @@ template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>&
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
int_type
m
=
Traits
::
reinterpret_int
(
mask
.
s
[
i
]);
c
.
s
[
i
]
=
Traits
::
reinterpret_from_int
((
Traits
::
reinterpret_int
(
a
.
s
[
i
])
&
m
)
|
(
Traits
::
reinterpret_int
(
b
.
s
[
i
])
&
~
m
))
;
CV_DbgAssert
(
m
==
0
||
m
==
(
~
(
int_type
)
0
));
// restrict mask values: 0 or 0xff/0xffff/etc
c
.
s
[
i
]
=
m
?
a
.
s
[
i
]
:
b
.
s
[
i
]
;
}
return
c
;
}
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
View file @
909a2557
...
...
@@ -438,10 +438,14 @@ void v_rshr_pack_store(schar* ptr, const v_int16x8& a)
}
// b
it
-wise "mask ? a : b"
// b
yte
-wise "mask ? a : b"
inline
__m128i
v_select_si128
(
__m128i
mask
,
__m128i
a
,
__m128i
b
)
{
#if CV_SSE4_1
return
_mm_blendv_epi8
(
b
,
a
,
mask
);
#else
return
_mm_xor_si128
(
b
,
_mm_and_si128
(
_mm_xor_si128
(
a
,
b
),
mask
));
#endif
}
inline
v_uint16x8
v_pack
(
const
v_uint32x4
&
a
,
const
v_uint32x4
&
b
)
...
...
@@ -1403,6 +1407,26 @@ OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND,
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS
(
v_float32x4
,
ps
,
OPENCV_HAL_NOP
,
OPENCV_HAL_1ST
,
15
,
15
)
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS
(
v_float64x2
,
pd
,
OPENCV_HAL_NOP
,
OPENCV_HAL_1ST
,
3
,
3
)
#if CV_SSE4_1
#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, cast_ret, cast, suffix) \
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
{ \
return _Tpvec(cast_ret(_mm_blendv_##suffix(cast(b.val), cast(a.val), cast(mask.val)))); \
}
OPENCV_HAL_IMPL_SSE_SELECT
(
v_uint8x16
,
OPENCV_HAL_NOP
,
OPENCV_HAL_NOP
,
epi8
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_int8x16
,
OPENCV_HAL_NOP
,
OPENCV_HAL_NOP
,
epi8
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_uint16x8
,
OPENCV_HAL_NOP
,
OPENCV_HAL_NOP
,
epi8
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_int16x8
,
OPENCV_HAL_NOP
,
OPENCV_HAL_NOP
,
epi8
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_uint32x4
,
_mm_castps_si128
,
_mm_castsi128_ps
,
ps
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_int32x4
,
_mm_castps_si128
,
_mm_castsi128_ps
,
ps
)
// OPENCV_HAL_IMPL_SSE_SELECT(v_uint64x2, TBD, TBD, pd)
// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, TBD, TBD, ps)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_float32x4
,
OPENCV_HAL_NOP
,
OPENCV_HAL_NOP
,
ps
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_float64x2
,
OPENCV_HAL_NOP
,
OPENCV_HAL_NOP
,
pd
)
#else // CV_SSE4_1
#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, suffix) \
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
{ \
...
...
@@ -1419,6 +1443,7 @@ OPENCV_HAL_IMPL_SSE_SELECT(v_int32x4, si128)
// OPENCV_HAL_IMPL_SSE_SELECT(v_int64x2, si128)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_float32x4
,
ps
)
OPENCV_HAL_IMPL_SSE_SELECT
(
v_float64x2
,
pd
)
#endif
#define OPENCV_HAL_IMPL_SSE_EXPAND(_Tpuvec, _Tpwuvec, _Tpu, _Tpsvec, _Tpwsvec, _Tps, suffix, wsuffix, shift) \
inline void v_expand(const _Tpuvec& a, _Tpwuvec& b0, _Tpwuvec& b1) \
...
...
modules/core/perf/perf_mat.cpp
View file @
909a2557
...
...
@@ -96,8 +96,8 @@ PERF_TEST_P(Size_MatType, Mat_Clone_Roi,
}
PERF_TEST_P
(
Size_MatType
,
Mat_CopyToWithMask
,
testing
::
Combine
(
testing
::
Values
(
TYPICAL_MAT_SIZES
),
testing
::
Values
(
CV_8UC1
,
CV_8UC2
))
testing
::
Combine
(
testing
::
Values
(
::
perf
::
sz1080p
,
::
perf
::
szODD
),
testing
::
Values
(
CV_8UC1
,
CV_8UC2
,
CV_8UC3
,
CV_16UC1
,
CV_32SC1
,
CV_32FC4
))
)
{
const
Size_MatType_t
params
=
GetParam
();
...
...
modules/core/src/copy.cpp
View file @
909a2557
...
...
@@ -91,11 +91,7 @@ copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mste
uchar
*
dst
=
(
uchar
*
)
_dst
;
int
x
=
0
;
#if CV_SIMD128
if
(
hasSIMD128
()
#if CV_SSE4_2
&&
USE_SSE4_2
#endif
)
{
{
v_uint8x16
v_zero
=
v_setzero_u8
();
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
...
...
@@ -104,11 +100,7 @@ copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mste
v_dst
=
v_load
(
dst
+
x
),
v_nmask
=
v_load
(
mask
+
x
)
==
v_zero
;
#if CV_SSE4_2
v_dst
=
v_uint8x16
(
_mm_blendv_epi8
(
v_src
.
val
,
v_dst
.
val
,
v_nmask
.
val
));
#else
v_dst
=
v_select
(
v_nmask
,
v_dst
,
v_src
);
#endif
v_store
(
dst
+
x
,
v_dst
);
}
}
...
...
@@ -130,11 +122,7 @@ copyMask_<ushort>(const uchar* _src, size_t sstep, const uchar* mask, size_t mst
ushort
*
dst
=
(
ushort
*
)
_dst
;
int
x
=
0
;
#if CV_SIMD128
if
(
hasSIMD128
()
#if CV_SSE4_2
&&
USE_SSE4_2
#endif
)
{
{
v_uint8x16
v_zero
=
v_setzero_u8
();
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
...
...
@@ -146,13 +134,8 @@ copyMask_<ushort>(const uchar* _src, size_t sstep, const uchar* mask, size_t mst
v_uint8x16
v_nmask
=
v_load
(
mask
+
x
)
==
v_zero
;
v_zip
(
v_nmask
,
v_nmask
,
v_nmask1
,
v_nmask2
);
#if CV_SSE4_2
v_dst1
=
v_uint16x8
(
_mm_blendv_epi8
(
v_src1
.
val
,
v_dst1
.
val
,
v_nmask1
.
val
));
v_dst2
=
v_uint16x8
(
_mm_blendv_epi8
(
v_src2
.
val
,
v_dst2
.
val
,
v_nmask2
.
val
));
#else
v_dst1
=
v_select
(
v_reinterpret_as_u16
(
v_nmask1
),
v_dst1
,
v_src1
);
v_dst2
=
v_select
(
v_reinterpret_as_u16
(
v_nmask2
),
v_dst2
,
v_src2
);
#endif
v_store
(
dst
+
x
,
v_dst1
);
v_store
(
dst
+
x
+
8
,
v_dst2
);
}
...
...
modules/core/test/test_intrin_utils.hpp
View file @
909a2557
...
...
@@ -657,8 +657,15 @@ template<typename R> struct TheTest
TheTest
&
test_mask
()
{
Data
<
R
>
dataA
,
dataB
,
dataC
,
dataD
(
1
),
dataE
(
2
);
typedef
V_TypeTraits
<
LaneType
>
Traits
;
typedef
typename
Traits
::
int_type
int_type
;
Data
<
R
>
dataA
,
dataB
(
0
),
dataC
,
dataD
(
1
),
dataE
(
2
);
dataA
[
1
]
*=
(
LaneType
)
-
1
;
const
LaneType
mask_one
=
Traits
::
reinterpret_from_int
(
~
(
typename
Traits
::
uint_type
)(
0
));
dataB
[
1
]
=
mask_one
;
dataB
[
R
::
nlanes
/
2
]
=
mask_one
;
dataB
[
R
::
nlanes
-
1
]
=
mask_one
;
dataC
*=
(
LaneType
)
-
1
;
R
a
=
dataA
,
b
=
dataB
,
c
=
dataC
,
d
=
dataD
,
e
=
dataE
;
...
...
@@ -670,12 +677,9 @@ template<typename R> struct TheTest
EXPECT_EQ
(
true
,
v_check_all
(
c
));
EXPECT_EQ
(
true
,
v_check_any
(
a
));
EXPECT_EQ
(
fals
e
,
v_check_any
(
b
));
EXPECT_EQ
(
tru
e
,
v_check_any
(
b
));
EXPECT_EQ
(
true
,
v_check_any
(
c
));
typedef
V_TypeTraits
<
LaneType
>
Traits
;
typedef
typename
Traits
::
int_type
int_type
;
R
f
=
v_select
(
b
,
d
,
e
);
Data
<
R
>
resF
=
f
;
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment