Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
53661d55
Commit
53661d55
authored
7 years ago
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #10406 from seiko2plus:coreUnvintrinCopy
parents
120b7a61
fd0ac962
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
89 additions
and
47 deletions
+89
-47
perf_mat.cpp
modules/core/perf/perf_mat.cpp
+42
-1
copy.cpp
modules/core/src/copy.cpp
+47
-46
No files found.
modules/core/perf/perf_mat.cpp
View file @
53661d55
...
...
@@ -55,7 +55,7 @@ PERF_TEST_P(Size_MatType, Mat_Clone,
Size
size
=
get
<
0
>
(
GetParam
());
int
type
=
get
<
1
>
(
GetParam
());
Mat
source
(
size
.
height
,
size
.
width
,
type
);
Mat
destination
(
size
.
height
,
size
.
width
,
type
);
;
Mat
destination
(
size
.
height
,
size
.
width
,
type
);
declare
.
in
(
source
,
WARMUP_RNG
).
out
(
destination
);
...
...
@@ -95,6 +95,47 @@ PERF_TEST_P(Size_MatType, Mat_Clone_Roi,
SANITY_CHECK
(
destination
,
1
);
}
PERF_TEST_P
(
Size_MatType
,
Mat_CopyToWithMask
,
testing
::
Combine
(
testing
::
Values
(
TYPICAL_MAT_SIZES
),
testing
::
Values
(
CV_8UC1
,
CV_8UC2
))
)
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
size
=
get
<
0
>
(
params
);
const
int
type
=
get
<
1
>
(
params
);
Mat
src
(
size
,
type
),
dst
(
size
,
type
),
mask
(
size
,
CV_8UC1
);
declare
.
in
(
src
,
mask
,
WARMUP_RNG
).
out
(
dst
);
TEST_CYCLE
()
{
src
.
copyTo
(
dst
,
mask
);
}
SANITY_CHECK
(
dst
);
}
PERF_TEST_P
(
Size_MatType
,
Mat_SetToWithMask
,
testing
::
Combine
(
testing
::
Values
(
TYPICAL_MAT_SIZES
),
testing
::
Values
(
CV_8UC1
,
CV_8UC2
))
)
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
size
=
get
<
0
>
(
params
);
const
int
type
=
get
<
1
>
(
params
);
const
Scalar
sc
=
Scalar
::
all
(
27
);
Mat
src
(
size
,
type
),
mask
(
size
,
CV_8UC1
);
declare
.
in
(
src
,
mask
,
WARMUP_RNG
).
out
(
src
);
TEST_CYCLE
()
{
src
.
setTo
(
sc
,
mask
);
}
SANITY_CHECK
(
src
);
}
///////////// Transform ////////////////////////
PERF_TEST_P
(
Size_MatType
,
Mat_Transform
,
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/copy.cpp
View file @
53661d55
...
...
@@ -90,28 +90,27 @@ copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mste
const
uchar
*
src
=
(
const
uchar
*
)
_src
;
uchar
*
dst
=
(
uchar
*
)
_dst
;
int
x
=
0
;
#if CV_SSE4_2
if
(
USE_SSE4_2
)
//
{
__m128i
zero
=
_mm_setzero_si128
();
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
const
__m128i
rSrc
=
_mm_lddqu_si128
((
const
__m128i
*
)(
src
+
x
));
__m128i
_mask
=
_mm_lddqu_si128
((
const
__m128i
*
)(
mask
+
x
));
__m128i
rDst
=
_mm_lddqu_si128
((
__m128i
*
)(
dst
+
x
));
__m128i
_negMask
=
_mm_cmpeq_epi8
(
_mask
,
zero
);
rDst
=
_mm_blendv_epi8
(
rSrc
,
rDst
,
_negMask
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
),
rDst
);
}
}
#elif CV_NEON
uint8x16_t
v_one
=
vdupq_n_u8
(
1
);
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
uint8x16_t
v_mask
=
vcgeq_u8
(
vld1q_u8
(
mask
+
x
),
v_one
);
uint8x16_t
v_dst
=
vld1q_u8
(
dst
+
x
),
v_src
=
vld1q_u8
(
src
+
x
);
vst1q_u8
(
dst
+
x
,
vbslq_u8
(
v_mask
,
v_src
,
v_dst
));
#if CV_SIMD128
if
(
hasSIMD128
()
#if CV_SSE4_2
&&
USE_SSE4_2
#endif
)
{
v_uint8x16
v_zero
=
v_setzero_u8
();
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
v_uint8x16
v_src
=
v_load
(
src
+
x
),
v_dst
=
v_load
(
dst
+
x
),
v_nmask
=
v_load
(
mask
+
x
)
==
v_zero
;
#if CV_SSE4_2
v_dst
=
v_uint8x16
(
_mm_blendv_epi8
(
v_src
.
val
,
v_dst
.
val
,
v_nmask
.
val
));
#else
v_dst
=
v_select
(
v_nmask
,
v_dst
,
v_src
);
#endif
v_store
(
dst
+
x
,
v_dst
);
}
}
#endif
for
(
;
x
<
size
.
width
;
x
++
)
...
...
@@ -130,31 +129,33 @@ copyMask_<ushort>(const uchar* _src, size_t sstep, const uchar* mask, size_t mst
const
ushort
*
src
=
(
const
ushort
*
)
_src
;
ushort
*
dst
=
(
ushort
*
)
_dst
;
int
x
=
0
;
#if CV_SSE4_2
if
(
USE_SSE4_2
)
//
{
__m128i
zero
=
_mm_setzero_si128
();
for
(
;
x
<=
size
.
width
-
8
;
x
+=
8
)
#if CV_SIMD128
if
(
hasSIMD128
()
#if CV_SSE4_2
&&
USE_SSE4_2
#endif
)
{
v_uint8x16
v_zero
=
v_setzero_u8
();
for
(
;
x
<=
size
.
width
-
16
;
x
+=
16
)
{
const
__m128i
rSrc
=
_mm_lddqu_si128
((
const
__m128i
*
)(
src
+
x
));
__m128i
_mask
=
_mm_loadl_epi64
((
const
__m128i
*
)(
mask
+
x
));
_mask
=
_mm_unpacklo_epi8
(
_mask
,
_mask
);
__m128i
rDst
=
_mm_lddqu_si128
((
const
__m128i
*
)(
dst
+
x
));
__m128i
_negMask
=
_mm_cmpeq_epi8
(
_mask
,
zero
);
rDst
=
_mm_blendv_epi8
(
rSrc
,
rDst
,
_negMask
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
x
),
rDst
);
}
}
#elif CV_NEON
uint8x8_t
v_one
=
vdup_n_u8
(
1
);
for
(
;
x
<=
size
.
width
-
8
;
x
+=
8
)
{
uint8x8_t
v_mask
=
vcge_u8
(
vld1_u8
(
mask
+
x
),
v_one
);
uint8x8x2_t
v_mask2
=
vzip_u8
(
v_mask
,
v_mask
);
uint16x8_t
v_mask_res
=
vreinterpretq_u16_u8
(
vcombine_u8
(
v_mask2
.
val
[
0
],
v_mask2
.
val
[
1
]));
uint16x8_t
v_src
=
vld1q_u16
(
src
+
x
),
v_dst
=
vld1q_u16
(
dst
+
x
);
vst1q_u16
(
dst
+
x
,
vbslq_u16
(
v_mask_res
,
v_src
,
v_dst
));
v_uint16x8
v_src1
=
v_load
(
src
+
x
),
v_src2
=
v_load
(
src
+
x
+
8
),
v_dst1
=
v_load
(
dst
+
x
),
v_dst2
=
v_load
(
dst
+
x
+
8
);
v_uint8x16
v_nmask1
,
v_nmask2
;
v_uint8x16
v_nmask
=
v_load
(
mask
+
x
)
==
v_zero
;
v_zip
(
v_nmask
,
v_nmask
,
v_nmask1
,
v_nmask2
);
#if CV_SSE4_2
v_dst1
=
v_uint16x8
(
_mm_blendv_epi8
(
v_src1
.
val
,
v_dst1
.
val
,
v_nmask1
.
val
));
v_dst2
=
v_uint16x8
(
_mm_blendv_epi8
(
v_src2
.
val
,
v_dst2
.
val
,
v_nmask2
.
val
));
#else
v_dst1
=
v_select
(
v_reinterpret_as_u16
(
v_nmask1
),
v_dst1
,
v_src1
);
v_dst2
=
v_select
(
v_reinterpret_as_u16
(
v_nmask2
),
v_dst2
,
v_src2
);
#endif
v_store
(
dst
+
x
,
v_dst1
);
v_store
(
dst
+
x
+
8
,
v_dst2
);
}
}
#endif
for
(
;
x
<
size
.
width
;
x
++
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment