Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a202dc9a
Commit
a202dc9a
authored
Jan 15, 2019
by
Vitaly Tuzov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
threshold() reworked to use wide universal intrinsics
parent
0e9c90a0
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
286 additions
and
166 deletions
+286
-166
thresh.cpp
modules/imgproc/src/thresh.cpp
+286
-166
No files found.
modules/imgproc/src/thresh.cpp
View file @
a202dc9a
...
...
@@ -195,22 +195,19 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
int
j
=
0
;
const
uchar
*
src
=
_src
.
ptr
();
uchar
*
dst
=
_dst
.
ptr
();
#if CV_SIMD128
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
)
||
checkHardwareSupport
(
CV_CPU_NEON
);
if
(
useSIMD
)
{
v_uint8x16
thresh_u
=
v_setall_u8
(
thresh
);
v_uint8x16
maxval16
=
v_setall_u8
(
maxval
);
#if CV_SIMD
v_uint8
thresh_u
=
vx_setall_u8
(
thresh
);
v_uint8
maxval16
=
vx_setall_u8
(
maxval
);
switch
(
type
)
{
case
THRESH_BINARY
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
j
=
0
;
j
<=
roi
.
width
-
v_uint8
::
nlanes
;
j
+=
v_uint8
::
nlanes
)
{
v_uint8x16
v0
;
v0
=
v
_load
(
src
+
j
);
v_uint8
v0
;
v0
=
vx
_load
(
src
+
j
);
v0
=
thresh_u
<
v0
;
v0
=
v0
&
maxval16
;
v_store
(
dst
+
j
,
v0
);
...
...
@@ -221,10 +218,10 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case
THRESH_BINARY_INV
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
j
=
0
;
j
<=
roi
.
width
-
v_uint8
::
nlanes
;
j
+=
v_uint8
::
nlanes
)
{
v_uint8x16
v0
;
v0
=
v
_load
(
src
+
j
);
v_uint8
v0
;
v0
=
vx
_load
(
src
+
j
);
v0
=
v0
<=
thresh_u
;
v0
=
v0
&
maxval16
;
v_store
(
dst
+
j
,
v0
);
...
...
@@ -235,10 +232,10 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case
THRESH_TRUNC
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
j
=
0
;
j
<=
roi
.
width
-
v_uint8
::
nlanes
;
j
+=
v_uint8
::
nlanes
)
{
v_uint8x16
v0
;
v0
=
v
_load
(
src
+
j
);
v_uint8
v0
;
v0
=
vx
_load
(
src
+
j
);
v0
=
v0
-
(
v0
-
thresh_u
);
v_store
(
dst
+
j
,
v0
);
}
...
...
@@ -248,10 +245,10 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case
THRESH_TOZERO
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
j
=
0
;
j
<=
roi
.
width
-
v_uint8
::
nlanes
;
j
+=
v_uint8
::
nlanes
)
{
v_uint8x16
v0
;
v0
=
v
_load
(
src
+
j
);
v_uint8
v0
;
v0
=
vx
_load
(
src
+
j
);
v0
=
(
thresh_u
<
v0
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
}
...
...
@@ -261,17 +258,16 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case
THRESH_TOZERO_INV
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
j
=
0
;
j
<=
roi
.
width
-
v_uint8
::
nlanes
;
j
+=
v_uint8
::
nlanes
)
{
v_uint8x16
v0
;
v0
=
v
_load
(
src
+
j
);
v_uint8
v0
;
v0
=
vx
_load
(
src
+
j
);
v0
=
(
v0
<=
thresh_u
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
}
}
break
;
}
}
#endif
int
j_scalar
=
j
;
...
...
@@ -362,30 +358,35 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
const
ushort
*
src
=
_src
.
ptr
<
ushort
>
();
ushort
*
dst
=
_dst
.
ptr
<
ushort
>
();
#if CV_SIMD128
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
)
||
checkHardwareSupport
(
CV_CPU_NEON
);
if
(
useSIMD
)
{
#if CV_SIMD
int
i
,
j
;
v_uint16x8
thresh_u
=
v
_setall_u16
(
thresh
);
v_uint16x8
maxval16
=
v
_setall_u16
(
maxval
);
v_uint16
thresh_u
=
vx
_setall_u16
(
thresh
);
v_uint16
maxval16
=
vx
_setall_u16
(
maxval
);
switch
(
type
)
{
case
THRESH_BINARY
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
j
=
0
;
j
<=
roi
.
width
-
2
*
v_uint16
::
nlanes
;
j
+=
2
*
v_uint16
::
nlanes
)
{
v_uint16x8
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v_uint16
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_uint16
::
nlanes
);
v0
=
thresh_u
<
v0
;
v1
=
thresh_u
<
v1
;
v0
=
v0
&
maxval16
;
v1
=
v1
&
maxval16
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
v_store
(
dst
+
j
+
v_uint16
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_uint16
::
nlanes
)
{
v_uint16
v0
=
vx_load
(
src
+
j
);
v0
=
thresh_u
<
v0
;
v0
=
v0
&
maxval16
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_uint16
::
nlanes
;
}
for
(;
j
<
roi
.
width
;
j
++
)
...
...
@@ -397,17 +398,25 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(;
j
<=
roi
.
width
-
2
*
v_uint16
::
nlanes
;
j
+=
2
*
v_uint16
::
nlanes
)
{
v_uint16x8
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v_uint16
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_uint16
::
nlanes
);
v0
=
v0
<=
thresh_u
;
v1
=
v1
<=
thresh_u
;
v0
=
v0
&
maxval16
;
v1
=
v1
&
maxval16
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
v_store
(
dst
+
j
+
v_uint16
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_uint16
::
nlanes
)
{
v_uint16
v0
=
vx_load
(
src
+
j
);
v0
=
v0
<=
thresh_u
;
v0
=
v0
&
maxval16
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_uint16
::
nlanes
;
}
for
(;
j
<
roi
.
width
;
j
++
)
...
...
@@ -419,15 +428,22 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(;
j
<=
roi
.
width
-
2
*
v_uint16
::
nlanes
;
j
+=
2
*
v_uint16
::
nlanes
)
{
v_uint16x8
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v_uint16
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_uint16
::
nlanes
);
v0
=
v_min
(
v0
,
thresh_u
);
v1
=
v_min
(
v1
,
thresh_u
);
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
v_store
(
dst
+
j
+
v_uint16
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_uint16
::
nlanes
)
{
v_uint16
v0
=
vx_load
(
src
+
j
);
v0
=
v_min
(
v0
,
thresh_u
);
v_store
(
dst
+
j
,
v0
);
j
+=
v_uint16
::
nlanes
;
}
for
(;
j
<
roi
.
width
;
j
++
)
...
...
@@ -439,15 +455,22 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(;
j
<=
roi
.
width
-
2
*
v_uint16
::
nlanes
;
j
+=
2
*
v_uint16
::
nlanes
)
{
v_uint16x8
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v_uint16
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_uint16
::
nlanes
);
v0
=
(
thresh_u
<
v0
)
&
v0
;
v1
=
(
thresh_u
<
v1
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
v_store
(
dst
+
j
+
v_uint16
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_uint16
::
nlanes
)
{
v_uint16
v0
=
vx_load
(
src
+
j
);
v0
=
(
thresh_u
<
v0
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_uint16
::
nlanes
;
}
for
(;
j
<
roi
.
width
;
j
++
)
...
...
@@ -459,15 +482,22 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(;
j
<=
roi
.
width
-
2
*
v_uint16
::
nlanes
;
j
+=
2
*
v_uint16
::
nlanes
)
{
v_uint16x8
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v_uint16
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_uint16
::
nlanes
);
v0
=
(
v0
<=
thresh_u
)
&
v0
;
v1
=
(
v1
<=
thresh_u
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
v_store
(
dst
+
j
+
v_uint16
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_uint16
::
nlanes
)
{
v_uint16
v0
=
vx_load
(
src
+
j
);
v0
=
(
v0
<=
thresh_u
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_uint16
::
nlanes
;
}
for
(;
j
<
roi
.
width
;
j
++
)
...
...
@@ -475,12 +505,9 @@ thresh_16u(const Mat& _src, Mat& _dst, ushort thresh, ushort maxval, int type)
}
break
;
}
}
else
#endif
{
#else
threshGeneric
<
ushort
>
(
roi
,
src
,
src_step
,
dst
,
dst_step
,
thresh
,
maxval
,
type
);
}
#endif
}
static
void
...
...
@@ -556,13 +583,10 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
}
#endif
#if CV_SIMD128
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
)
||
checkHardwareSupport
(
CV_CPU_NEON
);
if
(
useSIMD
)
{
#if CV_SIMD
int
i
,
j
;
v_int16x8
thresh8
=
v
_setall_s16
(
thresh
);
v_int16x8
maxval8
=
v
_setall_s16
(
maxval
);
v_int16
thresh8
=
vx
_setall_s16
(
thresh
);
v_int16
maxval8
=
vx
_setall_s16
(
maxval
);
switch
(
type
)
{
...
...
@@ -570,17 +594,25 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
;
j
<=
roi
.
width
-
2
*
v_int16
::
nlanes
;
j
+=
2
*
v_int16
::
nlanes
)
{
v_int16x8
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v_int16
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_int16
::
nlanes
);
v0
=
thresh8
<
v0
;
v1
=
thresh8
<
v1
;
v0
=
v0
&
maxval8
;
v1
=
v1
&
maxval8
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
v_store
(
dst
+
j
+
v_int16
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_int16
::
nlanes
)
{
v_int16
v0
=
vx_load
(
src
+
j
);
v0
=
thresh8
<
v0
;
v0
=
v0
&
maxval8
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_int16
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -592,17 +624,25 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
;
j
<=
roi
.
width
-
2
*
v_int16
::
nlanes
;
j
+=
2
*
v_int16
::
nlanes
)
{
v_int16x8
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v_int16
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_int16
::
nlanes
);
v0
=
v0
<=
thresh8
;
v1
=
v1
<=
thresh8
;
v0
=
v0
&
maxval8
;
v1
=
v1
&
maxval8
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
v_store
(
dst
+
j
+
v_int16
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_int16
::
nlanes
)
{
v_int16
v0
=
vx_load
(
src
+
j
);
v0
=
v0
<=
thresh8
;
v0
=
v0
&
maxval8
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_int16
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -614,15 +654,22 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
;
j
<=
roi
.
width
-
2
*
v_int16
::
nlanes
;
j
+=
2
*
v_int16
::
nlanes
)
{
v_int16x8
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v_int16
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_int16
::
nlanes
);
v0
=
v_min
(
v0
,
thresh8
);
v1
=
v_min
(
v1
,
thresh8
);
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
v_store
(
dst
+
j
+
v_int16
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_int16
::
nlanes
)
{
v_int16
v0
=
vx_load
(
src
+
j
);
v0
=
v_min
(
v0
,
thresh8
);
v_store
(
dst
+
j
,
v0
);
j
+=
v_int16
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -634,15 +681,22 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
;
j
<=
roi
.
width
-
2
*
v_int16
::
nlanes
;
j
+=
2
*
v_int16
::
nlanes
)
{
v_int16x8
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v_int16
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_int16
::
nlanes
);
v0
=
(
thresh8
<
v0
)
&
v0
;
v1
=
(
thresh8
<
v1
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
v_store
(
dst
+
j
+
v_int16
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_int16
::
nlanes
)
{
v_int16
v0
=
vx_load
(
src
+
j
);
v0
=
(
thresh8
<
v0
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_int16
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -654,15 +708,22 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
for
(
;
j
<=
roi
.
width
-
2
*
v_int16
::
nlanes
;
j
+=
2
*
v_int16
::
nlanes
)
{
v_int16x8
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v_int16
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_int16
::
nlanes
);
v0
=
(
v0
<=
thresh8
)
&
v0
;
v1
=
(
v1
<=
thresh8
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
v_store
(
dst
+
j
+
v_int16
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_int16
::
nlanes
)
{
v_int16
v0
=
vx_load
(
src
+
j
);
v0
=
(
v0
<=
thresh8
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_int16
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -672,12 +733,9 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
default
:
CV_Error
(
CV_StsBadArg
,
""
);
return
;
}
}
else
#endif
{
#else
threshGeneric
<
short
>
(
roi
,
src
,
src_step
,
dst
,
dst_step
,
thresh
,
maxval
,
type
);
}
#endif
}
...
...
@@ -736,13 +794,10 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
}
#endif
#if CV_SIMD128
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
)
||
checkHardwareSupport
(
CV_CPU_NEON
);
if
(
useSIMD
)
{
#if CV_SIMD
int
i
,
j
;
v_float32x4
thresh4
=
v
_setall_f32
(
thresh
);
v_float32x4
maxval4
=
v
_setall_f32
(
maxval
);
v_float32
thresh4
=
vx
_setall_f32
(
thresh
);
v_float32
maxval4
=
vx
_setall_f32
(
maxval
);
switch
(
type
)
{
...
...
@@ -750,17 +805,25 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
for
(
;
j
<=
roi
.
width
-
2
*
v_float32
::
nlanes
;
j
+=
2
*
v_float32
::
nlanes
)
{
v_float32x4
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
4
);
v_float32
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_float32
::
nlanes
);
v0
=
thresh4
<
v0
;
v1
=
thresh4
<
v1
;
v0
=
v0
&
maxval4
;
v1
=
v1
&
maxval4
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
4
,
v1
);
v_store
(
dst
+
j
+
v_float32
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_float32
::
nlanes
)
{
v_float32
v0
=
vx_load
(
src
+
j
);
v0
=
thresh4
<
v0
;
v0
=
v0
&
maxval4
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_float32
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -772,17 +835,25 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
for
(
;
j
<=
roi
.
width
-
2
*
v_float32
::
nlanes
;
j
+=
2
*
v_float32
::
nlanes
)
{
v_float32x4
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
4
);
v_float32
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_float32
::
nlanes
);
v0
=
v0
<=
thresh4
;
v1
=
v1
<=
thresh4
;
v0
=
v0
&
maxval4
;
v1
=
v1
&
maxval4
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
4
,
v1
);
v_store
(
dst
+
j
+
v_float32
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_float32
::
nlanes
)
{
v_float32
v0
=
vx_load
(
src
+
j
);
v0
=
v0
<=
thresh4
;
v0
=
v0
&
maxval4
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_float32
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -794,15 +865,22 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
for
(
;
j
<=
roi
.
width
-
2
*
v_float32
::
nlanes
;
j
+=
2
*
v_float32
::
nlanes
)
{
v_float32x4
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
4
);
v_float32
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_float32
::
nlanes
);
v0
=
v_min
(
v0
,
thresh4
);
v1
=
v_min
(
v1
,
thresh4
);
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
4
,
v1
);
v_store
(
dst
+
j
+
v_float32
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_float32
::
nlanes
)
{
v_float32
v0
=
vx_load
(
src
+
j
);
v0
=
v_min
(
v0
,
thresh4
);
v_store
(
dst
+
j
,
v0
);
j
+=
v_float32
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -814,15 +892,22 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
for
(
;
j
<=
roi
.
width
-
2
*
v_float32
::
nlanes
;
j
+=
2
*
v_float32
::
nlanes
)
{
v_float32x4
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
4
);
v_float32
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_float32
::
nlanes
);
v0
=
(
thresh4
<
v0
)
&
v0
;
v1
=
(
thresh4
<
v1
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
4
,
v1
);
v_store
(
dst
+
j
+
v_float32
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_float32
::
nlanes
)
{
v_float32
v0
=
vx_load
(
src
+
j
);
v0
=
(
thresh4
<
v0
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_float32
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -834,15 +919,22 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
for
(
;
j
<=
roi
.
width
-
2
*
v_float32
::
nlanes
;
j
+=
2
*
v_float32
::
nlanes
)
{
v_float32x4
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
4
);
v_float32
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_float32
::
nlanes
);
v0
=
(
v0
<=
thresh4
)
&
v0
;
v1
=
(
v1
<=
thresh4
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
4
,
v1
);
v_store
(
dst
+
j
+
v_float32
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_float32
::
nlanes
)
{
v_float32
v0
=
vx_load
(
src
+
j
);
v0
=
(
v0
<=
thresh4
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_float32
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -852,12 +944,9 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
default
:
CV_Error
(
CV_StsBadArg
,
""
);
return
;
}
}
else
#endif
{
#else
threshGeneric
<
float
>
(
roi
,
src
,
src_step
,
dst
,
dst_step
,
thresh
,
maxval
,
type
);
}
#endif
}
static
void
...
...
@@ -876,13 +965,10 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
roi
.
height
=
1
;
}
#if CV_SIMD128_64F
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
)
||
checkHardwareSupport
(
CV_CPU_NEON
);
if
(
useSIMD
)
{
#if CV_SIMD_64F
int
i
,
j
;
v_float64x2
thresh2
=
v
_setall_f64
(
thresh
);
v_float64x2
maxval2
=
v
_setall_f64
(
maxval
);
v_float64
thresh2
=
vx
_setall_f64
(
thresh
);
v_float64
maxval2
=
vx
_setall_f64
(
maxval
);
switch
(
type
)
{
...
...
@@ -890,17 +976,25 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
for
(
;
j
<=
roi
.
width
-
2
*
v_float64
::
nlanes
;
j
+=
2
*
v_float64
::
nlanes
)
{
v_float64x2
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
2
);
v_float64
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_float64
::
nlanes
);
v0
=
thresh2
<
v0
;
v1
=
thresh2
<
v1
;
v0
=
v0
&
maxval2
;
v1
=
v1
&
maxval2
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
2
,
v1
);
v_store
(
dst
+
j
+
v_float64
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_float64
::
nlanes
)
{
v_float64
v0
=
vx_load
(
src
+
j
);
v0
=
thresh2
<
v0
;
v0
=
v0
&
maxval2
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_float64
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -912,17 +1006,25 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
for
(
;
j
<=
roi
.
width
-
2
*
v_float64
::
nlanes
;
j
+=
2
*
v_float64
::
nlanes
)
{
v_float64x2
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
2
);
v_float64
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_float64
::
nlanes
);
v0
=
v0
<=
thresh2
;
v1
=
v1
<=
thresh2
;
v0
=
v0
&
maxval2
;
v1
=
v1
&
maxval2
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
2
,
v1
);
v_store
(
dst
+
j
+
v_float64
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_float64
::
nlanes
)
{
v_float64
v0
=
vx_load
(
src
+
j
);
v0
=
v0
<=
thresh2
;
v0
=
v0
&
maxval2
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_float64
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -934,15 +1036,22 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
for
(
;
j
<=
roi
.
width
-
2
*
v_float64
::
nlanes
;
j
+=
2
*
v_float64
::
nlanes
)
{
v_float64x2
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
2
);
v_float64
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_float64
::
nlanes
);
v0
=
v_min
(
v0
,
thresh2
);
v1
=
v_min
(
v1
,
thresh2
);
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
2
,
v1
);
v_store
(
dst
+
j
+
v_float64
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_float64
::
nlanes
)
{
v_float64
v0
=
vx_load
(
src
+
j
);
v0
=
v_min
(
v0
,
thresh2
);
v_store
(
dst
+
j
,
v0
);
j
+=
v_float64
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -954,15 +1063,22 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
for
(
;
j
<=
roi
.
width
-
2
*
v_float64
::
nlanes
;
j
+=
2
*
v_float64
::
nlanes
)
{
v_float64x2
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
2
);
v_float64
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_float64
::
nlanes
);
v0
=
(
thresh2
<
v0
)
&
v0
;
v1
=
(
thresh2
<
v1
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
2
,
v1
);
v_store
(
dst
+
j
+
v_float64
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_float64
::
nlanes
)
{
v_float64
v0
=
vx_load
(
src
+
j
);
v0
=
(
thresh2
<
v0
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_float64
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -974,15 +1090,22 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
for
(
;
j
<=
roi
.
width
-
2
*
v_float64
::
nlanes
;
j
+=
2
*
v_float64
::
nlanes
)
{
v_float64x2
v0
,
v1
;
v0
=
v
_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
2
);
v_float64
v0
,
v1
;
v0
=
vx
_load
(
src
+
j
);
v1
=
vx_load
(
src
+
j
+
v_float64
::
nlanes
);
v0
=
(
v0
<=
thresh2
)
&
v0
;
v1
=
(
v1
<=
thresh2
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
2
,
v1
);
v_store
(
dst
+
j
+
v_float64
::
nlanes
,
v1
);
}
if
(
j
<=
roi
.
width
-
v_float64
::
nlanes
)
{
v_float64
v0
=
vx_load
(
src
+
j
);
v0
=
(
v0
<=
thresh2
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
j
+=
v_float64
::
nlanes
;
}
for
(
;
j
<
roi
.
width
;
j
++
)
...
...
@@ -992,12 +1115,9 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
default
:
CV_Error
(
CV_StsBadArg
,
""
);
return
;
}
}
else
#endif
{
#else
threshGeneric
<
double
>
(
roi
,
src
,
src_step
,
dst
,
dst_step
,
thresh
,
maxval
,
type
);
}
#endif
}
#ifdef HAVE_IPP
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment