Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
994815fb
Commit
994815fb
authored
Oct 12, 2016
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #7413 from tomoaki0705:featureUniversalThreshold
parents
994c682b
ea6410d1
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
349 additions
and
567 deletions
+349
-567
perf_threshold.cpp
modules/imgproc/perf/perf_threshold.cpp
+1
-1
thresh.cpp
modules/imgproc/src/thresh.cpp
+348
-566
No files found.
modules/imgproc/perf/perf_threshold.cpp
View file @
994815fb
...
...
@@ -14,7 +14,7 @@ typedef perf::TestBaseWithParam<Size_MatType_ThreshType_t> Size_MatType_ThreshTy
PERF_TEST_P
(
Size_MatType_ThreshType
,
threshold
,
testing
::
Combine
(
testing
::
Values
(
TYPICAL_MAT_SIZES
),
testing
::
Values
(
CV_8UC1
,
CV_16SC1
),
testing
::
Values
(
CV_8UC1
,
CV_16SC1
,
CV_32FC1
,
CV_64FC1
),
ThreshType
::
all
()
)
)
...
...
modules/imgproc/src/thresh.cpp
View file @
994815fb
...
...
@@ -42,23 +42,7 @@
#include "precomp.hpp"
#include "opencl_kernels_imgproc.hpp"
#if CV_NEON && defined(__aarch64__)
#include <arm_neon.h>
namespace
cv
{
// Workaround with missing definitions of vreinterpretq_u64_f64/vreinterpretq_f64_u64
template
<
typename
T
>
static
inline
uint64x2_t
vreinterpretq_u64_f64
(
T
a
)
{
return
(
uint64x2_t
)
a
;
}
template
<
typename
T
>
static
inline
float64x2_t
vreinterpretq_f64_u64
(
T
a
)
{
return
(
float64x2_t
)
a
;
}
}
// namespace cv
#endif
#include "opencv2/core/hal/intrin.hpp"
namespace
cv
{
...
...
@@ -137,38 +121,25 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
int
j
=
0
;
const
uchar
*
src
=
_src
.
ptr
();
uchar
*
dst
=
_dst
.
ptr
();
#if CV_SSE2
if
(
(
roi
.
width
>=
8
)
&&
checkHardwareSupport
(
CV_CPU_SSE2
)
)
#if CV_SIMD128
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
)
||
checkHardwareSupport
(
CV_CPU_NEON
);
if
(
useSIMD
)
{
__m128i
_x80
=
_mm_set1_epi8
(
'\x80'
);
__m128i
thresh_u
=
_mm_set1_epi8
(
thresh
);
__m128i
thresh_s
=
_mm_set1_epi8
(
thresh
^
0x80
);
__m128i
maxval_
=
_mm_set1_epi8
(
maxval
);
v_uint8x16
thresh_u
=
v_setall_u8
(
thresh
);
v_uint8x16
maxval16
=
v_setall_u8
(
maxval
);
switch
(
type
)
{
case
THRESH_BINARY
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
32
;
j
+=
32
)
{
__m128i
v0
,
v1
;
v0
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
)
);
v1
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
+
16
)
);
v0
=
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v0
,
_x80
),
thresh_s
);
v1
=
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v1
,
_x80
),
thresh_s
);
v0
=
_mm_and_si128
(
v0
,
maxval_
);
v1
=
_mm_and_si128
(
v1
,
maxval_
);
_mm_storeu_si128
(
(
__m128i
*
)(
dst
+
j
),
v0
);
_mm_storeu_si128
(
(
__m128i
*
)(
dst
+
j
+
16
),
v1
);
}
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
__m128i
v0
=
_mm_loadl_epi64
(
(
const
__m128i
*
)(
src
+
j
)
);
v0
=
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v0
,
_x80
),
thresh_s
);
v0
=
_mm_and_si128
(
v0
,
maxval_
);
_mm_storel_epi64
(
(
__m128i
*
)(
dst
+
j
),
v0
);
v_uint8x16
v0
;
v0
=
v_load
(
src
+
j
);
v0
=
thresh_u
<
v0
;
v0
=
v0
&
maxval16
;
v_store
(
dst
+
j
,
v0
);
}
}
break
;
...
...
@@ -176,25 +147,13 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case
THRESH_BINARY_INV
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
32
;
j
+=
32
)
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
__m128i
v0
,
v1
;
v0
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
)
);
v1
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
+
16
)
);
v0
=
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v0
,
_x80
),
thresh_s
);
v1
=
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v1
,
_x80
),
thresh_s
);
v0
=
_mm_andnot_si128
(
v0
,
maxval_
);
v1
=
_mm_andnot_si128
(
v1
,
maxval_
);
_mm_storeu_si128
(
(
__m128i
*
)(
dst
+
j
),
v0
);
_mm_storeu_si128
(
(
__m128i
*
)(
dst
+
j
+
16
),
v1
);
}
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
__m128i
v0
=
_mm_loadl_epi64
(
(
const
__m128i
*
)(
src
+
j
)
);
v0
=
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v0
,
_x80
),
thresh_s
);
v0
=
_mm_andnot_si128
(
v0
,
maxval_
);
_mm_storel_epi64
(
(
__m128i
*
)(
dst
+
j
),
v0
);
v_uint8x16
v0
;
v0
=
v_load
(
src
+
j
);
v0
=
v0
<=
thresh_u
;
v0
=
v0
&
maxval16
;
v_store
(
dst
+
j
,
v0
);
}
}
break
;
...
...
@@ -202,22 +161,12 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case
THRESH_TRUNC
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
32
;
j
+=
32
)
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
__m128i
v0
,
v1
;
v0
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
)
);
v1
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
+
16
)
);
v0
=
_mm_subs_epu8
(
v0
,
_mm_subs_epu8
(
v0
,
thresh_u
));
v1
=
_mm_subs_epu8
(
v1
,
_mm_subs_epu8
(
v1
,
thresh_u
));
_mm_storeu_si128
(
(
__m128i
*
)(
dst
+
j
),
v0
);
_mm_storeu_si128
(
(
__m128i
*
)(
dst
+
j
+
16
),
v1
);
}
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
__m128i
v0
=
_mm_loadl_epi64
(
(
const
__m128i
*
)(
src
+
j
)
);
v0
=
_mm_subs_epu8
(
v0
,
_mm_subs_epu8
(
v0
,
thresh_u
));
_mm_storel_epi64
(
(
__m128i
*
)(
dst
+
j
),
v0
);
v_uint8x16
v0
;
v0
=
v_load
(
src
+
j
);
v0
=
v0
-
(
v0
-
thresh_u
);
v_store
(
dst
+
j
,
v0
);
}
}
break
;
...
...
@@ -225,22 +174,12 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case
THRESH_TOZERO
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
32
;
j
+=
32
)
{
__m128i
v0
,
v1
;
v0
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
)
);
v1
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
+
16
)
);
v0
=
_mm_and_si128
(
v0
,
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v0
,
_x80
),
thresh_s
));
v1
=
_mm_and_si128
(
v1
,
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v1
,
_x80
),
thresh_s
));
_mm_storeu_si128
(
(
__m128i
*
)(
dst
+
j
),
v0
);
_mm_storeu_si128
(
(
__m128i
*
)(
dst
+
j
+
16
),
v1
);
}
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
__m128i
v0
=
_mm_loadl_epi64
(
(
const
__m128i
*
)(
src
+
j
)
);
v0
=
_mm_and_si128
(
v0
,
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v0
,
_x80
),
thresh_s
));
_mm_storel_epi64
(
(
__m128i
*
)(
dst
+
j
),
v0
);
v_uint8x16
v0
;
v0
=
v_load
(
src
+
j
);
v0
=
(
thresh_u
<
v0
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
}
}
break
;
...
...
@@ -248,76 +187,12 @@ thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
case
THRESH_TOZERO_INV
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
32
;
j
+=
32
)
{
__m128i
v0
,
v1
;
v0
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
)
);
v1
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
+
16
)
);
v0
=
_mm_andnot_si128
(
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v0
,
_x80
),
thresh_s
),
v0
);
v1
=
_mm_andnot_si128
(
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v1
,
_x80
),
thresh_s
),
v1
);
_mm_storeu_si128
(
(
__m128i
*
)(
dst
+
j
),
v0
);
_mm_storeu_si128
(
(
__m128i
*
)(
dst
+
j
+
16
),
v1
);
}
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
__m128i
v0
=
_mm_loadl_epi64
(
(
const
__m128i
*
)(
src
+
j
)
);
v0
=
_mm_andnot_si128
(
_mm_cmpgt_epi8
(
_mm_xor_si128
(
v0
,
_x80
),
thresh_s
),
v0
);
_mm_storel_epi64
(
(
__m128i
*
)(
dst
+
j
),
v0
);
}
}
break
;
}
}
#elif CV_NEON
if
(
roi
.
width
>=
16
)
{
uint8x16_t
v_thresh
=
vdupq_n_u8
(
thresh
),
v_maxval
=
vdupq_n_u8
(
maxval
);
switch
(
type
)
{
case
THRESH_BINARY
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
vst1q_u8
(
dst
+
j
,
vandq_u8
(
vcgtq_u8
(
vld1q_u8
(
src
+
j
),
v_thresh
),
v_maxval
));
}
break
;
case
THRESH_BINARY_INV
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
vst1q_u8
(
dst
+
j
,
vandq_u8
(
vcleq_u8
(
vld1q_u8
(
src
+
j
),
v_thresh
),
v_maxval
));
}
break
;
case
THRESH_TRUNC
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
vst1q_u8
(
dst
+
j
,
vminq_u8
(
vld1q_u8
(
src
+
j
),
v_thresh
));
}
break
;
case
THRESH_TOZERO
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
uint8x16_t
v_src
=
vld1q_u8
(
src
+
j
),
v_mask
=
vcgtq_u8
(
v_src
,
v_thresh
);
vst1q_u8
(
dst
+
j
,
vandq_u8
(
v_mask
,
v_src
));
}
}
break
;
case
THRESH_TOZERO_INV
:
for
(
int
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
uint8x16_t
v_src
=
vld1q_u8
(
src
+
j
),
v_mask
=
vcleq_u8
(
v_src
,
v_thresh
);
vst1q_u8
(
dst
+
j
,
vandq_u8
(
v_mask
,
v_src
));
v_uint8x16
v0
;
v0
=
v_load
(
src
+
j
);
v0
=
(
v0
<=
thresh_u
)
&
v0
;
v_store
(
dst
+
j
,
v0
);
}
}
break
;
...
...
@@ -404,10 +279,6 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
size_t
src_step
=
_src
.
step
/
sizeof
(
src
[
0
]);
size_t
dst_step
=
_dst
.
step
/
sizeof
(
dst
[
0
]);
#if CV_SSE2
volatile
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#endif
if
(
_src
.
isContinuous
()
&&
_dst
.
isContinuous
()
)
{
roi
.
width
*=
roi
.
height
;
...
...
@@ -471,38 +342,31 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
}
#endif
#if CV_SIMD128
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
)
||
checkHardwareSupport
(
CV_CPU_NEON
);
if
(
useSIMD
)
{
v_int16x8
thresh8
=
v_setall_s16
(
thresh
);
v_int16x8
maxval8
=
v_setall_s16
(
maxval
);
switch
(
type
)
{
case
THRESH_BINARY
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE2
if
(
useSIMD
)
{
__m128i
thresh8
=
_mm_set1_epi16
(
thresh
),
maxval8
=
_mm_set1_epi16
(
maxval
);
for
(
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
__m128i
v0
,
v1
;
v0
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
)
);
v1
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
+
8
)
);
v0
=
_mm_cmpgt_epi16
(
v0
,
thresh8
);
v1
=
_mm_cmpgt_epi16
(
v1
,
thresh8
);
v0
=
_mm_and_si128
(
v0
,
maxval8
);
v1
=
_mm_and_si128
(
v1
,
maxval8
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
),
v0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
+
8
),
v1
);
}
v_int16x8
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v0
=
thresh8
<
v0
;
v1
=
thresh8
<
v1
;
v0
=
v0
&
maxval8
;
v1
=
v1
&
maxval8
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
}
#elif CV_NEON
int16x8_t
v_thresh
=
vdupq_n_s16
(
thresh
),
v_maxval
=
vdupq_n_s16
(
maxval
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
uint16x8_t
v_mask
=
vcgtq_s16
(
vld1q_s16
(
src
+
j
),
v_thresh
);
vst1q_s16
(
dst
+
j
,
vandq_s16
(
vreinterpretq_s16_u16
(
v_mask
),
v_maxval
));
}
#endif
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
>
thresh
?
maxval
:
0
;
...
...
@@ -513,32 +377,18 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE2
if
(
useSIMD
)
{
__m128i
thresh8
=
_mm_set1_epi16
(
thresh
),
maxval8
=
_mm_set1_epi16
(
maxval
);
for
(
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
__m128i
v0
,
v1
;
v0
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
)
);
v1
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
+
8
)
);
v0
=
_mm_cmpgt_epi16
(
v0
,
thresh8
);
v1
=
_mm_cmpgt_epi16
(
v1
,
thresh8
);
v0
=
_mm_andnot_si128
(
v0
,
maxval8
);
v1
=
_mm_andnot_si128
(
v1
,
maxval8
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
),
v0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
+
8
),
v1
);
}
}
#elif CV_NEON
int16x8_t
v_thresh
=
vdupq_n_s16
(
thresh
),
v_maxval
=
vdupq_n_s16
(
maxval
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
uint16x8_t
v_mask
=
vcleq_s16
(
vld1q_s16
(
src
+
j
),
v_thresh
);
vst1q_s16
(
dst
+
j
,
vandq_s16
(
vreinterpretq_s16_u16
(
v_mask
),
v_maxval
));
v_int16x8
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v0
=
v0
<=
thresh8
;
v1
=
v1
<=
thresh8
;
v0
=
v0
&
maxval8
;
v1
=
v1
&
maxval8
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
}
#endif
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
<=
thresh
?
maxval
:
0
;
...
...
@@ -549,30 +399,19 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE2
if
(
useSIMD
)
{
__m128i
thresh8
=
_mm_set1_epi16
(
thresh
);
for
(
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
__m128i
v0
,
v1
;
v0
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
)
);
v1
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
+
8
)
);
v0
=
_mm_min_epi16
(
v0
,
thresh8
);
v1
=
_mm_min_epi16
(
v1
,
thresh8
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
),
v0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
+
8
),
v1
);
}
v_int16x8
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v0
=
v_min
(
v0
,
thresh8
);
v1
=
v_min
(
v1
,
thresh8
);
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
}
#elif CV_NEON
int16x8_t
v_thresh
=
vdupq_n_s16
(
thresh
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
vst1q_s16
(
dst
+
j
,
vminq_s16
(
vld1q_s16
(
src
+
j
),
v_thresh
));
#endif
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
std
::
min
(
src
[
j
],
thresh
);
dst
[
j
]
=
std
::
min
(
src
[
j
],
thresh
);
}
break
;
...
...
@@ -580,31 +419,16 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE2
if
(
useSIMD
)
{
__m128i
thresh8
=
_mm_set1_epi16
(
thresh
);
for
(
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
__m128i
v0
,
v1
;
v0
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
)
);
v1
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
+
8
)
);
v0
=
_mm_and_si128
(
v0
,
_mm_cmpgt_epi16
(
v0
,
thresh8
));
v1
=
_mm_and_si128
(
v1
,
_mm_cmpgt_epi16
(
v1
,
thresh8
));
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
),
v0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
+
8
),
v1
);
}
}
#elif CV_NEON
int16x8_t
v_thresh
=
vdupq_n_s16
(
thresh
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
int16x8_t
v_src
=
vld1q_s16
(
src
+
j
);
uint16x8_t
v_mask
=
vcgtq_s16
(
v_src
,
v_thresh
);
vst1q_s16
(
dst
+
j
,
vandq_s16
(
vreinterpretq_s16_u16
(
v_mask
),
v_src
));
v_int16x8
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v0
=
(
thresh8
<
v0
)
&
v0
;
v1
=
(
thresh8
<
v1
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
}
#endif
for
(
;
j
<
roi
.
width
;
j
++
)
{
...
...
@@ -618,32 +442,72 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE2
if
(
useSIMD
)
{
__m128i
thresh8
=
_mm_set1_epi16
(
thresh
);
for
(
;
j
<=
roi
.
width
-
16
;
j
+=
16
)
{
__m128i
v0
,
v1
;
v0
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
)
);
v1
=
_mm_loadu_si128
(
(
const
__m128i
*
)(
src
+
j
+
8
)
);
v0
=
_mm_andnot_si128
(
_mm_cmpgt_epi16
(
v0
,
thresh8
),
v0
);
v1
=
_mm_andnot_si128
(
_mm_cmpgt_epi16
(
v1
,
thresh8
),
v1
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
),
v0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
+
8
),
v1
);
v_int16x8
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
8
);
v0
=
(
v0
<=
thresh8
)
&
v0
;
v1
=
(
v1
<=
thresh8
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
8
,
v1
);
}
for
(
;
j
<
roi
.
width
;
j
++
)
{
short
v
=
src
[
j
];
dst
[
j
]
=
v
<=
thresh
?
v
:
0
;
}
}
#elif CV_NEON
int16x8_t
v_thresh
=
vdupq_n_s16
(
thresh
);
break
;
default
:
return
CV_Error
(
CV_StsBadArg
,
""
);
}
}
else
#endif
{
switch
(
type
)
{
case
THRESH_BINARY
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
>
thresh
?
maxval
:
0
;
}
break
;
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
case
THRESH_BINARY_INV
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
int16x8_t
v_src
=
vld1q_s16
(
src
+
j
);
uint16x8_t
v_mask
=
vcleq_s16
(
v_src
,
v_thresh
);
vst1q_s16
(
dst
+
j
,
vandq_s16
(
vreinterpretq_s16_u16
(
v_mask
),
v_src
));
for
(
j
=
0
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
<=
thresh
?
maxval
:
0
;
}
#endif
for
(
;
j
<
roi
.
width
;
j
++
)
break
;
case
THRESH_TRUNC
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
std
::
min
(
src
[
j
],
thresh
);
}
break
;
case
THRESH_TOZERO
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<
roi
.
width
;
j
++
)
{
short
v
=
src
[
j
];
dst
[
j
]
=
v
>
thresh
?
v
:
0
;
}
}
break
;
case
THRESH_TOZERO_INV
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<
roi
.
width
;
j
++
)
{
short
v
=
src
[
j
];
dst
[
j
]
=
v
<=
thresh
?
v
:
0
;
...
...
@@ -653,6 +517,7 @@ thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
default
:
return
CV_Error
(
CV_StsBadArg
,
""
);
}
}
}
...
...
@@ -667,10 +532,6 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
size_t
src_step
=
_src
.
step
/
sizeof
(
src
[
0
]);
size_t
dst_step
=
_dst
.
step
/
sizeof
(
dst
[
0
]);
#if CV_SSE
volatile
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE
);
#endif
if
(
_src
.
isContinuous
()
&&
_dst
.
isContinuous
()
)
{
roi
.
width
*=
roi
.
height
;
...
...
@@ -716,40 +577,31 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
}
#endif
#if CV_SIMD128
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
)
||
checkHardwareSupport
(
CV_CPU_NEON
);
if
(
useSIMD
)
{
v_float32x4
thresh4
=
v_setall_f32
(
thresh
);
v_float32x4
maxval4
=
v_setall_f32
(
maxval
);
switch
(
type
)
{
case
THRESH_BINARY
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE
if
(
useSIMD
)
{
__m128
thresh4
=
_mm_set1_ps
(
thresh
),
maxval4
=
_mm_set1_ps
(
maxval
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
__m128
v0
,
v1
;
v0
=
_mm_loadu_ps
(
src
+
j
);
v1
=
_mm_loadu_ps
(
src
+
j
+
4
);
v0
=
_mm_cmpgt_ps
(
v0
,
thresh4
);
v1
=
_mm_cmpgt_ps
(
v1
,
thresh4
);
v0
=
_mm_and_ps
(
v0
,
maxval4
);
v1
=
_mm_and_ps
(
v1
,
maxval4
);
_mm_storeu_ps
(
dst
+
j
,
v0
);
_mm_storeu_ps
(
dst
+
j
+
4
,
v1
);
}
v_float32x4
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
4
);
v0
=
thresh4
<
v0
;
v1
=
thresh4
<
v1
;
v0
=
v0
&
maxval4
;
v1
=
v1
&
maxval4
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
4
,
v1
);
}
#elif CV_NEON
float32x4_t
v_thresh
=
vdupq_n_f32
(
thresh
);
uint32x4_t
v_maxval
=
vreinterpretq_u32_f32
(
vdupq_n_f32
(
maxval
));
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
{
float32x4_t
v_src
=
vld1q_f32
(
src
+
j
);
uint32x4_t
v_dst
=
vandq_u32
(
vcgtq_f32
(
v_src
,
v_thresh
),
v_maxval
);
vst1q_f32
(
dst
+
j
,
vreinterpretq_f32_u32
(
v_dst
));
}
#endif
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
>
thresh
?
maxval
:
0
;
...
...
@@ -760,34 +612,18 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE
if
(
useSIMD
)
{
__m128
thresh4
=
_mm_set1_ps
(
thresh
),
maxval4
=
_mm_set1_ps
(
maxval
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
__m128
v0
,
v1
;
v0
=
_mm_loadu_ps
(
src
+
j
);
v1
=
_mm_loadu_ps
(
src
+
j
+
4
);
v0
=
_mm_cmple_ps
(
v0
,
thresh4
)
;
v1
=
_mm_cmple_ps
(
v1
,
thresh4
)
;
v0
=
_mm_and_ps
(
v0
,
maxval4
)
;
v1
=
_mm_and_ps
(
v1
,
maxval4
)
;
_mm_storeu_ps
(
dst
+
j
,
v0
);
_mm_storeu_ps
(
dst
+
j
+
4
,
v1
);
v_float32x4
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
4
);
v0
=
v0
<=
thresh4
;
v1
=
v1
<=
thresh4
;
v0
=
v0
&
maxval4
;
v1
=
v1
&
maxval4
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
4
,
v1
);
}
}
#elif CV_NEON
float32x4_t
v_thresh
=
vdupq_n_f32
(
thresh
);
uint32x4_t
v_maxval
=
vreinterpretq_u32_f32
(
vdupq_n_f32
(
maxval
));
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
{
float32x4_t
v_src
=
vld1q_f32
(
src
+
j
);
uint32x4_t
v_dst
=
vandq_u32
(
vcleq_f32
(
v_src
,
v_thresh
),
v_maxval
);
vst1q_f32
(
dst
+
j
,
vreinterpretq_f32_u32
(
v_dst
));
}
#endif
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
<=
thresh
?
maxval
:
0
;
...
...
@@ -798,30 +634,19 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE
if
(
useSIMD
)
{
__m128
thresh4
=
_mm_set1_ps
(
thresh
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
__m128
v0
,
v1
;
v0
=
_mm_loadu_ps
(
src
+
j
);
v1
=
_mm_loadu_ps
(
src
+
j
+
4
);
v0
=
_mm_min_ps
(
v0
,
thresh4
);
v1
=
_mm_min_ps
(
v1
,
thresh4
);
_mm_storeu_ps
(
dst
+
j
,
v0
);
_mm_storeu_ps
(
dst
+
j
+
4
,
v1
);
v_float32x4
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
4
);
v0
=
v_min
(
v0
,
thresh4
);
v1
=
v_min
(
v1
,
thresh4
);
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
4
,
v1
);
}
}
#elif CV_NEON
float32x4_t
v_thresh
=
vdupq_n_f32
(
thresh
);
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
vst1q_f32
(
dst
+
j
,
vminq_f32
(
vld1q_f32
(
src
+
j
),
v_thresh
));
#endif
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
std
::
min
(
src
[
j
],
thresh
);
dst
[
j
]
=
std
::
min
(
src
[
j
],
thresh
);
}
break
;
...
...
@@ -829,32 +654,16 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE
if
(
useSIMD
)
{
__m128
thresh4
=
_mm_set1_ps
(
thresh
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
__m128
v0
,
v1
;
v0
=
_mm_loadu_ps
(
src
+
j
);
v1
=
_mm_loadu_ps
(
src
+
j
+
4
);
v0
=
_mm_and_ps
(
v0
,
_mm_cmpgt_ps
(
v0
,
thresh4
));
v1
=
_mm_and_ps
(
v1
,
_mm_cmpgt_ps
(
v1
,
thresh4
));
_mm_storeu_ps
(
dst
+
j
,
v0
);
_mm_storeu_ps
(
dst
+
j
+
4
,
v1
);
}
}
#elif CV_NEON
float32x4_t
v_thresh
=
vdupq_n_f32
(
thresh
);
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
{
float32x4_t
v_src
=
vld1q_f32
(
src
+
j
);
uint32x4_t
v_dst
=
vandq_u32
(
vcgtq_f32
(
v_src
,
v_thresh
),
vreinterpretq_u32_f32
(
v_src
));
vst1q_f32
(
dst
+
j
,
vreinterpretq_f32_u32
(
v_dst
));
v_float32x4
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
4
);
v0
=
(
thresh4
<
v0
)
&
v0
;
v1
=
(
thresh4
<
v1
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
4
,
v1
);
}
#endif
for
(
;
j
<
roi
.
width
;
j
++
)
{
...
...
@@ -868,33 +677,72 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE
if
(
useSIMD
)
{
__m128
thresh4
=
_mm_set1_ps
(
thresh
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
__m128
v0
,
v1
;
v0
=
_mm_loadu_ps
(
src
+
j
);
v1
=
_mm_loadu_ps
(
src
+
j
+
4
);
v0
=
_mm_and_ps
(
v0
,
_mm_cmple_ps
(
v0
,
thresh4
))
;
v1
=
_mm_and_ps
(
v1
,
_mm_cmple_ps
(
v1
,
thresh4
))
;
_mm_storeu_ps
(
dst
+
j
,
v0
);
_mm_storeu_ps
(
dst
+
j
+
4
,
v1
);
v_float32x4
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
4
);
v0
=
(
v0
<=
thresh4
)
&
v0
;
v1
=
(
v1
<=
thresh4
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
4
,
v1
);
}
}
#elif CV_NEON
float32x4_t
v_thresh
=
vdupq_n_f32
(
thresh
);
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
for
(
;
j
<
roi
.
width
;
j
++
)
{
float32x4_t
v_src
=
vld1q_f32
(
src
+
j
);
uint32x4_t
v_dst
=
vandq_u32
(
vcleq_f32
(
v_src
,
v_thresh
),
vreinterpretq_u32_f32
(
v_src
));
vst1q_f32
(
dst
+
j
,
vreinterpretq_f32_u32
(
v_dst
));
float
v
=
src
[
j
];
dst
[
j
]
=
v
<=
thresh
?
v
:
0
;
}
}
break
;
default
:
return
CV_Error
(
CV_StsBadArg
,
""
);
}
}
else
#endif
for
(
;
j
<
roi
.
width
;
j
++
)
{
switch
(
type
)
{
case
THRESH_BINARY
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
>
thresh
?
maxval
:
0
;
}
break
;
case
THRESH_BINARY_INV
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
<=
thresh
?
maxval
:
0
;
}
break
;
case
THRESH_TRUNC
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
std
::
min
(
src
[
j
],
thresh
);
}
break
;
case
THRESH_TOZERO
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<
roi
.
width
;
j
++
)
{
float
v
=
src
[
j
];
dst
[
j
]
=
v
>
thresh
?
v
:
0
;
}
}
break
;
case
THRESH_TOZERO_INV
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
for
(
j
=
0
;
j
<
roi
.
width
;
j
++
)
{
float
v
=
src
[
j
];
dst
[
j
]
=
v
<=
thresh
?
v
:
0
;
...
...
@@ -904,6 +752,7 @@ thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
default
:
return
CV_Error
(
CV_StsBadArg
,
""
);
}
}
}
static
void
...
...
@@ -917,251 +766,183 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
size_t
src_step
=
_src
.
step
/
sizeof
(
src
[
0
]);
size_t
dst_step
=
_dst
.
step
/
sizeof
(
dst
[
0
]);
#if CV_SSE2
volatile
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#endif
if
(
_src
.
isContinuous
()
&&
_dst
.
isContinuous
())
{
roi
.
width
*=
roi
.
height
;
roi
.
height
=
1
;
}
switch
(
type
)
#if CV_SIMD128_64F
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
)
||
checkHardwareSupport
(
CV_CPU_NEON
);
if
(
useSIMD
)
{
v_float64x2
thresh2
=
v_setall_f64
(
thresh
);
v_float64x2
maxval2
=
v_setall_f64
(
maxval
);
switch
(
type
)
{
case
THRESH_BINARY
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE2
if
(
useSIMD
)
{
__m128d
thresh2
=
_mm_set1_pd
(
thresh
),
maxval2
=
_mm_set1_pd
(
maxval
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
__m128d
v0
,
v1
,
v2
,
v3
;
v0
=
_mm_loadu_pd
(
src
+
j
);
v1
=
_mm_loadu_pd
(
src
+
j
+
2
);
v2
=
_mm_loadu_pd
(
src
+
j
+
4
);
v3
=
_mm_loadu_pd
(
src
+
j
+
6
);
v0
=
_mm_cmpgt_pd
(
v0
,
thresh2
);
v1
=
_mm_cmpgt_pd
(
v1
,
thresh2
);
v2
=
_mm_cmpgt_pd
(
v2
,
thresh2
);
v3
=
_mm_cmpgt_pd
(
v3
,
thresh2
);
v0
=
_mm_and_pd
(
v0
,
maxval2
);
v1
=
_mm_and_pd
(
v1
,
maxval2
);
v2
=
_mm_and_pd
(
v2
,
maxval2
);
v3
=
_mm_and_pd
(
v3
,
maxval2
);
_mm_storeu_pd
(
dst
+
j
,
v0
);
_mm_storeu_pd
(
dst
+
j
+
2
,
v1
);
_mm_storeu_pd
(
dst
+
j
+
4
,
v2
);
_mm_storeu_pd
(
dst
+
j
+
6
,
v3
);
}
}
#elif CV_NEON && defined(__aarch64__)
float64x2_t
v_thresh
=
vdupq_n_f64
(
thresh
);
uint64x2_t
v_maxval
=
vreinterpretq_u64_f64
(
vdupq_n_f64
(
maxval
));
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
{
float64x2_t
v_src0
=
vld1q_f64
(
src
+
j
);
float64x2_t
v_src1
=
vld1q_f64
(
src
+
j
+
2
);
uint64x2_t
v_dst0
=
vandq_u64
(
vcgtq_f64
(
v_src0
,
v_thresh
),
v_maxval
);
uint64x2_t
v_dst1
=
vandq_u64
(
vcgtq_f64
(
v_src1
,
v_thresh
),
v_maxval
);
vst1q_f64
(
dst
+
j
,
vreinterpretq_f64_u64
(
v_dst0
));
vst1q_f64
(
dst
+
j
+
2
,
vreinterpretq_f64_u64
(
v_dst1
));
v_float64x2
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
2
);
v0
=
thresh2
<
v0
;
v1
=
thresh2
<
v1
;
v0
=
v0
&
maxval2
;
v1
=
v1
&
maxval2
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
2
,
v1
);
}
#endif
for
(;
j
<
roi
.
width
;
j
++
)
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
>
thresh
?
maxval
:
0
;
}
break
;
case
THRESH_BINARY_INV
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE2
if
(
useSIMD
)
{
__m128d
thresh2
=
_mm_set1_pd
(
thresh
),
maxval2
=
_mm_set1_pd
(
maxval
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
__m128d
v0
,
v1
,
v2
,
v3
;
v0
=
_mm_loadu_pd
(
src
+
j
);
v1
=
_mm_loadu_pd
(
src
+
j
+
2
);
v2
=
_mm_loadu_pd
(
src
+
j
+
4
);
v3
=
_mm_loadu_pd
(
src
+
j
+
6
);
v0
=
_mm_cmple_pd
(
v0
,
thresh2
);
v1
=
_mm_cmple_pd
(
v1
,
thresh2
);
v2
=
_mm_cmple_pd
(
v2
,
thresh2
);
v3
=
_mm_cmple_pd
(
v3
,
thresh2
);
v0
=
_mm_and_pd
(
v0
,
maxval2
);
v1
=
_mm_and_pd
(
v1
,
maxval2
);
v2
=
_mm_and_pd
(
v2
,
maxval2
);
v3
=
_mm_and_pd
(
v3
,
maxval2
);
_mm_storeu_pd
(
dst
+
j
,
v0
);
_mm_storeu_pd
(
dst
+
j
+
2
,
v1
);
_mm_storeu_pd
(
dst
+
j
+
4
,
v2
);
_mm_storeu_pd
(
dst
+
j
+
6
,
v3
);
}
}
#elif CV_NEON && defined(__aarch64__)
float64x2_t
v_thresh
=
vdupq_n_f64
(
thresh
);
uint64x2_t
v_maxval
=
vreinterpretq_u64_f64
(
vdupq_n_f64
(
maxval
));
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
{
float64x2_t
v_src0
=
vld1q_f64
(
src
+
j
);
float64x2_t
v_src1
=
vld1q_f64
(
src
+
j
+
2
);
uint64x2_t
v_dst0
=
vandq_u64
(
vcleq_f64
(
v_src0
,
v_thresh
),
v_maxval
);
uint64x2_t
v_dst1
=
vandq_u64
(
vcleq_f64
(
v_src1
,
v_thresh
),
v_maxval
);
vst1q_f64
(
dst
+
j
,
vreinterpretq_f64_u64
(
v_dst0
));
vst1q_f64
(
dst
+
j
+
2
,
vreinterpretq_f64_u64
(
v_dst1
));
v_float64x2
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
2
);
v0
=
v0
<=
thresh2
;
v1
=
v1
<=
thresh2
;
v0
=
v0
&
maxval2
;
v1
=
v1
&
maxval2
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
2
,
v1
);
}
#endif
for
(;
j
<
roi
.
width
;
j
++
)
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
<=
thresh
?
maxval
:
0
;
}
break
;
case
THRESH_TRUNC
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE2
if
(
useSIMD
)
{
__m128d
thresh2
=
_mm_set1_pd
(
thresh
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
{
__m128d
v0
,
v1
,
v2
,
v3
;
v0
=
_mm_loadu_pd
(
src
+
j
);
v1
=
_mm_loadu_pd
(
src
+
j
+
2
);
v2
=
_mm_loadu_pd
(
src
+
j
+
4
);
v3
=
_mm_loadu_pd
(
src
+
j
+
6
);
v0
=
_mm_min_pd
(
v0
,
thresh2
);
v1
=
_mm_min_pd
(
v1
,
thresh2
);
v2
=
_mm_min_pd
(
v2
,
thresh2
);
v3
=
_mm_min_pd
(
v3
,
thresh2
);
_mm_storeu_pd
(
dst
+
j
,
v0
);
_mm_storeu_pd
(
dst
+
j
+
2
,
v1
);
_mm_storeu_pd
(
dst
+
j
+
4
,
v2
);
_mm_storeu_pd
(
dst
+
j
+
6
,
v3
);
}
}
#elif CV_NEON && defined(__aarch64__)
float64x2_t
v_thresh
=
vdupq_n_f64
(
thresh
);
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
{
float64x2_t
v_src0
=
vld1q_f64
(
src
+
j
);
float64x2_t
v_src1
=
vld1q_f64
(
src
+
j
+
2
);
float64x2_t
v_dst0
=
vminq_f64
(
v_src0
,
v_thresh
);
float64x2_t
v_dst1
=
vminq_f64
(
v_src1
,
v_thresh
);
vst1q_f64
(
dst
+
j
,
v_dst0
);
vst1q_f64
(
dst
+
j
+
2
,
v_dst1
);
v_float64x2
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
2
);
v0
=
v_min
(
v0
,
thresh2
);
v1
=
v_min
(
v1
,
thresh2
);
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
2
,
v1
);
}
#endif
for
(;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
std
::
min
(
src
[
j
],
thresh
);
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
std
::
min
(
src
[
j
],
thresh
);
}
break
;
case
THRESH_TOZERO
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
#if CV_SSE2
if
(
useSIMD
)
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
{
__m128d
thresh2
=
_mm_set1_pd
(
thresh
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
v_float64x2
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
2
);
v0
=
(
thresh2
<
v0
)
&
v0
;
v1
=
(
thresh2
<
v1
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
2
,
v1
);
}
for
(
;
j
<
roi
.
width
;
j
++
)
{
__m128d
v0
,
v1
,
v2
,
v3
;
v0
=
_mm_loadu_pd
(
src
+
j
);
v1
=
_mm_loadu_pd
(
src
+
j
+
2
);
v2
=
_mm_loadu_pd
(
src
+
j
+
4
);
v3
=
_mm_loadu_pd
(
src
+
j
+
6
);
v0
=
_mm_and_pd
(
v0
,
_mm_cmpgt_pd
(
v0
,
thresh2
));
v1
=
_mm_and_pd
(
v1
,
_mm_cmpgt_pd
(
v1
,
thresh2
));
v2
=
_mm_and_pd
(
v2
,
_mm_cmpgt_pd
(
v2
,
thresh2
));
v3
=
_mm_and_pd
(
v3
,
_mm_cmpgt_pd
(
v3
,
thresh2
));
_mm_storeu_pd
(
dst
+
j
,
v0
);
_mm_storeu_pd
(
dst
+
j
+
2
,
v1
);
_mm_storeu_pd
(
dst
+
j
+
4
,
v2
);
_mm_storeu_pd
(
dst
+
j
+
6
,
v3
);
double
v
=
src
[
j
];
dst
[
j
]
=
v
>
thresh
?
v
:
0
;
}
}
#elif CV_NEON && defined(__aarch64__)
float64x2_t
v_thresh
=
vdupq_n_f64
(
thresh
);
break
;
case
THRESH_TOZERO_INV
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
{
float64x2_t
v_src0
=
vld1q_f64
(
src
+
j
);
float64x2_t
v_src1
=
vld1q_f64
(
src
+
j
+
2
);
uint64x2_t
v_dst0
=
vandq_u64
(
vcgtq_f64
(
v_src0
,
v_thresh
),
vreinterpretq_u64_f64
(
v_src0
));
uint64x2_t
v_dst1
=
vandq_u64
(
vcgtq_f64
(
v_src1
,
v_thresh
),
vreinterpretq_u64_f64
(
v_src1
));
vst1q_f64
(
dst
+
j
,
vreinterpretq_f64_u64
(
v_dst0
));
vst1q_f64
(
dst
+
j
+
2
,
vreinterpretq_f64_u64
(
v_dst1
));
v_float64x2
v0
,
v1
;
v0
=
v_load
(
src
+
j
);
v1
=
v_load
(
src
+
j
+
2
);
v0
=
(
v0
<=
thresh2
)
&
v0
;
v1
=
(
v1
<=
thresh2
)
&
v1
;
v_store
(
dst
+
j
,
v0
);
v_store
(
dst
+
j
+
2
,
v1
);
}
#endif
for
(;
j
<
roi
.
width
;
j
++
)
for
(
;
j
<
roi
.
width
;
j
++
)
{
double
v
=
src
[
j
];
dst
[
j
]
=
v
>
thresh
?
v
:
0
;
dst
[
j
]
=
v
<=
thresh
?
v
:
0
;
}
}
break
;
default
:
return
CV_Error
(
CV_StsBadArg
,
""
);
}
}
else
#endif
{
switch
(
type
)
{
case
THRESH_BINARY
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
>
thresh
?
maxval
:
0
;
}
break
;
case
THRESH_TOZERO
_INV
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
case
THRESH_BINARY
_INV
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
j
=
0
;
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
src
[
j
]
<=
thresh
?
maxval
:
0
;
}
break
;
#if CV_SSE2
if
(
useSIMD
)
case
THRESH_TRUNC
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
__m128d
thresh2
=
_mm_set1_pd
(
thresh
);
for
(
;
j
<=
roi
.
width
-
8
;
j
+=
8
)
j
=
0
;
for
(
;
j
<
roi
.
width
;
j
++
)
dst
[
j
]
=
std
::
min
(
src
[
j
],
thresh
);
}
break
;
case
THRESH_TOZERO
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
__m128d
v0
,
v1
,
v2
,
v3
;
v0
=
_mm_loadu_pd
(
src
+
j
);
v1
=
_mm_loadu_pd
(
src
+
j
+
2
);
v2
=
_mm_loadu_pd
(
src
+
j
+
4
);
v3
=
_mm_loadu_pd
(
src
+
j
+
6
);
v0
=
_mm_and_pd
(
v0
,
_mm_cmple_pd
(
v0
,
thresh2
));
v1
=
_mm_and_pd
(
v1
,
_mm_cmple_pd
(
v1
,
thresh2
));
v2
=
_mm_and_pd
(
v2
,
_mm_cmple_pd
(
v2
,
thresh2
));
v3
=
_mm_and_pd
(
v3
,
_mm_cmple_pd
(
v3
,
thresh2
));
_mm_storeu_pd
(
dst
+
j
,
v0
);
_mm_storeu_pd
(
dst
+
j
+
2
,
v1
);
_mm_storeu_pd
(
dst
+
j
+
4
,
v2
);
_mm_storeu_pd
(
dst
+
j
+
6
,
v3
);
j
=
0
;
for
(
;
j
<
roi
.
width
;
j
++
)
{
double
v
=
src
[
j
];
dst
[
j
]
=
v
>
thresh
?
v
:
0
;
}
}
#elif CV_NEON && defined(__aarch64__)
float64x2_t
v_thresh
=
vdupq_n_f64
(
thresh
);
break
;
for
(
;
j
<=
roi
.
width
-
4
;
j
+=
4
)
case
THRESH_TOZERO_INV
:
for
(
i
=
0
;
i
<
roi
.
height
;
i
++
,
src
+=
src_step
,
dst
+=
dst_step
)
{
float64x2_t
v_src0
=
vld1q_f64
(
src
+
j
);
float64x2_t
v_src1
=
vld1q_f64
(
src
+
j
+
2
);
uint64x2_t
v_dst0
=
vandq_u64
(
vcleq_f64
(
v_src0
,
v_thresh
),
vreinterpretq_u64_f64
(
v_src0
));
uint64x2_t
v_dst1
=
vandq_u64
(
vcleq_f64
(
v_src1
,
v_thresh
),
vreinterpretq_u64_f64
(
v_src1
));
vst1q_f64
(
dst
+
j
,
vreinterpretq_f64_u64
(
v_dst0
));
vst1q_f64
(
dst
+
j
+
2
,
vreinterpretq_f64_u64
(
v_dst1
));
}
#endif
for
(;
j
<
roi
.
width
;
j
++
)
j
=
0
;
for
(
;
j
<
roi
.
width
;
j
++
)
{
double
v
=
src
[
j
];
dst
[
j
]
=
v
<=
thresh
?
v
:
0
;
...
...
@@ -1171,6 +952,7 @@ thresh_64f(const Mat& _src, Mat& _dst, double thresh, double maxval, int type)
default
:
return
CV_Error
(
CV_StsBadArg
,
""
);
}
}
}
#ifdef HAVE_IPP
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment