Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
5a01227a
Commit
5a01227a
authored
Mar 09, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
imgproc: dispatch box_filter
parent
ce3c92eb
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
77 additions
and
1777 deletions
+77
-1777
CMakeLists.txt
modules/imgproc/CMakeLists.txt
+1
-0
box_filter.dispatch.cpp
modules/imgproc/src/box_filter.dispatch.cpp
+30
-1277
box_filter.simd.hpp
modules/imgproc/src/box_filter.simd.hpp
+46
-500
No files found.
modules/imgproc/CMakeLists.txt
View file @
5a01227a
set
(
the_description
"Image Processing"
)
ocv_add_dispatched_file
(
accum SSE4_1 AVX AVX2
)
ocv_add_dispatched_file
(
bilateral_filter SSE2 AVX2
)
ocv_add_dispatched_file
(
box_filter SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
filter SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
color_hsv SSE2 SSE4_1 AVX2
)
ocv_add_dispatched_file
(
color_rgb SSE2 SSE4_1 AVX2
)
...
...
modules/imgproc/src/box_filter.dispatch.cpp
View file @
5a01227a
...
...
@@ -50,1119 +50,11 @@
#include "opencv2/core/openvx/ovx_defs.hpp"
namespace
cv
{
/****************************************************************************************\
Box Filter
\****************************************************************************************/
template
<
typename
T
,
typename
ST
>
struct
RowSum
:
public
BaseRowFilter
{
RowSum
(
int
_ksize
,
int
_anchor
)
:
BaseRowFilter
()
{
ksize
=
_ksize
;
anchor
=
_anchor
;
}
virtual
void
operator
()(
const
uchar
*
src
,
uchar
*
dst
,
int
width
,
int
cn
)
CV_OVERRIDE
{
const
T
*
S
=
(
const
T
*
)
src
;
ST
*
D
=
(
ST
*
)
dst
;
int
i
=
0
,
k
,
ksz_cn
=
ksize
*
cn
;
width
=
(
width
-
1
)
*
cn
;
if
(
ksize
==
3
)
{
for
(
i
=
0
;
i
<
width
+
cn
;
i
++
)
{
D
[
i
]
=
(
ST
)
S
[
i
]
+
(
ST
)
S
[
i
+
cn
]
+
(
ST
)
S
[
i
+
cn
*
2
];
}
}
else
if
(
ksize
==
5
)
{
for
(
i
=
0
;
i
<
width
+
cn
;
i
++
)
{
D
[
i
]
=
(
ST
)
S
[
i
]
+
(
ST
)
S
[
i
+
cn
]
+
(
ST
)
S
[
i
+
cn
*
2
]
+
(
ST
)
S
[
i
+
cn
*
3
]
+
(
ST
)
S
[
i
+
cn
*
4
];
}
}
else
if
(
cn
==
1
)
{
ST
s
=
0
;
for
(
i
=
0
;
i
<
ksz_cn
;
i
++
)
s
+=
(
ST
)
S
[
i
];
D
[
0
]
=
s
;
for
(
i
=
0
;
i
<
width
;
i
++
)
{
s
+=
(
ST
)
S
[
i
+
ksz_cn
]
-
(
ST
)
S
[
i
];
D
[
i
+
1
]
=
s
;
}
}
else
if
(
cn
==
3
)
{
ST
s0
=
0
,
s1
=
0
,
s2
=
0
;
for
(
i
=
0
;
i
<
ksz_cn
;
i
+=
3
)
{
s0
+=
(
ST
)
S
[
i
];
s1
+=
(
ST
)
S
[
i
+
1
];
s2
+=
(
ST
)
S
[
i
+
2
];
}
D
[
0
]
=
s0
;
D
[
1
]
=
s1
;
D
[
2
]
=
s2
;
for
(
i
=
0
;
i
<
width
;
i
+=
3
)
{
s0
+=
(
ST
)
S
[
i
+
ksz_cn
]
-
(
ST
)
S
[
i
];
s1
+=
(
ST
)
S
[
i
+
ksz_cn
+
1
]
-
(
ST
)
S
[
i
+
1
];
s2
+=
(
ST
)
S
[
i
+
ksz_cn
+
2
]
-
(
ST
)
S
[
i
+
2
];
D
[
i
+
3
]
=
s0
;
D
[
i
+
4
]
=
s1
;
D
[
i
+
5
]
=
s2
;
}
}
else
if
(
cn
==
4
)
{
ST
s0
=
0
,
s1
=
0
,
s2
=
0
,
s3
=
0
;
for
(
i
=
0
;
i
<
ksz_cn
;
i
+=
4
)
{
s0
+=
(
ST
)
S
[
i
];
s1
+=
(
ST
)
S
[
i
+
1
];
s2
+=
(
ST
)
S
[
i
+
2
];
s3
+=
(
ST
)
S
[
i
+
3
];
}
D
[
0
]
=
s0
;
D
[
1
]
=
s1
;
D
[
2
]
=
s2
;
D
[
3
]
=
s3
;
for
(
i
=
0
;
i
<
width
;
i
+=
4
)
{
s0
+=
(
ST
)
S
[
i
+
ksz_cn
]
-
(
ST
)
S
[
i
];
s1
+=
(
ST
)
S
[
i
+
ksz_cn
+
1
]
-
(
ST
)
S
[
i
+
1
];
s2
+=
(
ST
)
S
[
i
+
ksz_cn
+
2
]
-
(
ST
)
S
[
i
+
2
];
s3
+=
(
ST
)
S
[
i
+
ksz_cn
+
3
]
-
(
ST
)
S
[
i
+
3
];
D
[
i
+
4
]
=
s0
;
D
[
i
+
5
]
=
s1
;
D
[
i
+
6
]
=
s2
;
D
[
i
+
7
]
=
s3
;
}
}
else
for
(
k
=
0
;
k
<
cn
;
k
++
,
S
++
,
D
++
)
{
ST
s
=
0
;
for
(
i
=
0
;
i
<
ksz_cn
;
i
+=
cn
)
s
+=
(
ST
)
S
[
i
];
D
[
0
]
=
s
;
for
(
i
=
0
;
i
<
width
;
i
+=
cn
)
{
s
+=
(
ST
)
S
[
i
+
ksz_cn
]
-
(
ST
)
S
[
i
];
D
[
i
+
cn
]
=
s
;
}
}
}
};
template
<
typename
ST
,
typename
T
>
struct
ColumnSum
:
public
BaseColumnFilter
{
ColumnSum
(
int
_ksize
,
int
_anchor
,
double
_scale
)
:
BaseColumnFilter
()
{
ksize
=
_ksize
;
anchor
=
_anchor
;
scale
=
_scale
;
sumCount
=
0
;
}
virtual
void
reset
()
CV_OVERRIDE
{
sumCount
=
0
;
}
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
int
i
;
ST
*
SUM
;
bool
haveScale
=
scale
!=
1
;
double
_scale
=
scale
;
if
(
width
!=
(
int
)
sum
.
size
()
)
{
sum
.
resize
(
width
);
sumCount
=
0
;
}
SUM
=
&
sum
[
0
];
if
(
sumCount
==
0
)
{
memset
((
void
*
)
SUM
,
0
,
width
*
sizeof
(
ST
));
for
(
;
sumCount
<
ksize
-
1
;
sumCount
++
,
src
++
)
{
const
ST
*
Sp
=
(
const
ST
*
)
src
[
0
];
for
(
i
=
0
;
i
<
width
;
i
++
)
SUM
[
i
]
+=
Sp
[
i
];
}
}
else
{
CV_Assert
(
sumCount
==
ksize
-
1
);
src
+=
ksize
-
1
;
}
for
(
;
count
--
;
src
++
)
{
const
ST
*
Sp
=
(
const
ST
*
)
src
[
0
];
const
ST
*
Sm
=
(
const
ST
*
)
src
[
1
-
ksize
];
T
*
D
=
(
T
*
)
dst
;
if
(
haveScale
)
{
for
(
i
=
0
;
i
<=
width
-
2
;
i
+=
2
)
{
ST
s0
=
SUM
[
i
]
+
Sp
[
i
],
s1
=
SUM
[
i
+
1
]
+
Sp
[
i
+
1
];
D
[
i
]
=
saturate_cast
<
T
>
(
s0
*
_scale
);
D
[
i
+
1
]
=
saturate_cast
<
T
>
(
s1
*
_scale
);
s0
-=
Sm
[
i
];
s1
-=
Sm
[
i
+
1
];
SUM
[
i
]
=
s0
;
SUM
[
i
+
1
]
=
s1
;
}
for
(
;
i
<
width
;
i
++
)
{
ST
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
saturate_cast
<
T
>
(
s0
*
_scale
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
else
{
for
(
i
=
0
;
i
<=
width
-
2
;
i
+=
2
)
{
ST
s0
=
SUM
[
i
]
+
Sp
[
i
],
s1
=
SUM
[
i
+
1
]
+
Sp
[
i
+
1
];
D
[
i
]
=
saturate_cast
<
T
>
(
s0
);
D
[
i
+
1
]
=
saturate_cast
<
T
>
(
s1
);
s0
-=
Sm
[
i
];
s1
-=
Sm
[
i
+
1
];
SUM
[
i
]
=
s0
;
SUM
[
i
+
1
]
=
s1
;
}
for
(
;
i
<
width
;
i
++
)
{
ST
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
saturate_cast
<
T
>
(
s0
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
dst
+=
dststep
;
}
}
double
scale
;
int
sumCount
;
std
::
vector
<
ST
>
sum
;
};
template
<>
struct
ColumnSum
<
int
,
uchar
>
:
public
BaseColumnFilter
{
ColumnSum
(
int
_ksize
,
int
_anchor
,
double
_scale
)
:
BaseColumnFilter
()
{
ksize
=
_ksize
;
anchor
=
_anchor
;
scale
=
_scale
;
sumCount
=
0
;
}
virtual
void
reset
()
CV_OVERRIDE
{
sumCount
=
0
;
}
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
int
*
SUM
;
bool
haveScale
=
scale
!=
1
;
double
_scale
=
scale
;
if
(
width
!=
(
int
)
sum
.
size
()
)
{
sum
.
resize
(
width
);
sumCount
=
0
;
}
SUM
=
&
sum
[
0
];
if
(
sumCount
==
0
)
{
memset
((
void
*
)
SUM
,
0
,
width
*
sizeof
(
int
));
for
(
;
sumCount
<
ksize
-
1
;
sumCount
++
,
src
++
)
{
const
int
*
Sp
=
(
const
int
*
)
src
[
0
];
int
i
=
0
;
#if CV_SIMD
for
(;
i
<=
width
-
v_int32
::
nlanes
;
i
+=
v_int32
::
nlanes
)
{
v_store
(
SUM
+
i
,
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
));
}
#if CV_SIMD_WIDTH > 16
for
(;
i
<=
width
-
v_int32x4
::
nlanes
;
i
+=
v_int32x4
::
nlanes
)
{
v_store
(
SUM
+
i
,
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
SUM
[
i
]
+=
Sp
[
i
];
}
}
else
{
CV_Assert
(
sumCount
==
ksize
-
1
);
src
+=
ksize
-
1
;
}
for
(
;
count
--
;
src
++
)
{
const
int
*
Sp
=
(
const
int
*
)
src
[
0
];
const
int
*
Sm
=
(
const
int
*
)
src
[
1
-
ksize
];
uchar
*
D
=
(
uchar
*
)
dst
;
if
(
haveScale
)
{
int
i
=
0
;
#if CV_SIMD
v_float32
_v_scale
=
vx_setall_f32
((
float
)
_scale
);
for
(
;
i
<=
width
-
v_uint16
::
nlanes
;
i
+=
v_uint16
::
nlanes
)
{
v_int32
v_s0
=
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
);
v_int32
v_s01
=
vx_load
(
SUM
+
i
+
v_int32
::
nlanes
)
+
vx_load
(
Sp
+
i
+
v_int32
::
nlanes
);
v_uint32
v_s0d
=
v_reinterpret_as_u32
(
v_round
(
v_cvt_f32
(
v_s0
)
*
_v_scale
));
v_uint32
v_s01d
=
v_reinterpret_as_u32
(
v_round
(
v_cvt_f32
(
v_s01
)
*
_v_scale
));
v_uint16
v_dst
=
v_pack
(
v_s0d
,
v_s01d
);
v_pack_store
(
D
+
i
,
v_dst
);
v_store
(
SUM
+
i
,
v_s0
-
vx_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32
::
nlanes
,
v_s01
-
vx_load
(
Sm
+
i
+
v_int32
::
nlanes
));
}
#if CV_SIMD_WIDTH > 16
v_float32x4
v_scale
=
v_setall_f32
((
float
)
_scale
);
for
(
;
i
<=
width
-
v_uint16x8
::
nlanes
;
i
+=
v_uint16x8
::
nlanes
)
{
v_int32x4
v_s0
=
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
);
v_int32x4
v_s01
=
v_load
(
SUM
+
i
+
v_int32x4
::
nlanes
)
+
v_load
(
Sp
+
i
+
v_int32x4
::
nlanes
);
v_uint32x4
v_s0d
=
v_reinterpret_as_u32
(
v_round
(
v_cvt_f32
(
v_s0
)
*
v_scale
));
v_uint32x4
v_s01d
=
v_reinterpret_as_u32
(
v_round
(
v_cvt_f32
(
v_s01
)
*
v_scale
));
v_uint16x8
v_dst
=
v_pack
(
v_s0d
,
v_s01d
);
v_pack_store
(
D
+
i
,
v_dst
);
v_store
(
SUM
+
i
,
v_s0
-
v_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32x4
::
nlanes
,
v_s01
-
v_load
(
Sm
+
i
+
v_int32x4
::
nlanes
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
saturate_cast
<
uchar
>
(
s0
*
_scale
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
else
{
int
i
=
0
;
#if CV_SIMD
for
(
;
i
<=
width
-
v_uint16
::
nlanes
;
i
+=
v_uint16
::
nlanes
)
{
v_int32
v_s0
=
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
);
v_int32
v_s01
=
vx_load
(
SUM
+
i
+
v_int32
::
nlanes
)
+
vx_load
(
Sp
+
i
+
v_int32
::
nlanes
);
v_uint16
v_dst
=
v_pack
(
v_reinterpret_as_u32
(
v_s0
),
v_reinterpret_as_u32
(
v_s01
));
v_pack_store
(
D
+
i
,
v_dst
);
v_store
(
SUM
+
i
,
v_s0
-
vx_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32
::
nlanes
,
v_s01
-
vx_load
(
Sm
+
i
+
v_int32
::
nlanes
));
}
#if CV_SIMD_WIDTH > 16
for
(
;
i
<=
width
-
v_uint16x8
::
nlanes
;
i
+=
v_uint16x8
::
nlanes
)
{
v_int32x4
v_s0
=
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
);
v_int32x4
v_s01
=
v_load
(
SUM
+
i
+
v_int32x4
::
nlanes
)
+
v_load
(
Sp
+
i
+
v_int32x4
::
nlanes
);
v_uint16x8
v_dst
=
v_pack
(
v_reinterpret_as_u32
(
v_s0
),
v_reinterpret_as_u32
(
v_s01
));
v_pack_store
(
D
+
i
,
v_dst
);
v_store
(
SUM
+
i
,
v_s0
-
v_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32x4
::
nlanes
,
v_s01
-
v_load
(
Sm
+
i
+
v_int32x4
::
nlanes
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
saturate_cast
<
uchar
>
(
s0
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
int
sumCount
;
std
::
vector
<
int
>
sum
;
};
template
<>
struct
ColumnSum
<
ushort
,
uchar
>
:
public
BaseColumnFilter
{
enum
{
SHIFT
=
23
};
ColumnSum
(
int
_ksize
,
int
_anchor
,
double
_scale
)
:
BaseColumnFilter
()
{
ksize
=
_ksize
;
anchor
=
_anchor
;
scale
=
_scale
;
sumCount
=
0
;
divDelta
=
0
;
divScale
=
1
;
if
(
scale
!=
1
)
{
int
d
=
cvRound
(
1.
/
scale
);
double
scalef
=
((
double
)(
1
<<
SHIFT
))
/
d
;
divScale
=
cvFloor
(
scalef
);
scalef
-=
divScale
;
divDelta
=
d
/
2
;
if
(
scalef
<
0.5
)
divDelta
++
;
else
divScale
++
;
}
}
virtual
void
reset
()
CV_OVERRIDE
{
sumCount
=
0
;
}
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
const
int
ds
=
divScale
;
const
int
dd
=
divDelta
;
ushort
*
SUM
;
const
bool
haveScale
=
scale
!=
1
;
if
(
width
!=
(
int
)
sum
.
size
()
)
{
sum
.
resize
(
width
);
sumCount
=
0
;
}
SUM
=
&
sum
[
0
];
if
(
sumCount
==
0
)
{
memset
((
void
*
)
SUM
,
0
,
width
*
sizeof
(
SUM
[
0
]));
for
(
;
sumCount
<
ksize
-
1
;
sumCount
++
,
src
++
)
{
const
ushort
*
Sp
=
(
const
ushort
*
)
src
[
0
];
int
i
=
0
;
#if CV_SIMD
for
(
;
i
<=
width
-
v_uint16
::
nlanes
;
i
+=
v_uint16
::
nlanes
)
{
v_store
(
SUM
+
i
,
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
));
}
#if CV_SIMD_WIDTH > 16
for
(
;
i
<=
width
-
v_uint16x8
::
nlanes
;
i
+=
v_uint16x8
::
nlanes
)
{
v_store
(
SUM
+
i
,
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
SUM
[
i
]
+=
Sp
[
i
];
}
}
else
{
CV_Assert
(
sumCount
==
ksize
-
1
);
src
+=
ksize
-
1
;
}
for
(
;
count
--
;
src
++
)
{
const
ushort
*
Sp
=
(
const
ushort
*
)
src
[
0
];
const
ushort
*
Sm
=
(
const
ushort
*
)
src
[
1
-
ksize
];
uchar
*
D
=
(
uchar
*
)
dst
;
if
(
haveScale
)
{
int
i
=
0
;
#if CV_SIMD
v_uint32
_ds4
=
vx_setall_u32
((
unsigned
)
ds
);
v_uint16
_dd8
=
vx_setall_u16
((
ushort
)
dd
);
for
(
;
i
<=
width
-
v_uint8
::
nlanes
;
i
+=
v_uint8
::
nlanes
)
{
v_uint16
_sm0
=
vx_load
(
Sm
+
i
);
v_uint16
_sm1
=
vx_load
(
Sm
+
i
+
v_uint16
::
nlanes
);
v_uint16
_s0
=
v_add_wrap
(
vx_load
(
SUM
+
i
),
vx_load
(
Sp
+
i
));
v_uint16
_s1
=
v_add_wrap
(
vx_load
(
SUM
+
i
+
v_uint16
::
nlanes
),
vx_load
(
Sp
+
i
+
v_uint16
::
nlanes
));
v_uint32
_s00
,
_s01
,
_s10
,
_s11
;
v_expand
(
_s0
+
_dd8
,
_s00
,
_s01
);
v_expand
(
_s1
+
_dd8
,
_s10
,
_s11
);
_s00
=
v_shr
<
SHIFT
>
(
_s00
*
_ds4
);
_s01
=
v_shr
<
SHIFT
>
(
_s01
*
_ds4
);
_s10
=
v_shr
<
SHIFT
>
(
_s10
*
_ds4
);
_s11
=
v_shr
<
SHIFT
>
(
_s11
*
_ds4
);
v_int16
r0
=
v_pack
(
v_reinterpret_as_s32
(
_s00
),
v_reinterpret_as_s32
(
_s01
));
v_int16
r1
=
v_pack
(
v_reinterpret_as_s32
(
_s10
),
v_reinterpret_as_s32
(
_s11
));
_s0
=
v_sub_wrap
(
_s0
,
_sm0
);
_s1
=
v_sub_wrap
(
_s1
,
_sm1
);
v_store
(
D
+
i
,
v_pack_u
(
r0
,
r1
));
v_store
(
SUM
+
i
,
_s0
);
v_store
(
SUM
+
i
+
v_uint16
::
nlanes
,
_s1
);
}
#if CV_SIMD_WIDTH > 16
v_uint32x4
ds4
=
v_setall_u32
((
unsigned
)
ds
);
v_uint16x8
dd8
=
v_setall_u16
((
ushort
)
dd
);
for
(
;
i
<=
width
-
v_uint8x16
::
nlanes
;
i
+=
v_uint8x16
::
nlanes
)
{
v_uint16x8
_sm0
=
v_load
(
Sm
+
i
);
v_uint16x8
_sm1
=
v_load
(
Sm
+
i
+
v_uint16x8
::
nlanes
);
v_uint16x8
_s0
=
v_add_wrap
(
v_load
(
SUM
+
i
),
v_load
(
Sp
+
i
));
v_uint16x8
_s1
=
v_add_wrap
(
v_load
(
SUM
+
i
+
v_uint16x8
::
nlanes
),
v_load
(
Sp
+
i
+
v_uint16x8
::
nlanes
));
v_uint32x4
_s00
,
_s01
,
_s10
,
_s11
;
v_expand
(
_s0
+
dd8
,
_s00
,
_s01
);
v_expand
(
_s1
+
dd8
,
_s10
,
_s11
);
_s00
=
v_shr
<
SHIFT
>
(
_s00
*
ds4
);
_s01
=
v_shr
<
SHIFT
>
(
_s01
*
ds4
);
_s10
=
v_shr
<
SHIFT
>
(
_s10
*
ds4
);
_s11
=
v_shr
<
SHIFT
>
(
_s11
*
ds4
);
v_int16x8
r0
=
v_pack
(
v_reinterpret_as_s32
(
_s00
),
v_reinterpret_as_s32
(
_s01
));
v_int16x8
r1
=
v_pack
(
v_reinterpret_as_s32
(
_s10
),
v_reinterpret_as_s32
(
_s11
));
_s0
=
v_sub_wrap
(
_s0
,
_sm0
);
_s1
=
v_sub_wrap
(
_s1
,
_sm1
);
v_store
(
D
+
i
,
v_pack_u
(
r0
,
r1
));
v_store
(
SUM
+
i
,
_s0
);
v_store
(
SUM
+
i
+
v_uint16x8
::
nlanes
,
_s1
);
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
(
uchar
)((
s0
+
dd
)
*
ds
>>
SHIFT
);
SUM
[
i
]
=
(
ushort
)(
s0
-
Sm
[
i
]);
}
}
else
{
int
i
=
0
;
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
saturate_cast
<
uchar
>
(
s0
);
SUM
[
i
]
=
(
ushort
)(
s0
-
Sm
[
i
]);
}
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
int
sumCount
;
int
divDelta
;
int
divScale
;
std
::
vector
<
ushort
>
sum
;
};
template
<>
struct
ColumnSum
<
int
,
short
>
:
public
BaseColumnFilter
{
ColumnSum
(
int
_ksize
,
int
_anchor
,
double
_scale
)
:
BaseColumnFilter
()
{
ksize
=
_ksize
;
anchor
=
_anchor
;
scale
=
_scale
;
sumCount
=
0
;
}
virtual
void
reset
()
CV_OVERRIDE
{
sumCount
=
0
;
}
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
int
i
;
int
*
SUM
;
bool
haveScale
=
scale
!=
1
;
double
_scale
=
scale
;
if
(
width
!=
(
int
)
sum
.
size
()
)
{
sum
.
resize
(
width
);
sumCount
=
0
;
}
SUM
=
&
sum
[
0
];
if
(
sumCount
==
0
)
{
memset
((
void
*
)
SUM
,
0
,
width
*
sizeof
(
int
));
for
(
;
sumCount
<
ksize
-
1
;
sumCount
++
,
src
++
)
{
const
int
*
Sp
=
(
const
int
*
)
src
[
0
];
i
=
0
;
#if CV_SIMD
for
(
;
i
<=
width
-
v_int32
::
nlanes
;
i
+=
v_int32
::
nlanes
)
{
v_store
(
SUM
+
i
,
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
));
}
#if CV_SIMD_WIDTH > 16
for
(
;
i
<=
width
-
v_int32x4
::
nlanes
;
i
+=
v_int32x4
::
nlanes
)
{
v_store
(
SUM
+
i
,
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
SUM
[
i
]
+=
Sp
[
i
];
}
}
else
{
CV_Assert
(
sumCount
==
ksize
-
1
);
src
+=
ksize
-
1
;
}
#include "box_filter.simd.hpp"
#include "box_filter.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
for
(
;
count
--
;
src
++
)
{
const
int
*
Sp
=
(
const
int
*
)
src
[
0
];
const
int
*
Sm
=
(
const
int
*
)
src
[
1
-
ksize
];
short
*
D
=
(
short
*
)
dst
;
if
(
haveScale
)
{
i
=
0
;
#if CV_SIMD
v_float32
_v_scale
=
vx_setall_f32
((
float
)
_scale
);
for
(
;
i
<=
width
-
v_int16
::
nlanes
;
i
+=
v_int16
::
nlanes
)
{
v_int32
v_s0
=
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
);
v_int32
v_s01
=
vx_load
(
SUM
+
i
+
v_int32
::
nlanes
)
+
vx_load
(
Sp
+
i
+
v_int32
::
nlanes
);
v_int32
v_s0d
=
v_round
(
v_cvt_f32
(
v_s0
)
*
_v_scale
);
v_int32
v_s01d
=
v_round
(
v_cvt_f32
(
v_s01
)
*
_v_scale
);
v_store
(
D
+
i
,
v_pack
(
v_s0d
,
v_s01d
));
v_store
(
SUM
+
i
,
v_s0
-
vx_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32
::
nlanes
,
v_s01
-
vx_load
(
Sm
+
i
+
v_int32
::
nlanes
));
}
#if CV_SIMD_WIDTH > 16
v_float32x4
v_scale
=
v_setall_f32
((
float
)
_scale
);
for
(
;
i
<=
width
-
v_int16x8
::
nlanes
;
i
+=
v_int16x8
::
nlanes
)
{
v_int32x4
v_s0
=
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
);
v_int32x4
v_s01
=
v_load
(
SUM
+
i
+
v_int32x4
::
nlanes
)
+
v_load
(
Sp
+
i
+
v_int32x4
::
nlanes
);
v_int32x4
v_s0d
=
v_round
(
v_cvt_f32
(
v_s0
)
*
v_scale
);
v_int32x4
v_s01d
=
v_round
(
v_cvt_f32
(
v_s01
)
*
v_scale
);
v_store
(
D
+
i
,
v_pack
(
v_s0d
,
v_s01d
));
v_store
(
SUM
+
i
,
v_s0
-
v_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32x4
::
nlanes
,
v_s01
-
v_load
(
Sm
+
i
+
v_int32x4
::
nlanes
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
saturate_cast
<
short
>
(
s0
*
_scale
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
else
{
i
=
0
;
#if CV_SIMD
for
(
;
i
<=
width
-
v_int16
::
nlanes
;
i
+=
v_int16
::
nlanes
)
{
v_int32
v_s0
=
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
);
v_int32
v_s01
=
vx_load
(
SUM
+
i
+
v_int32
::
nlanes
)
+
vx_load
(
Sp
+
i
+
v_int32
::
nlanes
);
v_store
(
D
+
i
,
v_pack
(
v_s0
,
v_s01
));
v_store
(
SUM
+
i
,
v_s0
-
vx_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32
::
nlanes
,
v_s01
-
vx_load
(
Sm
+
i
+
v_int32
::
nlanes
));
}
#if CV_SIMD_WIDTH > 16
for
(
;
i
<=
width
-
v_int16x8
::
nlanes
;
i
+=
v_int16x8
::
nlanes
)
{
v_int32x4
v_s0
=
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
);
v_int32x4
v_s01
=
v_load
(
SUM
+
i
+
v_int32x4
::
nlanes
)
+
v_load
(
Sp
+
i
+
v_int32x4
::
nlanes
);
v_store
(
D
+
i
,
v_pack
(
v_s0
,
v_s01
));
v_store
(
SUM
+
i
,
v_s0
-
v_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32x4
::
nlanes
,
v_s01
-
v_load
(
Sm
+
i
+
v_int32x4
::
nlanes
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
saturate_cast
<
short
>
(
s0
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
int
sumCount
;
std
::
vector
<
int
>
sum
;
};
template
<>
struct
ColumnSum
<
int
,
ushort
>
:
public
BaseColumnFilter
{
ColumnSum
(
int
_ksize
,
int
_anchor
,
double
_scale
)
:
BaseColumnFilter
()
{
ksize
=
_ksize
;
anchor
=
_anchor
;
scale
=
_scale
;
sumCount
=
0
;
}
virtual
void
reset
()
CV_OVERRIDE
{
sumCount
=
0
;
}
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
int
*
SUM
;
bool
haveScale
=
scale
!=
1
;
double
_scale
=
scale
;
if
(
width
!=
(
int
)
sum
.
size
()
)
{
sum
.
resize
(
width
);
sumCount
=
0
;
}
SUM
=
&
sum
[
0
];
if
(
sumCount
==
0
)
{
memset
((
void
*
)
SUM
,
0
,
width
*
sizeof
(
int
));
for
(
;
sumCount
<
ksize
-
1
;
sumCount
++
,
src
++
)
{
const
int
*
Sp
=
(
const
int
*
)
src
[
0
];
int
i
=
0
;
#if CV_SIMD
for
(;
i
<=
width
-
v_int32
::
nlanes
;
i
+=
v_int32
::
nlanes
)
{
v_store
(
SUM
+
i
,
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
));
}
#if CV_SIMD_WIDTH > 16
for
(;
i
<=
width
-
v_int32x4
::
nlanes
;
i
+=
v_int32x4
::
nlanes
)
{
v_store
(
SUM
+
i
,
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
SUM
[
i
]
+=
Sp
[
i
];
}
}
else
{
CV_Assert
(
sumCount
==
ksize
-
1
);
src
+=
ksize
-
1
;
}
for
(
;
count
--
;
src
++
)
{
const
int
*
Sp
=
(
const
int
*
)
src
[
0
];
const
int
*
Sm
=
(
const
int
*
)
src
[
1
-
ksize
];
ushort
*
D
=
(
ushort
*
)
dst
;
if
(
haveScale
)
{
int
i
=
0
;
#if CV_SIMD
v_float32
_v_scale
=
vx_setall_f32
((
float
)
_scale
);
for
(
;
i
<=
width
-
v_uint16
::
nlanes
;
i
+=
v_uint16
::
nlanes
)
{
v_int32
v_s0
=
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
);
v_int32
v_s01
=
vx_load
(
SUM
+
i
+
v_int32
::
nlanes
)
+
vx_load
(
Sp
+
i
+
v_int32
::
nlanes
);
v_uint32
v_s0d
=
v_reinterpret_as_u32
(
v_round
(
v_cvt_f32
(
v_s0
)
*
_v_scale
));
v_uint32
v_s01d
=
v_reinterpret_as_u32
(
v_round
(
v_cvt_f32
(
v_s01
)
*
_v_scale
));
v_store
(
D
+
i
,
v_pack
(
v_s0d
,
v_s01d
));
v_store
(
SUM
+
i
,
v_s0
-
vx_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32
::
nlanes
,
v_s01
-
vx_load
(
Sm
+
i
+
v_int32
::
nlanes
));
}
#if CV_SIMD_WIDTH > 16
v_float32x4
v_scale
=
v_setall_f32
((
float
)
_scale
);
for
(
;
i
<=
width
-
v_uint16x8
::
nlanes
;
i
+=
v_uint16x8
::
nlanes
)
{
v_int32x4
v_s0
=
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
);
v_int32x4
v_s01
=
v_load
(
SUM
+
i
+
v_int32x4
::
nlanes
)
+
v_load
(
Sp
+
i
+
v_int32x4
::
nlanes
);
v_uint32x4
v_s0d
=
v_reinterpret_as_u32
(
v_round
(
v_cvt_f32
(
v_s0
)
*
v_scale
));
v_uint32x4
v_s01d
=
v_reinterpret_as_u32
(
v_round
(
v_cvt_f32
(
v_s01
)
*
v_scale
));
v_store
(
D
+
i
,
v_pack
(
v_s0d
,
v_s01d
));
v_store
(
SUM
+
i
,
v_s0
-
v_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32x4
::
nlanes
,
v_s01
-
v_load
(
Sm
+
i
+
v_int32x4
::
nlanes
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
saturate_cast
<
ushort
>
(
s0
*
_scale
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
else
{
int
i
=
0
;
#if CV_SIMD
for
(
;
i
<=
width
-
v_uint16
::
nlanes
;
i
+=
v_uint16
::
nlanes
)
{
v_int32
v_s0
=
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
);
v_int32
v_s01
=
vx_load
(
SUM
+
i
+
v_int32
::
nlanes
)
+
vx_load
(
Sp
+
i
+
v_int32
::
nlanes
);
v_store
(
D
+
i
,
v_pack
(
v_reinterpret_as_u32
(
v_s0
),
v_reinterpret_as_u32
(
v_s01
)));
v_store
(
SUM
+
i
,
v_s0
-
vx_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32
::
nlanes
,
v_s01
-
vx_load
(
Sm
+
i
+
v_int32
::
nlanes
));
}
#if CV_SIMD_WIDTH > 16
for
(
;
i
<=
width
-
v_uint16x8
::
nlanes
;
i
+=
v_uint16x8
::
nlanes
)
{
v_int32x4
v_s0
=
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
);
v_int32x4
v_s01
=
v_load
(
SUM
+
i
+
v_int32x4
::
nlanes
)
+
v_load
(
Sp
+
i
+
v_int32x4
::
nlanes
);
v_store
(
D
+
i
,
v_pack
(
v_reinterpret_as_u32
(
v_s0
),
v_reinterpret_as_u32
(
v_s01
)));
v_store
(
SUM
+
i
,
v_s0
-
v_load
(
Sm
+
i
));
v_store
(
SUM
+
i
+
v_int32x4
::
nlanes
,
v_s01
-
v_load
(
Sm
+
i
+
v_int32x4
::
nlanes
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
saturate_cast
<
ushort
>
(
s0
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
int
sumCount
;
std
::
vector
<
int
>
sum
;
};
template
<>
struct
ColumnSum
<
int
,
int
>
:
public
BaseColumnFilter
{
ColumnSum
(
int
_ksize
,
int
_anchor
,
double
_scale
)
:
BaseColumnFilter
()
{
ksize
=
_ksize
;
anchor
=
_anchor
;
scale
=
_scale
;
sumCount
=
0
;
}
virtual
void
reset
()
CV_OVERRIDE
{
sumCount
=
0
;
}
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
int
*
SUM
;
bool
haveScale
=
scale
!=
1
;
double
_scale
=
scale
;
if
(
width
!=
(
int
)
sum
.
size
()
)
{
sum
.
resize
(
width
);
sumCount
=
0
;
}
SUM
=
&
sum
[
0
];
if
(
sumCount
==
0
)
{
memset
((
void
*
)
SUM
,
0
,
width
*
sizeof
(
int
));
for
(
;
sumCount
<
ksize
-
1
;
sumCount
++
,
src
++
)
{
const
int
*
Sp
=
(
const
int
*
)
src
[
0
];
int
i
=
0
;
#if CV_SIMD
for
(
;
i
<=
width
-
v_int32
::
nlanes
;
i
+=
v_int32
::
nlanes
)
{
v_store
(
SUM
+
i
,
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
));
}
#if CV_SIMD_WIDTH > 16
for
(
;
i
<=
width
-
v_int32x4
::
nlanes
;
i
+=
v_int32x4
::
nlanes
)
{
v_store
(
SUM
+
i
,
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
SUM
[
i
]
+=
Sp
[
i
];
}
}
else
{
CV_Assert
(
sumCount
==
ksize
-
1
);
src
+=
ksize
-
1
;
}
for
(
;
count
--
;
src
++
)
{
const
int
*
Sp
=
(
const
int
*
)
src
[
0
];
const
int
*
Sm
=
(
const
int
*
)
src
[
1
-
ksize
];
int
*
D
=
(
int
*
)
dst
;
if
(
haveScale
)
{
int
i
=
0
;
#if CV_SIMD
v_float32
_v_scale
=
vx_setall_f32
((
float
)
_scale
);
for
(
;
i
<=
width
-
v_int32
::
nlanes
;
i
+=
v_int32
::
nlanes
)
{
v_int32
v_s0
=
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
);
v_int32
v_s0d
=
v_round
(
v_cvt_f32
(
v_s0
)
*
_v_scale
);
v_store
(
D
+
i
,
v_s0d
);
v_store
(
SUM
+
i
,
v_s0
-
vx_load
(
Sm
+
i
));
}
#if CV_SIMD_WIDTH > 16
v_float32x4
v_scale
=
v_setall_f32
((
float
)
_scale
);
for
(
;
i
<=
width
-
v_int32x4
::
nlanes
;
i
+=
v_int32x4
::
nlanes
)
{
v_int32x4
v_s0
=
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
);
v_int32x4
v_s0d
=
v_round
(
v_cvt_f32
(
v_s0
)
*
v_scale
);
v_store
(
D
+
i
,
v_s0d
);
v_store
(
SUM
+
i
,
v_s0
-
v_load
(
Sm
+
i
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
saturate_cast
<
int
>
(
s0
*
_scale
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
else
{
int
i
=
0
;
#if CV_SIMD
for
(
;
i
<=
width
-
v_int32
::
nlanes
;
i
+=
v_int32
::
nlanes
)
{
v_int32
v_s0
=
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
);
v_store
(
D
+
i
,
v_s0
);
v_store
(
SUM
+
i
,
v_s0
-
vx_load
(
Sm
+
i
));
}
#if CV_SIMD_WIDTH > 16
for
(
;
i
<=
width
-
v_int32x4
::
nlanes
;
i
+=
v_int32x4
::
nlanes
)
{
v_int32x4
v_s0
=
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
);
v_store
(
D
+
i
,
v_s0
);
v_store
(
SUM
+
i
,
v_s0
-
v_load
(
Sm
+
i
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
s0
;
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
int
sumCount
;
std
::
vector
<
int
>
sum
;
};
template
<>
struct
ColumnSum
<
int
,
float
>
:
public
BaseColumnFilter
{
ColumnSum
(
int
_ksize
,
int
_anchor
,
double
_scale
)
:
BaseColumnFilter
()
{
ksize
=
_ksize
;
anchor
=
_anchor
;
scale
=
_scale
;
sumCount
=
0
;
}
virtual
void
reset
()
CV_OVERRIDE
{
sumCount
=
0
;
}
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
int
*
SUM
;
bool
haveScale
=
scale
!=
1
;
double
_scale
=
scale
;
if
(
width
!=
(
int
)
sum
.
size
()
)
{
sum
.
resize
(
width
);
sumCount
=
0
;
}
SUM
=
&
sum
[
0
];
if
(
sumCount
==
0
)
{
memset
((
void
*
)
SUM
,
0
,
width
*
sizeof
(
int
));
for
(
;
sumCount
<
ksize
-
1
;
sumCount
++
,
src
++
)
{
const
int
*
Sp
=
(
const
int
*
)
src
[
0
];
int
i
=
0
;
#if CV_SIMD
for
(
;
i
<=
width
-
v_int32
::
nlanes
;
i
+=
v_int32
::
nlanes
)
{
v_store
(
SUM
+
i
,
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
));
}
#if CV_SIMD_WIDTH > 16
for
(
;
i
<=
width
-
v_int32x4
::
nlanes
;
i
+=
v_int32x4
::
nlanes
)
{
v_store
(
SUM
+
i
,
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
SUM
[
i
]
+=
Sp
[
i
];
}
}
else
{
CV_Assert
(
sumCount
==
ksize
-
1
);
src
+=
ksize
-
1
;
}
for
(
;
count
--
;
src
++
)
{
const
int
*
Sp
=
(
const
int
*
)
src
[
0
];
const
int
*
Sm
=
(
const
int
*
)
src
[
1
-
ksize
];
float
*
D
=
(
float
*
)
dst
;
if
(
haveScale
)
{
int
i
=
0
;
#if CV_SIMD
v_float32
_v_scale
=
vx_setall_f32
((
float
)
_scale
);
for
(;
i
<=
width
-
v_int32
::
nlanes
;
i
+=
v_int32
::
nlanes
)
{
v_int32
v_s0
=
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
);
v_store
(
D
+
i
,
v_cvt_f32
(
v_s0
)
*
_v_scale
);
v_store
(
SUM
+
i
,
v_s0
-
vx_load
(
Sm
+
i
));
}
#if CV_SIMD_WIDTH > 16
v_float32x4
v_scale
=
v_setall_f32
((
float
)
_scale
);
for
(;
i
<=
width
-
v_int32x4
::
nlanes
;
i
+=
v_int32x4
::
nlanes
)
{
v_int32x4
v_s0
=
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
);
v_store
(
D
+
i
,
v_cvt_f32
(
v_s0
)
*
v_scale
);
v_store
(
SUM
+
i
,
v_s0
-
v_load
(
Sm
+
i
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
(
float
)(
s0
*
_scale
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
else
{
int
i
=
0
;
#if CV_SIMD
for
(
;
i
<=
width
-
v_int32
::
nlanes
;
i
+=
v_int32
::
nlanes
)
{
v_int32
v_s0
=
vx_load
(
SUM
+
i
)
+
vx_load
(
Sp
+
i
);
v_store
(
D
+
i
,
v_cvt_f32
(
v_s0
));
v_store
(
SUM
+
i
,
v_s0
-
vx_load
(
Sm
+
i
));
}
#if CV_SIMD_WIDTH > 16
for
(
;
i
<=
width
-
v_int32x4
::
nlanes
;
i
+=
v_int32x4
::
nlanes
)
{
v_int32x4
v_s0
=
v_load
(
SUM
+
i
)
+
v_load
(
Sp
+
i
);
v_store
(
D
+
i
,
v_cvt_f32
(
v_s0
));
v_store
(
SUM
+
i
,
v_s0
-
v_load
(
Sm
+
i
));
}
#endif
#endif
for
(
;
i
<
width
;
i
++
)
{
int
s0
=
SUM
[
i
]
+
Sp
[
i
];
D
[
i
]
=
(
float
)(
s0
);
SUM
[
i
]
=
s0
-
Sm
[
i
];
}
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
int
sumCount
;
std
::
vector
<
int
>
sum
;
};
namespace
cv
{
#ifdef HAVE_OPENCL
...
...
@@ -1396,109 +288,34 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
#endif
}
cv
::
Ptr
<
cv
::
BaseRowFilter
>
cv
::
getRowSumFilter
(
int
srcType
,
int
sumType
,
int
ksize
,
int
anchor
)
Ptr
<
BaseRowFilter
>
getRowSumFilter
(
int
srcType
,
int
sumType
,
int
ksize
,
int
anchor
)
{
int
sdepth
=
CV_MAT_DEPTH
(
srcType
),
ddepth
=
CV_MAT_DEPTH
(
sumType
);
CV_Assert
(
CV_MAT_CN
(
sumType
)
==
CV_MAT_CN
(
srcType
)
);
if
(
anchor
<
0
)
anchor
=
ksize
/
2
;
if
(
sdepth
==
CV_8U
&&
ddepth
==
CV_32S
)
return
makePtr
<
RowSum
<
uchar
,
int
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_8U
&&
ddepth
==
CV_16U
)
return
makePtr
<
RowSum
<
uchar
,
ushort
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_8U
&&
ddepth
==
CV_64F
)
return
makePtr
<
RowSum
<
uchar
,
double
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_16U
&&
ddepth
==
CV_32S
)
return
makePtr
<
RowSum
<
ushort
,
int
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_16U
&&
ddepth
==
CV_64F
)
return
makePtr
<
RowSum
<
ushort
,
double
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_16S
&&
ddepth
==
CV_32S
)
return
makePtr
<
RowSum
<
short
,
int
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_32S
&&
ddepth
==
CV_32S
)
return
makePtr
<
RowSum
<
int
,
int
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_16S
&&
ddepth
==
CV_64F
)
return
makePtr
<
RowSum
<
short
,
double
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_32F
&&
ddepth
==
CV_64F
)
return
makePtr
<
RowSum
<
float
,
double
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_64F
&&
ddepth
==
CV_64F
)
return
makePtr
<
RowSum
<
double
,
double
>
>
(
ksize
,
anchor
);
CV_Error_
(
CV_StsNotImplemented
,
(
"Unsupported combination of source format (=%d), and buffer format (=%d)"
,
srcType
,
sumType
));
CV_INSTRUMENT_REGION
();
CV_CPU_DISPATCH
(
getRowSumFilter
,
(
srcType
,
sumType
,
ksize
,
anchor
),
CV_CPU_DISPATCH_MODES_ALL
);
}
cv
::
Ptr
<
cv
::
BaseColumnFilter
>
cv
::
getColumnSumFilter
(
int
sumType
,
int
dstType
,
int
ksize
,
int
anchor
,
double
scale
)
Ptr
<
BaseColumnFilter
>
getColumnSumFilter
(
int
sumType
,
int
dstType
,
int
ksize
,
int
anchor
,
double
scale
)
{
int
sdepth
=
CV_MAT_DEPTH
(
sumType
),
ddepth
=
CV_MAT_DEPTH
(
dstType
);
CV_Assert
(
CV_MAT_CN
(
sumType
)
==
CV_MAT_CN
(
dstType
)
);
if
(
anchor
<
0
)
anchor
=
ksize
/
2
;
if
(
ddepth
==
CV_8U
&&
sdepth
==
CV_32S
)
return
makePtr
<
ColumnSum
<
int
,
uchar
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_8U
&&
sdepth
==
CV_16U
)
return
makePtr
<
ColumnSum
<
ushort
,
uchar
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_8U
&&
sdepth
==
CV_64F
)
return
makePtr
<
ColumnSum
<
double
,
uchar
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_16U
&&
sdepth
==
CV_32S
)
return
makePtr
<
ColumnSum
<
int
,
ushort
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_16U
&&
sdepth
==
CV_64F
)
return
makePtr
<
ColumnSum
<
double
,
ushort
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_16S
&&
sdepth
==
CV_32S
)
return
makePtr
<
ColumnSum
<
int
,
short
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_16S
&&
sdepth
==
CV_64F
)
return
makePtr
<
ColumnSum
<
double
,
short
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_32S
&&
sdepth
==
CV_32S
)
return
makePtr
<
ColumnSum
<
int
,
int
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_32F
&&
sdepth
==
CV_32S
)
return
makePtr
<
ColumnSum
<
int
,
float
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_32F
&&
sdepth
==
CV_64F
)
return
makePtr
<
ColumnSum
<
double
,
float
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_64F
&&
sdepth
==
CV_32S
)
return
makePtr
<
ColumnSum
<
int
,
double
>
>
(
ksize
,
anchor
,
scale
);
if
(
ddepth
==
CV_64F
&&
sdepth
==
CV_64F
)
return
makePtr
<
ColumnSum
<
double
,
double
>
>
(
ksize
,
anchor
,
scale
);
CV_Error_
(
CV_StsNotImplemented
,
(
"Unsupported combination of sum format (=%d), and destination format (=%d)"
,
sumType
,
dstType
));
CV_INSTRUMENT_REGION
();
CV_CPU_DISPATCH
(
getColumnSumFilter
,
(
sumType
,
dstType
,
ksize
,
anchor
,
scale
),
CV_CPU_DISPATCH_MODES_ALL
);
}
cv
::
Ptr
<
cv
::
FilterEngine
>
cv
::
createBoxFilter
(
int
srcType
,
int
dstType
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
Ptr
<
FilterEngine
>
createBoxFilter
(
int
srcType
,
int
dstType
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
{
int
sdepth
=
CV_MAT_DEPTH
(
srcType
);
int
cn
=
CV_MAT_CN
(
srcType
),
sumType
=
CV_64F
;
if
(
sdepth
==
CV_8U
&&
CV_MAT_DEPTH
(
dstType
)
==
CV_8U
&&
ksize
.
width
*
ksize
.
height
<=
256
)
sumType
=
CV_16U
;
else
if
(
sdepth
<=
CV_32S
&&
(
!
normalize
||
ksize
.
width
*
ksize
.
height
<=
(
sdepth
==
CV_8U
?
(
1
<<
23
)
:
sdepth
==
CV_16U
?
(
1
<<
15
)
:
(
1
<<
16
)))
)
sumType
=
CV_32S
;
sumType
=
CV_MAKETYPE
(
sumType
,
cn
);
Ptr
<
BaseRowFilter
>
rowFilter
=
getRowSumFilter
(
srcType
,
sumType
,
ksize
.
width
,
anchor
.
x
);
Ptr
<
BaseColumnFilter
>
columnFilter
=
getColumnSumFilter
(
sumType
,
dstType
,
ksize
.
height
,
anchor
.
y
,
normalize
?
1.
/
(
ksize
.
width
*
ksize
.
height
)
:
1
);
CV_INSTRUMENT_REGION
();
return
makePtr
<
FilterEngine
>
(
Ptr
<
BaseFilter
>
(),
rowFilter
,
columnFilter
,
srcType
,
dstType
,
sumType
,
borderType
);
CV_CPU_DISPATCH
(
createBoxFilter
,
(
srcType
,
dstType
,
ksize
,
anchor
,
normalize
,
borderType
)
,
CV_CPU_DISPATCH_MODES_ALL
);
}
#ifdef HAVE_OPENVX
namespace
cv
{
namespace
ovx
{
template
<>
inline
bool
skipSmallImages
<
VX_KERNEL_BOX_3x3
>
(
int
w
,
int
h
)
{
return
w
*
h
<
640
*
480
;
}
}
...
...
@@ -1570,12 +387,9 @@ namespace cv
return
true
;
}
}
#endif
#if defined(HAVE_IPP)
namespace
cv
{
static
bool
ipp_boxfilter
(
Mat
&
src
,
Mat
&
dst
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
{
#ifdef HAVE_IPP_IW
...
...
@@ -1620,13 +434,12 @@ static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool nor
return
false
;
#endif
}
}
#endif
void
cv
::
boxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
void
boxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
{
CV_INSTRUMENT_REGION
();
...
...
@@ -1674,8 +487,8 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
}
void
cv
::
blur
(
InputArray
src
,
OutputArray
dst
,
Size
ksize
,
Point
anchor
,
int
borderType
)
void
blur
(
InputArray
src
,
OutputArray
dst
,
Size
ksize
,
Point
anchor
,
int
borderType
)
{
CV_INSTRUMENT_REGION
();
...
...
@@ -1687,77 +500,17 @@ void cv::blur( InputArray src, OutputArray dst,
Squared Box Filter
\****************************************************************************************/
namespace
cv
{
template
<
typename
T
,
typename
ST
>
struct
SqrRowSum
:
public
BaseRowFilter
{
SqrRowSum
(
int
_ksize
,
int
_anchor
)
:
BaseRowFilter
()
{
ksize
=
_ksize
;
anchor
=
_anchor
;
}
virtual
void
operator
()(
const
uchar
*
src
,
uchar
*
dst
,
int
width
,
int
cn
)
CV_OVERRIDE
{
const
T
*
S
=
(
const
T
*
)
src
;
ST
*
D
=
(
ST
*
)
dst
;
int
i
=
0
,
k
,
ksz_cn
=
ksize
*
cn
;
width
=
(
width
-
1
)
*
cn
;
for
(
k
=
0
;
k
<
cn
;
k
++
,
S
++
,
D
++
)
{
ST
s
=
0
;
for
(
i
=
0
;
i
<
ksz_cn
;
i
+=
cn
)
{
ST
val
=
(
ST
)
S
[
i
];
s
+=
val
*
val
;
}
D
[
0
]
=
s
;
for
(
i
=
0
;
i
<
width
;
i
+=
cn
)
{
ST
val0
=
(
ST
)
S
[
i
],
val1
=
(
ST
)
S
[
i
+
ksz_cn
];
s
+=
val1
*
val1
-
val0
*
val0
;
D
[
i
+
cn
]
=
s
;
}
}
}
};
static
Ptr
<
BaseRowFilter
>
getSqrRowSumFilter
(
int
srcType
,
int
sumType
,
int
ksize
,
int
anchor
)
{
int
sdepth
=
CV_MAT_DEPTH
(
srcType
),
ddepth
=
CV_MAT_DEPTH
(
sumType
);
CV_Assert
(
CV_MAT_CN
(
sumType
)
==
CV_MAT_CN
(
srcType
)
);
if
(
anchor
<
0
)
anchor
=
ksize
/
2
;
if
(
sdepth
==
CV_8U
&&
ddepth
==
CV_32S
)
return
makePtr
<
SqrRowSum
<
uchar
,
int
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_8U
&&
ddepth
==
CV_64F
)
return
makePtr
<
SqrRowSum
<
uchar
,
double
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_16U
&&
ddepth
==
CV_64F
)
return
makePtr
<
SqrRowSum
<
ushort
,
double
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_16S
&&
ddepth
==
CV_64F
)
return
makePtr
<
SqrRowSum
<
short
,
double
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_32F
&&
ddepth
==
CV_64F
)
return
makePtr
<
SqrRowSum
<
float
,
double
>
>
(
ksize
,
anchor
);
if
(
sdepth
==
CV_64F
&&
ddepth
==
CV_64F
)
return
makePtr
<
SqrRowSum
<
double
,
double
>
>
(
ksize
,
anchor
);
CV_Error_
(
CV_StsNotImplemented
,
(
"Unsupported combination of source format (=%d), and buffer format (=%d)"
,
srcType
,
sumType
));
}
CV_INSTRUMENT_REGION
();
CV_CPU_DISPATCH
(
getSqrRowSumFilter
,
(
srcType
,
sumType
,
ksize
,
anchor
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
cv
::
sqrBoxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
void
sqrBoxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
{
CV_INSTRUMENT_REGION
();
...
...
@@ -1801,4 +554,4 @@ void cv::sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth,
f
->
apply
(
src
,
dst
,
wsz
,
ofs
);
}
/* End of file. */
}
// namespace
modules/imgproc/src/box_filter.simd.hpp
View file @
5a01227a
...
...
@@ -42,21 +42,25 @@
//M*/
#include "precomp.hpp"
#include <vector>
#include "opencv2/core/hal/intrin.hpp"
#include "opencl_kernels_imgproc.hpp"
#include "opencv2/core/openvx/ovx_defs.hpp"
namespace
cv
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
Ptr
<
BaseRowFilter
>
getRowSumFilter
(
int
srcType
,
int
sumType
,
int
ksize
,
int
anchor
);
Ptr
<
BaseColumnFilter
>
getColumnSumFilter
(
int
sumType
,
int
dstType
,
int
ksize
,
int
anchor
,
double
scale
);
Ptr
<
FilterEngine
>
createBoxFilter
(
int
srcType
,
int
dstType
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
);
namespace
cv
{
Ptr
<
BaseRowFilter
>
getSqrRowSumFilter
(
int
srcType
,
int
sumType
,
int
ksize
,
int
anchor
);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
/****************************************************************************************\
Box Filter
\****************************************************************************************/
namespace
{
template
<
typename
T
,
typename
ST
>
struct
RowSum
:
public
BaseRowFilter
...
...
@@ -70,6 +74,8 @@ struct RowSum :
virtual
void
operator
()(
const
uchar
*
src
,
uchar
*
dst
,
int
width
,
int
cn
)
CV_OVERRIDE
{
CV_INSTRUMENT_REGION
();
const
T
*
S
=
(
const
T
*
)
src
;
ST
*
D
=
(
ST
*
)
dst
;
int
i
=
0
,
k
,
ksz_cn
=
ksize
*
cn
;
...
...
@@ -183,6 +189,8 @@ struct ColumnSum :
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
CV_INSTRUMENT_REGION
();
int
i
;
ST
*
SUM
;
bool
haveScale
=
scale
!=
1
;
...
...
@@ -281,6 +289,8 @@ struct ColumnSum<int, uchar> :
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
CV_INSTRUMENT_REGION
();
int
*
SUM
;
bool
haveScale
=
scale
!=
1
;
double
_scale
=
scale
;
...
...
@@ -408,9 +418,6 @@ struct ColumnSum<int, uchar> :
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
...
...
@@ -452,6 +459,8 @@ public BaseColumnFilter
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
CV_INSTRUMENT_REGION
();
const
int
ds
=
divScale
;
const
int
dd
=
divDelta
;
ushort
*
SUM
;
...
...
@@ -586,9 +595,6 @@ public BaseColumnFilter
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
...
...
@@ -616,6 +622,8 @@ struct ColumnSum<int, short> :
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
CV_INSTRUMENT_REGION
();
int
i
;
int
*
SUM
;
bool
haveScale
=
scale
!=
1
;
...
...
@@ -739,9 +747,6 @@ struct ColumnSum<int, short> :
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
...
...
@@ -767,6 +772,8 @@ struct ColumnSum<int, ushort> :
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
CV_INSTRUMENT_REGION
();
int
*
SUM
;
bool
haveScale
=
scale
!=
1
;
double
_scale
=
scale
;
...
...
@@ -888,9 +895,6 @@ struct ColumnSum<int, ushort> :
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
...
...
@@ -915,6 +919,8 @@ struct ColumnSum<int, int> :
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
CV_INSTRUMENT_REGION
();
int
*
SUM
;
bool
haveScale
=
scale
!=
1
;
double
_scale
=
scale
;
...
...
@@ -1022,9 +1028,6 @@ struct ColumnSum<int, int> :
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
...
...
@@ -1050,6 +1053,8 @@ struct ColumnSum<int, float> :
virtual
void
operator
()(
const
uchar
**
src
,
uchar
*
dst
,
int
dststep
,
int
count
,
int
width
)
CV_OVERRIDE
{
CV_INSTRUMENT_REGION
();
int
*
SUM
;
bool
haveScale
=
scale
!=
1
;
double
_scale
=
scale
;
...
...
@@ -1154,9 +1159,6 @@ struct ColumnSum<int, float> :
}
dst
+=
dststep
;
}
#if CV_SIMD
vx_cleanup
();
#endif
}
double
scale
;
...
...
@@ -1164,243 +1166,13 @@ struct ColumnSum<int, float> :
std
::
vector
<
int
>
sum
;
};
#ifdef HAVE_OPENCL
}
// namespace anon
static
bool
ocl_boxFilter3x3_8UC1
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
int
borderType
,
bool
normalize
)
{
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
int
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
if
(
anchor
.
x
<
0
)
anchor
.
x
=
ksize
.
width
/
2
;
if
(
anchor
.
y
<
0
)
anchor
.
y
=
ksize
.
height
/
2
;
if
(
!
(
dev
.
isIntel
()
&&
(
type
==
CV_8UC1
)
&&
(
_src
.
offset
()
==
0
)
&&
(
_src
.
step
()
%
4
==
0
)
&&
(
_src
.
cols
()
%
16
==
0
)
&&
(
_src
.
rows
()
%
2
==
0
)
&&
(
anchor
.
x
==
1
)
&&
(
anchor
.
y
==
1
)
&&
(
ksize
.
width
==
3
)
&&
(
ksize
.
height
==
3
))
)
return
false
;
float
alpha
=
1.0
f
/
(
ksize
.
height
*
ksize
.
width
);
Size
size
=
_src
.
size
();
size_t
globalsize
[
2
]
=
{
0
,
0
};
size_t
localsize
[
2
]
=
{
0
,
0
};
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
0
,
"BORDER_REFLECT_101"
};
globalsize
[
0
]
=
size
.
width
/
16
;
globalsize
[
1
]
=
size
.
height
/
2
;
char
build_opts
[
1024
];
sprintf
(
build_opts
,
"-D %s %s"
,
borderMap
[
borderType
],
normalize
?
"-D NORMALIZE"
:
""
);
ocl
::
Kernel
kernel
(
"boxFilter3x3_8UC1_cols16_rows2"
,
cv
::
ocl
::
imgproc
::
boxFilter3x3_oclsrc
,
build_opts
);
if
(
kernel
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
size
,
CV_MAKETYPE
(
ddepth
,
cn
));
if
(
!
(
_dst
.
offset
()
==
0
&&
_dst
.
step
()
%
4
==
0
))
return
false
;
UMat
dst
=
_dst
.
getUMat
();
int
idxArg
=
kernel
.
set
(
0
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
src
.
step
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrWriteOnly
(
dst
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
dst
.
step
);
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
dst
.
rows
);
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
dst
.
cols
);
if
(
normalize
)
idxArg
=
kernel
.
set
(
idxArg
,
(
float
)
alpha
);
return
kernel
.
run
(
2
,
globalsize
,
(
localsize
[
0
]
==
0
)
?
NULL
:
localsize
,
false
);
}
static
bool
ocl_boxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
int
borderType
,
bool
normalize
,
bool
sqr
=
false
)
Ptr
<
BaseRowFilter
>
getRowSumFilter
(
int
srcType
,
int
sumType
,
int
ksize
,
int
anchor
)
{
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
int
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
esz
=
CV_ELEM_SIZE
(
type
);
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
;
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
if
(
cn
>
4
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
||
_src
.
offset
()
%
esz
!=
0
||
_src
.
step
()
%
esz
!=
0
)
return
false
;
if
(
anchor
.
x
<
0
)
anchor
.
x
=
ksize
.
width
/
2
;
if
(
anchor
.
y
<
0
)
anchor
.
y
=
ksize
.
height
/
2
;
int
computeUnits
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
float
alpha
=
1.0
f
/
(
ksize
.
height
*
ksize
.
width
);
Size
size
=
_src
.
size
(),
wholeSize
;
bool
isolated
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
borderType
&=
~
BORDER_ISOLATED
;
int
wdepth
=
std
::
max
(
CV_32F
,
std
::
max
(
ddepth
,
sdepth
)),
wtype
=
CV_MAKE_TYPE
(
wdepth
,
cn
),
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
);
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
0
,
"BORDER_REFLECT_101"
};
size_t
globalsize
[
2
]
=
{
(
size_t
)
size
.
width
,
(
size_t
)
size
.
height
};
size_t
localsize_general
[
2
]
=
{
0
,
1
},
*
localsize
=
NULL
;
UMat
src
=
_src
.
getUMat
();
if
(
!
isolated
)
{
Point
ofs
;
src
.
locateROI
(
wholeSize
,
ofs
);
}
int
h
=
isolated
?
size
.
height
:
wholeSize
.
height
;
int
w
=
isolated
?
size
.
width
:
wholeSize
.
width
;
size_t
maxWorkItemSizes
[
32
];
ocl
::
Device
::
getDefault
().
maxWorkItemSizes
(
maxWorkItemSizes
);
int
tryWorkItems
=
(
int
)
maxWorkItemSizes
[
0
];
ocl
::
Kernel
kernel
;
if
(
dev
.
isIntel
()
&&
!
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_CPU
)
&&
((
ksize
.
width
<
5
&&
ksize
.
height
<
5
&&
esz
<=
4
)
||
(
ksize
.
width
==
5
&&
ksize
.
height
==
5
&&
cn
==
1
)))
{
if
(
w
<
ksize
.
width
||
h
<
ksize
.
height
)
return
false
;
// Figure out what vector size to use for loading the pixels.
int
pxLoadNumPixels
=
cn
!=
1
||
size
.
width
%
4
?
1
:
4
;
int
pxLoadVecSize
=
cn
*
pxLoadNumPixels
;
// Figure out how many pixels per work item to compute in X and Y
// directions. Too many and we run out of registers.
int
pxPerWorkItemX
=
1
,
pxPerWorkItemY
=
1
;
if
(
cn
<=
2
&&
ksize
.
width
<=
4
&&
ksize
.
height
<=
4
)
{
pxPerWorkItemX
=
size
.
width
%
8
?
size
.
width
%
4
?
size
.
width
%
2
?
1
:
2
:
4
:
8
;
pxPerWorkItemY
=
size
.
height
%
2
?
1
:
2
;
}
else
if
(
cn
<
4
||
(
ksize
.
width
<=
4
&&
ksize
.
height
<=
4
))
{
pxPerWorkItemX
=
size
.
width
%
2
?
1
:
2
;
pxPerWorkItemY
=
size
.
height
%
2
?
1
:
2
;
}
globalsize
[
0
]
=
size
.
width
/
pxPerWorkItemX
;
globalsize
[
1
]
=
size
.
height
/
pxPerWorkItemY
;
// Need some padding in the private array for pixels
int
privDataWidth
=
roundUp
(
pxPerWorkItemX
+
ksize
.
width
-
1
,
pxLoadNumPixels
);
// Make the global size a nice round number so the runtime can pick
// from reasonable choices for the workgroup size
const
int
wgRound
=
256
;
globalsize
[
0
]
=
roundUp
(
globalsize
[
0
],
wgRound
);
char
build_options
[
1024
],
cvt
[
2
][
40
];
sprintf
(
build_options
,
"-D cn=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
"-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d "
"-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s "
"-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d "
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
"-D convertToWT=%s -D convertToDstT=%s%s%s -D PX_LOAD_FLOAT_VEC_CONV=convert_%s -D OP_BOX_FILTER"
,
cn
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
pxLoadVecSize
,
pxLoadNumPixels
,
pxPerWorkItemX
,
pxPerWorkItemY
,
privDataWidth
,
borderMap
[
borderType
],
isolated
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
,
privDataWidth
/
pxLoadNumPixels
,
pxPerWorkItemY
+
ksize
.
height
-
1
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
normalize
?
" -D NORMALIZE"
:
""
,
sqr
?
" -D SQR"
:
""
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
pxLoadVecSize
))
//PX_LOAD_FLOAT_VEC_CONV
);
if
(
!
kernel
.
create
(
"filterSmall"
,
cv
::
ocl
::
imgproc
::
filterSmall_oclsrc
,
build_options
))
return
false
;
}
else
{
localsize
=
localsize_general
;
for
(
;
;
)
{
int
BLOCK_SIZE_X
=
tryWorkItems
,
BLOCK_SIZE_Y
=
std
::
min
(
ksize
.
height
*
10
,
size
.
height
);
while
(
BLOCK_SIZE_X
>
32
&&
BLOCK_SIZE_X
>=
ksize
.
width
*
2
&&
BLOCK_SIZE_X
>
size
.
width
*
2
)
BLOCK_SIZE_X
/=
2
;
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE_X
/
8
&&
BLOCK_SIZE_Y
*
computeUnits
*
32
<
size
.
height
)
BLOCK_SIZE_Y
*=
2
;
if
(
ksize
.
width
>
BLOCK_SIZE_X
||
w
<
ksize
.
width
||
h
<
ksize
.
height
)
return
false
;
char
cvt
[
2
][
50
];
String
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s"
" -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s"
" -D ST1=%s -D DT1=%s -D cn=%d"
,
BLOCK_SIZE_X
,
BLOCK_SIZE_Y
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
ddepth
,
cn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
1
]),
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
borderMap
[
borderType
],
isolated
?
" -D BORDER_ISOLATED"
:
""
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
normalize
?
" -D NORMALIZE"
:
""
,
sqr
?
" -D SQR"
:
""
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
),
cn
);
localsize
[
0
]
=
BLOCK_SIZE_X
;
globalsize
[
0
]
=
divUp
(
size
.
width
,
BLOCK_SIZE_X
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE_X
;
globalsize
[
1
]
=
divUp
(
size
.
height
,
BLOCK_SIZE_Y
);
kernel
.
create
(
"boxFilter"
,
cv
::
ocl
::
imgproc
::
boxFilter_oclsrc
,
opts
);
if
(
kernel
.
empty
())
return
false
;
size_t
kernelWorkGroupSize
=
kernel
.
workGroupSize
();
if
(
localsize
[
0
]
<=
kernelWorkGroupSize
)
break
;
if
(
BLOCK_SIZE_X
<
(
int
)
kernelWorkGroupSize
)
return
false
;
tryWorkItems
=
(
int
)
kernelWorkGroupSize
;
}
}
_dst
.
create
(
size
,
CV_MAKETYPE
(
ddepth
,
cn
));
UMat
dst
=
_dst
.
getUMat
();
int
idxArg
=
kernel
.
set
(
0
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
src
.
step
);
int
srcOffsetX
=
(
int
)((
src
.
offset
%
src
.
step
)
/
src
.
elemSize
());
int
srcOffsetY
=
(
int
)(
src
.
offset
/
src
.
step
);
int
srcEndX
=
isolated
?
srcOffsetX
+
size
.
width
:
wholeSize
.
width
;
int
srcEndY
=
isolated
?
srcOffsetY
+
size
.
height
:
wholeSize
.
height
;
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetY
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndY
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
WriteOnly
(
dst
));
if
(
normalize
)
idxArg
=
kernel
.
set
(
idxArg
,
(
float
)
alpha
);
return
kernel
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
#endif
}
CV_INSTRUMENT_REGION
();
cv
::
Ptr
<
cv
::
BaseRowFilter
>
cv
::
getRowSumFilter
(
int
srcType
,
int
sumType
,
int
ksize
,
int
anchor
)
{
int
sdepth
=
CV_MAT_DEPTH
(
srcType
),
ddepth
=
CV_MAT_DEPTH
(
sumType
);
CV_Assert
(
CV_MAT_CN
(
sumType
)
==
CV_MAT_CN
(
srcType
)
);
...
...
@@ -1434,9 +1206,10 @@ cv::Ptr<cv::BaseRowFilter> cv::getRowSumFilter(int srcType, int sumType, int ksi
}
cv
::
Ptr
<
cv
::
BaseColumnFilter
>
cv
::
getColumnSumFilter
(
int
sumType
,
int
dstType
,
int
ksize
,
int
anchor
,
double
scale
)
Ptr
<
BaseColumnFilter
>
getColumnSumFilter
(
int
sumType
,
int
dstType
,
int
ksize
,
int
anchor
,
double
scale
)
{
CV_INSTRUMENT_REGION
();
int
sdepth
=
CV_MAT_DEPTH
(
sumType
),
ddepth
=
CV_MAT_DEPTH
(
dstType
);
CV_Assert
(
CV_MAT_CN
(
sumType
)
==
CV_MAT_CN
(
dstType
)
);
...
...
@@ -1474,9 +1247,11 @@ cv::Ptr<cv::BaseColumnFilter> cv::getColumnSumFilter(int sumType, int dstType, i
}
cv
::
Ptr
<
cv
::
FilterEngine
>
cv
::
createBoxFilter
(
int
srcType
,
int
dstType
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
Ptr
<
FilterEngine
>
createBoxFilter
(
int
srcType
,
int
dstType
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
{
CV_INSTRUMENT_REGION
();
int
sdepth
=
CV_MAT_DEPTH
(
srcType
);
int
cn
=
CV_MAT_CN
(
srcType
),
sumType
=
CV_64F
;
if
(
sdepth
==
CV_8U
&&
CV_MAT_DEPTH
(
dstType
)
==
CV_8U
&&
...
...
@@ -1496,199 +1271,12 @@ cv::Ptr<cv::FilterEngine> cv::createBoxFilter( int srcType, int dstType, Size ks
srcType
,
dstType
,
sumType
,
borderType
);
}
#ifdef HAVE_OPENVX
namespace
cv
{
namespace
ovx
{
template
<>
inline
bool
skipSmallImages
<
VX_KERNEL_BOX_3x3
>
(
int
w
,
int
h
)
{
return
w
*
h
<
640
*
480
;
}
}
static
bool
openvx_boxfilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
{
if
(
ddepth
<
0
)
ddepth
=
CV_8UC1
;
if
(
_src
.
type
()
!=
CV_8UC1
||
ddepth
!=
CV_8U
||
!
normalize
||
_src
.
cols
()
<
3
||
_src
.
rows
()
<
3
||
ksize
.
width
!=
3
||
ksize
.
height
!=
3
||
(
anchor
.
x
>=
0
&&
anchor
.
x
!=
1
)
||
(
anchor
.
y
>=
0
&&
anchor
.
y
!=
1
)
||
ovx
::
skipSmallImages
<
VX_KERNEL_BOX_3x3
>
(
_src
.
cols
(),
_src
.
rows
()))
return
false
;
Mat
src
=
_src
.
getMat
();
if
((
borderType
&
BORDER_ISOLATED
)
==
0
&&
src
.
isSubmatrix
())
return
false
;
//Process isolated borders only
vx_enum
border
;
switch
(
borderType
&
~
BORDER_ISOLATED
)
{
case
BORDER_CONSTANT
:
border
=
VX_BORDER_CONSTANT
;
break
;
case
BORDER_REPLICATE
:
border
=
VX_BORDER_REPLICATE
;
break
;
default
:
return
false
;
}
_dst
.
create
(
src
.
size
(),
CV_8UC1
);
Mat
dst
=
_dst
.
getMat
();
try
{
ivx
::
Context
ctx
=
ovx
::
getOpenVXContext
();
Mat
a
;
if
(
dst
.
data
!=
src
.
data
)
a
=
src
;
else
src
.
copyTo
(
a
);
ivx
::
Image
ia
=
ivx
::
Image
::
createFromHandle
(
ctx
,
VX_DF_IMAGE_U8
,
ivx
::
Image
::
createAddressing
(
a
.
cols
,
a
.
rows
,
1
,
(
vx_int32
)(
a
.
step
)),
a
.
data
),
ib
=
ivx
::
Image
::
createFromHandle
(
ctx
,
VX_DF_IMAGE_U8
,
ivx
::
Image
::
createAddressing
(
dst
.
cols
,
dst
.
rows
,
1
,
(
vx_int32
)(
dst
.
step
)),
dst
.
data
);
//ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments
//since OpenVX standard says nothing about thread-safety for now
ivx
::
border_t
prevBorder
=
ctx
.
immediateBorder
();
ctx
.
setImmediateBorder
(
border
,
(
vx_uint8
)(
0
));
ivx
::
IVX_CHECK_STATUS
(
vxuBox3x3
(
ctx
,
ia
,
ib
));
ctx
.
setImmediateBorder
(
prevBorder
);
}
catch
(
const
ivx
::
RuntimeError
&
e
)
{
VX_DbgThrow
(
e
.
what
());
}
catch
(
const
ivx
::
WrapperError
&
e
)
{
VX_DbgThrow
(
e
.
what
());
}
return
true
;
}
}
#endif
#if defined(HAVE_IPP)
namespace
cv
{
static
bool
ipp_boxfilter
(
Mat
&
src
,
Mat
&
dst
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP
();
#if IPP_VERSION_X100 < 201801
// Problem with SSE42 optimization for 16s and some 8u modes
if
(
ipp
::
getIppTopFeatures
()
==
ippCPUID_SSE42
&&
(((
src
.
depth
()
==
CV_16S
||
src
.
depth
()
==
CV_16U
)
&&
(
src
.
channels
()
==
3
||
src
.
channels
()
==
4
))
||
(
src
.
depth
()
==
CV_8U
&&
src
.
channels
()
==
3
&&
(
ksize
.
width
>
5
||
ksize
.
height
>
5
))))
return
false
;
// Other optimizations has some degradations too
if
((((
src
.
depth
()
==
CV_16S
||
src
.
depth
()
==
CV_16U
)
&&
(
src
.
channels
()
==
4
))
||
(
src
.
depth
()
==
CV_8U
&&
src
.
channels
()
==
1
&&
(
ksize
.
width
>
5
||
ksize
.
height
>
5
))))
return
false
;
#endif
if
(
!
normalize
)
return
false
;
if
(
!
ippiCheckAnchor
(
anchor
,
ksize
))
return
false
;
try
{
::
ipp
::
IwiImage
iwSrc
=
ippiGetImage
(
src
);
::
ipp
::
IwiImage
iwDst
=
ippiGetImage
(
dst
);
::
ipp
::
IwiSize
iwKSize
=
ippiGetSize
(
ksize
);
::
ipp
::
IwiBorderSize
borderSize
(
iwKSize
);
::
ipp
::
IwiBorderType
ippBorder
(
ippiGetBorder
(
iwSrc
,
borderType
,
borderSize
));
if
(
!
ippBorder
)
return
false
;
CV_INSTRUMENT_FUN_IPP
(
::
ipp
::
iwiFilterBox
,
iwSrc
,
iwDst
,
iwKSize
,
::
ipp
::
IwDefault
(),
ippBorder
);
}
catch
(
const
::
ipp
::
IwException
&
)
{
return
false
;
}
return
true
;
#else
CV_UNUSED
(
src
);
CV_UNUSED
(
dst
);
CV_UNUSED
(
ksize
);
CV_UNUSED
(
anchor
);
CV_UNUSED
(
normalize
);
CV_UNUSED
(
borderType
);
return
false
;
#endif
}
}
#endif
void
cv
::
boxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
{
CV_INSTRUMENT_REGION
();
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
(
borderType
==
BORDER_REPLICATE
||
borderType
==
BORDER_CONSTANT
||
borderType
==
BORDER_REFLECT
||
borderType
==
BORDER_REFLECT_101
),
ocl_boxFilter3x3_8UC1
(
_src
,
_dst
,
ddepth
,
ksize
,
anchor
,
borderType
,
normalize
))
CV_OCL_RUN
(
_dst
.
isUMat
(),
ocl_boxFilter
(
_src
,
_dst
,
ddepth
,
ksize
,
anchor
,
borderType
,
normalize
))
Mat
src
=
_src
.
getMat
();
int
stype
=
src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
stype
),
cn
=
CV_MAT_CN
(
stype
);
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
_dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
ddepth
,
cn
)
);
Mat
dst
=
_dst
.
getMat
();
if
(
borderType
!=
BORDER_CONSTANT
&&
normalize
&&
(
borderType
&
BORDER_ISOLATED
)
!=
0
)
{
if
(
src
.
rows
==
1
)
ksize
.
height
=
1
;
if
(
src
.
cols
==
1
)
ksize
.
width
=
1
;
}
Point
ofs
;
Size
wsz
(
src
.
cols
,
src
.
rows
);
if
(
!
(
borderType
&
BORDER_ISOLATED
))
src
.
locateROI
(
wsz
,
ofs
);
CALL_HAL
(
boxFilter
,
cv_hal_boxFilter
,
src
.
ptr
(),
src
.
step
,
dst
.
ptr
(),
dst
.
step
,
src
.
cols
,
src
.
rows
,
sdepth
,
ddepth
,
cn
,
ofs
.
x
,
ofs
.
y
,
wsz
.
width
-
src
.
cols
-
ofs
.
x
,
wsz
.
height
-
src
.
rows
-
ofs
.
y
,
ksize
.
width
,
ksize
.
height
,
anchor
.
x
,
anchor
.
y
,
normalize
,
borderType
&~
BORDER_ISOLATED
);
CV_OVX_RUN
(
true
,
openvx_boxfilter
(
src
,
dst
,
ddepth
,
ksize
,
anchor
,
normalize
,
borderType
))
CV_IPP_RUN_FAST
(
ipp_boxfilter
(
src
,
dst
,
ksize
,
anchor
,
normalize
,
borderType
));
borderType
=
(
borderType
&~
BORDER_ISOLATED
);
Ptr
<
FilterEngine
>
f
=
createBoxFilter
(
src
.
type
(),
dst
.
type
(),
ksize
,
anchor
,
normalize
,
borderType
);
f
->
apply
(
src
,
dst
,
wsz
,
ofs
);
}
void
cv
::
blur
(
InputArray
src
,
OutputArray
dst
,
Size
ksize
,
Point
anchor
,
int
borderType
)
{
CV_INSTRUMENT_REGION
();
boxFilter
(
src
,
dst
,
-
1
,
ksize
,
anchor
,
true
,
borderType
);
}
/****************************************************************************************\
Squared Box Filter
\****************************************************************************************/
namespace
cv
{
namespace
{
template
<
typename
T
,
typename
ST
>
struct
SqrRowSum
:
...
...
@@ -1703,6 +1291,8 @@ struct SqrRowSum :
virtual
void
operator
()(
const
uchar
*
src
,
uchar
*
dst
,
int
width
,
int
cn
)
CV_OVERRIDE
{
CV_INSTRUMENT_REGION
();
const
T
*
S
=
(
const
T
*
)
src
;
ST
*
D
=
(
ST
*
)
dst
;
int
i
=
0
,
k
,
ksz_cn
=
ksize
*
cn
;
...
...
@@ -1727,7 +1317,9 @@ struct SqrRowSum :
}
};
static
Ptr
<
BaseRowFilter
>
getSqrRowSumFilter
(
int
srcType
,
int
sumType
,
int
ksize
,
int
anchor
)
}
// namespace anon
Ptr
<
BaseRowFilter
>
getSqrRowSumFilter
(
int
srcType
,
int
sumType
,
int
ksize
,
int
anchor
)
{
int
sdepth
=
CV_MAT_DEPTH
(
srcType
),
ddepth
=
CV_MAT_DEPTH
(
sumType
);
CV_Assert
(
CV_MAT_CN
(
sumType
)
==
CV_MAT_CN
(
srcType
)
);
...
...
@@ -1753,52 +1345,6 @@ static Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize
srcType
,
sumType
));
}
}
void
cv
::
sqrBoxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
{
CV_INSTRUMENT_REGION
();
int
srcType
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
srcType
),
cn
=
CV_MAT_CN
(
srcType
);
Size
size
=
_src
.
size
();
if
(
ddepth
<
0
)
ddepth
=
sdepth
<
CV_32F
?
CV_32F
:
CV_64F
;
if
(
borderType
!=
BORDER_CONSTANT
&&
normalize
)
{
if
(
size
.
height
==
1
)
ksize
.
height
=
1
;
if
(
size
.
width
==
1
)
ksize
.
width
=
1
;
}
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
,
ocl_boxFilter
(
_src
,
_dst
,
ddepth
,
ksize
,
anchor
,
borderType
,
normalize
,
true
))
int
sumDepth
=
CV_64F
;
if
(
sdepth
==
CV_8U
)
sumDepth
=
CV_32S
;
int
sumType
=
CV_MAKETYPE
(
sumDepth
,
cn
),
dstType
=
CV_MAKETYPE
(
ddepth
,
cn
);
Mat
src
=
_src
.
getMat
();
_dst
.
create
(
size
,
dstType
);
Mat
dst
=
_dst
.
getMat
();
Ptr
<
BaseRowFilter
>
rowFilter
=
getSqrRowSumFilter
(
srcType
,
sumType
,
ksize
.
width
,
anchor
.
x
);
Ptr
<
BaseColumnFilter
>
columnFilter
=
getColumnSumFilter
(
sumType
,
dstType
,
ksize
.
height
,
anchor
.
y
,
normalize
?
1.
/
(
ksize
.
width
*
ksize
.
height
)
:
1
);
Ptr
<
FilterEngine
>
f
=
makePtr
<
FilterEngine
>
(
Ptr
<
BaseFilter
>
(),
rowFilter
,
columnFilter
,
srcType
,
dstType
,
sumType
,
borderType
);
Point
ofs
;
Size
wsz
(
src
.
cols
,
src
.
rows
);
src
.
locateROI
(
wsz
,
ofs
);
f
->
apply
(
src
,
dst
,
wsz
,
ofs
);
}
/* End of file. */
#endif
CV_CPU_OPTIMIZATION_NAMESPACE_END
}
// namespace
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment