Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
2b7866f2
Commit
2b7866f2
authored
Oct 29, 2016
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #7503 from pengli:box_filter_v2
parents
b077d6a1
3607da9f
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
261 additions
and
0 deletions
+261
-0
boxFilter3x3.cl
modules/imgproc/src/opencl/boxFilter3x3.cl
+127
-0
smooth.cpp
modules/imgproc/src/smooth.cpp
+60
-0
test_boxfilter.cpp
modules/imgproc/test/ocl/test_boxfilter.cpp
+74
-0
No files found.
modules/imgproc/src/opencl/boxFilter3x3.cl
0 → 100644
View file @
2b7866f2
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
__kernel
void
boxFilter3x3_8UC1_cols16_rows2
(
__global
const
uint*
src,
int
src_step,
__global
uint*
dst,
int
dst_step,
int
rows,
int
cols
#
ifdef
NORMALIZE
,
float
alpha
#
endif
)
{
int
block_x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
*
2
;
int
ssx,
dsx
;
if
((
block_x
*
16
)
>=
cols
|
| y >= rows) return;
uint4 line[4];
uint4 line_out[2];
ushort a; ushort16 b; ushort c;
ushort d; ushort16 e; ushort f;
ushort g; ushort16 h; ushort i;
ushort j; ushort16 k; ushort l;
ssx = dsx = 1;
int src_index = block_x * 4 * ssx + (y - 1) * (src_step / 4);
line[1] = vload4(0, src + src_index + (src_step / 4));
line[2] = vload4(0, src + src_index + 2 * (src_step / 4));
#ifdef BORDER_CONSTANT
line[0] = (y == 0) ? (uint4)0 : vload4(0, src + src_index);
line[3] = (y == (rows - 2)) ? (uint4)0 : vload4(0, src + src_index + 3 * (src_step / 4));
#elif defined BORDER_REFLECT_101
line[0] = (y == 0) ? line[2] : vload4(0, src + src_index);
line[3] = (y == (rows - 2)) ? line[1] : vload4(0, src + src_index + 3 * (src_step / 4));
#elif defined (BORDER_REPLICATE) || defined(BORDER_REFLECT)
line[0] = (y == 0) ? line[1] : vload4(0, src + src_index);
line[3] = (y == (rows - 2)) ? line[2] : vload4(0, src + src_index + 3 * (src_step / 4));
#endif
ushort16 sum, mid;
__global uchar *src_p = (__global uchar *)src;
src_index = block_x * 16 * ssx + (y - 1) * src_step;
bool line_end = ((block_x + 1) * 16 == cols);
b = convert_ushort16(as_uchar16(line[0]));
e = convert_ushort16(as_uchar16(line[1]));
h = convert_ushort16(as_uchar16(line[2]));
k = convert_ushort16(as_uchar16(line[3]));
#ifdef BORDER_CONSTANT
a = (block_x == 0 || y == 0) ? 0 : convert_ushort(src_p[src_index - 1]);
c = (line_end || y == 0) ? 0 : convert_ushort(src_p[src_index + 16]);
d = (block_x == 0) ? 0 : convert_ushort(src_p[src_index + src_step - 1]);
f = line_end ? 0 : convert_ushort(src_p[src_index + src_step + 16]);
g = (block_x == 0) ? 0 : convert_ushort(src_p[src_index + 2 * src_step - 1]);
i = line_end ? 0 : convert_ushort(src_p[src_index + 2 * src_step + 16]);
j = (block_x == 0 || y == (rows - 2)) ? 0 : convert_ushort(src_p[src_index + 3 * src_step - 1]);
l = (line_end || y == (rows - 2))? 0 : convert_ushort(src_p[src_index + 3 * src_step + 16]);
#elif defined BORDER_REFLECT_101
int offset;
offset = (y == 0) ? (2 * src_step) : 0;
a = (block_x == 0) ? convert_ushort(src_p[src_index + offset + 1]) : convert_ushort(src_p[src_index + offset - 1]);
c = line_end ? convert_ushort(src_p[src_index + offset + 14]) : convert_ushort(src_p[src_index + offset + 16]);
d = (block_x == 0) ? convert_ushort(src_p[src_index + src_step + 1]) : convert_ushort(src_p[src_index + src_step - 1]);
f = line_end ? convert_ushort(src_p[src_index + src_step + 14]) : convert_ushort(src_p[src_index + src_step + 16]);
g = (block_x == 0) ? convert_ushort(src_p[src_index + 2 * src_step + 1]) : convert_ushort(src_p[src_index + 2 * src_step - 1]);
i = line_end ? convert_ushort(src_p[src_index + 2 * src_step + 14]) : convert_ushort(src_p[src_index + 2 * src_step + 16]);
offset = (y == (rows - 2)) ? (1 * src_step) : (3 * src_step);
j = (block_x == 0) ? convert_ushort(src_p[src_index + offset + 1]) : convert_ushort(src_p[src_index + offset - 1]);
l = line_end ? convert_ushort(src_p[src_index + offset + 14]) : convert_ushort(src_p[src_index + offset + 16]);
#elif defined (BORDER_REPLICATE) |
|
defined
(
BORDER_REFLECT
)
int
offset
;
offset
=
(
y
==
0
)
?
(
1
*
src_step
)
:
0
;
a
=
(
block_x
==
0
)
?
convert_ushort
(
src_p[src_index
+
offset]
)
:
convert_ushort
(
src_p[src_index
+
offset
-
1]
)
;
c
=
line_end
?
convert_ushort
(
src_p[src_index
+
offset
+
15]
)
:
convert_ushort
(
src_p[src_index
+
offset
+
16]
)
;
d
=
(
block_x
==
0
)
?
convert_ushort
(
src_p[src_index
+
src_step]
)
:
convert_ushort
(
src_p[src_index
+
src_step
-
1]
)
;
f
=
line_end
?
convert_ushort
(
src_p[src_index
+
src_step
+
15]
)
:
convert_ushort
(
src_p[src_index
+
src_step
+
16]
)
;
g
=
(
block_x
==
0
)
?
convert_ushort
(
src_p[src_index
+
2
*
src_step]
)
:
convert_ushort
(
src_p[src_index
+
2
*
src_step
-
1]
)
;
i
=
line_end
?
convert_ushort
(
src_p[src_index
+
2
*
src_step
+
15]
)
:
convert_ushort
(
src_p[src_index
+
2
*
src_step
+
16]
)
;
offset
=
(
y
==
(
rows
-
2
))
?
(
2
*
src_step
)
:
(
3
*
src_step
)
;
j
=
(
block_x
==
0
)
?
convert_ushort
(
src_p[src_index
+
offset]
)
:
convert_ushort
(
src_p[src_index
+
offset
-
1]
)
;
l
=
line_end
?
convert_ushort
(
src_p[src_index
+
offset
+
15]
)
:
convert_ushort
(
src_p[src_index
+
offset
+
16]
)
;
#
endif
mid
=
(
ushort16
)(
d,
e.s0123,
e.s456789ab,
e.scde
)
+
e
+
(
ushort16
)(
e.s123,
e.s4567,
e.s89abcdef,
f
)
+
(
ushort16
)(
g,
h.s0123,
h.s456789ab,
h.scde
)
+
h
+
(
ushort16
)(
h.s123,
h.s4567,
h.s89abcdef,
i
)
;
sum
=
(
ushort16
)(
a,
b.s0123,
b.s456789ab,
b.scde
)
+
b
+
(
ushort16
)(
b.s123,
b.s4567,
b.s89abcdef,
c
)
+
mid
;
#
ifdef
NORMALIZE
line_out[0]
=
as_uint4
(
convert_uchar16_sat_rte
((
convert_float16
(
sum
)
*
alpha
)))
;
#
else
line_out[0]
=
as_uint4
(
convert_uchar16_sat_rte
(
sum
))
;
#
endif
sum
=
mid
+
(
ushort16
)(
j,
k.s0123,
k.s456789ab,
k.scde
)
+
k
+
(
ushort16
)(
k.s123,
k.s4567,
k.s89abcdef,
l
)
;
#
ifdef
NORMALIZE
line_out[1]
=
as_uint4
(
convert_uchar16_sat_rte
((
convert_float16
(
sum
)
*
alpha
)))
;
#
else
line_out[1]
=
as_uint4
(
convert_uchar16_sat_rte
(
sum
))
;
#
endif
int
dst_index
=
block_x
*
4
*
dsx
+
y
*
(
dst_step
/
4
)
;
vstore4
(
line_out[0],
0
,
dst
+
dst_index
)
;
vstore4
(
line_out[1],
0
,
dst
+
dst_index
+
(
dst_step
/
4
))
;
}
modules/imgproc/src/smooth.cpp
View file @
2b7866f2
...
...
@@ -1295,6 +1295,61 @@ struct ColumnSum<int, float> :
#ifdef HAVE_OPENCL
static
bool
ocl_boxFilter3x3_8UC1
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
int
borderType
,
bool
normalize
)
{
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
int
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
if
(
anchor
.
x
<
0
)
anchor
.
x
=
ksize
.
width
/
2
;
if
(
anchor
.
y
<
0
)
anchor
.
y
=
ksize
.
height
/
2
;
if
(
!
(
dev
.
isIntel
()
&&
(
type
==
CV_8UC1
)
&&
(
_src
.
offset
()
==
0
)
&&
(
_src
.
step
()
%
4
==
0
)
&&
(
_src
.
cols
()
%
16
==
0
)
&&
(
_src
.
rows
()
%
2
==
0
)
&&
(
anchor
.
x
==
1
)
&&
(
anchor
.
y
==
1
)
&&
(
ksize
.
width
==
3
)
&&
(
ksize
.
height
==
3
))
)
return
false
;
float
alpha
=
1.0
f
/
(
ksize
.
height
*
ksize
.
width
);
Size
size
=
_src
.
size
();
size_t
globalsize
[
2
]
=
{
0
,
0
};
size_t
localsize
[
2
]
=
{
0
,
0
};
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
0
,
"BORDER_REFLECT_101"
};
globalsize
[
0
]
=
size
.
width
/
16
;
globalsize
[
1
]
=
size
.
height
/
2
;
char
build_opts
[
1024
];
sprintf
(
build_opts
,
"-D %s %s"
,
borderMap
[
borderType
],
normalize
?
"-D NORMALIZE"
:
""
);
ocl
::
Kernel
kernel
(
"boxFilter3x3_8UC1_cols16_rows2"
,
cv
::
ocl
::
imgproc
::
boxFilter3x3_oclsrc
,
build_opts
);
if
(
kernel
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
size
,
CV_MAKETYPE
(
ddepth
,
cn
));
if
(
!
(
_dst
.
offset
()
==
0
&&
_dst
.
step
()
%
4
==
0
))
return
false
;
UMat
dst
=
_dst
.
getUMat
();
int
idxArg
=
kernel
.
set
(
0
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
src
.
step
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrWriteOnly
(
dst
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
dst
.
step
);
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
dst
.
rows
);
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
dst
.
cols
);
if
(
normalize
)
idxArg
=
kernel
.
set
(
idxArg
,
(
float
)
alpha
);
return
kernel
.
run
(
2
,
globalsize
,
(
localsize
[
0
]
==
0
)
?
NULL
:
localsize
,
false
);
}
#define DIVUP(total, grain) ((total + grain - 1) / (grain))
#define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n)))
...
...
@@ -1683,6 +1738,11 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
{
CV_INSTRUMENT_REGION
()
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
(
borderType
==
BORDER_REPLICATE
||
borderType
==
BORDER_CONSTANT
||
borderType
==
BORDER_REFLECT
||
borderType
==
BORDER_REFLECT_101
),
ocl_boxFilter3x3_8UC1
(
_src
,
_dst
,
ddepth
,
ksize
,
anchor
,
borderType
,
normalize
))
CV_OCL_RUN
(
_dst
.
isUMat
(),
ocl_boxFilter
(
_src
,
_dst
,
ddepth
,
ksize
,
anchor
,
borderType
,
normalize
))
Mat
src
=
_src
.
getMat
();
...
...
modules/imgproc/test/ocl/test_boxfilter.cpp
View file @
2b7866f2
...
...
@@ -157,6 +157,80 @@ OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SqrBoxFilter,
);
PARAM_TEST_CASE
(
BoxFilter3x3_cols16_rows2_Base
,
MatDepth
,
Channels
,
BorderType
,
bool
,
bool
)
{
int
depth
,
cn
,
borderType
;
Size
ksize
,
dsize
;
Point
anchor
;
bool
normalize
,
useRoi
;
TEST_DECLARE_INPUT_PARAMETER
(
src
);
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
);
virtual
void
SetUp
()
{
depth
=
GET_PARAM
(
0
);
cn
=
GET_PARAM
(
1
);
borderType
=
GET_PARAM
(
2
);
// only not isolated border tested, because CPU module doesn't support isolated border case.
normalize
=
GET_PARAM
(
3
);
useRoi
=
GET_PARAM
(
4
);
}
void
random_roi
()
{
int
type
=
CV_MAKE_TYPE
(
depth
,
cn
);
ksize
=
Size
(
3
,
3
);
Size
roiSize
=
randomSize
(
ksize
.
width
,
MAX_VALUE
,
ksize
.
height
,
MAX_VALUE
);
roiSize
.
width
=
std
::
max
(
ksize
.
width
+
13
,
roiSize
.
width
&
(
~
0xf
));
roiSize
.
height
=
std
::
max
(
ksize
.
height
+
1
,
roiSize
.
height
&
(
~
0x1
));
Border
srcBorder
=
{
0
,
0
,
0
,
0
};
randomSubMat
(
src
,
src_roi
,
roiSize
,
srcBorder
,
type
,
-
MAX_VALUE
,
MAX_VALUE
);
Border
dstBorder
=
{
0
,
0
,
0
,
0
};
randomSubMat
(
dst
,
dst_roi
,
roiSize
,
dstBorder
,
type
,
-
MAX_VALUE
,
MAX_VALUE
);
anchor
.
x
=
-
1
;
anchor
.
y
=
-
1
;
UMAT_UPLOAD_INPUT_PARAMETER
(
src
);
UMAT_UPLOAD_OUTPUT_PARAMETER
(
dst
);
}
void
Near
(
double
threshold
=
0.0
)
{
OCL_EXPECT_MATS_NEAR
(
dst
,
threshold
);
}
};
typedef
BoxFilter3x3_cols16_rows2_Base
BoxFilter3x3_cols16_rows2
;
OCL_TEST_P
(
BoxFilter3x3_cols16_rows2
,
Mat
)
{
for
(
int
j
=
0
;
j
<
test_loop_times
;
j
++
)
{
random_roi
();
OCL_OFF
(
cv
::
boxFilter
(
src_roi
,
dst_roi
,
-
1
,
ksize
,
anchor
,
normalize
,
borderType
));
OCL_ON
(
cv
::
boxFilter
(
usrc_roi
,
udst_roi
,
-
1
,
ksize
,
anchor
,
normalize
,
borderType
));
Near
(
depth
<=
CV_32S
?
1
:
3e-3
);
}
}
OCL_INSTANTIATE_TEST_CASE_P
(
ImageProc
,
BoxFilter3x3_cols16_rows2
,
Combine
(
Values
((
MatDepth
)
CV_8U
),
Values
((
Channels
)
1
),
Values
((
BorderType
)
BORDER_CONSTANT
,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REFLECT
,
(
BorderType
)
BORDER_REFLECT_101
),
Bool
(),
Values
(
false
)
// ROI
)
);
}
}
// namespace cvtest::ocl
#endif // HAVE_OPENCL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment