Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f1d93cb2
Commit
f1d93cb2
authored
Nov 09, 2016
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #7624 from pengli:gaussian_blur
parents
b2fa82ff
8f63f51e
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
289 additions
and
2 deletions
+289
-2
gaussianBlur3x3.cl
modules/imgproc/src/opencl/gaussianBlur3x3.cl
+133
-0
smooth.cpp
modules/imgproc/src/smooth.cpp
+67
-2
test_filters.cpp
modules/imgproc/test/ocl/test_filters.cpp
+89
-0
No files found.
modules/imgproc/src/opencl/gaussianBlur3x3.cl
0 → 100644
View file @
f1d93cb2
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
#
define
DIG
(
a
)
a,
__constant
float
kx[]
=
{
KERNEL_MATRIX_X
}
;
__constant
float
ky[]
=
{
KERNEL_MATRIX_Y
}
;
#
define
OP
(
delta,
y,
x
)
(
convert_float16
(
arr[
(
y
+
delta
)
*
3
+
x]
)
*
ky[y]
*
kx[x]
)
__kernel
void
gaussianBlur3x3_8UC1_cols16_rows2
(
__global
const
uint*
src,
int
src_step,
__global
uint*
dst,
int
dst_step,
int
rows,
int
cols
)
{
int
block_x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
*
2
;
int
ssx,
dsx
;
if
((
block_x
*
16
)
>=
cols
|
| y >= rows) return;
uint4 line[4];
uint4 line_out[2];
uchar a; uchar16 b; uchar c;
uchar d; uchar16 e; uchar f;
uchar g; uchar16 h; uchar i;
uchar j; uchar16 k; uchar l;
ssx = dsx = 1;
int src_index = block_x * 4 * ssx + (y - 1) * (src_step / 4);
line[1] = vload4(0, src + src_index + (src_step / 4));
line[2] = vload4(0, src + src_index + 2 * (src_step / 4));
#ifdef BORDER_CONSTANT
line[0] = (y == 0) ? (uint4)0 : vload4(0, src + src_index);
line[3] = (y == (rows - 2)) ? (uint4)0 : vload4(0, src + src_index + 3 * (src_step / 4));
#elif defined BORDER_REFLECT_101
line[0] = (y == 0) ? line[2] : vload4(0, src + src_index);
line[3] = (y == (rows - 2)) ? line[1] : vload4(0, src + src_index + 3 * (src_step / 4));
#elif defined (BORDER_REPLICATE) || defined(BORDER_REFLECT)
line[0] = (y == 0) ? line[1] : vload4(0, src + src_index);
line[3] = (y == (rows - 2)) ? line[2] : vload4(0, src + src_index + 3 * (src_step / 4));
#endif
__global uchar *src_p = (__global uchar *)src;
src_index = block_x * 16 * ssx + (y - 1) * src_step;
bool line_end = ((block_x + 1) * 16 == cols);
b = as_uchar16(line[0]);
e = as_uchar16(line[1]);
h = as_uchar16(line[2]);
k = as_uchar16(line[3]);
#ifdef BORDER_CONSTANT
a = (block_x == 0 || y == 0) ? 0 : src_p[src_index - 1];
c = (line_end || y == 0) ? 0 : src_p[src_index + 16];
d = (block_x == 0) ? 0 : src_p[src_index + src_step - 1];
f = line_end ? 0 : src_p[src_index + src_step + 16];
g = (block_x == 0) ? 0 : src_p[src_index + 2 * src_step - 1];
i = line_end ? 0 : src_p[src_index + 2 * src_step + 16];
j = (block_x == 0 || y == (rows - 2)) ? 0 : src_p[src_index + 3 * src_step - 1];
l = (line_end || y == (rows - 2))? 0 : src_p[src_index + 3 * src_step + 16];
#elif defined BORDER_REFLECT_101
int offset;
offset = (y == 0) ? (2 * src_step) : 0;
a = (block_x == 0) ? src_p[src_index + offset + 1] : src_p[src_index + offset - 1];
c = line_end ? src_p[src_index + offset + 14] : src_p[src_index + offset + 16];
d = (block_x == 0) ? src_p[src_index + src_step + 1] : src_p[src_index + src_step - 1];
f = line_end ? src_p[src_index + src_step + 14] : src_p[src_index + src_step + 16];
g = (block_x == 0) ? src_p[src_index + 2 * src_step + 1] : src_p[src_index + 2 * src_step - 1];
i = line_end ? src_p[src_index + 2 * src_step + 14] : src_p[src_index + 2 * src_step + 16];
offset = (y == (rows - 2)) ? (1 * src_step) : (3 * src_step);
j = (block_x == 0) ? src_p[src_index + offset + 1] : src_p[src_index + offset - 1];
l = line_end ? src_p[src_index + offset + 14] : src_p[src_index + offset + 16];
#elif defined (BORDER_REPLICATE) |
|
defined
(
BORDER_REFLECT
)
int
offset
;
offset
=
(
y
==
0
)
?
(
1
*
src_step
)
:
0
;
a
=
(
block_x
==
0
)
?
src_p[src_index
+
offset]
:
src_p[src_index
+
offset
-
1]
;
c
=
line_end
?
src_p[src_index
+
offset
+
15]
:
src_p[src_index
+
offset
+
16]
;
d
=
(
block_x
==
0
)
?
src_p[src_index
+
src_step]
:
src_p[src_index
+
src_step
-
1]
;
f
=
line_end
?
src_p[src_index
+
src_step
+
15]
:
src_p[src_index
+
src_step
+
16]
;
g
=
(
block_x
==
0
)
?
src_p[src_index
+
2
*
src_step]
:
src_p[src_index
+
2
*
src_step
-
1]
;
i
=
line_end
?
src_p[src_index
+
2
*
src_step
+
15]
:
src_p[src_index
+
2
*
src_step
+
16]
;
offset
=
(
y
==
(
rows
-
2
))
?
(
2
*
src_step
)
:
(
3
*
src_step
)
;
j
=
(
block_x
==
0
)
?
src_p[src_index
+
offset]
:
src_p[src_index
+
offset
-
1]
;
l
=
line_end
?
src_p[src_index
+
offset
+
15]
:
src_p[src_index
+
offset
+
16]
;
#
endif
uchar16
arr[12]
;
float16
sum[2]
;
arr[0]
=
(
uchar16
)(
a,
b.s0123,
b.s456789ab,
b.scde
)
;
arr[1]
=
b
;
arr[2]
=
(
uchar16
)(
b.s123,
b.s4567,
b.s89abcdef,
c
)
;
arr[3]
=
(
uchar16
)(
d,
e.s0123,
e.s456789ab,
e.scde
)
;
arr[4]
=
e
;
arr[5]
=
(
uchar16
)(
e.s123,
e.s4567,
e.s89abcdef,
f
)
;
arr[6]
=
(
uchar16
)(
g,
h.s0123,
h.s456789ab,
h.scde
)
;
arr[7]
=
h
;
arr[8]
=
(
uchar16
)(
h.s123,
h.s4567,
h.s89abcdef,
i
)
;
arr[9]
=
(
uchar16
)(
j,
k.s0123,
k.s456789ab,
k.scde
)
;
arr[10]
=
k
;
arr[11]
=
(
uchar16
)(
k.s123,
k.s4567,
k.s89abcdef,
l
)
;
sum[0]
=
OP
(
0
,
0
,
0
)
+
OP
(
0
,
0
,
1
)
+
OP
(
0
,
0
,
2
)
+
OP
(
0
,
1
,
0
)
+
OP
(
0
,
1
,
1
)
+
OP
(
0
,
1
,
2
)
+
OP
(
0
,
2
,
0
)
+
OP
(
0
,
2
,
1
)
+
OP
(
0
,
2
,
2
)
;
sum[1]
=
OP
(
1
,
0
,
0
)
+
OP
(
1
,
0
,
1
)
+
OP
(
1
,
0
,
2
)
+
OP
(
1
,
1
,
0
)
+
OP
(
1
,
1
,
1
)
+
OP
(
1
,
1
,
2
)
+
OP
(
1
,
2
,
0
)
+
OP
(
1
,
2
,
1
)
+
OP
(
1
,
2
,
2
)
;
line_out[0]
=
as_uint4
(
convert_uchar16_sat_rte
(
sum[0]
))
;
line_out[1]
=
as_uint4
(
convert_uchar16_sat_rte
(
sum[1]
))
;
int
dst_index
=
block_x
*
4
*
dsx
+
y
*
(
dst_step
/
4
)
;
vstore4
(
line_out[0],
0
,
dst
+
dst_index
)
;
vstore4
(
line_out[1],
0
,
dst
+
dst_index
+
(
dst_step
/
4
))
;
}
modules/imgproc/src/smooth.cpp
View file @
f1d93cb2
...
...
@@ -2016,9 +2016,68 @@ cv::Ptr<cv::FilterEngine> cv::createGaussianFilter( int type, Size ksize,
return
createSeparableLinearFilter
(
type
,
type
,
kx
,
ky
,
Point
(
-
1
,
-
1
),
0
,
borderType
);
}
#ifdef HAVE_IPP
namespace
cv
{
#ifdef HAVE_OPENCL
static
bool
ocl_GaussianBlur3x3_8UC1
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
InputArray
_kernelX
,
InputArray
_kernelY
,
int
borderType
)
{
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
int
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
if
(
!
(
dev
.
isIntel
()
&&
(
type
==
CV_8UC1
)
&&
(
_src
.
offset
()
==
0
)
&&
(
_src
.
step
()
%
4
==
0
)
&&
(
_src
.
cols
()
%
16
==
0
)
&&
(
_src
.
rows
()
%
2
==
0
))
)
return
false
;
Mat
kernelX
=
_kernelX
.
getMat
().
reshape
(
1
,
1
);
if
(
kernelX
.
cols
%
2
!=
1
)
return
false
;
Mat
kernelY
=
_kernelY
.
getMat
().
reshape
(
1
,
1
);
if
(
kernelY
.
cols
%
2
!=
1
)
return
false
;
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
Size
size
=
_src
.
size
();
size_t
globalsize
[
2
]
=
{
0
,
0
};
size_t
localsize
[
2
]
=
{
0
,
0
};
globalsize
[
0
]
=
size
.
width
/
16
;
globalsize
[
1
]
=
size
.
height
/
2
;
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
0
,
"BORDER_REFLECT_101"
};
char
build_opts
[
1024
];
sprintf
(
build_opts
,
"-D %s %s%s"
,
borderMap
[
borderType
],
ocl
::
kernelToStr
(
kernelX
,
CV_32F
,
"KERNEL_MATRIX_X"
).
c_str
(),
ocl
::
kernelToStr
(
kernelY
,
CV_32F
,
"KERNEL_MATRIX_Y"
).
c_str
());
ocl
::
Kernel
kernel
(
"gaussianBlur3x3_8UC1_cols16_rows2"
,
cv
::
ocl
::
imgproc
::
gaussianBlur3x3_oclsrc
,
build_opts
);
if
(
kernel
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
size
,
CV_MAKETYPE
(
ddepth
,
cn
));
if
(
!
(
_dst
.
offset
()
==
0
&&
_dst
.
step
()
%
4
==
0
))
return
false
;
UMat
dst
=
_dst
.
getUMat
();
int
idxArg
=
kernel
.
set
(
0
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
src
.
step
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrWriteOnly
(
dst
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
dst
.
step
);
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
dst
.
rows
);
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
dst
.
cols
);
return
kernel
.
run
(
2
,
globalsize
,
(
localsize
[
0
]
==
0
)
?
NULL
:
localsize
,
false
);
}
#endif
#ifdef HAVE_IPP
static
bool
ipp_GaussianBlur
(
InputArray
_src
,
OutputArray
_dst
,
Size
ksize
,
double
sigma1
,
double
sigma2
,
int
borderType
)
...
...
@@ -2109,8 +2168,8 @@ static bool ipp_GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
#endif
return
false
;
}
}
#endif
}
void
cv
::
GaussianBlur
(
InputArray
_src
,
OutputArray
_dst
,
Size
ksize
,
...
...
@@ -2148,6 +2207,12 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
Mat
kx
,
ky
;
createGaussianKernels
(
kx
,
ky
,
type
,
ksize
,
sigma1
,
sigma2
);
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
&&
ksize
.
width
==
3
&&
ksize
.
height
==
3
&&
(
size_t
)
_src
.
rows
()
>
ky
.
total
()
&&
(
size_t
)
_src
.
cols
()
>
kx
.
total
(),
ocl_GaussianBlur3x3_8UC1
(
_src
,
_dst
,
CV_MAT_DEPTH
(
type
),
kx
,
ky
,
borderType
));
sepFilter2D
(
_src
,
_dst
,
CV_MAT_DEPTH
(
type
),
kx
,
ky
,
Point
(
-
1
,
-
1
),
0
,
borderType
);
}
...
...
modules/imgproc/test/ocl/test_filters.cpp
View file @
f1d93cb2
...
...
@@ -229,6 +229,86 @@ OCL_TEST_P(GaussianBlurTest, Mat)
}
}
PARAM_TEST_CASE
(
GaussianBlur3x3_cols16_rows2_Base
,
MatType
,
int
,
// kernel size
Size
,
// dx, dy
BorderType
,
// border type
double
,
// optional parameter
bool
,
// roi or not
int
)
// width multiplier
{
int
type
,
borderType
,
ksize
;
Size
size
;
double
param
;
bool
useRoi
;
int
widthMultiple
;
TEST_DECLARE_INPUT_PARAMETER
(
src
);
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
);
virtual
void
SetUp
()
{
type
=
GET_PARAM
(
0
);
ksize
=
GET_PARAM
(
1
);
size
=
GET_PARAM
(
2
);
borderType
=
GET_PARAM
(
3
);
param
=
GET_PARAM
(
4
);
useRoi
=
GET_PARAM
(
5
);
widthMultiple
=
GET_PARAM
(
6
);
}
void
random_roi
()
{
size
=
Size
(
3
,
3
);
Size
roiSize
=
randomSize
(
size
.
width
,
MAX_VALUE
,
size
.
height
,
MAX_VALUE
);
roiSize
.
width
=
std
::
max
(
size
.
width
+
13
,
roiSize
.
width
&
(
~
0xf
));
roiSize
.
height
=
std
::
max
(
size
.
height
+
1
,
roiSize
.
height
&
(
~
0x1
));
Border
srcBorder
=
randomBorder
(
0
,
useRoi
?
MAX_VALUE
:
0
);
randomSubMat
(
src
,
src_roi
,
roiSize
,
srcBorder
,
type
,
5
,
256
);
Border
dstBorder
=
randomBorder
(
0
,
useRoi
?
MAX_VALUE
:
0
);
randomSubMat
(
dst
,
dst_roi
,
roiSize
,
dstBorder
,
type
,
-
60
,
70
);
UMAT_UPLOAD_INPUT_PARAMETER
(
src
);
UMAT_UPLOAD_OUTPUT_PARAMETER
(
dst
);
}
void
Near
()
{
Near
(
1
,
false
);
}
void
Near
(
double
threshold
,
bool
relative
)
{
if
(
relative
)
OCL_EXPECT_MATS_NEAR_RELATIVE
(
dst
,
threshold
);
else
OCL_EXPECT_MATS_NEAR
(
dst
,
threshold
);
}
};
typedef
GaussianBlur3x3_cols16_rows2_Base
GaussianBlur3x3_cols16_rows2
;
OCL_TEST_P
(
GaussianBlur3x3_cols16_rows2
,
Mat
)
{
Size
kernelSize
(
ksize
,
ksize
);
for
(
int
j
=
0
;
j
<
test_loop_times
;
j
++
)
{
random_roi
();
double
sigma1
=
rng
.
uniform
(
0.1
,
1.0
);
double
sigma2
=
j
%
2
==
0
?
sigma1
:
rng
.
uniform
(
0.1
,
1.0
);
OCL_OFF
(
cv
::
GaussianBlur
(
src_roi
,
dst_roi
,
Size
(
ksize
,
ksize
),
sigma1
,
sigma2
,
borderType
));
OCL_ON
(
cv
::
GaussianBlur
(
usrc_roi
,
udst_roi
,
Size
(
ksize
,
ksize
),
sigma1
,
sigma2
,
borderType
));
Near
(
CV_MAT_DEPTH
(
type
)
>=
CV_32F
?
1e-3
:
4
,
CV_MAT_DEPTH
(
type
)
>=
CV_32F
);
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// Erode
...
...
@@ -490,6 +570,15 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
Bool
(),
Values
(
1
)));
// not used
OCL_INSTANTIATE_TEST_CASE_P
(
Filter
,
GaussianBlur3x3_cols16_rows2
,
Combine
(
Values
((
MatType
)
CV_8UC1
),
Values
(
3
),
// kernel size
Values
(
Size
(
0
,
0
)),
// not used
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED
,
Values
(
0.0
),
// not used
Bool
(),
Values
(
1
)));
// not used
OCL_INSTANTIATE_TEST_CASE_P
(
Filter
,
Erode
,
Combine
(
Values
(
CV_8UC1
,
CV_8UC3
,
CV_8UC4
,
CV_32FC1
,
CV_32FC3
,
CV_32FC4
,
CV_64FC1
,
CV_64FC4
),
Values
(
0
,
3
,
5
,
7
),
// kernel size, 0 means kernel = Mat()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment