Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
03b1d133
Commit
03b1d133
authored
May 06, 2014
by
Alexander Alekhin
Committed by
OpenCV Buildbot
May 06, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2660 from arkunze:pullreq/140423-filter2D
parents
e9be4865
1f8b41f3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
105 additions
and
18 deletions
+105
-18
ocl.cpp
modules/core/src/ocl.cpp
+1
-1
filter.cpp
modules/imgproc/src/filter.cpp
+88
-7
filter2DSmall.cl
modules/imgproc/src/opencl/filter2DSmall.cl
+0
-0
test_filter2d.cpp
modules/imgproc/test/ocl/test_filter2d.cpp
+16
-10
No files found.
modules/core/src/ocl.cpp
View file @
03b1d133
...
...
@@ -4379,7 +4379,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
typedef
std
::
string
(
*
func_t
)(
const
Mat
&
);
static
const
func_t
funcs
[]
=
{
kerToStr
<
uchar
>
,
kerToStr
<
char
>
,
kerToStr
<
ushort
>
,
kerToStr
<
short
>
,
kerToStr
<
int
>
,
kerToStr
<
float
>
,
kerToStr
<
double
>
,
0
};
const
func_t
func
=
funcs
[
depth
];
const
func_t
func
=
funcs
[
d
d
epth
];
CV_Assert
(
func
!=
0
);
return
cv
::
format
(
" -D %s=%s"
,
name
?
name
:
"COEFF"
,
func
(
kernel
).
c_str
());
...
...
modules/imgproc/src/filter.cpp
View file @
03b1d133
...
...
@@ -3191,11 +3191,10 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
"BORDER_WRAP"
,
"BORDER_REFLECT_101"
};
cv
::
Mat
kernelMat
=
_kernel
.
getMat
();
std
::
vector
<
float
>
kernelMatDataFloat
;
int
kernel_size_y2_aligned
=
_prepareKernelFilter2D
<
float
>
(
kernelMatDataFloat
,
kernelMat
);
cv
::
Size
sz
=
_src
.
size
(),
wholeSize
;
size_t
globalsize
[
2
]
=
{
sz
.
width
,
sz
.
height
},
localsize
[
2
]
=
{
0
,
1
};
size_t
globalsize
[
2
]
=
{
sz
.
width
,
sz
.
height
};
size_t
localsize_general
[
2
]
=
{
0
,
1
};
size_t
*
localsize
=
NULL
;
ocl
::
Kernel
k
;
UMat
src
=
_src
.
getUMat
();
...
...
@@ -3210,6 +3209,76 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
size_t
tryWorkItems
=
maxWorkItemSizes
[
0
];
char
cvt
[
2
][
40
];
// For smaller filter kernels, there is a special kernel that is more
// efficient than the general one.
UMat
kernalDataUMat
;
if
(
device
.
isIntel
()
&&
(
device
.
type
()
&
ocl
::
Device
::
TYPE_GPU
)
&&
((
ksize
.
width
<
5
&&
ksize
.
height
<
5
)
||
(
ksize
.
width
==
5
&&
ksize
.
height
==
5
&&
cn
==
1
)))
{
kernelMat
.
reshape
(
0
,
1
);
String
kerStr
=
ocl
::
kernelToStr
(
kernelMat
,
CV_32F
);
int
h
=
isolated
?
sz
.
height
:
wholeSize
.
height
;
int
w
=
isolated
?
sz
.
width
:
wholeSize
.
width
;
if
((
w
<
ksize
.
width
)
||
(
h
<
ksize
.
height
))
return
false
;
// Figure out what vector size to use for loading the pixels.
int
pxLoadNumPixels
=
((
cn
!=
1
)
||
sz
.
width
%
4
)
?
1
:
4
;
int
pxLoadVecSize
=
cn
*
pxLoadNumPixels
;
// Figure out how many pixels per work item to compute in X and Y
// directions. Too many and we run out of registers.
int
pxPerWorkItemX
=
1
;
int
pxPerWorkItemY
=
1
;
if
(
cn
<=
2
&&
ksize
.
width
<=
4
&&
ksize
.
height
<=
4
)
{
pxPerWorkItemX
=
sz
.
width
%
8
?
sz
.
width
%
4
?
sz
.
width
%
2
?
1
:
2
:
4
:
8
;
pxPerWorkItemY
=
sz
.
width
%
2
?
1
:
2
;
}
else
if
(
cn
<
4
||
(
ksize
.
width
<=
4
&&
ksize
.
height
<=
4
))
{
pxPerWorkItemX
=
sz
.
width
%
2
?
1
:
2
;
pxPerWorkItemY
=
sz
.
width
%
2
?
1
:
2
;
}
globalsize
[
0
]
=
sz
.
width
/
pxPerWorkItemX
;
globalsize
[
1
]
=
sz
.
height
/
pxPerWorkItemY
;
// Need some padding in the private array for pixels
int
privDataWidth
=
ROUNDUP
(
pxPerWorkItemX
+
ksize
.
width
-
1
,
pxLoadNumPixels
);
// Make the global size a nice round number so the runtime can pick
// from reasonable choices for the workgroup size
const
int
wgRound
=
256
;
globalsize
[
0
]
=
ROUNDUP
(
globalsize
[
0
],
wgRound
);
char
build_options
[
1024
];
sprintf
(
build_options
,
"-D cn=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
"-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d "
"-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s "
"-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d "
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
"-D convertToWT=%s -D convertToDstT=%s %s"
,
cn
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
pxLoadVecSize
,
pxLoadNumPixels
,
pxPerWorkItemX
,
pxPerWorkItemY
,
privDataWidth
,
borderMap
[
borderType
],
isolated
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
,
privDataWidth
/
pxLoadNumPixels
,
pxPerWorkItemY
+
ksize
.
height
-
1
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
kerStr
.
c_str
());
cv
::
String
errmsg
;
if
(
!
k
.
create
(
"filter2DSmall"
,
cv
::
ocl
::
imgproc
::
filter2DSmall_oclsrc
,
build_options
,
&
errmsg
))
return
false
;
}
else
{
localsize
=
localsize_general
;
std
::
vector
<
float
>
kernelMatDataFloat
;
int
kernel_size_y2_aligned
=
_prepareKernelFilter2D
<
float
>
(
kernelMatDataFloat
,
kernelMat
);
String
kerStr
=
ocl
::
kernelToStr
(
kernelMatDataFloat
,
CV_32F
);
for
(
;
;
)
...
...
@@ -3217,13 +3286,13 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
size_t
BLOCK_SIZE
=
tryWorkItems
;
while
(
BLOCK_SIZE
>
32
&&
BLOCK_SIZE
>=
(
size_t
)
ksize
.
width
*
2
&&
BLOCK_SIZE
>
(
size_t
)
sz
.
width
*
2
)
BLOCK_SIZE
/=
2
;
#if 1 // TODO Mode with several blocks requires a much more VGPRs, so this optimization is not actual for the current devices
#if 1 // TODO Mode with several blocks requires a much more VGPRs, so this optimization is not actual for the current devices
size_t
BLOCK_SIZE_Y
=
1
;
#else
#else
size_t
BLOCK_SIZE_Y
=
8
;
// TODO Check heuristic value on devices
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE
/
8
&&
BLOCK_SIZE_Y
*
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
*
32
<
(
size_t
)
src
.
rows
)
BLOCK_SIZE_Y
*=
2
;
#endif
#endif
if
((
size_t
)
ksize
.
width
>
BLOCK_SIZE
)
return
false
;
...
...
@@ -3268,6 +3337,7 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
return
false
;
tryWorkItems
=
kernelWorkGroupSize
;
}
}
_dst
.
create
(
sz
,
dtype
);
UMat
dst
=
_dst
.
getUMat
();
...
...
@@ -3688,9 +3758,20 @@ void cv::filter2D( InputArray _src, OutputArray _dst, int ddepth,
temp
=
dst
;
else
temp
.
create
(
dst
.
size
(),
dst
.
type
());
// crossCorr doesn't accept non-zero delta with multiple channels
if
(
src
.
channels
()
!=
1
&&
delta
!=
0
)
{
crossCorr
(
src
,
kernel
,
temp
,
src
.
size
(),
CV_MAKETYPE
(
ddepth
,
src
.
channels
()),
anchor
,
0
,
borderType
);
add
(
temp
,
delta
,
temp
);
}
else
{
crossCorr
(
src
,
kernel
,
temp
,
src
.
size
(),
CV_MAKETYPE
(
ddepth
,
src
.
channels
()),
anchor
,
delta
,
borderType
);
}
if
(
temp
.
data
!=
dst
.
data
)
temp
.
copyTo
(
dst
);
return
;
...
...
modules/imgproc/src/opencl/filter2DSmall.cl
0 → 100755
View file @
03b1d133
This diff is collapsed.
Click to expand it.
modules/imgproc/test/ocl/test_filter2d.cpp
View file @
03b1d133
...
...
@@ -51,7 +51,7 @@ namespace ocl {
/////////////////////////////////////////////////////////////////////////////////////////////////
// Filter2D
PARAM_TEST_CASE
(
Filter2D
,
MatDepth
,
Channels
,
BorderType
,
bool
,
bool
)
PARAM_TEST_CASE
(
Filter2D
,
MatDepth
,
Channels
,
int
,
int
,
BorderType
,
bool
,
bool
)
{
static
const
int
kernelMinSize
=
2
;
static
const
int
kernelMaxSize
=
10
;
...
...
@@ -60,6 +60,7 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
Size
dsize
;
Point
anchor
;
int
borderType
;
int
widthMultiple
;
bool
useRoi
;
Mat
kernel
;
double
delta
;
...
...
@@ -70,27 +71,30 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
virtual
void
SetUp
()
{
type
=
CV_MAKE_TYPE
(
GET_PARAM
(
0
),
GET_PARAM
(
1
));
borderType
=
GET_PARAM
(
2
)
|
(
GET_PARAM
(
3
)
?
BORDER_ISOLATED
:
0
);
useRoi
=
GET_PARAM
(
4
);
Size
ksize
(
GET_PARAM
(
2
),
GET_PARAM
(
2
));
widthMultiple
=
GET_PARAM
(
3
);
borderType
=
GET_PARAM
(
4
)
|
(
GET_PARAM
(
5
)
?
BORDER_ISOLATED
:
0
);
useRoi
=
GET_PARAM
(
6
);
Mat
temp
=
randomMat
(
ksize
,
CV_MAKE_TYPE
(((
CV_64F
==
CV_MAT_DEPTH
(
type
))
?
CV_64F
:
CV_32F
),
1
),
-
MAX_VALUE
,
MAX_VALUE
);
cv
::
normalize
(
temp
,
kernel
,
1.0
,
0.0
,
NORM_L1
);
}
void
random_roi
()
{
dsize
=
randomSize
(
1
,
MAX_VALUE
);
// Make sure the width is a multiple of the requested value, and no more.
dsize
.
width
&=
~
((
widthMultiple
*
2
)
-
1
);
dsize
.
width
+=
widthMultiple
;
Size
ksize
=
randomSize
(
kernelMinSize
,
kernelMaxSize
);
Mat
temp
=
randomMat
(
ksize
,
CV_MAKE_TYPE
(((
CV_64F
==
CV_MAT_DEPTH
(
type
))
?
CV_64F
:
CV_32F
),
1
),
-
MAX_VALUE
,
MAX_VALUE
);
cv
::
normalize
(
temp
,
kernel
,
1.0
,
0.0
,
NORM_L1
);
Size
roiSize
=
randomSize
(
ksize
.
width
,
MAX_VALUE
,
ksize
.
height
,
MAX_VALUE
);
Size
roiSize
=
randomSize
(
kernel
.
size
[
0
],
MAX_VALUE
,
kernel
.
size
[
1
],
MAX_VALUE
);
Border
srcBorder
=
randomBorder
(
0
,
useRoi
?
MAX_VALUE
:
0
);
randomSubMat
(
src
,
src_roi
,
roiSize
,
srcBorder
,
type
,
-
MAX_VALUE
,
MAX_VALUE
);
Border
dstBorder
=
randomBorder
(
0
,
useRoi
?
MAX_VALUE
:
0
);
randomSubMat
(
dst
,
dst_roi
,
dsize
,
dstBorder
,
type
,
-
MAX_VALUE
,
MAX_VALUE
);
anchor
.
x
=
randomInt
(
-
1
,
k
size
.
width
);
anchor
.
y
=
randomInt
(
-
1
,
k
size
.
height
);
anchor
.
x
=
randomInt
(
-
1
,
k
ernel
.
size
[
0
]
);
anchor
.
y
=
randomInt
(
-
1
,
k
ernel
.
size
[
1
]
);
delta
=
randomDouble
(
-
100
,
100
);
...
...
@@ -122,6 +126,8 @@ OCL_INSTANTIATE_TEST_CASE_P(ImageProc, Filter2D,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_32F
),
OCL_ALL_CHANNELS
,
Values
(
3
,
5
,
9
),
// Kernel size
Values
(
1
,
4
,
8
),
// Width mutiple
Values
((
BorderType
)
BORDER_CONSTANT
,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REFLECT
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment