Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
03b1d133
Commit
03b1d133
authored
May 06, 2014
by
Alexander Alekhin
Committed by
OpenCV Buildbot
May 06, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2660 from arkunze:pullreq/140423-filter2D
parents
e9be4865
1f8b41f3
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
488 additions
and
66 deletions
+488
-66
ocl.cpp
modules/core/src/ocl.cpp
+1
-1
filter.cpp
modules/imgproc/src/filter.cpp
+136
-55
filter2DSmall.cl
modules/imgproc/src/opencl/filter2DSmall.cl
+335
-0
test_filter2d.cpp
modules/imgproc/test/ocl/test_filter2d.cpp
+16
-10
No files found.
modules/core/src/ocl.cpp
View file @
03b1d133
...
...
@@ -4379,7 +4379,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
typedef
std
::
string
(
*
func_t
)(
const
Mat
&
);
static
const
func_t
funcs
[]
=
{
kerToStr
<
uchar
>
,
kerToStr
<
char
>
,
kerToStr
<
ushort
>
,
kerToStr
<
short
>
,
kerToStr
<
int
>
,
kerToStr
<
float
>
,
kerToStr
<
double
>
,
0
};
const
func_t
func
=
funcs
[
depth
];
const
func_t
func
=
funcs
[
d
d
epth
];
CV_Assert
(
func
!=
0
);
return
cv
::
format
(
" -D %s=%s"
,
name
?
name
:
"COEFF"
,
func
(
kernel
).
c_str
());
...
...
modules/imgproc/src/filter.cpp
View file @
03b1d133
...
...
@@ -3191,11 +3191,10 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
"BORDER_WRAP"
,
"BORDER_REFLECT_101"
};
cv
::
Mat
kernelMat
=
_kernel
.
getMat
();
std
::
vector
<
float
>
kernelMatDataFloat
;
int
kernel_size_y2_aligned
=
_prepareKernelFilter2D
<
float
>
(
kernelMatDataFloat
,
kernelMat
);
cv
::
Size
sz
=
_src
.
size
(),
wholeSize
;
size_t
globalsize
[
2
]
=
{
sz
.
width
,
sz
.
height
},
localsize
[
2
]
=
{
0
,
1
};
size_t
globalsize
[
2
]
=
{
sz
.
width
,
sz
.
height
};
size_t
localsize_general
[
2
]
=
{
0
,
1
};
size_t
*
localsize
=
NULL
;
ocl
::
Kernel
k
;
UMat
src
=
_src
.
getUMat
();
...
...
@@ -3210,63 +3209,134 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
size_t
tryWorkItems
=
maxWorkItemSizes
[
0
];
char
cvt
[
2
][
40
];
String
kerStr
=
ocl
::
kernelToStr
(
kernelMatDataFloat
,
CV_32F
);
for
(
;
;
)
// For smaller filter kernels, there is a special kernel that is more
// efficient than the general one.
UMat
kernalDataUMat
;
if
(
device
.
isIntel
()
&&
(
device
.
type
()
&
ocl
::
Device
::
TYPE_GPU
)
&&
((
ksize
.
width
<
5
&&
ksize
.
height
<
5
)
||
(
ksize
.
width
==
5
&&
ksize
.
height
==
5
&&
cn
==
1
)))
{
size_t
BLOCK_SIZE
=
tryWorkItems
;
while
(
BLOCK_SIZE
>
32
&&
BLOCK_SIZE
>=
(
size_t
)
ksize
.
width
*
2
&&
BLOCK_SIZE
>
(
size_t
)
sz
.
width
*
2
)
BLOCK_SIZE
/=
2
;
#if 1 // TODO Mode with several blocks requires a much more VGPRs, so this optimization is not actual for the current devices
size_t
BLOCK_SIZE_Y
=
1
;
#else
size_t
BLOCK_SIZE_Y
=
8
;
// TODO Check heuristic value on devices
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE
/
8
&&
BLOCK_SIZE_Y
*
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
*
32
<
(
size_t
)
src
.
rows
)
BLOCK_SIZE_Y
*=
2
;
#endif
if
((
size_t
)
ksize
.
width
>
BLOCK_SIZE
)
return
false
;
int
requiredTop
=
anchor
.
y
;
int
requiredLeft
=
(
int
)
BLOCK_SIZE
;
// not this: anchor.x;
int
requiredBottom
=
ksize
.
height
-
1
-
anchor
.
y
;
int
requiredRight
=
(
int
)
BLOCK_SIZE
;
// not this: ksize.width - 1 - anchor.x;
kernelMat
.
reshape
(
0
,
1
);
String
kerStr
=
ocl
::
kernelToStr
(
kernelMat
,
CV_32F
);
int
h
=
isolated
?
sz
.
height
:
wholeSize
.
height
;
int
w
=
isolated
?
sz
.
width
:
wholeSize
.
width
;
bool
extra_extrapolation
=
h
<
requiredTop
||
h
<
requiredBottom
||
w
<
requiredLeft
||
w
<
requiredRight
;
if
((
w
<
ksize
.
width
)
||
(
h
<
ksize
.
height
))
return
false
;
String
opts
=
format
(
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
"-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s%s "
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
"-D convertToWT=%s -D convertToDstT=%s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
cn
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
kernel_size_y2_aligned
,
borderMap
[
borderType
],
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isolated
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
kerStr
.
c_str
(),
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]));
localsize
[
0
]
=
BLOCK_SIZE
;
globalsize
[
0
]
=
DIVUP
(
sz
.
width
,
BLOCK_SIZE
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE
;
globalsize
[
1
]
=
DIVUP
(
sz
.
height
,
BLOCK_SIZE_Y
);
if
(
!
k
.
create
(
"filter2D"
,
cv
::
ocl
::
imgproc
::
filter2D_oclsrc
,
opts
))
return
false
;
// Figure out what vector size to use for loading the pixels.
int
pxLoadNumPixels
=
((
cn
!=
1
)
||
sz
.
width
%
4
)
?
1
:
4
;
int
pxLoadVecSize
=
cn
*
pxLoadNumPixels
;
size_t
kernelWorkGroupSize
=
k
.
workGroupSize
();
if
(
localsize
[
0
]
<=
kernelWorkGroupSize
)
break
;
if
(
BLOCK_SIZE
<
kernelWorkGroupSize
)
// Figure out how many pixels per work item to compute in X and Y
// directions. Too many and we run out of registers.
int
pxPerWorkItemX
=
1
;
int
pxPerWorkItemY
=
1
;
if
(
cn
<=
2
&&
ksize
.
width
<=
4
&&
ksize
.
height
<=
4
)
{
pxPerWorkItemX
=
sz
.
width
%
8
?
sz
.
width
%
4
?
sz
.
width
%
2
?
1
:
2
:
4
:
8
;
pxPerWorkItemY
=
sz
.
width
%
2
?
1
:
2
;
}
else
if
(
cn
<
4
||
(
ksize
.
width
<=
4
&&
ksize
.
height
<=
4
))
{
pxPerWorkItemX
=
sz
.
width
%
2
?
1
:
2
;
pxPerWorkItemY
=
sz
.
width
%
2
?
1
:
2
;
}
globalsize
[
0
]
=
sz
.
width
/
pxPerWorkItemX
;
globalsize
[
1
]
=
sz
.
height
/
pxPerWorkItemY
;
// Need some padding in the private array for pixels
int
privDataWidth
=
ROUNDUP
(
pxPerWorkItemX
+
ksize
.
width
-
1
,
pxLoadNumPixels
);
// Make the global size a nice round number so the runtime can pick
// from reasonable choices for the workgroup size
const
int
wgRound
=
256
;
globalsize
[
0
]
=
ROUNDUP
(
globalsize
[
0
],
wgRound
);
char
build_options
[
1024
];
sprintf
(
build_options
,
"-D cn=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
"-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d "
"-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s "
"-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d "
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
"-D convertToWT=%s -D convertToDstT=%s %s"
,
cn
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
pxLoadVecSize
,
pxLoadNumPixels
,
pxPerWorkItemX
,
pxPerWorkItemY
,
privDataWidth
,
borderMap
[
borderType
],
isolated
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
,
privDataWidth
/
pxLoadNumPixels
,
pxPerWorkItemY
+
ksize
.
height
-
1
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
kerStr
.
c_str
());
cv
::
String
errmsg
;
if
(
!
k
.
create
(
"filter2DSmall"
,
cv
::
ocl
::
imgproc
::
filter2DSmall_oclsrc
,
build_options
,
&
errmsg
))
return
false
;
tryWorkItems
=
kernelWorkGroupSize
;
}
else
{
localsize
=
localsize_general
;
std
::
vector
<
float
>
kernelMatDataFloat
;
int
kernel_size_y2_aligned
=
_prepareKernelFilter2D
<
float
>
(
kernelMatDataFloat
,
kernelMat
);
String
kerStr
=
ocl
::
kernelToStr
(
kernelMatDataFloat
,
CV_32F
);
for
(
;
;
)
{
size_t
BLOCK_SIZE
=
tryWorkItems
;
while
(
BLOCK_SIZE
>
32
&&
BLOCK_SIZE
>=
(
size_t
)
ksize
.
width
*
2
&&
BLOCK_SIZE
>
(
size_t
)
sz
.
width
*
2
)
BLOCK_SIZE
/=
2
;
#if 1 // TODO Mode with several blocks requires a much more VGPRs, so this optimization is not actual for the current devices
size_t
BLOCK_SIZE_Y
=
1
;
#else
size_t
BLOCK_SIZE_Y
=
8
;
// TODO Check heuristic value on devices
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE
/
8
&&
BLOCK_SIZE_Y
*
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
*
32
<
(
size_t
)
src
.
rows
)
BLOCK_SIZE_Y
*=
2
;
#endif
if
((
size_t
)
ksize
.
width
>
BLOCK_SIZE
)
return
false
;
int
requiredTop
=
anchor
.
y
;
int
requiredLeft
=
(
int
)
BLOCK_SIZE
;
// not this: anchor.x;
int
requiredBottom
=
ksize
.
height
-
1
-
anchor
.
y
;
int
requiredRight
=
(
int
)
BLOCK_SIZE
;
// not this: ksize.width - 1 - anchor.x;
int
h
=
isolated
?
sz
.
height
:
wholeSize
.
height
;
int
w
=
isolated
?
sz
.
width
:
wholeSize
.
width
;
bool
extra_extrapolation
=
h
<
requiredTop
||
h
<
requiredBottom
||
w
<
requiredLeft
||
w
<
requiredRight
;
if
((
w
<
ksize
.
width
)
||
(
h
<
ksize
.
height
))
return
false
;
String
opts
=
format
(
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
"-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s%s "
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
"-D convertToWT=%s -D convertToDstT=%s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
cn
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
kernel_size_y2_aligned
,
borderMap
[
borderType
],
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isolated
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
kerStr
.
c_str
(),
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]));
localsize
[
0
]
=
BLOCK_SIZE
;
globalsize
[
0
]
=
DIVUP
(
sz
.
width
,
BLOCK_SIZE
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE
;
globalsize
[
1
]
=
DIVUP
(
sz
.
height
,
BLOCK_SIZE_Y
);
if
(
!
k
.
create
(
"filter2D"
,
cv
::
ocl
::
imgproc
::
filter2D_oclsrc
,
opts
))
return
false
;
size_t
kernelWorkGroupSize
=
k
.
workGroupSize
();
if
(
localsize
[
0
]
<=
kernelWorkGroupSize
)
break
;
if
(
BLOCK_SIZE
<
kernelWorkGroupSize
)
return
false
;
tryWorkItems
=
kernelWorkGroupSize
;
}
}
_dst
.
create
(
sz
,
dtype
);
...
...
@@ -3688,9 +3758,20 @@ void cv::filter2D( InputArray _src, OutputArray _dst, int ddepth,
temp
=
dst
;
else
temp
.
create
(
dst
.
size
(),
dst
.
type
());
crossCorr
(
src
,
kernel
,
temp
,
src
.
size
(),
CV_MAKETYPE
(
ddepth
,
src
.
channels
()),
anchor
,
delta
,
borderType
);
// crossCorr doesn't accept non-zero delta with multiple channels
if
(
src
.
channels
()
!=
1
&&
delta
!=
0
)
{
crossCorr
(
src
,
kernel
,
temp
,
src
.
size
(),
CV_MAKETYPE
(
ddepth
,
src
.
channels
()),
anchor
,
0
,
borderType
);
add
(
temp
,
delta
,
temp
);
}
else
{
crossCorr
(
src
,
kernel
,
temp
,
src
.
size
(),
CV_MAKETYPE
(
ddepth
,
src
.
channels
()),
anchor
,
delta
,
borderType
);
}
if
(
temp
.
data
!=
dst
.
data
)
temp
.
copyTo
(
dst
);
return
;
...
...
modules/imgproc/src/opencl/filter2DSmall.cl
0 → 100755
View file @
03b1d133
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2013,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Copyright
(
C
)
2014
,
Intel
Corporation,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
ifdef
BORDER_REPLICATE
//BORDER_REPLICATE:
aaaaaa|abcdefgh|hhhhhhh
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
l_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
r_edge
)
-1
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
t_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
b_edge
)
-1
:
(
addr
))
#
endif
#
ifdef
BORDER_REFLECT
//BORDER_REFLECT:
fedcba|abcdefgh|hgfedcb
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-1+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-1+
((
b_edge
)
<<1
)
:
(
addr
))
#
endif
#
ifdef
BORDER_REFLECT_101
//BORDER_REFLECT_101:
gfedcb|abcdefgh|gfedcba
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-2+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-2+
((
b_edge
)
<<1
)
:
(
addr
))
#
endif
//blur
function
does
not
support
BORDER_WRAP
#
ifdef
BORDER_WRAP
//BORDER_WRAP:
cdefgh|abcdefgh|abcdefg
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
i
)
+
(
r_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
i
)
-
(
r_edge
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
i
)
+
(
b_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
i
)
-
(
b_edge
)
:
(
addr
))
#
endif
#
ifdef
BORDER_ISOLATED
#
define
ISOLATED_MIN
(
VAL
)
(
VAL
)
#
else
#
define
ISOLATED_MIN
(
VAL
)
0
#
endif
#
ifdef
EXTRA_EXTRAPOLATION
//
border
>
src
image
size
#
ifdef
BORDER_CONSTANT
//
None
#
elif
defined
BORDER_REPLICATE
#
define
EXTRAPOLATE
(
x,
y,
minX,
minY,
maxX,
maxY
)
\
{
\
x
=
max
(
min
(
x,
maxX
-
1
)
,
minX
)
; \
y
=
max
(
min
(
y,
maxY
-
1
)
,
minY
)
; \
}
#
elif
defined
BORDER_WRAP
#
define
EXTRAPOLATE
(
x,
y,
minX,
minY,
maxX,
maxY
)
\
{
\
if
(
x
<
minX
)
\
x
-=
((
x
-
maxX
+
1
)
/
maxX
)
*
maxX
; \
if
(
x
>=
maxX
)
\
x
%=
maxX
; \
if
(
y
<
minY
)
\
y
-=
((
y
-
maxY
+
1
)
/
maxY
)
*
maxY
; \
if
(
y
>=
maxY
)
\
y
%=
maxY
; \
}
#
elif
defined
(
BORDER_REFLECT
)
|
| defined(BORDER_REFLECT_101)
#define EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, delta) \
{ \
if (maxX - minX == 1) \
x = minX; \
else \
do \
{ \
if (x < minX) \
x = minX - (x - minX) - 1 + delta; \
else \
x = maxX - 1 - (x - maxX) - delta; \
} \
while (x >= maxX || x < minX); \
\
if (maxY - minY == 1) \
y = minY; \
else \
do \
{ \
if (y < minY) \
y = minY - (y - minY) - 1 + delta; \
else \
y = maxY - 1 - (y - maxY) - delta; \
} \
while (y >= maxY || y < minY); \
}
#ifdef BORDER_REFLECT
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 0)
#elif defined(BORDER_REFLECT_101) || defined(BORDER_REFLECT101)
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 1)
#endif
#else
#error No extrapolation method
#endif
#else
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
{ \
int _row = y - ISOLATED_MIN(minY), _col = x - ISOLATED_MIN(minX); \
_row = ADDR_H(_row, 0, maxY - ISOLATED_MIN(minY)); \
_row = ADDR_B(_row, maxY - ISOLATED_MIN(minY), _row); \
y = _row + ISOLATED_MIN(minY); \
\
_col = ADDR_L(_col, 0, maxX - ISOLATED_MIN(minX)); \
_col = ADDR_R(_col, maxX - ISOLATED_MIN(minX), _col); \
x = _col + ISOLATED_MIN(minX); \
}
#endif
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#if cn != 3
#define loadpix(addr) *(__global const srcT *)(addr)
#define storepix(val, addr) *(__global dstT *)(addr) = val
#define SRCSIZE (int)sizeof(srcT)
#define DSTSIZE (int)sizeof(dstT)
#else
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
#define SRCSIZE (int)sizeof(srcT1) * cn
#define DSTSIZE (int)sizeof(dstT1) * cn
#endif
#define noconvert
struct RectCoords
{
int x1, y1, x2, y2;
};
#ifdef BORDER_ISOLATED
inline bool isBorder(const struct RectCoords bounds, int2 coord, int numPixels)
{
return (coord.x < bounds.x1 || coord.y < bounds.y1 || coord.x + numPixels > bounds.x2 || coord.y >= bounds.y2);
}
#else
inline bool isBorder(const struct RectCoords bounds, int2 coord, int numPixels)
{
return (coord.x < 0 || coord.y < 0 || coord.x + numPixels > bounds.x2 || coord.y >= bounds.y2);
}
#endif
WT getBorderPixel(const struct RectCoords bounds, int2 coord,
__global const uchar* srcptr, int srcstep)
{
#ifdef BORDER_CONSTANT
return (WT)(0);
#else
int selected_col = coord.x;
int selected_row = coord.y;
EXTRAPOLATE(selected_col, selected_row,
bounds.x1, bounds.y1,
bounds.x2, bounds.y2
);
coord = (int2)(selected_col, selected_row);
__global const uchar* ptr = srcptr + mul24(coord.y, srcstep) +
coord.x * SRCSIZE;
return convertToWT(loadpix(ptr));
#endif
}
inline WT readSrcPixelSingle(int2 pos, __global const uchar* srcptr,
int srcstep, const struct RectCoords srcCoords)
{
if (!isBorder(srcCoords, pos, 1))
{
__global const uchar* ptr = srcptr + mul24(pos.y, srcstep) +
pos.x * SRCSIZE;
return convertToWT(loadpix(ptr));
}
else
{
return getBorderPixel(srcCoords, pos, srcptr, srcstep);
}
}
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
#define vload1(OFFSET, PTR) (*(PTR + OFFSET))
#define PX_LOAD_VEC_TYPE CAT(srcT1, PX_LOAD_VEC_SIZE)
#define PX_LOAD_FLOAT_VEC_TYPE CAT(WT1, PX_LOAD_VEC_SIZE)
#define PX_LOAD_FLOAT_VEC_CONV CAT(convert_, PX_LOAD_FLOAT_VEC_TYPE)
#define PX_LOAD CAT(vload, PX_LOAD_VEC_SIZE)
#define float1 float
inline PX_LOAD_FLOAT_VEC_TYPE readSrcPixelGroup(int2 pos, __global const uchar* srcptr,
int srcstep, const struct RectCoords srcCoords)
{
__global const srcT1* ptr = (__global const srcT1*)
(srcptr + mul24(pos.y, srcstep) +
pos.x * SRCSIZE);
return PX_LOAD_FLOAT_VEC_CONV(PX_LOAD(0, ptr));
}
// Macros to ensure unrolled loops
#define LOOP1(VAR, STMT) (STMT); (VAR)++;
#define LOOP2(VAR, STMT) LOOP1(VAR, STMT); (STMT); (VAR)++;
#define LOOP3(VAR, STMT) LOOP2(VAR, STMT); (STMT); (VAR)++;
#define LOOP4(VAR, STMT) LOOP3(VAR, STMT); (STMT); (VAR)++;
#define LOOP5(VAR, STMT) LOOP4(VAR, STMT); (STMT); (VAR)++;
#define LOOP6(VAR, STMT) LOOP5(VAR, STMT); (STMT); (VAR)++;
#define LOOP7(VAR, STMT) LOOP6(VAR, STMT); (STMT); (VAR)++;
#define LOOP8(VAR, STMT) LOOP7(VAR, STMT); (STMT); (VAR)++;
#define LOOP9(VAR, STMT) LOOP8(VAR, STMT); (STMT); (VAR)++;
#define LOOP10(VAR, STMT) LOOP9(VAR, STMT); (STMT); (VAR)++;
#define LOOP11(VAR, STMT) LOOP10(VAR, STMT); (STMT); (VAR)++;
#define LOOP12(VAR, STMT) LOOP11(VAR, STMT); (STMT); (VAR)++;
#define LOOP13(VAR, STMT) LOOP12(VAR, STMT); (STMT); (VAR)++;
#define LOOP(N, VAR, STMT) CAT(LOOP, N)((VAR), (STMT))
#define DIG(a) a,
__constant WT1 kernelData[] = { COEFF };
__kernel void filter2DSmall(__global const uchar * srcptr, int src_step, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY,
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, float delta)
{
const struct RectCoords srcCoords = { srcOffsetX, srcOffsetY, srcEndX, srcEndY }; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const int startX = get_global_id(0) * PX_PER_WI_X;
const int startY = get_global_id(1) * PX_PER_WI_Y;
if ((startX >= cols) |
|
(
startY
>=
rows
))
{
return
;
}
WT
privateData[PX_PER_WI_Y
+
KERNEL_SIZE_Y
-
1][PRIV_DATA_WIDTH]
;
//
Load
all
of
the
pixels
needed
for
the
calculation
int
py
=
0
;
LOOP
(
PX_LOAD_Y_ITERATIONS,
py,
{
int
y
=
startY
+
py
;
int
px
=
0
;
LOOP
(
PX_LOAD_X_ITERATIONS,
px,
{
int
x
=
startX
+
(
px
*
PX_LOAD_NUM_PX
)
;
int2
srcPos
=
(
int2
)(
srcCoords.x1
+
x
-
ANCHOR_X,
srcCoords.y1
+
y
-
ANCHOR_Y
)
;
if
(
!isBorder
(
srcCoords,
srcPos,
PX_LOAD_NUM_PX
))
{
PX_LOAD_FLOAT_VEC_TYPE
p
=
readSrcPixelGroup
(
srcPos,
srcptr,
src_step,
srcCoords
)
;
*
((
PX_LOAD_FLOAT_VEC_TYPE*
)
&privateData[py][px
*
PX_LOAD_NUM_PX]
)
=
p
;
}
else
{
int
lx
=
0
;
LOOP
(
PX_LOAD_NUM_PX,
lx,
{
WT
p
=
readSrcPixelSingle
(
srcPos,
srcptr,
src_step,
srcCoords
)
;
*
((
WT*
)
&privateData[py][px
*
PX_LOAD_NUM_PX
+
lx]
)
=
p
;
srcPos.x++
;
}
)
;
}
}
)
;
}
)
;
//
Use
the
stored
pixels
to
compute
the
results
py
=
0
;
LOOP
(
PX_PER_WI_Y,
py,
{
int
y
=
startY
+
py
;
int
px
=
0
;
LOOP
(
PX_PER_WI_X,
px,
{
int
x
=
startX
+
px
;
WT
total_sum
=
0
;
int
sy
=
0
;
int
kernelIndex
=
0
;
LOOP
(
KERNEL_SIZE_Y,
sy,
{
int
sx
=
0
;
LOOP
(
KERNEL_SIZE_X,
sx,
{
total_sum
=
mad
(
kernelData[kernelIndex++],
privateData[py
+
sy][px
+
sx],
total_sum
)
;
}
)
;
}
)
;
__global
dstT*
dstPtr
=
(
__global
dstT*
)(
dstptr
+
y
*
dst_step
+
dst_offset
+
x
*
DSTSIZE
)
; // Pointer can be out of bounds!
storepix
(
convertToDstT
(
total_sum
+
(
WT
)(
delta
))
,
dstPtr
)
;
}
)
;
}
)
;
}
modules/imgproc/test/ocl/test_filter2d.cpp
View file @
03b1d133
...
...
@@ -51,7 +51,7 @@ namespace ocl {
/////////////////////////////////////////////////////////////////////////////////////////////////
// Filter2D
PARAM_TEST_CASE
(
Filter2D
,
MatDepth
,
Channels
,
BorderType
,
bool
,
bool
)
PARAM_TEST_CASE
(
Filter2D
,
MatDepth
,
Channels
,
int
,
int
,
BorderType
,
bool
,
bool
)
{
static
const
int
kernelMinSize
=
2
;
static
const
int
kernelMaxSize
=
10
;
...
...
@@ -60,6 +60,7 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
Size
dsize
;
Point
anchor
;
int
borderType
;
int
widthMultiple
;
bool
useRoi
;
Mat
kernel
;
double
delta
;
...
...
@@ -70,27 +71,30 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
virtual
void
SetUp
()
{
type
=
CV_MAKE_TYPE
(
GET_PARAM
(
0
),
GET_PARAM
(
1
));
borderType
=
GET_PARAM
(
2
)
|
(
GET_PARAM
(
3
)
?
BORDER_ISOLATED
:
0
);
useRoi
=
GET_PARAM
(
4
);
Size
ksize
(
GET_PARAM
(
2
),
GET_PARAM
(
2
));
widthMultiple
=
GET_PARAM
(
3
);
borderType
=
GET_PARAM
(
4
)
|
(
GET_PARAM
(
5
)
?
BORDER_ISOLATED
:
0
);
useRoi
=
GET_PARAM
(
6
);
Mat
temp
=
randomMat
(
ksize
,
CV_MAKE_TYPE
(((
CV_64F
==
CV_MAT_DEPTH
(
type
))
?
CV_64F
:
CV_32F
),
1
),
-
MAX_VALUE
,
MAX_VALUE
);
cv
::
normalize
(
temp
,
kernel
,
1.0
,
0.0
,
NORM_L1
);
}
void
random_roi
()
{
dsize
=
randomSize
(
1
,
MAX_VALUE
);
// Make sure the width is a multiple of the requested value, and no more.
dsize
.
width
&=
~
((
widthMultiple
*
2
)
-
1
);
dsize
.
width
+=
widthMultiple
;
Size
ksize
=
randomSize
(
kernelMinSize
,
kernelMaxSize
);
Mat
temp
=
randomMat
(
ksize
,
CV_MAKE_TYPE
(((
CV_64F
==
CV_MAT_DEPTH
(
type
))
?
CV_64F
:
CV_32F
),
1
),
-
MAX_VALUE
,
MAX_VALUE
);
cv
::
normalize
(
temp
,
kernel
,
1.0
,
0.0
,
NORM_L1
);
Size
roiSize
=
randomSize
(
ksize
.
width
,
MAX_VALUE
,
ksize
.
height
,
MAX_VALUE
);
Size
roiSize
=
randomSize
(
kernel
.
size
[
0
],
MAX_VALUE
,
kernel
.
size
[
1
],
MAX_VALUE
);
Border
srcBorder
=
randomBorder
(
0
,
useRoi
?
MAX_VALUE
:
0
);
randomSubMat
(
src
,
src_roi
,
roiSize
,
srcBorder
,
type
,
-
MAX_VALUE
,
MAX_VALUE
);
Border
dstBorder
=
randomBorder
(
0
,
useRoi
?
MAX_VALUE
:
0
);
randomSubMat
(
dst
,
dst_roi
,
dsize
,
dstBorder
,
type
,
-
MAX_VALUE
,
MAX_VALUE
);
anchor
.
x
=
randomInt
(
-
1
,
k
size
.
width
);
anchor
.
y
=
randomInt
(
-
1
,
k
size
.
height
);
anchor
.
x
=
randomInt
(
-
1
,
k
ernel
.
size
[
0
]
);
anchor
.
y
=
randomInt
(
-
1
,
k
ernel
.
size
[
1
]
);
delta
=
randomDouble
(
-
100
,
100
);
...
...
@@ -122,6 +126,8 @@ OCL_INSTANTIATE_TEST_CASE_P(ImageProc, Filter2D,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_32F
),
OCL_ALL_CHANNELS
,
Values
(
3
,
5
,
9
),
// Kernel size
Values
(
1
,
4
,
8
),
// Width mutiple
Values
((
BorderType
)
BORDER_CONSTANT
,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REFLECT
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment