Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e2c6ab01
Commit
e2c6ab01
authored
Mar 21, 2014
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactored filter2D; eliminated restrictions sdepth == ddepth, delta == 0
parent
b6833fdd
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
98 additions
and
256 deletions
+98
-256
filter.cpp
modules/imgproc/src/filter.cpp
+52
-99
filter2D.cl
modules/imgproc/src/opencl/filter2D.cl
+39
-153
test_filter2d.cpp
modules/imgproc/test/ocl/test_filter2d.cpp
+5
-2
test_filters.cpp
modules/imgproc/test/ocl/test_filters.cpp
+2
-2
No files found.
modules/imgproc/src/filter.cpp
View file @
e2c6ab01
...
...
@@ -3154,74 +3154,50 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
InputArray
_kernel
,
Point
anchor
,
double
delta
,
int
borderType
)
{
i
f
(
abs
(
delta
)
>
FLT_MIN
)
return
false
;
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
i
nt
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
ddepth
=
ddepth
<
0
?
sdepth
:
ddepth
;
int
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
),
wdepth
=
std
::
max
(
std
::
max
(
sdepth
,
ddepth
),
CV_32F
),
wtype
=
CV_MAKE_TYPE
(
wdepth
,
cn
);
if
(
cn
>
4
)
return
false
;
int
sdepth
=
CV_MAT_DEPTH
(
type
);
Size
ksize
=
_kernel
.
size
();
if
(
anchor
.
x
<
0
)
if
(
anchor
.
x
<
0
)
anchor
.
x
=
ksize
.
width
/
2
;
if
(
anchor
.
y
<
0
)
if
(
anchor
.
y
<
0
)
anchor
.
y
=
ksize
.
height
/
2
;
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
else
if
(
ddepth
!=
sdepth
)
return
false
;
bool
is
IsolatedBorder
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
bo
ol
useDouble
=
(
CV_64F
==
sdepth
)
;
bool
is
olated
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
bo
rderType
&=
~
BORDER_ISOLATED
;
const
cv
::
ocl
::
Device
&
device
=
cv
::
ocl
::
Device
::
getDefault
();
int
doubleFPConfig
=
device
.
doubleFPConfig
()
;
if
(
useDouble
&&
(
0
==
doubleFPConfig
)
)
bool
doubleSupport
=
device
.
doubleFPConfig
()
>
0
;
if
(
wdepth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
const
char
*
btype
=
NULL
;
switch
(
borderType
&
~
BORDER_ISOLATED
)
{
case
BORDER_CONSTANT
:
btype
=
"BORDER_CONSTANT"
;
break
;
case
BORDER_REPLICATE
:
btype
=
"BORDER_REPLICATE"
;
break
;
case
BORDER_REFLECT
:
btype
=
"BORDER_REFLECT"
;
break
;
case
BORDER_WRAP
:
return
false
;
case
BORDER_REFLECT101
:
btype
=
"BORDER_REFLECT_101"
;
break
;
}
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
"BORDER_WRAP"
,
"BORDER_REFLECT_101"
};
cv
::
Mat
kernelMat
=
_kernel
.
getMat
();
std
::
vector
<
float
>
kernelMatDataFloat
;
std
::
vector
<
double
>
kernelMatDataDouble
;
int
kernel_size_y2_aligned
=
useDouble
?
_prepareKernelFilter2D
<
double
>
(
kernelMatDataDouble
,
kernelMat
)
:
_prepareKernelFilter2D
<
float
>
(
kernelMatDataFloat
,
kernelMat
);
int
kernel_size_y2_aligned
=
_prepareKernelFilter2D
<
float
>
(
kernelMatDataFloat
,
kernelMat
);
cv
::
Size
sz
=
_src
.
size
(),
wholeSize
;
size_t
globalsize
[
2
]
=
{
sz
.
width
,
sz
.
height
},
localsize
[
2
]
=
{
0
,
1
};
cv
::
Size
sz
=
_src
.
size
();
size_t
globalsize
[
2
]
=
{
sz
.
width
,
sz
.
height
};
size_t
localsize
[
2
]
=
{
0
,
1
};
ocl
::
Kernel
kernel
;
UMat
src
;
Size
wholeSize
;
if
(
!
isIsolatedBorder
)
ocl
::
Kernel
k
;
UMat
src
=
_src
.
getUMat
();
if
(
!
isolated
)
{
src
=
_src
.
getUMat
();
Point
ofs
;
src
.
locateROI
(
wholeSize
,
ofs
);
}
size_t
maxWorkItemSizes
[
32
];
device
.
maxWorkItemSizes
(
maxWorkItemSizes
);
size_t
maxWorkItemSizes
[
32
];
device
.
maxWorkItemSizes
(
maxWorkItemSizes
);
size_t
tryWorkItems
=
maxWorkItemSizes
[
0
];
for
(;;)
char
cvt
[
2
][
40
];
for
(
;
;
)
{
size_t
BLOCK_SIZE
=
tryWorkItems
;
while
(
BLOCK_SIZE
>
32
&&
BLOCK_SIZE
>=
(
size_t
)
ksize
.
width
*
2
&&
BLOCK_SIZE
>
(
size_t
)
sz
.
width
*
2
)
...
...
@@ -3241,32 +3217,36 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
int
requiredLeft
=
(
int
)
BLOCK_SIZE
;
// not this: anchor.x;
int
requiredBottom
=
ksize
.
height
-
1
-
anchor
.
y
;
int
requiredRight
=
(
int
)
BLOCK_SIZE
;
// not this: ksize.width - 1 - anchor.x;
int
h
=
is
IsolatedBorder
?
sz
.
height
:
wholeSize
.
height
;
int
w
=
is
IsolatedBorder
?
sz
.
width
:
wholeSize
.
width
;
int
h
=
is
olated
?
sz
.
height
:
wholeSize
.
height
;
int
w
=
is
olated
?
sz
.
width
:
wholeSize
.
width
;
bool
extra_extrapolation
=
h
<
requiredTop
||
h
<
requiredBottom
||
w
<
requiredLeft
||
w
<
requiredRight
;
if
((
w
<
ksize
.
width
)
||
(
h
<
ksize
.
height
))
return
false
;
char
build_options
[
1024
];
sprintf
(
build_options
,
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D KERNEL_SIZE_Y2_ALIGNED=%d "
"-D %s -D %s -D %s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
sdepth
,
cn
,
useDouble
?
1
:
0
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
kernel_size_y2_aligned
,
btype
,
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isIsolatedBorder
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
);
String
opts
=
format
(
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
"-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s "
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
"-D convertToWT=%s -D convertToDstT=%s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
cn
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
kernel_size_y2_aligned
,
borderMap
[
borderType
],
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isolated
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]));
localsize
[
0
]
=
BLOCK_SIZE
;
globalsize
[
0
]
=
DIVUP
(
sz
.
width
,
BLOCK_SIZE
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE
;
globalsize
[
1
]
=
DIVUP
(
sz
.
height
,
BLOCK_SIZE_Y
);
cv
::
String
errmsg
;
if
(
!
kernel
.
create
(
"filter2D"
,
cv
::
ocl
::
imgproc
::
filter2D_oclsrc
,
build_options
))
if
(
!
k
.
create
(
"filter2D"
,
cv
::
ocl
::
imgproc
::
filter2D_oclsrc
,
opts
))
return
false
;
size_t
kernelWorkGroupSize
=
kernel
.
workGroupSize
();
size_t
kernelWorkGroupSize
=
k
.
workGroupSize
();
if
(
localsize
[
0
]
<=
kernelWorkGroupSize
)
break
;
if
(
BLOCK_SIZE
<
kernelWorkGroupSize
)
...
...
@@ -3274,46 +3254,19 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
tryWorkItems
=
kernelWorkGroupSize
;
}
_dst
.
create
(
sz
,
CV_MAKETYPE
(
ddepth
,
cn
));
UMat
dst
=
_dst
.
getUMat
();
if
(
src
.
empty
())
src
=
_src
.
getUMat
();
int
idxArg
=
0
;
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
src
.
step
);
_dst
.
create
(
sz
,
dtype
);
UMat
dst
=
_dst
.
getUMat
(),
kernalDataUMat
(
kernelMatDataFloat
,
true
);
int
srcOffsetX
=
(
int
)((
src
.
offset
%
src
.
step
)
/
src
.
elemSize
());
int
srcOffsetY
=
(
int
)(
src
.
offset
/
src
.
step
);
int
srcEndX
=
(
isIsolatedBorder
?
(
srcOffsetX
+
sz
.
width
)
:
wholeSize
.
width
);
int
srcEndY
=
(
isIsolatedBorder
?
(
srcOffsetY
+
sz
.
height
)
:
wholeSize
.
height
);
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetY
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndY
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
WriteOnly
(
dst
));
float
borderValue
[
4
]
=
{
0
,
0
,
0
,
0
};
double
borderValueDouble
[
4
]
=
{
0
,
0
,
0
,
0
};
if
((
borderType
&
~
BORDER_ISOLATED
)
==
BORDER_CONSTANT
)
{
int
cnocl
=
3
==
cn
?
4
:
cn
;
if
(
useDouble
)
idxArg
=
kernel
.
set
(
idxArg
,
(
void
*
)
&
borderValueDouble
[
0
],
sizeof
(
double
)
*
cnocl
);
else
idxArg
=
kernel
.
set
(
idxArg
,
(
void
*
)
&
borderValue
[
0
],
sizeof
(
float
)
*
cnocl
);
}
if
(
useDouble
)
{
UMat
kernalDataUMat
(
kernelMatDataDouble
,
true
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrReadOnly
(
kernalDataUMat
));
}
else
{
UMat
kernalDataUMat
(
kernelMatDataFloat
,
true
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrReadOnly
(
kernalDataUMat
));
}
return
kernel
.
run
(
2
,
globalsize
,
localsize
,
true
);
int
srcEndX
=
(
isolated
?
(
srcOffsetX
+
sz
.
width
)
:
wholeSize
.
width
);
int
srcEndY
=
(
isolated
?
(
srcOffsetY
+
sz
.
height
)
:
wholeSize
.
height
);
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
(
int
)
src
.
step
,
srcOffsetX
,
srcOffsetY
,
srcEndX
,
srcEndY
,
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
kernalDataUMat
),
(
float
)
delta
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
static
bool
ocl_sepRowFilter2D
(
const
UMat
&
src
,
UMat
&
buf
,
const
Mat
&
kernelX
,
int
anchor
,
...
...
modules/imgproc/src/opencl/filter2D.cl
View file @
e2c6ab01
...
...
@@ -122,7 +122,7 @@
}
#ifdef BORDER_REFLECT
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 0)
#elif defined(BORDER_REFLECT_101)
#elif defined(BORDER_REFLECT_101)
|
|
defined
(
BORDER_REFLECT101
)
#
define
EXTRAPOLATE
(
x,
y,
minX,
minY,
maxX,
maxY
)
EXTRAPOLATE_
(
x,
y,
minX,
minY,
maxX,
maxY,
1
)
#
endif
#
else
...
...
@@ -142,127 +142,49 @@
}
#
endif
#if
USE_DOUBLE
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
#define FPTYPE float
#define CONVERT_TO_FPTYPE CAT(convert_float, VEC_SIZE)
#endif
#if DATA_DEPTH == 0
#define BASE_TYPE uchar
#elif DATA_DEPTH == 1
#define BASE_TYPE char
#elif DATA_DEPTH == 2
#define BASE_TYPE ushort
#elif DATA_DEPTH == 3
#define BASE_TYPE short
#elif DATA_DEPTH == 4
#define BASE_TYPE int
#elif DATA_DEPTH == 5
#define BASE_TYPE float
#elif DATA_DEPTH == 6
#define BASE_TYPE double
#else
#error data_depth
#endif
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
#define uchar1 uchar
#define char1 char
#define ushort1 ushort
#define short1 short
#define int1 int
#define float1 float
#define double1 double
#define convert_uchar1_sat_rte convert_uchar_sat_rte
#define convert_char1_sat_rte convert_char_sat_rte
#define convert_ushort1_sat_rte convert_ushort_sat_rte
#define convert_short1_sat_rte convert_short_sat_rte
#define convert_int1_sat_rte convert_int_sat_rte
#define convert_float1
#define convert_double1
#if DATA_DEPTH == 5 |
|
DATA_DEPTH
==
6
#
define
CONVERT_TO_TYPE
CAT
(
CAT
(
convert_,
BASE_TYPE
)
,
VEC_SIZE
)
#
else
#
define
CONVERT_TO_TYPE
CAT
(
CAT
(
CAT
(
convert_,
BASE_TYPE
)
,
VEC_SIZE
)
,
_sat_rte
)
#
endif
#
define
VEC_SIZE
DATA_CHAN
#
define
VEC_TYPE
CAT
(
BASE_TYPE,
VEC_SIZE
)
#
define
TYPE
VEC_TYPE
#
if
VEC_SIZE
==
3
#
define
SCALAR_TYPE
CAT
(
FPTYPE,
4
)
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
dstT
*
)(
addr
)
=
val
#
define
SRCSIZE
(
int
)
sizeof
(
srcT
)
#
define
DSTSIZE
(
int
)
sizeof
(
dstT
)
#
else
#
define
SCALAR_TYPE
CAT
(
FPTYPE,
VEC_SIZE
)
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
addr
))
#
define
SRCSIZE
(
int
)
sizeof
(
srcT1
)
*
cn
#
define
DSTSIZE
(
int
)
sizeof
(
dstT1
)
*
cn
#
endif
#
define
INTERMEDIATE_TYPE
CAT
(
FPTYPE,
VEC_SIZE
)
#
if
DATA_CHAN
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
TYPE
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
TYPE
*
)(
addr
)
=
val
#
define
TSIZE
(
int
)
sizeof
(
TYPE
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
BASE_TYPE
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
BASE_TYPE
*
)(
addr
))
#
define
TSIZE
(
int
)
sizeof
(
BASE_TYPE
)
*DATA_CHAN
#
endif
#
define
noconvert
struct
RectCoords
{
int
x1,
y1,
x2,
y2
;
}
;
//#define
DEBUG
#
ifdef
DEBUG
#
define
DEBUG_ONLY
(
x
)
x
#
define
ASSERT
(
condition
)
do
{
if
(
!
(
condition
))
{
printf
(
"BUG in boxFilter kernel (global=%d,%d): "
#
condition
"\n"
,
get_global_id
(
0
)
,
get_global_id
(
1
))
; } } while (0)
#
else
#
define
DEBUG_ONLY
(
x
)
(
void
)
0
#
define
ASSERT
(
condition
)
(
void
)
0
#
endif
inline
INTERMEDIATE_TYPE
readSrcPixel
(
int2
pos,
__global
const
uchar*
srcptr,
int
srcstep,
const
struct
RectCoords
srcCoords
#
ifdef
BORDER_CONSTANT
,
SCALAR_TYPE
borderValue
#
endif
)
inline
WT
readSrcPixel
(
int2
pos,
__global
const
uchar
*
srcptr,
int
src_step,
const
struct
RectCoords
srcCoords
)
{
#
ifdef
BORDER_ISOLATED
if
(
pos.x
>=
srcCoords.x1
&&
pos.y
>=
srcCoords.y1
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
if
(
pos.x
>=
srcCoords.x1
&&
pos.y
>=
srcCoords.y1
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
#
else
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
#
endif
{
//__global
TYPE*
ptr
=
(
__global
TYPE*
)((
__global
char*
)
src
+
pos.x
*
sizeof
(
TYPE
)
+
pos.y
*
srcStepBytes
)
;
__global
TYPE*
ptr
=
(
__global
TYPE*
)(
srcptr
+
pos.y
*
srcstep
+
pos.x
*
TSIZE
)
;
return
CONVERT_TO_FPTYPE
(
loadpix
(
ptr
))
;
return
convertToWT
(
loadpix
(
srcptr
+
mad24
(
pos.y,
src_step,
pos.x
*
SRCSIZE
)))
;
}
else
{
#
ifdef
BORDER_CONSTANT
#
if
VEC_SIZE
==
3
return
(
INTERMEDIATE_TYPE
)(
borderValue.x,
borderValue.y,
borderValue.z
)
;
#
else
return
borderValue
;
#
endif
return
(
WT
)(
0
)
;
#
else
int
selected_col
=
pos.x
;
int
selected_row
=
pos.y
;
int
selected_col
=
pos.x,
selected_row
=
pos.y
;
EXTRAPOLATE
(
selected_col,
selected_row,
#
ifdef
BORDER_ISOLATED
...
...
@@ -273,68 +195,40 @@ inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global const uchar* srcptr, in
srcCoords.x2,
srcCoords.y2
)
;
//
debug
border
mapping
//printf
(
"pos=%d,%d --> %d, %d\n"
,
pos.x,
pos.y,
selected_col,
selected_row
)
;
pos
=
(
int2
)(
selected_col,
selected_row
)
;
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
{
//__global
TYPE*
ptr
=
(
__global
TYPE*
)((
__global
char*
)
src
+
pos.x
*
sizeof
(
TYPE
)
+
pos.y
*
srcStepBytes
)
;
__global
TYPE*
ptr
=
(
__global
TYPE*
)(
srcptr
+
pos.y
*
srcstep
+
pos.x
*
TSIZE
)
;
return
CONVERT_TO_FPTYPE
(
loadpix
(
ptr
))
;
}
else
{
//
for
debug
only
DEBUG_ONLY
(
printf
(
"BUG in boxFilter kernel\n"
))
;
return
(
FPTYPE
)(
0.0f
)
;
}
return
convertToWT
(
loadpix
(
srcptr
+
mad24
(
selected_row,
src_step,
selected_col
*
SRCSIZE
)))
;
#
endif
}
}
//
INPUT
PARAMETER:
BLOCK_SIZE_Y
(
via
defines
)
__kernel
__attribute__
((
reqd_work_group_size
(
LOCAL_SIZE,
1
,
1
)))
void
filter2D
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcOffsetX,
int
srcOffsetY,
int
srcEndX,
int
srcEndY,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
int
rows,
int
cols,
#
ifdef
BORDER_CONSTANT
SCALAR_TYPE
borderValue,
#
endif
__constant
FPTYPE*
kernelData
//
transposed:
[KERNEL_SIZE_X][KERNEL_SIZE_Y2_ALIGNED]
)
__kernel
void
filter2D
(
__global
const
uchar
*
srcptr,
int
src_step,
int
srcOffsetX,
int
srcOffsetY,
int
srcEndX,
int
srcEndY,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
__constant
WT1
*
kernelData,
float
delta
)
{
const
struct
RectCoords
srcCoords
=
{
srcOffsetX,
srcOffsetY,
srcEndX,
srcEndY
}
; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const
struct
RectCoords
srcCoords
=
{
srcOffsetX,
srcOffsetY,
srcEndX,
srcEndY
}
; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const
int
local_id
=
get_local_id
(
0
)
;
const
int
x
=
local_id
+
(
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
))
*
get_group_id
(
0
)
-
ANCHOR_X
;
const
int
y
=
get_global_id
(
1
)
*
BLOCK_SIZE_Y
;
int
local_id
=
get_local_id
(
0
)
;
int
x
=
local_id
+
(
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
))
*
get_group_id
(
0
)
-
ANCHOR_X
;
int
y
=
get_global_id
(
1
)
*
BLOCK_SIZE_Y
;
INTERMEDIATE_TYPE
data[KERNEL_SIZE_Y]
;
__local
INTERMEDIATE_TYPE
sumOfCols[LOCAL_SIZE]
;
WT
data[KERNEL_SIZE_Y]
;
__local
WT
sumOfCols[LOCAL_SIZE]
;
int2
srcPos
=
(
int2
)(
srcCoords.x1
+
x,
srcCoords.y1
+
y
-
ANCHOR_Y
)
;
int2
pos
=
(
int2
)(
x,
y
)
;
__global
TYPE*
dstPtr
=
(
__global
TYPE*
)((
__global
char*
)
dstptr
+
pos.y
*
dststep
+
dstoffset
+
pos.x
*
TSIZE
)
; // Pointer can be out of bounds!
bool
writeResult
=
((
local_id
>=
ANCHOR_X
)
&&
(
local_id
<
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
-
ANCHOR_X
)
)
&&
(
pos.x
>=
0
)
&&
(
pos.x
<
cols
))
;
__global
dstT
*
dst
=
(
__global
dstT
*
)(
dstptr
+
mad24
(
pos.y,
dst_step,
mad24
(
pos.x,
DSTSIZE,
dst_offset
))
)
; // Pointer can be out of bounds!
bool
writeResult
=
local_id
>=
ANCHOR_X
&&
local_id
<
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
-
ANCHOR_X
)
&&
pos.x
>=
0
&&
pos.x
<
cols
;
#
if
BLOCK_SIZE_Y
>
1
bool
readAllpixels
=
true
;
int
sy_index
=
0
; // current index in data[] array
dstRowsMax
=
min
(
rows,
pos.y
+
BLOCK_SIZE_Y
)
;
for
(
;
pos.y
<
dstRowsMax
;
pos.y++,
dstPtr
=
(
__global
TYPE*
)((
__global
char*
)
dstptr
+
dststep
))
for
(
;
pos.y
<
dstRowsMax
;
pos.y++,
dst
=
(
__global
dstT
*
)((
__global
uchar
*
)
dst
+
dst_step
))
#
endif
{
ASSERT
(
pos.y
<
dstRowsMax
)
;
for
(
#
if
BLOCK_SIZE_Y
>
1
int
sy
=
readAllpixels
?
0
:
-1
; sy < (readAllpixels ? KERNEL_SIZE_Y : 0);
...
...
@@ -343,27 +237,21 @@ void filter2D(__global const uchar* srcptr, int srcstep, int srcOffsetX, int src
#
endif
sy++,
srcPos.y++
)
{
data[sy
+
sy_index]
=
readSrcPixel
(
srcPos,
srcptr,
srcstep,
srcCoords
#
ifdef
BORDER_CONSTANT
,
borderValue
#
endif
)
;
data[sy
+
sy_index]
=
readSrcPixel
(
srcPos,
srcptr,
src_step,
srcCoords
)
;
}
INTERMEDIATE_TYPE
total_sum
=
0
;
WT
total_sum
=
0
;
for
(
int
sx
=
0
; sx < KERNEL_SIZE_X; sx++)
{
{
__constant
FPTYPE
*
k
=
&kernelData[KERNEL_SIZE_Y2_ALIGNED
*
sx
__constant
WT1
*
k
=
&kernelData[KERNEL_SIZE_Y2_ALIGNED
*
sx
#
if
BLOCK_SIZE_Y
>
1
+
KERNEL_SIZE_Y
-
sy_index
#
endif
]
;
INTERMEDIATE_TYPE
tmp_sum
=
0
;
WT
tmp_sum
=
0
;
for
(
int
sy
=
0
; sy < KERNEL_SIZE_Y; sy++)
{
tmp_sum
+=
data[sy]
*
k[sy]
;
}
sumOfCols[local_id]
=
tmp_sum
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -377,14 +265,12 @@ void filter2D(__global const uchar* srcptr, int srcstep, int srcOffsetX, int src
}
if
(
writeResult
)
{
storepix
(
CONVERT_TO_TYPE
(
total_sum
)
,
dstPtr
)
;
}
storepix
(
convertToDstT
(
total_sum
+
(
WT
)(
delta
))
,
dst
)
;
#
if
BLOCK_SIZE_Y
>
1
readAllpixels
=
false
;
#
if
BLOCK_SIZE_Y
>
KERNEL_SIZE_Y
sy_index
=
(
sy_index
+
1
<=
KERNEL_SIZE_Y
)
?
sy_index
+
1
:
1
;
sy_index
=
sy_index
+
1
<=
KERNEL_SIZE_Y
?
sy_index
+
1
:
1
;
#
else
sy_index++
;
#
endif
...
...
modules/imgproc/test/ocl/test_filter2d.cpp
View file @
e2c6ab01
...
...
@@ -62,6 +62,7 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
int
borderType
;
bool
useRoi
;
Mat
kernel
;
double
delta
;
TEST_DECLARE_INPUT_PARAMETER
(
src
);
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
);
...
...
@@ -91,6 +92,8 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
anchor
.
x
=
randomInt
(
-
1
,
ksize
.
width
);
anchor
.
y
=
randomInt
(
-
1
,
ksize
.
height
);
delta
=
randomDouble
(
-
100
,
100
);
UMAT_UPLOAD_INPUT_PARAMETER
(
src
);
UMAT_UPLOAD_OUTPUT_PARAMETER
(
dst
);
}
...
...
@@ -108,8 +111,8 @@ OCL_TEST_P(Filter2D, Mat)
{
random_roi
();
OCL_OFF
(
cv
::
filter2D
(
src_roi
,
dst_roi
,
-
1
,
kernel
,
anchor
,
0.0
,
borderType
));
OCL_ON
(
cv
::
filter2D
(
usrc_roi
,
udst_roi
,
-
1
,
kernel
,
anchor
,
0.0
,
borderType
));
OCL_OFF
(
cv
::
filter2D
(
src_roi
,
dst_roi
,
-
1
,
kernel
,
anchor
,
delta
,
borderType
));
OCL_ON
(
cv
::
filter2D
(
usrc_roi
,
udst_roi
,
-
1
,
kernel
,
anchor
,
delta
,
borderType
));
Near
(
1.0
);
}
...
...
modules/imgproc/test/ocl/test_filters.cpp
View file @
e2c6ab01
...
...
@@ -152,8 +152,8 @@ OCL_TEST_P(LaplacianTest, Accuracy)
{
random_roi
();
OCL_OFF
(
cv
::
Laplacian
(
src_roi
,
dst_roi
,
-
1
,
ksize
,
scale
,
0
,
borderType
));
OCL_ON
(
cv
::
Laplacian
(
usrc_roi
,
udst_roi
,
-
1
,
ksize
,
scale
,
0
,
borderType
));
OCL_OFF
(
cv
::
Laplacian
(
src_roi
,
dst_roi
,
-
1
,
ksize
,
scale
,
1
0
,
borderType
));
OCL_ON
(
cv
::
Laplacian
(
usrc_roi
,
udst_roi
,
-
1
,
ksize
,
scale
,
1
0
,
borderType
));
Near
();
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment