Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
82bab2b0
Commit
82bab2b0
authored
Jan 31, 2014
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Jan 31, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2236 from ilya-lavrenov:tapi_boxfilter
parents
0b57d371
68f5dd41
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
129 additions
and
338 deletions
+129
-338
boxFilter.cl
modules/imgproc/src/opencl/boxFilter.cl
+53
-221
smooth.cpp
modules/imgproc/src/smooth.cpp
+63
-106
test_boxfilter.cpp
modules/imgproc/test/ocl/test_boxfilter.cpp
+13
-11
No files found.
modules/imgproc/src/opencl/boxFilter.cl
View file @
82bab2b0
...
@@ -39,45 +39,15 @@
...
@@ -39,45 +39,15 @@
//
//
//M*/
//M*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#
ifdef
DOUBLE_SUPPORT
/////////////////////////////////Macro
for
border
type////////////////////////////////////////////
#
ifdef
cl_amd_fp64
/////////////////////////////////////////////////////////////////////////////////////////////////
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
ifdef
BORDER_REPLICATE
#
elif
defined
(
cl_khr_fp64
)
//BORDER_REPLICATE:
aaaaaa|abcdefgh|hhhhhhh
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
l_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
r_edge
)
-1
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
t_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
b_edge
)
-1
:
(
addr
))
#
endif
#
ifdef
BORDER_REFLECT
//BORDER_REFLECT:
fedcba|abcdefgh|hgfedcb
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-1+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-1+
((
b_edge
)
<<1
)
:
(
addr
))
#
endif
#
endif
#
ifdef
BORDER_REFLECT_101
//BORDER_REFLECT_101:
gfedcb|abcdefgh|gfedcba
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-2+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-2+
((
b_edge
)
<<1
)
:
(
addr
))
#
endif
#
endif
//blur
function
does
not
support
BORDER_WRAP
#
ifdef
BORDER_WRAP
//BORDER_WRAP:
cdefgh|abcdefgh|abcdefg
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
i
)
+
(
r_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
i
)
-
(
r_edge
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
i
)
+
(
b_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
i
)
-
(
b_edge
)
:
(
addr
))
#
endif
#
ifdef
EXTRA_EXTRAPOLATION
//
border
>
src
image
size
#
ifdef
BORDER_CONSTANT
#
ifdef
BORDER_CONSTANT
//
None
#
elif
defined
BORDER_REPLICATE
#
elif
defined
BORDER_REPLICATE
#
define
EXTRAPOLATE
(
x,
y,
minX,
minY,
maxX,
maxY
)
\
#
define
EXTRAPOLATE
(
x,
y,
minX,
minY,
maxX,
maxY
)
\
{
\
{
\
...
@@ -131,248 +101,110 @@
...
@@ -131,248 +101,110 @@
#
else
#
else
#
error
No
extrapolation
method
#
error
No
extrapolation
method
#
endif
#
endif
#else
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
{ \
int _row = y - minY, _col = x - minX; \
_row = ADDR_H(_row, 0, maxY - minY); \
_row = ADDR_B(_row, maxY - minY, _row); \
y = _row + minY; \
\
_col = ADDR_L(_col, 0, maxX - minX); \
_col = ADDR_R(_col, maxX - minX, _col); \
x = _col + minX; \
}
#endif
#if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
#define FPTYPE float
#define CONVERT_TO_FPTYPE CAT(convert_float, VEC_SIZE)
#endif
#if DATA_DEPTH == 0
#define BASE_TYPE uchar
#elif DATA_DEPTH == 1
#define BASE_TYPE char
#elif DATA_DEPTH == 2
#define BASE_TYPE ushort
#elif DATA_DEPTH == 3
#define BASE_TYPE short
#elif DATA_DEPTH == 4
#define BASE_TYPE int
#elif DATA_DEPTH == 5
#define BASE_TYPE float
#elif DATA_DEPTH == 6
#define BASE_TYPE double
#else
#error data_depth
#endif
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
#define uchar1 uchar
#define char1 char
#define ushort1 ushort
#define short1 short
#define int1 int
#define float1 float
#define double1 double
#define convert_uchar1_sat_rte convert_uchar_sat_rte
#define convert_char1_sat_rte convert_char_sat_rte
#define convert_ushort1_sat_rte convert_ushort_sat_rte
#define convert_short1_sat_rte convert_short_sat_rte
#define convert_int1_sat_rte convert_int_sat_rte
#define convert_float1
#define convert_double1
#if DATA_DEPTH == 5 |
|
DATA_DEPTH
==
6
#
define
CONVERT_TO_TYPE
CAT
(
CAT
(
convert_,
BASE_TYPE
)
,
VEC_SIZE
)
#
else
#
define
CONVERT_TO_TYPE
CAT
(
CAT
(
CAT
(
convert_,
BASE_TYPE
)
,
VEC_SIZE
)
,
_sat_rte
)
#
endif
#
define
VEC_SIZE
DATA_CHAN
#
define
VEC_TYPE
CAT
(
BASE_TYPE,
VEC_SIZE
)
#
define
noconvert
#
define
TYPE
VEC_TYPE
#
define
SCALAR_TYPE
CAT
(
FPTYPE,
VEC_SIZE
)
#
define
INTERMEDIATE_TYPE
CAT
(
FPTYPE,
VEC_SIZE
)
#
define
TYPE_SIZE
(
VEC_SIZE*sizeof
(
BASE_TYPE
))
struct
RectCoords
struct
RectCoords
{
{
int
x1,
y1,
x2,
y2
;
int
x1,
y1,
x2,
y2
;
}
;
}
;
//#define
DEBUG
inline
WT
readSrcPixel
(
int2
pos,
__global
const
uchar
*
srcptr,
int
src_step,
const
struct
RectCoords
srcCoords
)
#
ifdef
DEBUG
#
define
DEBUG_ONLY
(
x
)
x
#
define
ASSERT
(
condition
)
do
{
if
(
!
(
condition
))
{
printf
(
"BUG in boxFilter kernel (global=%d,%d): "
#
condition
"\n"
,
get_global_id
(
0
)
,
get_global_id
(
1
))
; } } while (0)
#
else
#
define
DEBUG_ONLY
(
x
)
#
define
ASSERT
(
condition
)
#
endif
inline
INTERMEDIATE_TYPE
readSrcPixel
(
int2
pos,
__global
const
uchar*
srcptr,
int
srcstep,
const
struct
RectCoords
srcCoords
#
ifdef
BORDER_CONSTANT
,
SCALAR_TYPE
borderValue
#
endif
)
{
{
#
ifdef
BORDER_ISOLATED
#
ifdef
BORDER_ISOLATED
if
(
pos.x
>=
srcCoords.x1
&&
pos.y
>=
srcCoords.y1
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
if
(
pos.x
>=
srcCoords.x1
&&
pos.y
>=
srcCoords.y1
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
#
else
#
else
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
#
endif
#
endif
{
{
__global
TYPE*
ptr
=
(
__global
TYPE*
)(
srcptr
+
pos.y
*
srcstep
+
pos.x
*
sizeof
(
TYPE
))
;
int
src_index
=
mad24
(
pos.y,
src_step,
pos.x
*
(
int
)
sizeof
(
ST
))
;
return
CONVERT_TO_FPTYPE
(
*ptr
)
;
return
convertToWT
(
*
(
__global
const
ST
*
)(
srcptr
+
src_index
)
)
;
}
}
else
else
{
{
#
ifdef
BORDER_CONSTANT
#
ifdef
BORDER_CONSTANT
return
borderValue
;
return
(
WT
)(
0
)
;
#
else
#
else
int
selected_col
=
pos.x
;
int
selected_col
=
pos.x,
selected_row
=
pos.y
;
int
selected_row
=
pos.y
;
EXTRAPOLATE
(
selected_col,
selected_row,
EXTRAPOLATE
(
selected_col,
selected_row,
#
ifdef
BORDER_ISOLATED
#
ifdef
BORDER_ISOLATED
srcCoords.x1,
srcCoords.y1,
srcCoords.x1,
srcCoords.y1,
#
else
#
else
0
,
0
,
0
,
0
,
#
endif
#
endif
srcCoords.x2,
srcCoords.y2
srcCoords.x2,
srcCoords.y2
)
;
)
;
//
debug
border
mapping
int
src_index
=
mad24
(
selected_row,
src_step,
selected_col
*
(
int
)
sizeof
(
ST
))
;
//printf
(
"pos=%d,%d --> %d, %d\n"
,
pos.x,
pos.y,
selected_col,
selected_row
)
;
return
convertToWT
(
*
(
__global
const
ST
*
)(
srcptr
+
src_index
))
;
pos
=
(
int2
)(
selected_col,
selected_row
)
;
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
{
__global
TYPE*
ptr
=
(
__global
TYPE*
)(
srcptr
+
pos.y
*
srcstep
+
pos.x
*
sizeof
(
TYPE
))
;
return
CONVERT_TO_FPTYPE
(
*ptr
)
;
}
else
{
//
for
debug
only
DEBUG_ONLY
(
printf
(
"BUG in boxFilter kernel\n"
))
;
return
(
FPTYPE
)(
0.0f
)
;
}
#
endif
#
endif
}
}
}
}
//
INPUT
PARAMETER:
BLOCK_SIZE_Y
(
via
defines
)
__kernel
void
boxFilter
(
__global
const
uchar
*
srcptr,
int
src_step,
int
srcOffsetX,
int
srcOffsetY,
int
srcEndX,
int
srcEndY,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
rows,
int
cols
__kernel
#
ifdef
NORMALIZE
__attribute__
((
reqd_work_group_size
(
LOCAL_SIZE,
1
,
1
)))
,
float
alpha
void
boxFilter
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcOffsetX,
int
srcOffsetY,
int
srcEndX,
int
srcEndY,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
int
rows,
int
cols,
#
ifdef
BORDER_CONSTANT
SCALAR_TYPE
borderValue,
#
endif
#
endif
FPTYPE
alpha
)
)
{
{
const
struct
RectCoords
srcCoords
=
{srcOffsetX,
srcOffsetY,
srcEndX,
srcEndY}
; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const
struct
RectCoords
srcCoords
=
{
srcOffsetX,
srcOffsetY,
srcEndX,
srcEndY
}
; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const
int
x
=
get_local_id
(
0
)
+
(
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
))
*
get_group_id
(
0
)
-
ANCHOR_X
;
const
int
y
=
get_global_id
(
1
)
*
BLOCK_SIZE_Y
;
const
int
local_id
=
get_local_id
(
0
)
;
int
x
=
get_local_id
(
0
)
+
(
LOCAL_SIZE_X
-
(
KERNEL_SIZE_X
-
1
))
*
get_group_id
(
0
)
-
ANCHOR_X
;
int
y
=
get_global_id
(
1
)
*
BLOCK_SIZE_Y
;
INTERMEDIATE_TYPE
data[KERNEL_SIZE_Y]
;
int
local_id
=
get_local_id
(
0
)
;
__local
INTERMEDIATE_TYPE
sumOfCols[LOCAL_SIZE]
;
WT
data[KERNEL_SIZE_Y]
;
__local
WT
sumOfCols[LOCAL_SIZE_X]
;
int2
srcPos
=
(
int2
)(
srcCoords.x1
+
x,
srcCoords.y1
+
y
-
ANCHOR_Y
)
;
int2
srcPos
=
(
int2
)(
srcCoords.x1
+
x,
srcCoords.y1
+
y
-
ANCHOR_Y
)
;
for
(
int
sy
=
0
; sy < KERNEL_SIZE_Y; sy++, srcPos.y++)
{
data[sy]
=
readSrcPixel
(
srcPos,
srcptr,
srcstep,
srcCoords
#
ifdef
BORDER_CONSTANT
,
borderValue
#
endif
)
;
}
INTERMEDIATE_TYPE
tmp_sum
=
0
;
#
pragma
unroll
for
(
int
sy
=
0
; sy < KERNEL_SIZE_Y; sy++)
for
(
int
sy
=
0
; sy < KERNEL_SIZE_Y; sy++, srcPos.y++)
{
data[sy]
=
readSrcPixel
(
srcPos,
srcptr,
src_step,
srcCoords
)
;
tmp_sum
+=
(
data[sy]
)
;
}
WT
tmp_sum
=
(
WT
)(
0
)
;
#
pragma
unroll
for
(
int
sy
=
0
; sy < KERNEL_SIZE_Y; sy++)
tmp_sum
+=
data[sy]
;
sumOfCols[local_id]
=
tmp_sum
;
sumOfCols[local_id]
=
tmp_sum
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
2
pos
=
(
int2
)(
x,
y
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
*
(
int
)
sizeof
(
DT
)
+
dst_offset
)
;
__global
TYPE*
dstPtr
=
(
__global
TYPE*
)(
dstptr
+
pos.y
*
dststep
+
dstoffset
+
pos.x
*
TYPE_SIZE/*sizeof
(
TYPE
)
*/
)
; // Pointer can be out of bounds!
__global
DT
*
dst
=
(
__global
DT
*
)(
dstptr
+
dst_index
)
;
int
sy_index
=
0
; // current index in data[] array
int
sy_index
=
0
; // current index in data[] array
int
stepsY
=
min
(
rows
-
pos.y,
BLOCK_SIZE_Y
)
;
for
(
int
i
=
0
,
stepY
=
min
(
rows
-
y,
BLOCK_SIZE_Y
)
; i < stepY; ++i)
ASSERT
(
stepsY
>
0
)
;
for
(
; ;)
{
{
ASSERT
(
pos.y
<
rows
)
;
if
(
local_id
>=
ANCHOR_X
&&
local_id
<
LOCAL_SIZE_X
-
(
KERNEL_SIZE_X
-
1
-
ANCHOR_X
)
&&
x
>=
0
&&
x
<
cols
)
if
(
local_id
>=
ANCHOR_X
&&
local_id
<
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
-
ANCHOR_X
)
&&
pos.x
>=
0
&&
pos.x
<
cols
)
{
{
ASSERT
(
pos.y
>=
0
&&
pos.y
<
rows
)
;
WT
total_sum
=
(
WT
)(
0
)
;
INTERMEDIATE_TYPE
total_sum
=
0
;
#
pragma
unroll
#
pragma
unroll
for
(
int
sx
=
0
; sx < KERNEL_SIZE_X; sx++)
for
(
int
sx
=
0
; sx < KERNEL_SIZE_X; sx++)
{
total_sum
+=
sumOfCols[local_id
+
sx
-
ANCHOR_X]
;
total_sum
+=
sumOfCols[local_id
+
sx
-
ANCHOR_X]
;
}
*dstPtr
=
CONVERT_TO_TYPE
(((
INTERMEDIATE_TYPE
)
alpha
)
*
total_sum
)
;
}
#
if
BLOCK_SIZE_Y
==
1
#
if
def
NORMALIZE
break
;
dst[0]
=
convertToDT
((
WT
)(
alpha
)
*
total_sum
)
;
#
else
#
else
if
(
--stepsY
==
0
)
dst[0]
=
convertToDT
(
total_sum
)
;
break
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
tmp_sum
=
sumOfCols[local_id]
; // TODO FIX IT: workaround for BUG in OpenCL compiler
tmp_sum
=
sumOfCols[local_id]
;
//
only
works
with
scalars:
ASSERT
(
fabs
(
tmp_sum
-
sumOfCols[local_id]
)
<
(
INTERMEDIATE_TYPE
)
1e-6
)
;
tmp_sum
-=
data[sy_index]
;
tmp_sum
-=
data[sy_index]
;
data[sy_index]
=
readSrcPixel
(
srcPos,
srcptr,
srcstep,
srcCoords
data[sy_index]
=
readSrcPixel
(
srcPos,
srcptr,
src_step,
srcCoords
)
;
#
ifdef
BORDER_CONSTANT
,
borderValue
#
endif
)
;
srcPos.y++
;
srcPos.y++
;
tmp_sum
+=
data[sy_index]
;
tmp_sum
+=
data[sy_index]
;
sumOfCols[local_id]
=
tmp_sum
;
sumOfCols[local_id]
=
tmp_sum
;
sy_index
=
(
sy_index
+
1
<
KERNEL_SIZE_Y
)
?
sy_index
+
1
:
0
;
sy_index
=
sy_index
+
1
<
KERNEL_SIZE_Y
?
sy_index
+
1
:
0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
next
line
dst
=
(
__global
DT
*
)((
__global
uchar
*
)
dst
+
dst_step
)
;
DEBUG_ONLY
(
pos.y++
)
;
dstPtr
=
(
__global
TYPE*
)((
__global
char*
)
dstPtr
+
dststep
)
; // Pointer can be out of bounds!
#
endif
//
BLOCK_SIZE_Y
==
1
}
}
}
}
modules/imgproc/src/smooth.cpp
View file @
82bab2b0
...
@@ -611,155 +611,114 @@ template<> struct ColumnSum<int, ushort> : public BaseColumnFilter
...
@@ -611,155 +611,114 @@ template<> struct ColumnSum<int, ushort> : public BaseColumnFilter
std
::
vector
<
int
>
sum
;
std
::
vector
<
int
>
sum
;
};
};
#ifdef HAVE_OPENCL
#define DIVUP(total, grain) ((total + grain - 1) / (grain))
#define DIVUP(total, grain) ((total + grain - 1) / (grain))
static
bool
ocl_boxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
static
bool
ocl_boxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
int
borderType
)
Size
ksize
,
Point
anchor
,
int
borderType
,
bool
normalize
)
{
{
int
type
=
_src
.
type
();
int
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
esz
=
CV_ELEM_SIZE
(
type
);
int
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
((
1
!=
cn
)
&&
(
2
!=
cn
)
&&
(
4
!=
cn
))
return
false
;
//TODO
int
sdepth
=
CV_MAT_DEPTH
(
type
);
if
(
ddepth
<
0
)
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
ddepth
=
sdepth
;
else
if
(
ddepth
!=
sdepth
)
if
(
!
(
cn
==
1
||
cn
==
2
||
cn
==
4
)
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
||
_src
.
offset
()
%
esz
!=
0
||
_src
.
step
()
%
esz
!=
0
)
return
false
;
return
false
;
if
(
anchor
.
x
<
0
)
if
(
anchor
.
x
<
0
)
anchor
.
x
=
ksize
.
width
/
2
;
anchor
.
x
=
ksize
.
width
/
2
;
if
(
anchor
.
y
<
0
)
if
(
anchor
.
y
<
0
)
anchor
.
y
=
ksize
.
height
/
2
;
anchor
.
y
=
ksize
.
height
/
2
;
ocl
::
Kernel
kernel
;
int
computeUnits
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
//Normalize the result by default
float
alpha
=
1.0
f
/
(
ksize
.
height
*
ksize
.
width
);
float
alpha
=
1.0
f
/
(
ksize
.
height
*
ksize
.
width
);
bool
isIsolatedBorder
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
Size
size
=
_src
.
size
(),
wholeSize
;
bool
useDouble
=
(
CV_64F
==
sdepth
);
bool
isolated
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
const
cv
::
ocl
::
Device
&
device
=
cv
::
ocl
::
Device
::
getDefault
();
borderType
&=
~
BORDER_ISOLATED
;
int
doubleFPConfig
=
device
.
doubleFPConfig
();
int
wdepth
=
std
::
max
(
CV_32F
,
std
::
max
(
ddepth
,
sdepth
));
if
(
useDouble
&&
(
0
==
doubleFPConfig
))
return
false
;
// may be we have to check is (0 != (CL_FP_SOFT_FLOAT & doubleFPConfig)) ?
const
char
*
btype
=
NULL
;
switch
(
borderType
&
~
BORDER_ISOLATED
)
{
case
BORDER_CONSTANT
:
btype
=
"BORDER_CONSTANT"
;
break
;
case
BORDER_REPLICATE
:
btype
=
"BORDER_REPLICATE"
;
break
;
case
BORDER_REFLECT
:
btype
=
"BORDER_REFLECT"
;
break
;
case
BORDER_WRAP
:
//CV_Error(CV_StsUnsupportedFormat, "BORDER_WRAP is not supported!");
return
false
;
case
BORDER_REFLECT101
:
btype
=
"BORDER_REFLECT_101"
;
break
;
}
cv
::
Size
sz
=
_src
.
size
();
size_t
globalsize
[
2
]
=
{
sz
.
width
,
sz
.
height
};
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
0
,
"BORDER_REFLECT_101"
};
size_t
localsize
[
2
]
=
{
0
,
1
};
size_t
globalsize
[
2
]
=
{
size
.
width
,
size
.
height
};
size_t
localsize
[
2
]
=
{
0
,
1
};
UMat
src
;
Size
wholeSize
;
UMat
src
=
_src
.
getUMat
()
;
if
(
!
is
IsolatedBorder
)
if
(
!
is
olated
)
{
{
src
=
_src
.
getUMat
();
Point
ofs
;
Point
ofs
;
src
.
locateROI
(
wholeSize
,
ofs
);
src
.
locateROI
(
wholeSize
,
ofs
);
}
}
size_t
maxWorkItemSizes
[
32
];
device
.
maxWorkItemSizes
(
maxWorkItemSizes
);
int
h
=
isolated
?
size
.
height
:
wholeSize
.
height
;
size_t
tryWorkItems
=
maxWorkItemSizes
[
0
];
int
w
=
isolated
?
size
.
width
:
wholeSize
.
width
;
for
(;;)
{
size_t
BLOCK_SIZE
=
tryWorkItems
;
while
(
BLOCK_SIZE
>
32
&&
BLOCK_SIZE
>=
(
size_t
)
ksize
.
width
*
2
&&
BLOCK_SIZE
>
(
size_t
)
sz
.
width
*
2
)
BLOCK_SIZE
/=
2
;
size_t
BLOCK_SIZE_Y
=
8
;
// TODO Check heuristic value on devices
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE
/
8
&&
BLOCK_SIZE_Y
*
device
.
maxComputeUnits
()
*
32
<
(
size_t
)
sz
.
height
)
BLOCK_SIZE_Y
*=
2
;
if
((
size_t
)
ksize
.
width
>
BLOCK_SIZE
)
size_t
maxWorkItemSizes
[
32
];
return
false
;
ocl
::
Device
::
getDefault
().
maxWorkItemSizes
(
maxWorkItemSizes
);
int
tryWorkItems
=
(
int
)
maxWorkItemSizes
[
0
];
int
requiredTop
=
anchor
.
y
;
ocl
::
Kernel
kernel
;
int
requiredLeft
=
(
int
)
BLOCK_SIZE
;
// not this: anchor.x;
for
(
;
;
)
int
requiredBottom
=
ksize
.
height
-
1
-
anchor
.
y
;
{
int
requiredRight
=
(
int
)
BLOCK_SIZE
;
// not this: ksize.width - 1 - anchor.x;
int
BLOCK_SIZE_X
=
tryWorkItems
,
BLOCK_SIZE_Y
=
8
;
int
h
=
isIsolatedBorder
?
sz
.
height
:
wholeSize
.
height
;
int
w
=
isIsolatedBorder
?
sz
.
width
:
wholeSize
.
width
;
bool
extra_extrapolation
=
h
<
requiredTop
||
h
<
requiredBottom
||
w
<
requiredLeft
||
w
<
requiredRight
;
while
(
BLOCK_SIZE_X
>
32
&&
BLOCK_SIZE_X
>=
ksize
.
width
*
2
&&
BLOCK_SIZE_X
>
size
.
width
*
2
)
BLOCK_SIZE_X
/=
2
;
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE_X
/
8
&&
BLOCK_SIZE_Y
*
computeUnits
*
32
<
size
.
height
)
BLOCK_SIZE_Y
*=
2
;
if
(
(
w
<
ksize
.
width
)
||
(
h
<
ksize
.
height
)
)
if
(
ksize
.
width
>
BLOCK_SIZE_X
||
w
<
ksize
.
width
||
h
<
ksize
.
height
)
return
false
;
return
false
;
char
build_options
[
1024
];
char
cvt
[
2
][
50
];
sprintf
(
build_options
,
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s -D %s -D %s"
,
String
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s "
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s"
,
sdepth
,
cn
,
useDouble
?
1
:
0
,
BLOCK_SIZE_X
,
BLOCK_SIZE_Y
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
ddepth
,
cn
)),
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
btype
,
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
0
]),
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
1
]),
isIsolatedBorder
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
);
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
borderMap
[
borderType
],
isolated
?
" -D BORDER_ISOLATED"
:
""
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
normalize
?
" -D NORMALIZE"
:
""
);
localsize
[
0
]
=
BLOCK_SIZE
;
localsize
[
0
]
=
BLOCK_SIZE
_X
;
globalsize
[
0
]
=
DIVUP
(
s
z
.
width
,
BLOCK_SIZE
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE
;
globalsize
[
0
]
=
DIVUP
(
s
ize
.
width
,
BLOCK_SIZE_X
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE_X
;
globalsize
[
1
]
=
DIVUP
(
s
z
.
height
,
BLOCK_SIZE_Y
);
globalsize
[
1
]
=
DIVUP
(
s
ize
.
height
,
BLOCK_SIZE_Y
);
cv
::
String
errmsg
;
kernel
.
create
(
"boxFilter"
,
cv
::
ocl
::
imgproc
::
boxFilter_oclsrc
,
opts
);
kernel
.
create
(
"boxFilter"
,
cv
::
ocl
::
imgproc
::
boxFilter_oclsrc
,
build_options
);
size_t
kernelWorkGroupSize
=
kernel
.
workGroupSize
();
size_t
kernelWorkGroupSize
=
kernel
.
workGroupSize
();
if
(
localsize
[
0
]
<=
kernelWorkGroupSize
)
if
(
localsize
[
0
]
<=
kernelWorkGroupSize
)
break
;
break
;
if
(
BLOCK_SIZE_X
<
(
int
)
kernelWorkGroupSize
)
if
(
BLOCK_SIZE
<
kernelWorkGroupSize
)
return
false
;
return
false
;
tryWorkItems
=
kernelWorkGroupSize
;
tryWorkItems
=
(
int
)
kernelWorkGroupSize
;
}
}
_dst
.
create
(
s
z
,
CV_MAKETYPE
(
ddepth
,
cn
));
_dst
.
create
(
s
ize
,
CV_MAKETYPE
(
ddepth
,
cn
));
UMat
dst
=
_dst
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
if
(
src
.
empty
())
src
=
_src
.
getUMat
();
int
idxArg
=
kernel
.
set
(
0
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
int
idxArg
=
0
;
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
src
.
step
);
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
src
.
step
);
int
srcOffsetX
=
(
int
)((
src
.
offset
%
src
.
step
)
/
src
.
elemSize
());
int
srcOffsetX
=
(
int
)((
src
.
offset
%
src
.
step
)
/
src
.
elemSize
());
int
srcOffsetY
=
(
int
)(
src
.
offset
/
src
.
step
);
int
srcOffsetY
=
(
int
)(
src
.
offset
/
src
.
step
);
int
srcEndX
=
(
isIsolatedBorder
?
(
srcOffsetX
+
sz
.
width
)
:
wholeSize
.
width
)
;
int
srcEndX
=
isolated
?
srcOffsetX
+
size
.
width
:
wholeSize
.
width
;
int
srcEndY
=
(
isIsolatedBorder
?
(
srcOffsetY
+
sz
.
height
)
:
wholeSize
.
height
)
;
int
srcEndY
=
isolated
?
srcOffsetY
+
size
.
height
:
wholeSize
.
height
;
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetY
);
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetY
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndY
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndY
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
WriteOnly
(
dst
));
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
WriteOnly
(
dst
));
float
borderValue
[
4
]
=
{
0
,
0
,
0
,
0
};
if
(
normalize
)
double
borderValueDouble
[
4
]
=
{
0
,
0
,
0
,
0
};
if
((
borderType
&
~
BORDER_ISOLATED
)
==
BORDER_CONSTANT
)
{
int
cnocl
=
(
3
==
cn
)
?
4
:
cn
;
if
(
useDouble
)
idxArg
=
kernel
.
set
(
idxArg
,
(
void
*
)
&
borderValueDouble
[
0
],
sizeof
(
double
)
*
cnocl
);
else
idxArg
=
kernel
.
set
(
idxArg
,
(
void
*
)
&
borderValue
[
0
],
sizeof
(
float
)
*
cnocl
);
}
if
(
useDouble
)
idxArg
=
kernel
.
set
(
idxArg
,
(
double
)
alpha
);
else
idxArg
=
kernel
.
set
(
idxArg
,
(
float
)
alpha
);
idxArg
=
kernel
.
set
(
idxArg
,
(
float
)
alpha
);
return
kernel
.
run
(
2
,
globalsize
,
localsize
,
tru
e
);
return
kernel
.
run
(
2
,
globalsize
,
localsize
,
fals
e
);
}
}
#endif
}
}
...
@@ -862,9 +821,7 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
...
@@ -862,9 +821,7 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
Size
ksize
,
Point
anchor
,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
bool
normalize
,
int
borderType
)
{
{
bool
use_opencl
=
ocl
::
useOpenCL
()
&&
_dst
.
isUMat
()
&&
normalize
;
CV_OCL_RUN
(
_dst
.
isUMat
(),
ocl_boxFilter
(
_src
,
_dst
,
ddepth
,
ksize
,
anchor
,
borderType
,
normalize
))
if
(
use_opencl
&&
ocl_boxFilter
(
_src
,
_dst
,
ddepth
,
ksize
,
anchor
,
borderType
)
)
return
;
Mat
src
=
_src
.
getMat
();
Mat
src
=
_src
.
getMat
();
int
sdepth
=
src
.
depth
(),
cn
=
src
.
channels
();
int
sdepth
=
src
.
depth
(),
cn
=
src
.
channels
();
...
...
modules/imgproc/test/ocl/test_boxfilter.cpp
View file @
82bab2b0
...
@@ -56,32 +56,34 @@ enum
...
@@ -56,32 +56,34 @@ enum
/////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
// boxFilter
// boxFilter
PARAM_TEST_CASE
(
BoxFilter
,
MatDepth
,
Channels
,
BorderType
,
bool
)
PARAM_TEST_CASE
(
BoxFilter
,
MatDepth
,
Channels
,
BorderType
,
bool
,
bool
)
{
{
static
const
int
kernelMinSize
=
2
;
static
const
int
kernelMinSize
=
2
;
static
const
int
kernelMaxSize
=
10
;
static
const
int
kernelMaxSize
=
10
;
int
type
;
int
depth
,
cn
;
Size
ksize
;
Size
ksize
;
Size
dsize
;
Size
dsize
;
Point
anchor
;
Point
anchor
;
int
borderType
;
int
borderType
;
bool
useRoi
;
bool
normalize
,
useRoi
;
TEST_DECLARE_INPUT_PARAMETER
(
src
)
TEST_DECLARE_INPUT_PARAMETER
(
src
)
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
)
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
)
virtual
void
SetUp
()
virtual
void
SetUp
()
{
{
type
=
CV_MAKE_TYPE
(
GET_PARAM
(
0
),
GET_PARAM
(
1
));
depth
=
GET_PARAM
(
0
);
cn
=
GET_PARAM
(
1
);
borderType
=
GET_PARAM
(
2
);
// only not isolated border tested, because CPU module doesn't support isolated border case.
borderType
=
GET_PARAM
(
2
);
// only not isolated border tested, because CPU module doesn't support isolated border case.
useRoi
=
GET_PARAM
(
3
);
normalize
=
GET_PARAM
(
3
);
useRoi
=
GET_PARAM
(
4
);
}
}
void
random_roi
()
void
random_roi
()
{
{
int
type
=
CV_MAKE_TYPE
(
depth
,
cn
);
dsize
=
randomSize
(
1
,
MAX_VALUE
);
dsize
=
randomSize
(
1
,
MAX_VALUE
);
ksize
=
randomSize
(
kernelMinSize
,
kernelMaxSize
);
ksize
=
randomSize
(
kernelMinSize
,
kernelMaxSize
);
Size
roiSize
=
randomSize
(
ksize
.
width
,
MAX_VALUE
,
ksize
.
height
,
MAX_VALUE
);
Size
roiSize
=
randomSize
(
ksize
.
width
,
MAX_VALUE
,
ksize
.
height
,
MAX_VALUE
);
...
@@ -100,8 +102,7 @@ PARAM_TEST_CASE(BoxFilter, MatDepth, Channels, BorderType, bool)
...
@@ -100,8 +102,7 @@ PARAM_TEST_CASE(BoxFilter, MatDepth, Channels, BorderType, bool)
void
Near
(
double
threshold
=
0.0
)
void
Near
(
double
threshold
=
0.0
)
{
{
EXPECT_MAT_NEAR
(
dst
,
udst
,
threshold
);
OCL_EXPECT_MATS_NEAR
(
dst
,
threshold
)
EXPECT_MAT_NEAR
(
dst_roi
,
udst_roi
,
threshold
);
}
}
};
};
...
@@ -111,10 +112,10 @@ OCL_TEST_P(BoxFilter, Mat)
...
@@ -111,10 +112,10 @@ OCL_TEST_P(BoxFilter, Mat)
{
{
random_roi
();
random_roi
();
OCL_OFF
(
cv
::
boxFilter
(
src_roi
,
dst_roi
,
-
1
,
ksize
,
anchor
,
tru
e
,
borderType
));
OCL_OFF
(
cv
::
boxFilter
(
src_roi
,
dst_roi
,
-
1
,
ksize
,
anchor
,
normaliz
e
,
borderType
));
OCL_ON
(
cv
::
boxFilter
(
usrc_roi
,
udst_roi
,
-
1
,
ksize
,
anchor
,
tru
e
,
borderType
));
OCL_ON
(
cv
::
boxFilter
(
usrc_roi
,
udst_roi
,
-
1
,
ksize
,
anchor
,
normaliz
e
,
borderType
));
Near
(
1.0
);
Near
(
depth
<=
CV_32S
?
1
:
1e-3
);
}
}
}
}
...
@@ -127,6 +128,7 @@ OCL_INSTANTIATE_TEST_CASE_P(ImageProc, BoxFilter,
...
@@ -127,6 +128,7 @@ OCL_INSTANTIATE_TEST_CASE_P(ImageProc, BoxFilter,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REFLECT
,
(
BorderType
)
BORDER_REFLECT
,
(
BorderType
)
BORDER_REFLECT_101
),
(
BorderType
)
BORDER_REFLECT_101
),
Bool
(),
Bool
()
// ROI
Bool
()
// ROI
)
)
);
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment