Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
d63a8ba0
Commit
d63a8ba0
authored
Mar 25, 2014
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Mar 25, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2523 from ilya-lavrenov:tapi_filters
parents
da7a6052
e2c6ab01
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
250 additions
and
364 deletions
+250
-364
filter.cpp
modules/imgproc/src/filter.cpp
+53
-101
bilateral.cl
modules/imgproc/src/opencl/bilateral.cl
+34
-8
boxFilter.cl
modules/imgproc/src/opencl/boxFilter.cl
+20
-7
filter2D.cl
modules/imgproc/src/opencl/filter2D.cl
+39
-135
medianFilter.cl
modules/imgproc/src/opencl/medianFilter.cl
+59
-69
smooth.cpp
modules/imgproc/src/smooth.cpp
+31
-29
test_boxfilter.cpp
modules/imgproc/test/ocl/test_boxfilter.cpp
+2
-2
test_filter2d.cpp
modules/imgproc/test/ocl/test_filter2d.cpp
+7
-5
test_filters.cpp
modules/imgproc/test/ocl/test_filters.cpp
+4
-7
test_medianfilter.cpp
modules/imgproc/test/ocl/test_medianfilter.cpp
+1
-1
No files found.
modules/imgproc/src/filter.cpp
View file @
d63a8ba0
...
...
@@ -3154,75 +3154,50 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
InputArray
_kernel
,
Point
anchor
,
double
delta
,
int
borderType
)
{
if
(
abs
(
delta
)
>
FLT_MIN
)
int
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
ddepth
=
ddepth
<
0
?
sdepth
:
ddepth
;
int
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
),
wdepth
=
std
::
max
(
std
::
max
(
sdepth
,
ddepth
),
CV_32F
),
wtype
=
CV_MAKE_TYPE
(
wdepth
,
cn
);
if
(
cn
>
4
)
return
false
;
int
type
=
_src
.
type
();
int
cn
=
CV_MAT_CN
(
type
);
if
((
1
!=
cn
)
&&
(
2
!=
cn
)
&&
(
4
!=
cn
))
return
false
;
//TODO
int
sdepth
=
CV_MAT_DEPTH
(
type
);
Size
ksize
=
_kernel
.
size
();
if
(
anchor
.
x
<
0
)
if
(
anchor
.
x
<
0
)
anchor
.
x
=
ksize
.
width
/
2
;
if
(
anchor
.
y
<
0
)
if
(
anchor
.
y
<
0
)
anchor
.
y
=
ksize
.
height
/
2
;
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
else
if
(
ddepth
!=
sdepth
)
return
false
;
bool
is
IsolatedBorder
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
bo
ol
useDouble
=
(
CV_64F
==
sdepth
)
;
bool
is
olated
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
bo
rderType
&=
~
BORDER_ISOLATED
;
const
cv
::
ocl
::
Device
&
device
=
cv
::
ocl
::
Device
::
getDefault
();
int
doubleFPConfig
=
device
.
doubleFPConfig
()
;
if
(
useDouble
&&
(
0
==
doubleFPConfig
)
)
bool
doubleSupport
=
device
.
doubleFPConfig
()
>
0
;
if
(
wdepth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
const
char
*
btype
=
NULL
;
switch
(
borderType
&
~
BORDER_ISOLATED
)
{
case
BORDER_CONSTANT
:
btype
=
"BORDER_CONSTANT"
;
break
;
case
BORDER_REPLICATE
:
btype
=
"BORDER_REPLICATE"
;
break
;
case
BORDER_REFLECT
:
btype
=
"BORDER_REFLECT"
;
break
;
case
BORDER_WRAP
:
return
false
;
case
BORDER_REFLECT101
:
btype
=
"BORDER_REFLECT_101"
;
break
;
}
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
"BORDER_WRAP"
,
"BORDER_REFLECT_101"
};
cv
::
Mat
kernelMat
=
_kernel
.
getMat
();
std
::
vector
<
float
>
kernelMatDataFloat
;
std
::
vector
<
double
>
kernelMatDataDouble
;
int
kernel_size_y2_aligned
=
useDouble
?
_prepareKernelFilter2D
<
double
>
(
kernelMatDataDouble
,
kernelMat
)
:
_prepareKernelFilter2D
<
float
>
(
kernelMatDataFloat
,
kernelMat
);
int
kernel_size_y2_aligned
=
_prepareKernelFilter2D
<
float
>
(
kernelMatDataFloat
,
kernelMat
);
cv
::
Size
sz
=
_src
.
size
(),
wholeSize
;
size_t
globalsize
[
2
]
=
{
sz
.
width
,
sz
.
height
},
localsize
[
2
]
=
{
0
,
1
};
cv
::
Size
sz
=
_src
.
size
();
size_t
globalsize
[
2
]
=
{
sz
.
width
,
sz
.
height
};
size_t
localsize
[
2
]
=
{
0
,
1
};
ocl
::
Kernel
kernel
;
UMat
src
;
Size
wholeSize
;
if
(
!
isIsolatedBorder
)
ocl
::
Kernel
k
;
UMat
src
=
_src
.
getUMat
();
if
(
!
isolated
)
{
src
=
_src
.
getUMat
();
Point
ofs
;
src
.
locateROI
(
wholeSize
,
ofs
);
}
size_t
maxWorkItemSizes
[
32
];
device
.
maxWorkItemSizes
(
maxWorkItemSizes
);
size_t
maxWorkItemSizes
[
32
];
device
.
maxWorkItemSizes
(
maxWorkItemSizes
);
size_t
tryWorkItems
=
maxWorkItemSizes
[
0
];
for
(;;)
char
cvt
[
2
][
40
];
for
(
;
;
)
{
size_t
BLOCK_SIZE
=
tryWorkItems
;
while
(
BLOCK_SIZE
>
32
&&
BLOCK_SIZE
>=
(
size_t
)
ksize
.
width
*
2
&&
BLOCK_SIZE
>
(
size_t
)
sz
.
width
*
2
)
...
...
@@ -3242,32 +3217,36 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
int
requiredLeft
=
(
int
)
BLOCK_SIZE
;
// not this: anchor.x;
int
requiredBottom
=
ksize
.
height
-
1
-
anchor
.
y
;
int
requiredRight
=
(
int
)
BLOCK_SIZE
;
// not this: ksize.width - 1 - anchor.x;
int
h
=
is
IsolatedBorder
?
sz
.
height
:
wholeSize
.
height
;
int
w
=
is
IsolatedBorder
?
sz
.
width
:
wholeSize
.
width
;
int
h
=
is
olated
?
sz
.
height
:
wholeSize
.
height
;
int
w
=
is
olated
?
sz
.
width
:
wholeSize
.
width
;
bool
extra_extrapolation
=
h
<
requiredTop
||
h
<
requiredBottom
||
w
<
requiredLeft
||
w
<
requiredRight
;
if
((
w
<
ksize
.
width
)
||
(
h
<
ksize
.
height
))
return
false
;
char
build_options
[
1024
];
sprintf
(
build_options
,
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D KERNEL_SIZE_Y2_ALIGNED=%d "
"-D %s -D %s -D %s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
sdepth
,
cn
,
useDouble
?
1
:
0
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
kernel_size_y2_aligned
,
btype
,
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isIsolatedBorder
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
);
String
opts
=
format
(
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
"-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s "
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
"-D convertToWT=%s -D convertToDstT=%s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
cn
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
kernel_size_y2_aligned
,
borderMap
[
borderType
],
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isolated
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]));
localsize
[
0
]
=
BLOCK_SIZE
;
globalsize
[
0
]
=
DIVUP
(
sz
.
width
,
BLOCK_SIZE
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE
;
globalsize
[
1
]
=
DIVUP
(
sz
.
height
,
BLOCK_SIZE_Y
);
cv
::
String
errmsg
;
if
(
!
kernel
.
create
(
"filter2D"
,
cv
::
ocl
::
imgproc
::
filter2D_oclsrc
,
build_options
))
if
(
!
k
.
create
(
"filter2D"
,
cv
::
ocl
::
imgproc
::
filter2D_oclsrc
,
opts
))
return
false
;
size_t
kernelWorkGroupSize
=
kernel
.
workGroupSize
();
size_t
kernelWorkGroupSize
=
k
.
workGroupSize
();
if
(
localsize
[
0
]
<=
kernelWorkGroupSize
)
break
;
if
(
BLOCK_SIZE
<
kernelWorkGroupSize
)
...
...
@@ -3275,46 +3254,19 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
tryWorkItems
=
kernelWorkGroupSize
;
}
_dst
.
create
(
sz
,
CV_MAKETYPE
(
ddepth
,
cn
));
UMat
dst
=
_dst
.
getUMat
();
if
(
src
.
empty
())
src
=
_src
.
getUMat
();
int
idxArg
=
0
;
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
src
.
step
);
_dst
.
create
(
sz
,
dtype
);
UMat
dst
=
_dst
.
getUMat
(),
kernalDataUMat
(
kernelMatDataFloat
,
true
);
int
srcOffsetX
=
(
int
)((
src
.
offset
%
src
.
step
)
/
src
.
elemSize
());
int
srcOffsetY
=
(
int
)(
src
.
offset
/
src
.
step
);
int
srcEndX
=
(
isIsolatedBorder
?
(
srcOffsetX
+
sz
.
width
)
:
wholeSize
.
width
);
int
srcEndY
=
(
isIsolatedBorder
?
(
srcOffsetY
+
sz
.
height
)
:
wholeSize
.
height
);
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetY
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndY
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
WriteOnly
(
dst
));
float
borderValue
[
4
]
=
{
0
,
0
,
0
,
0
};
double
borderValueDouble
[
4
]
=
{
0
,
0
,
0
,
0
};
if
((
borderType
&
~
BORDER_ISOLATED
)
==
BORDER_CONSTANT
)
{
int
cnocl
=
(
3
==
cn
)
?
4
:
cn
;
if
(
useDouble
)
idxArg
=
kernel
.
set
(
idxArg
,
(
void
*
)
&
borderValueDouble
[
0
],
sizeof
(
double
)
*
cnocl
);
else
idxArg
=
kernel
.
set
(
idxArg
,
(
void
*
)
&
borderValue
[
0
],
sizeof
(
float
)
*
cnocl
);
}
if
(
useDouble
)
{
UMat
kernalDataUMat
(
kernelMatDataDouble
,
true
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrReadOnly
(
kernalDataUMat
));
}
else
{
UMat
kernalDataUMat
(
kernelMatDataFloat
,
true
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrReadOnly
(
kernalDataUMat
));
}
return
kernel
.
run
(
2
,
globalsize
,
localsize
,
true
);
int
srcEndX
=
(
isolated
?
(
srcOffsetX
+
sz
.
width
)
:
wholeSize
.
width
);
int
srcEndY
=
(
isolated
?
(
srcOffsetY
+
sz
.
height
)
:
wholeSize
.
height
);
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
(
int
)
src
.
step
,
srcOffsetX
,
srcOffsetY
,
srcEndX
,
srcEndY
,
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
kernalDataUMat
),
(
float
)
delta
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
static
bool
ocl_sepRowFilter2D
(
const
UMat
&
src
,
UMat
&
buf
,
const
Mat
&
kernelX
,
int
anchor
,
...
...
modules/imgproc/src/opencl/bilateral.cl
View file @
d63a8ba0
...
...
@@ -32,6 +32,28 @@
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
uchar_t
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
uchar_t
*
)(
addr
)
=
val
#
define
TSIZE
cn
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
uchar
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
uchar
*
)(
addr
))
#
define
TSIZE
3
#
endif
#
if
cn
==
1
#
define
SUM
(
a
)
a
#
elif
cn
==
2
#
define
SUM
(
a
)
a.x
+
a.y
#
elif
cn
==
3
#
define
SUM
(
a
)
a.x
+
a.y
+
a.z
#
elif
cn
==
4
#
define
SUM
(
a
)
a.x
+
a.y
+
a.z
+
a.w
#
else
#
error
"cn should be <= 4"
#
endif
__kernel
void
bilateral
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__constant
float
*
color_weight,
__constant
float
*
space_weight,
__constant
int
*
space_ofs
)
...
...
@@ -41,19 +63,23 @@ __kernel void bilateral(__global const uchar * src, int src_step, int src_offset
if
(
y
<
dst_rows
&&
x
<
dst_cols
)
{
int
src_index
=
mad24
(
y
+
radius,
src_step,
x
+
radius
+
src_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
float
sum
=
0.f,
wsum
=
0.f
;
int
val0
=
convert_int
(
src[src_index]
)
;
int
src_index
=
mad24
(
y
+
radius,
src_step,
mad24
(
x
+
radius,
TSIZE,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
TSIZE,
dst_offset
))
;
float_t
sum
=
(
float_t
)(
0.0f
)
;
float
wsum
=
0.0f
;
int_t
val0
=
convert_int_t
(
loadpix
(
src
+
src_index
))
;
#
pragma
unroll
for
(
int
k
=
0
; k < maxk; k++ )
{
int
val
=
convert_int
(
src[src_index
+
space_ofs[k]]
)
;
float
w
=
space_weight[k]
*
color_weight[abs
(
val
-
val0
)
]
;
sum
+=
(
float
)(
val
)
*
w
;
int_t
val
=
convert_int_t
(
loadpix
(
src
+
src_index
+
space_ofs[k]
))
;
uint_t
diff
=
abs
(
val
-
val0
)
;
float
w
=
space_weight[k]
*
color_weight[SUM
(
diff
)
]
;
sum
+=
convert_float_t
(
val
)
*
(
float_t
)(
w
)
;
wsum
+=
w
;
}
dst[dst_index]
=
convert_uchar_rtz
(
sum
/
wsum
+
0.5f
)
;
storepix
(
convert_uchar_t
(
sum
/
(
float_t
)(
wsum
))
,
dst
+
dst_index
)
;
}
}
modules/imgproc/src/opencl/boxFilter.cl
View file @
d63a8ba0
...
...
@@ -47,6 +47,18 @@
#
endif
#
endif
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
ST
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
DT
*
)(
addr
)
=
val
#
define
SRCSIZE
(
int
)
sizeof
(
ST
)
#
define
DSTSIZE
(
int
)
sizeof
(
DT
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
ST1
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
DT1
*
)(
addr
))
#
define
SRCSIZE
(
int
)
sizeof
(
ST1
)
*cn
#
define
DSTSIZE
(
int
)
sizeof
(
DT1
)
*cn
#
endif
#
ifdef
BORDER_CONSTANT
#
elif
defined
BORDER_REPLICATE
#
define
EXTRAPOLATE
(
x,
y,
minX,
minY,
maxX,
maxY
)
\
...
...
@@ -123,8 +135,8 @@ inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, co
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
#
endif
{
int
src_index
=
mad24
(
pos.y,
src_step,
pos.x
*
(
int
)
sizeof
(
ST
)
)
;
WT
value
=
convertToWT
(
*
(
__global
const
ST
*
)
(
srcptr
+
src_index
))
;
int
src_index
=
mad24
(
pos.y,
src_step,
pos.x
*
SRCSIZE
)
;
WT
value
=
convertToWT
(
loadpix
(
srcptr
+
src_index
))
;
return
PROCESS_ELEM
(
value
)
;
}
...
...
@@ -143,8 +155,8 @@ inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, co
#
endif
srcCoords.x2,
srcCoords.y2
)
;
int
src_index
=
mad24
(
selected_row,
src_step,
selected_col
*
(
int
)
sizeof
(
ST
)
)
;
WT
value
=
convertToWT
(
*
(
__global
const
ST
*
)
(
srcptr
+
src_index
))
;
int
src_index
=
mad24
(
selected_row,
src_step,
selected_col
*
SRCSIZE
)
;
WT
value
=
convertToWT
(
loadpix
(
srcptr
+
src_index
))
;
return
PROCESS_ELEM
(
value
)
;
#
endif
...
...
@@ -180,7 +192,7 @@ __kernel void boxFilter(__global const uchar * srcptr, int src_step, int srcOffs
sumOfCols[local_id]
=
tmp_sum
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
*
(
int
)
sizeof
(
DT
)
+
dst_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
DSTSIZE,
dst_offset
)
)
;
__global
DT
*
dst
=
(
__global
DT
*
)(
dstptr
+
dst_index
)
;
int
sy_index
=
0
; // current index in data[] array
...
...
@@ -196,10 +208,11 @@ __kernel void boxFilter(__global const uchar * srcptr, int src_step, int srcOffs
total_sum
+=
sumOfCols[local_id
+
sx
-
ANCHOR_X]
;
#
ifdef
NORMALIZE
dst[0]
=
convertToDT
((
WT
)(
alpha
)
*
total_sum
)
;
DT
dstval
=
convertToDT
((
WT
)(
alpha
)
*
total_sum
)
;
#
else
dst[0]
=
convertToDT
(
total_sum
)
;
DT
dstval
=
convertToDT
(
total_sum
)
;
#
endif
storepix
(
dstval,
dst
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
modules/imgproc/src/opencl/filter2D.cl
View file @
d63a8ba0
...
...
@@ -122,7 +122,7 @@
}
#ifdef BORDER_REFLECT
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 0)
#elif defined(BORDER_REFLECT_101)
#elif defined(BORDER_REFLECT_101)
|
|
defined
(
BORDER_REFLECT101
)
#
define
EXTRAPOLATE
(
x,
y,
minX,
minY,
maxX,
maxY
)
EXTRAPOLATE_
(
x,
y,
minX,
minY,
maxX,
maxY,
1
)
#
endif
#
else
...
...
@@ -142,109 +142,49 @@
}
#
endif
#if
USE_DOUBLE
#
if
def
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
#define FPTYPE float
#define CONVERT_TO_FPTYPE CAT(convert_float, VEC_SIZE)
#endif
#if DATA_DEPTH == 0
#define BASE_TYPE uchar
#elif DATA_DEPTH == 1
#define BASE_TYPE char
#elif DATA_DEPTH == 2
#define BASE_TYPE ushort
#elif DATA_DEPTH == 3
#define BASE_TYPE short
#elif DATA_DEPTH == 4
#define BASE_TYPE int
#elif DATA_DEPTH == 5
#define BASE_TYPE float
#elif DATA_DEPTH == 6
#define BASE_TYPE double
#else
#error data_depth
#
endif
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
#define uchar1 uchar
#define char1 char
#define ushort1 ushort
#define short1 short
#define int1 int
#define float1 float
#define double1 double
#define convert_uchar1_sat_rte convert_uchar_sat_rte
#define convert_char1_sat_rte convert_char_sat_rte
#define convert_ushort1_sat_rte convert_ushort_sat_rte
#define convert_short1_sat_rte convert_short_sat_rte
#define convert_int1_sat_rte convert_int_sat_rte
#define convert_float1
#define convert_double1
#if DATA_DEPTH == 5 |
|
DATA_DEPTH
==
6
#
define
CONVERT_TO_TYPE
CAT
(
CAT
(
convert_,
BASE_TYPE
)
,
VEC_SIZE
)
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
dstT
*
)(
addr
)
=
val
#
define
SRCSIZE
(
int
)
sizeof
(
srcT
)
#
define
DSTSIZE
(
int
)
sizeof
(
dstT
)
#
else
#
define
CONVERT_TO_TYPE
CAT
(
CAT
(
CAT
(
convert_,
BASE_TYPE
)
,
VEC_SIZE
)
,
_sat_rte
)
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
addr
))
#
define
SRCSIZE
(
int
)
sizeof
(
srcT1
)
*
cn
#
define
DSTSIZE
(
int
)
sizeof
(
dstT1
)
*
cn
#
endif
#
define
VEC_SIZE
DATA_CHAN
#
define
VEC_TYPE
CAT
(
BASE_TYPE,
VEC_SIZE
)
#
define
TYPE
VEC_TYPE
#
define
SCALAR_TYPE
CAT
(
FPTYPE,
VEC_SIZE
)
#
define
INTERMEDIATE_TYPE
CAT
(
FPTYPE,
VEC_SIZE
)
#
define
noconvert
struct
RectCoords
{
int
x1,
y1,
x2,
y2
;
}
;
//#define
DEBUG
#
ifdef
DEBUG
#
define
DEBUG_ONLY
(
x
)
x
#
define
ASSERT
(
condition
)
do
{
if
(
!
(
condition
))
{
printf
(
"BUG in boxFilter kernel (global=%d,%d): "
#
condition
"\n"
,
get_global_id
(
0
)
,
get_global_id
(
1
))
; } } while (0)
#
else
#
define
DEBUG_ONLY
(
x
)
(
void
)
0
#
define
ASSERT
(
condition
)
(
void
)
0
#
endif
inline
INTERMEDIATE_TYPE
readSrcPixel
(
int2
pos,
__global
const
uchar*
srcptr,
int
srcstep,
const
struct
RectCoords
srcCoords
#
ifdef
BORDER_CONSTANT
,
SCALAR_TYPE
borderValue
#
endif
)
inline
WT
readSrcPixel
(
int2
pos,
__global
const
uchar
*
srcptr,
int
src_step,
const
struct
RectCoords
srcCoords
)
{
#
ifdef
BORDER_ISOLATED
if
(
pos.x
>=
srcCoords.x1
&&
pos.y
>=
srcCoords.y1
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
if
(
pos.x
>=
srcCoords.x1
&&
pos.y
>=
srcCoords.y1
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
#
else
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
#
endif
{
//__global
TYPE*
ptr
=
(
__global
TYPE*
)((
__global
char*
)
src
+
pos.x
*
sizeof
(
TYPE
)
+
pos.y
*
srcStepBytes
)
;
__global
TYPE*
ptr
=
(
__global
TYPE*
)(
srcptr
+
pos.y
*
srcstep
+
pos.x
*
sizeof
(
TYPE
))
;
return
CONVERT_TO_FPTYPE
(
*ptr
)
;
return
convertToWT
(
loadpix
(
srcptr
+
mad24
(
pos.y,
src_step,
pos.x
*
SRCSIZE
)))
;
}
else
{
#
ifdef
BORDER_CONSTANT
return
borderValue
;
return
(
WT
)(
0
)
;
#
else
int
selected_col
=
pos.x
;
int
selected_row
=
pos.y
;
int
selected_col
=
pos.x,
selected_row
=
pos.y
;
EXTRAPOLATE
(
selected_col,
selected_row,
#
ifdef
BORDER_ISOLATED
...
...
@@ -255,68 +195,40 @@ inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global const uchar* srcptr, in
srcCoords.x2,
srcCoords.y2
)
;
//
debug
border
mapping
//printf
(
"pos=%d,%d --> %d, %d\n"
,
pos.x,
pos.y,
selected_col,
selected_row
)
;
pos
=
(
int2
)(
selected_col,
selected_row
)
;
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
{
//__global
TYPE*
ptr
=
(
__global
TYPE*
)((
__global
char*
)
src
+
pos.x
*
sizeof
(
TYPE
)
+
pos.y
*
srcStepBytes
)
;
__global
TYPE*
ptr
=
(
__global
TYPE*
)(
srcptr
+
pos.y
*
srcstep
+
pos.x
*
sizeof
(
TYPE
))
;
return
CONVERT_TO_FPTYPE
(
*ptr
)
;
}
else
{
//
for
debug
only
DEBUG_ONLY
(
printf
(
"BUG in boxFilter kernel\n"
))
;
return
(
FPTYPE
)(
0.0f
)
;
}
return
convertToWT
(
loadpix
(
srcptr
+
mad24
(
selected_row,
src_step,
selected_col
*
SRCSIZE
)))
;
#
endif
}
}
//
INPUT
PARAMETER:
BLOCK_SIZE_Y
(
via
defines
)
__kernel
__attribute__
((
reqd_work_group_size
(
LOCAL_SIZE,
1
,
1
)))
void
filter2D
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcOffsetX,
int
srcOffsetY,
int
srcEndX,
int
srcEndY,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
int
rows,
int
cols,
#
ifdef
BORDER_CONSTANT
SCALAR_TYPE
borderValue,
#
endif
__constant
FPTYPE*
kernelData
//
transposed:
[KERNEL_SIZE_X][KERNEL_SIZE_Y2_ALIGNED]
)
__kernel
void
filter2D
(
__global
const
uchar
*
srcptr,
int
src_step,
int
srcOffsetX,
int
srcOffsetY,
int
srcEndX,
int
srcEndY,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
__constant
WT1
*
kernelData,
float
delta
)
{
const
struct
RectCoords
srcCoords
=
{
srcOffsetX,
srcOffsetY,
srcEndX,
srcEndY
}
; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const
struct
RectCoords
srcCoords
=
{
srcOffsetX,
srcOffsetY,
srcEndX,
srcEndY
}
; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const
int
local_id
=
get_local_id
(
0
)
;
const
int
x
=
local_id
+
(
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
))
*
get_group_id
(
0
)
-
ANCHOR_X
;
const
int
y
=
get_global_id
(
1
)
*
BLOCK_SIZE_Y
;
int
local_id
=
get_local_id
(
0
)
;
int
x
=
local_id
+
(
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
))
*
get_group_id
(
0
)
-
ANCHOR_X
;
int
y
=
get_global_id
(
1
)
*
BLOCK_SIZE_Y
;
INTERMEDIATE_TYPE
data[KERNEL_SIZE_Y]
;
__local
INTERMEDIATE_TYPE
sumOfCols[LOCAL_SIZE]
;
WT
data[KERNEL_SIZE_Y]
;
__local
WT
sumOfCols[LOCAL_SIZE]
;
int2
srcPos
=
(
int2
)(
srcCoords.x1
+
x,
srcCoords.y1
+
y
-
ANCHOR_Y
)
;
int2
pos
=
(
int2
)(
x,
y
)
;
__global
TYPE*
dstPtr
=
(
__global
TYPE*
)((
__global
char*
)
dstptr
+
pos.y
*
dststep
+
dstoffset
+
pos.x
*
sizeof
(
TYPE
))
; // Pointer can be out of bounds!
bool
writeResult
=
((
local_id
>=
ANCHOR_X
)
&&
(
local_id
<
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
-
ANCHOR_X
)
)
&&
(
pos.x
>=
0
)
&&
(
pos.x
<
cols
))
;
__global
dstT
*
dst
=
(
__global
dstT
*
)(
dstptr
+
mad24
(
pos.y,
dst_step,
mad24
(
pos.x,
DSTSIZE,
dst_offset
)
))
; // Pointer can be out of bounds!
bool
writeResult
=
local_id
>=
ANCHOR_X
&&
local_id
<
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
-
ANCHOR_X
)
&&
pos.x
>=
0
&&
pos.x
<
cols
;
#
if
BLOCK_SIZE_Y
>
1
bool
readAllpixels
=
true
;
int
sy_index
=
0
; // current index in data[] array
dstRowsMax
=
min
(
rows,
pos.y
+
BLOCK_SIZE_Y
)
;
for
(
;
pos.y
<
dstRowsMax
;
pos.y++,
dstPtr
=
(
__global
TYPE*
)((
__global
char*
)
dstptr
+
dststep
))
for
(
;
pos.y
<
dstRowsMax
;
pos.y++,
dst
=
(
__global
dstT
*
)((
__global
uchar
*
)
dst
+
dst_step
))
#
endif
{
ASSERT
(
pos.y
<
dstRowsMax
)
;
for
(
#
if
BLOCK_SIZE_Y
>
1
int
sy
=
readAllpixels
?
0
:
-1
; sy < (readAllpixels ? KERNEL_SIZE_Y : 0);
...
...
@@ -325,27 +237,21 @@ void filter2D(__global const uchar* srcptr, int srcstep, int srcOffsetX, int src
#
endif
sy++,
srcPos.y++
)
{
data[sy
+
sy_index]
=
readSrcPixel
(
srcPos,
srcptr,
srcstep,
srcCoords
#
ifdef
BORDER_CONSTANT
,
borderValue
#
endif
)
;
data[sy
+
sy_index]
=
readSrcPixel
(
srcPos,
srcptr,
src_step,
srcCoords
)
;
}
INTERMEDIATE_TYPE
total_sum
=
0
;
WT
total_sum
=
0
;
for
(
int
sx
=
0
; sx < KERNEL_SIZE_X; sx++)
{
{
__constant
FPTYPE
*
k
=
&kernelData[KERNEL_SIZE_Y2_ALIGNED
*
sx
__constant
WT1
*
k
=
&kernelData[KERNEL_SIZE_Y2_ALIGNED
*
sx
#
if
BLOCK_SIZE_Y
>
1
+
KERNEL_SIZE_Y
-
sy_index
#
endif
]
;
INTERMEDIATE_TYPE
tmp_sum
=
0
;
WT
tmp_sum
=
0
;
for
(
int
sy
=
0
; sy < KERNEL_SIZE_Y; sy++)
{
tmp_sum
+=
data[sy]
*
k[sy]
;
}
sumOfCols[local_id]
=
tmp_sum
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -359,14 +265,12 @@ void filter2D(__global const uchar* srcptr, int srcstep, int srcOffsetX, int src
}
if
(
writeResult
)
{
*dstPtr
=
CONVERT_TO_TYPE
(
total_sum
)
;
}
storepix
(
convertToDstT
(
total_sum
+
(
WT
)(
delta
))
,
dst
)
;
#
if
BLOCK_SIZE_Y
>
1
readAllpixels
=
false
;
#
if
BLOCK_SIZE_Y
>
KERNEL_SIZE_Y
sy_index
=
(
sy_index
+
1
<=
KERNEL_SIZE_Y
)
?
sy_index
+
1
:
1
;
sy_index
=
sy_index
+
1
<=
KERNEL_SIZE_Y
?
sy_index
+
1
:
1
;
#
else
sy_index++
;
#
endif
...
...
modules/imgproc/src/opencl/medianFilter.cl
View file @
d63a8ba0
...
...
@@ -29,52 +29,52 @@
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
#
define
DATA_TYPE
type
#
define
scnbytes
((
int
)
sizeof
(
type
))
#
define
op
(
a,b
)
{
mid=a
; a=min(a,b); b=max(mid,b);}
__kernel
void
medianFilter3
(
__global
const
uchar*
srcptr,
int
srcStep,
int
srcOffset,
__global
uchar*
dstptr,
int
dstStep,
int
dstOffset,
int
rows,
int
cols
)
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
T
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
T
*
)(
addr
)
=
val
#
define
TSIZE
(
int
)
sizeof
(
T
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
T1
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
T1
*
)(
addr
))
#
define
TSIZE
(
int
)
sizeof
(
T1
)
*
cn
#
endif
#
define
op
(
a,
b
)
{
mid
=
a
; a = min(a, b); b = max(mid, b); }
__kernel
void
medianFilter3
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
__local
DATA_TYPE
data[18][18]
;
__local
T
data[18][18]
;
int
x
=
get_local_id
(
0
)
;
int
y
=
get_local_id
(
1
)
;
int
gx=
get_global_id
(
0
)
;
int
gy=
get_global_id
(
1
)
;
int
gx
=
get_global_id
(
0
)
;
int
gy
=
get_global_id
(
1
)
;
int
dx
=
gx
-
x
-
1
;
int
dy
=
gy
-
y
-
1
;
const
int
id
=
min
((
int
)(
x*16+
y
)
,
9*18-1
)
;
int
id
=
min
(
mad24
(
x,
16
,
y
)
,
9*18-1
)
;
int
dr
=
id
/
18
;
int
dc
=
id
%
18
;
int
c
=
clamp
(
dx+dc,
0
,
cols-1
)
;
int
r
=
clamp
(
dy+dr,
0
,
rows-1
)
;
int
index1
=
mad24
(
r,
srcStep,
srcOffset
+
c*scnbytes
)
;
r
=
clamp
(
dy+dr+9,
0
,
rows-1
)
;
int
index9
=
mad24
(
r,
srcStep,
srcOffset
+
c*scnbytes
)
;
int
c
=
clamp
(
dx
+
dc,
0
,
dst_cols
-
1
)
;
__global
DATA_TYPE
*
src
=
(
__global
DATA_TYPE
*
)(
srcptr
+
index1
)
;
data[dr][dc]
=
src[0]
;
src
=
(
__global
DATA_TYPE
*
)(
srcptr
+
index9
)
;
data[dr+9][dc]
=
src[0]
;
int
r
=
clamp
(
dy
+
dr,
0
,
dst_rows
-
1
)
;
int
index1
=
mad24
(
r,
src_step,
mad24
(
c,
TSIZE,
src_offset
))
;
r
=
clamp
(
dy
+
dr
+
9
,
0
,
dst_rows
-
1
)
;
int
index9
=
mad24
(
r,
src_step,
mad24
(
c,
TSIZE,
src_offset
))
;
data[dr][dc]
=
loadpix
(
srcptr
+
index1
)
;
data[dr+9][dc]
=
loadpix
(
srcptr
+
index9
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
DATA_TYPE
p0=data[y][x],
p1=data[y][
(
x+1
)
],
p2=
data[y][
(
x+2
)
]
;
DATA_TYPE
p3=data[y+1][x],
p4=data[y+1][
(
x+1
)
],
p5=
data[y+1][
(
x+2
)
]
;
DATA_TYPE
p6=data[y+2][x],
p7=data[y+2][
(
x+1
)
],
p8=
data[y+2][
(
x+2
)
]
;
DATA_TYPE
mid
;
T
p0
=
data[y][x],
p1
=
data[y][
(
x+1
)
],
p2
=
data[y][
(
x+2
)
]
;
T
p3
=
data[y+1][x],
p4
=
data[y+1][
(
x+1
)
],
p5
=
data[y+1][
(
x+2
)
]
;
T
p6
=
data[y+2][x],
p7
=
data[y+2][
(
x+1
)
],
p8
=
data[y+2][
(
x+2
)
]
;
T
mid
;
op
(
p1,
p2
)
; op(p4, p5); op(p7, p8); op(p0, p1);
op
(
p3,
p4
)
; op(p6, p7); op(p1, p2); op(p4, p5);
...
...
@@ -82,56 +82,48 @@ __kernel void medianFilter3(__global const uchar* srcptr, int srcStep, int srcOf
op
(
p3,
p6
)
; op(p1, p4); op(p2, p5); op(p4, p7);
op
(
p4,
p2
)
; op(p6, p4); op(p4, p2);
int
dst_index
=
mad24
(
gy,
dst
Step,
dstOffset
+
gx
*
scnbytes
)
;
int
dst_index
=
mad24
(
gy,
dst
_step,
mad24
(
gx,
TSIZE,
dst_offset
)
)
;
if
(
gy
<
rows
&&
gx
<
cols
)
{
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_index
)
;
dst[0]
=
p4
;
}
if
(
gy
<
dst_rows
&&
gx
<
dst_cols
)
storepix
(
p4,
dstptr
+
dst_index
)
;
}
__kernel
void
medianFilter5
(
__global
const
uchar*
srcptr,
int
srcStep,
int
srcOffset,
__global
uchar*
dstptr,
int
dstStep,
int
dstOffset,
int
rows,
int
cols
)
__kernel
void
medianFilter5
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
__local
DATA_TYPE
data[20][20]
;
__local
T
data[20][20]
;
int
x
=get_local_id
(
0
)
;
int
y
=get_local_id
(
1
)
;
int
x
=
get_local_id
(
0
)
;
int
y
=
get_local_id
(
1
)
;
int
gx
=
get_global_id
(
0
)
;
int
gy
=
get_global_id
(
1
)
;
int
gx
=
get_global_id
(
0
)
;
int
gy
=
get_global_id
(
1
)
;
int
dx
=
gx
-
x
-
2
;
int
dy
=
gy
-
y
-
2
;
const
int
id
=
min
((
int
)(
x*16+y
)
,
10*20-1
)
;
int
dr=id/20
;
int
dc=id%20
;
int
c=clamp
(
dx+dc,
0
,
cols-1
)
;
int
id
=
min
(
mad24
(
x,
16
,
y
)
,
10*20-1
)
;
int
r
=
clamp
(
dy+dr,
0
,
rows-1
)
;
int
index1
=
mad24
(
r,
srcStep,
srcOffset
+
c*scnbytes
)
;
int
dr
=
id
/
20
;
int
dc
=
id
%
20
;
r
=
clamp
(
dy+dr+10,
0
,
rows-1
)
;
int
index10
=
mad24
(
r,
srcStep,
srcOffset
+
c*scnbytes
)
;
int
c
=
clamp
(
dx
+
dc,
0
,
dst_cols
-
1
)
;
int
r
=
clamp
(
dy
+
dr,
0
,
dst_rows
-
1
)
;
int
index1
=
mad24
(
r,
src_step,
mad24
(
c,
TSIZE,
src_offset
))
;
__global
DATA_TYPE
*
src
=
(
__global
DATA_TYPE
*
)(
srcptr
+
index1
)
;
data[dr][dc]
=
src[0]
;
src
=
(
__global
DATA_TYPE
*
)(
srcptr
+
index10
)
;
data[dr+10][dc]
=
src[0]
;
r
=
clamp
(
dy
+
dr
+
10
,
0
,
dst_rows
-
1
)
;
int
index10
=
mad24
(
r,
src_step,
mad24
(
c,
TSIZE,
src_offset
))
;
data[dr][dc]
=
loadpix
(
srcptr
+
index1
)
;
data[dr+10][dc]
=
loadpix
(
srcptr
+
index10
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
DATA_TYPE
p0=data[y][x],
p1=data[y][x+1],
p2=data[y][x+2],
p3=data[y][x+3],
p4=
data[y][x+4]
;
DATA_TYPE
p5=data[y+1][x],
p6=data[y+1][x+1],
p7=data[y+1][x+2],
p8=data[y+1][x+3],
p9=
data[y+1][x+4]
;
DATA_TYPE
p10=data[y+2][x],
p11=data[y+2][x+1],
p12=data[y+2][x+2],
p13=data[y+2][x+3],
p14=
data[y+2][x+4]
;
DATA_TYPE
p15=data[y+3][x],
p16=data[y+3][x+1],
p17=data[y+3][x+2],
p18=data[y+3][x+3],
p19=
data[y+3][x+4]
;
DATA_TYPE
p20=data[y+4][x],
p21=data[y+4][x+1],
p22=data[y+4][x+2],
p23=data[y+4][x+3],
p24=
data[y+4][x+4]
;
DATA_TYPE
mid
;
T
p0
=
data[y][x],
p1
=
data[y][x+1],
p2
=
data[y][x+2],
p3
=
data[y][x+3],
p4
=
data[y][x+4]
;
T
p5
=
data[y+1][x],
p6
=
data[y+1][x+1],
p7
=
data[y+1][x+2],
p8
=
data[y+1][x+3],
p9
=
data[y+1][x+4]
;
T
p10
=
data[y+2][x],
p11
=
data[y+2][x+1],
p12
=
data[y+2][x+2],
p13
=
data[y+2][x+3],
p14
=
data[y+2][x+4]
;
T
p15
=
data[y+3][x],
p16
=
data[y+3][x+1],
p17
=
data[y+3][x+2],
p18
=
data[y+3][x+3],
p19
=
data[y+3][x+4]
;
T
p20
=
data[y+4][x],
p21
=
data[y+4][x+1],
p22
=
data[y+4][x+2],
p23
=
data[y+4][x+3],
p24
=
data[y+4][x+4]
;
T
mid
;
op
(
p1,
p2
)
; op(p0, p1); op(p1, p2); op(p4, p5); op(p3, p4);
op
(
p4,
p5
)
; op(p0, p3); op(p2, p5); op(p2, p3); op(p1, p4);
...
...
@@ -157,11 +149,8 @@ __kernel void medianFilter5(__global const uchar* srcptr, int srcStep, int srcOf
op
(
p13,
p17
)
; op(p3, p15); op(p11, p23); op(p11, p15); op(p7, p19);
op
(
p7,
p11
)
; op(p11, p13); op(p11, p12);
int
dst_index
=
mad24
(
gy,
dstStep,
dstOffset
+
gx
*
scnbytes
)
;
int
dst_index
=
mad24
(
gy,
dst_step,
mad24
(
gx,
TSIZE,
dst_offset
)
)
;
if
(
gy
<
rows
&&
gx
<
cols
)
{
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_index
)
;
dst[0]
=
p12
;
}
if
(
gy
<
dst_rows
&&
gx
<
dst_cols
)
storepix
(
p12,
dstptr
+
dst_index
)
;
}
\ No newline at end of file
modules/imgproc/src/smooth.cpp
View file @
d63a8ba0
...
...
@@ -639,7 +639,7 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
if
(
!
(
cn
==
1
||
cn
==
2
||
cn
==
4
)
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
||
if
(
cn
>
4
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
||
_src
.
offset
()
%
esz
!=
0
||
_src
.
step
()
%
esz
!=
0
)
return
false
;
...
...
@@ -687,15 +687,17 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
return
false
;
char
cvt
[
2
][
50
];
String
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s"
,
String
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s"
" -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s"
" -D ST1=%s -D DT1=%s -D cn=%d"
,
BLOCK_SIZE_X
,
BLOCK_SIZE_Y
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
ddepth
,
cn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
1
]),
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
borderMap
[
borderType
],
isolated
?
" -D BORDER_ISOLATED"
:
""
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
normalize
?
" -D NORMALIZE"
:
""
,
sqr
?
" -D SQR"
:
""
);
normalize
?
" -D NORMALIZE"
:
""
,
sqr
?
" -D SQR"
:
""
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
),
cn
);
localsize
[
0
]
=
BLOCK_SIZE_X
;
globalsize
[
0
]
=
DIVUP
(
size
.
width
,
BLOCK_SIZE_X
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE_X
;
...
...
@@ -1902,35 +1904,27 @@ medianBlur_SortNet( const Mat& _src, Mat& _dst, int m )
#ifdef HAVE_OPENCL
static
bool
ocl_medianFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
m
)
static
bool
ocl_medianFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
m
)
{
int
type
=
_src
.
type
();
int
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
if
(
!
((
depth
==
CV_8U
||
depth
==
CV_16U
||
depth
==
CV_16S
||
depth
==
CV_32F
)
&&
(
cn
!=
3
&&
cn
<=
4
)))
return
false
;
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
const
char
*
kernelName
;
if
(
m
==
3
)
kernelName
=
"medianFilter3"
;
else
if
(
m
==
5
)
kernelName
=
"medianFilter5"
;
else
if
(
!
((
depth
==
CV_8U
||
depth
==
CV_16U
||
depth
==
CV_16S
||
depth
==
CV_32F
)
&&
cn
<=
4
&&
(
m
==
3
||
m
==
5
))
)
return
false
;
ocl
::
Kernel
k
(
kernelName
,
ocl
::
imgproc
::
medianFilter_oclsrc
,
format
(
"-D type=%s"
,
ocl
::
typeToStr
(
type
)));
ocl
::
Kernel
k
(
format
(
"medianFilter%d"
,
m
).
c_str
(),
ocl
::
imgproc
::
medianFilter_oclsrc
,
format
(
"-D T=%s -D T1=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
));
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
_src
.
size
(),
type
);
_dst
.
create
(
src
.
size
(),
type
);
UMat
dst
=
_dst
.
getUMat
();
size_t
globalsize
[
2
]
=
{(
src
.
cols
+
18
)
/
16
*
16
,
(
src
.
rows
+
15
)
/
16
*
16
};
size_t
localsize
[
2
]
=
{
16
,
16
};
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
));
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
)).
run
(
2
,
globalsize
,
localsize
,
false
);
size_t
globalsize
[
2
]
=
{
(
src
.
cols
+
18
)
/
16
*
16
,
(
src
.
rows
+
15
)
/
16
*
16
},
localsize
[
2
]
=
{
16
,
16
};
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
#endif
...
...
@@ -2210,10 +2204,10 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d,
double
sigma_color
,
double
sigma_space
,
int
borderType
)
{
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
i
,
j
,
maxk
,
radius
;
if
(
type
!=
CV_8UC1
)
if
(
depth
!=
CV_8U
||
cn
>
4
)
return
false
;
if
(
sigma_color
<=
0
)
...
...
@@ -2240,9 +2234,9 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d,
std
::
vector
<
float
>
_color_weight
(
cn
*
256
);
std
::
vector
<
float
>
_space_weight
(
d
*
d
);
std
::
vector
<
int
>
_space_ofs
(
d
*
d
);
float
*
color_weight
=
&
_color_weight
[
0
];
float
*
space_weight
=
&
_space_weight
[
0
];
int
*
space_ofs
=
&
_space_ofs
[
0
];
float
*
const
color_weight
=
&
_color_weight
[
0
];
float
*
const
space_weight
=
&
_space_weight
[
0
];
int
*
const
space_ofs
=
&
_space_ofs
[
0
];
// initialize color-related bilateral filter coefficients
for
(
i
=
0
;
i
<
256
*
cn
;
i
++
)
...
...
@@ -2256,11 +2250,19 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d,
if
(
r
>
radius
)
continue
;
space_weight
[
maxk
]
=
(
float
)
std
::
exp
(
r
*
r
*
gauss_space_coeff
);
space_ofs
[
maxk
++
]
=
(
int
)(
i
*
temp
.
step
+
j
);
space_ofs
[
maxk
++
]
=
(
int
)(
i
*
temp
.
step
+
j
*
cn
);
}
char
cvt
[
3
][
40
];
String
cnstr
=
cn
>
1
?
format
(
"%d"
,
cn
)
:
""
;
ocl
::
Kernel
k
(
"bilateral"
,
ocl
::
imgproc
::
bilateral_oclsrc
,
format
(
"-D radius=%d -D maxk=%d"
,
radius
,
maxk
));
format
(
"-D radius=%d -D maxk=%d -D cn=%d -D int_t=%s -D uint_t=uint%s -D convert_int_t=%s"
" -D uchar_t=%s -D float_t=%s -D convert_float_t=%s -D convert_uchar_t=%s"
,
radius
,
maxk
,
cn
,
ocl
::
typeToStr
(
CV_32SC
(
cn
)),
cnstr
.
c_str
(),
ocl
::
convertTypeStr
(
CV_8U
,
CV_32S
,
cn
,
cvt
[
0
]),
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_32FC
(
cn
)),
ocl
::
convertTypeStr
(
CV_32S
,
CV_32F
,
cn
,
cvt
[
1
]),
ocl
::
convertTypeStr
(
CV_32F
,
CV_8U
,
cn
,
cvt
[
2
])));
if
(
k
.
empty
())
return
false
;
...
...
modules/imgproc/test/ocl/test_boxfilter.cpp
View file @
d63a8ba0
...
...
@@ -133,7 +133,7 @@ OCL_TEST_P(SqrBoxFilter, Mat)
OCL_INSTANTIATE_TEST_CASE_P
(
ImageProc
,
BoxFilter
,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_16S
,
CV_32S
,
CV_32F
),
Values
(
1
,
2
,
4
)
,
OCL_ALL_CHANNELS
,
Values
((
BorderType
)
BORDER_CONSTANT
,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REFLECT
,
...
...
@@ -146,7 +146,7 @@ OCL_INSTANTIATE_TEST_CASE_P(ImageProc, BoxFilter,
OCL_INSTANTIATE_TEST_CASE_P
(
ImageProc
,
SqrBoxFilter
,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_16S
,
CV_32F
,
CV_64F
),
Values
(
1
,
2
,
4
)
,
OCL_ALL_CHANNELS
,
Values
((
BorderType
)
BORDER_CONSTANT
,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REFLECT
,
...
...
modules/imgproc/test/ocl/test_filter2d.cpp
View file @
d63a8ba0
...
...
@@ -62,6 +62,7 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
int
borderType
;
bool
useRoi
;
Mat
kernel
;
double
delta
;
TEST_DECLARE_INPUT_PARAMETER
(
src
);
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
);
...
...
@@ -91,6 +92,8 @@ PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
anchor
.
x
=
randomInt
(
-
1
,
ksize
.
width
);
anchor
.
y
=
randomInt
(
-
1
,
ksize
.
height
);
delta
=
randomDouble
(
-
100
,
100
);
UMAT_UPLOAD_INPUT_PARAMETER
(
src
);
UMAT_UPLOAD_OUTPUT_PARAMETER
(
dst
);
}
...
...
@@ -108,18 +111,17 @@ OCL_TEST_P(Filter2D, Mat)
{
random_roi
();
OCL_OFF
(
cv
::
filter2D
(
src_roi
,
dst_roi
,
-
1
,
kernel
,
anchor
,
0.0
,
borderType
));
OCL_ON
(
cv
::
filter2D
(
usrc_roi
,
udst_roi
,
-
1
,
kernel
,
anchor
,
0.0
,
borderType
));
OCL_OFF
(
cv
::
filter2D
(
src_roi
,
dst_roi
,
-
1
,
kernel
,
anchor
,
delta
,
borderType
));
OCL_ON
(
cv
::
filter2D
(
usrc_roi
,
udst_roi
,
-
1
,
kernel
,
anchor
,
delta
,
borderType
));
Near
(
1.0
);
}
}
OCL_INSTANTIATE_TEST_CASE_P
(
ImageProc
,
Filter2D
,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_
16S
,
CV_32F
,
CV_64
F
),
Values
(
1
,
2
,
4
)
,
Values
(
CV_8U
,
CV_16U
,
CV_
32
F
),
OCL_ALL_CHANNELS
,
Values
((
BorderType
)
BORDER_CONSTANT
,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REFLECT
,
...
...
modules/imgproc/test/ocl/test_filters.cpp
View file @
d63a8ba0
...
...
@@ -152,8 +152,8 @@ OCL_TEST_P(LaplacianTest, Accuracy)
{
random_roi
();
OCL_OFF
(
cv
::
Laplacian
(
src_roi
,
dst_roi
,
-
1
,
ksize
,
scale
,
0
,
borderType
));
OCL_ON
(
cv
::
Laplacian
(
usrc_roi
,
udst_roi
,
-
1
,
ksize
,
scale
,
0
,
borderType
));
OCL_OFF
(
cv
::
Laplacian
(
src_roi
,
dst_roi
,
-
1
,
ksize
,
scale
,
1
0
,
borderType
));
OCL_ON
(
cv
::
Laplacian
(
usrc_roi
,
udst_roi
,
-
1
,
ksize
,
scale
,
1
0
,
borderType
));
Near
();
}
...
...
@@ -290,8 +290,6 @@ OCL_TEST_P(MorphologyEx, Mat)
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define FILTER_BORDER_SET_NO_ISOLATED \
...
...
@@ -306,10 +304,10 @@ OCL_TEST_P(MorphologyEx, Mat)
(int)BORDER_REFLECT|BORDER_ISOLATED, (int)BORDER_WRAP|BORDER_ISOLATED, \
(int)BORDER_REFLECT_101|BORDER_ISOLATED*/
) // WRAP and ISOLATED are not supported by cv:: version
#define FILTER_TYPES Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4)
#define FILTER_TYPES Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_
16UC1, CV_16UC3, CV_16UC4, CV_
32FC1, CV_32FC3, CV_32FC4)
OCL_INSTANTIATE_TEST_CASE_P
(
Filter
,
Bilateral
,
Combine
(
Values
(
(
MatType
)
CV_8UC1
),
Values
(
CV_8UC1
,
CV_8UC3
),
Values
(
5
,
9
),
// kernel size
Values
(
Size
(
0
,
0
)),
// not used
FILTER_BORDER_SET_NO_ISOLATED
,
...
...
@@ -372,7 +370,6 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
Values
(
1.0
,
2.0
,
3.0
),
Bool
()));
}
}
// namespace cvtest::ocl
#endif // HAVE_OPENCL
modules/imgproc/test/ocl/test_medianfilter.cpp
View file @
d63a8ba0
...
...
@@ -102,7 +102,7 @@ OCL_TEST_P(MedianFilter, Mat)
OCL_INSTANTIATE_TEST_CASE_P
(
ImageProc
,
MedianFilter
,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_16S
,
CV_32F
),
Values
(
1
,
2
,
4
)
,
OCL_ALL_CHANNELS
,
Values
(
3
,
5
),
Bool
())
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment