Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
89605fac
Commit
89605fac
authored
Aug 26, 2014
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3155 from vbystricky:ocloptLaplacian
parents
6d253a20
9ee07891
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
265 additions
and
4 deletions
+265
-4
deriv.cpp
modules/imgproc/src/deriv.cpp
+92
-4
laplacian5.cl
modules/imgproc/src/opencl/laplacian5.cl
+173
-0
No files found.
modules/imgproc/src/deriv.cpp
View file @
89605fac
...
@@ -643,21 +643,109 @@ void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
...
@@ -643,21 +643,109 @@ void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
namespace
cv
{
namespace
cv
{
#define LAPLACIAN_LOCAL_MEM(tileX, tileY, ksize, elsize) (((tileX) + 2 * (int)((ksize) / 2)) * (3 * (tileY) + 2 * (int)((ksize) / 2)) * elsize)
static
bool
ocl_Laplacian5
(
InputArray
_src
,
OutputArray
_dst
,
static
bool
ocl_Laplacian5
(
InputArray
_src
,
OutputArray
_dst
,
const
Mat
&
kd
,
const
Mat
&
ks
,
double
scale
,
double
delta
,
const
Mat
&
kd
,
const
Mat
&
ks
,
double
scale
,
double
delta
,
int
borderType
,
int
depth
,
int
ddepth
)
int
borderType
,
int
depth
,
int
ddepth
)
{
{
const
size_t
tileSizeX
=
16
;
const
size_t
tileSizeYmin
=
8
;
const
ocl
::
Device
dev
=
ocl
::
Device
::
getDefault
();
int
stype
=
_src
.
type
();
int
sdepth
=
CV_MAT_DEPTH
(
stype
),
cn
=
CV_MAT_CN
(
stype
),
esz
=
CV_ELEM_SIZE
(
stype
);
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
;
if
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
return
false
;
Mat
kernelX
=
kd
.
reshape
(
1
,
1
);
if
(
kernelX
.
cols
%
2
!=
1
)
return
false
;
Mat
kernelY
=
ks
.
reshape
(
1
,
1
);
if
(
kernelY
.
cols
%
2
!=
1
)
return
false
;
CV_Assert
(
kernelX
.
cols
==
kernelY
.
cols
);
size_t
wgs
=
dev
.
maxWorkGroupSize
();
size_t
lmsz
=
dev
.
localMemSize
();
size_t
src_step
=
_src
.
step
(),
src_offset
=
_src
.
offset
();
if
(((
src_offset
%
src_step
)
%
esz
==
0
)
&&
(
(
borderType
==
BORDER_CONSTANT
||
borderType
==
BORDER_REPLICATE
)
||
((
borderType
==
BORDER_REFLECT
||
borderType
==
BORDER_WRAP
||
borderType
==
BORDER_REFLECT_101
)
&&
(
_src
.
cols
()
>=
kernelX
.
cols
&&
_src
.
rows
()
>=
kernelY
.
cols
))
)
&&
(
tileSizeX
*
tileSizeYmin
<=
wgs
)
&&
(
LAPLACIAN_LOCAL_MEM
(
tileSizeX
,
tileSizeYmin
,
kernelX
.
cols
,
cn
*
4
)
<=
lmsz
)
)
{
Size
size
=
_src
.
size
(),
wholeSize
;
Point
origin
;
int
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
);
int
wdepth
=
CV_32F
;
size_t
tileSizeY
=
wgs
/
tileSizeX
;
while
((
tileSizeX
*
tileSizeY
>
wgs
)
||
(
LAPLACIAN_LOCAL_MEM
(
tileSizeX
,
tileSizeY
,
kernelX
.
cols
,
cn
*
4
)
>
lmsz
))
{
tileSizeY
/=
2
;
}
size_t
lt2
[
2
]
=
{
tileSizeX
,
tileSizeY
};
size_t
gt2
[
2
]
=
{
lt2
[
0
]
*
(
1
+
(
size
.
width
-
1
)
/
lt2
[
0
]),
lt2
[
1
]
};
char
cvt
[
2
][
40
];
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
"BORDER_WRAP"
,
"BORDER_REFLECT_101"
};
String
opts
=
cv
::
format
(
"-D BLK_X=%d -D BLK_Y=%d -D RADIUS=%d%s%s"
" -D convertToWT=%s -D convertToDT=%s"
" -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s"
" -D srcT=%s -D dstT=%s -D WT=%s"
" -D CN=%d "
,
(
int
)
lt2
[
0
],
(
int
)
lt2
[
1
],
kernelX
.
cols
/
2
,
ocl
::
kernelToStr
(
kernelX
,
wdepth
,
"KERNEL_MATRIX_X"
).
c_str
(),
ocl
::
kernelToStr
(
kernelY
,
wdepth
,
"KERNEL_MATRIX_Y"
).
c_str
(),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
borderMap
[
borderType
],
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
typeToStr
(
CV_MAKETYPE
(
sdepth
,
cn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
ddepth
,
cn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
cn
)),
cn
);
ocl
::
Kernel
k
(
"laplacian"
,
ocl
::
imgproc
::
laplacian5_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
size
,
dtype
);
UMat
dst
=
_dst
.
getUMat
();
int
src_offset_x
=
static_cast
<
int
>
((
src_offset
%
src_step
)
/
esz
);
int
src_offset_y
=
static_cast
<
int
>
(
src_offset
/
src_step
);
src
.
locateROI
(
wholeSize
,
origin
);
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
(
int
)
src_step
,
src_offset_x
,
src_offset_y
,
wholeSize
.
height
,
wholeSize
.
width
,
ocl
::
KernelArg
::
WriteOnly
(
dst
),
static_cast
<
float
>
(
scale
),
static_cast
<
float
>
(
delta
));
return
k
.
run
(
2
,
gt2
,
lt2
,
false
);
}
int
iscale
=
cvRound
(
scale
),
idelta
=
cvRound
(
delta
);
int
iscale
=
cvRound
(
scale
),
idelta
=
cvRound
(
delta
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
,
bool
floatCoeff
=
std
::
fabs
(
delta
-
idelta
)
>
DBL_EPSILON
||
std
::
fabs
(
scale
-
iscale
)
>
DBL_EPSILON
;
floatCoeff
=
std
::
fabs
(
delta
-
idelta
)
>
DBL_EPSILON
||
std
::
fabs
(
scale
-
iscale
)
>
DBL_EPSILON
;
int
wdepth
=
std
::
max
(
depth
,
floatCoeff
?
CV_32F
:
CV_32S
),
kercn
=
1
;
int
cn
=
_src
.
channels
(),
wdepth
=
std
::
max
(
depth
,
floatCoeff
?
CV_32F
:
CV_32S
),
kercn
=
1
;
if
(
!
doubleSupport
&&
wdepth
==
CV_64F
)
if
(
!
doubleSupport
&&
wdepth
==
CV_64F
)
return
false
;
return
false
;
char
cvt
[
2
][
40
];
char
cvt
[
2
][
40
];
ocl
::
Kernel
k
(
"sumConvert"
,
ocl
::
imgproc
::
laplacian5_oclsrc
,
ocl
::
Kernel
k
(
"sumConvert"
,
ocl
::
imgproc
::
laplacian5_oclsrc
,
format
(
"-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d "
format
(
"-D ONLY_SUM_CONVERT "
"-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d "
"-D convertToWT=%s -D convertToDT=%s%s"
,
"-D convertToWT=%s -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
kercn
)),
...
...
modules/imgproc/src/opencl/laplacian5.cl
View file @
89605fac
...
@@ -5,8 +5,11 @@
...
@@ -5,8 +5,11 @@
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
define
noconvert
#
define
noconvert
#
ifdef
ONLY_SUM_CONVERT
__kernel
void
sumConvert
(
__global
const
uchar
*
src1ptr,
int
src1_step,
int
src1_offset,
__kernel
void
sumConvert
(
__global
const
uchar
*
src1ptr,
int
src1_step,
int
src1_offset,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
...
@@ -32,3 +35,172 @@ __kernel void sumConvert(__global const uchar * src1ptr, int src1_step, int src1
...
@@ -32,3 +35,172 @@ __kernel void sumConvert(__global const uchar * src1ptr, int src1_step, int src1
#
endif
#
endif
}
}
}
}
#
else
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro
for
border
type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#
ifdef
BORDER_CONSTANT
//
CCCCCC|abcdefgh|CCCCCCC
#
define
EXTRAPOLATE
(
x,
maxV
)
#
elif
defined
BORDER_REPLICATE
//
aaaaaa|abcdefgh|hhhhhhh
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
max
(
min
((
x
)
,
(
maxV
)
-
1
)
,
0
)
; \
}
#
elif
defined
BORDER_WRAP
//
cdefgh|abcdefgh|abcdefg
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
(
(
x
)
+
(
maxV
)
)
%
(
maxV
)
; \
}
#
elif
defined
BORDER_REFLECT
//
fedcba|abcdefgh|hgfedcb
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
min
(((
maxV
)
-1
)
*2-
(
x
)
+1
,
max
((
x
)
,
-
(
x
)
-1
)
)
; \
}
#
elif
defined
BORDER_REFLECT_101
//
gfedcb|abcdefgh|gfedcba
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
min
(((
maxV
)
-1
)
*2-
(
x
)
,
max
((
x
)
,
-
(
x
))
)
; \
}
#
else
#
error
No
extrapolation
method
#
endif
#
if
CN
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
dstT
*
)(
addr
)
=
val
#
define
SRCSIZE
(
int
)
sizeof
(
srcT
)
#
define
DSTSIZE
(
int
)
sizeof
(
dstT
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
addr
))
#
define
SRCSIZE
(
int
)
sizeof
(
srcT1
)
*3
#
define
DSTSIZE
(
int
)
sizeof
(
dstT1
)
*3
#
endif
#
define
SRC
(
_x,_y
)
convertToWT
(
loadpix
(
Src
+
mad24
(
_y,
src_step,
SRCSIZE
*
_x
)))
#
ifdef
BORDER_CONSTANT
//
CCCCCC|abcdefgh|CCCCCCC
#
define
ELEM
(
_x,_y,r_edge,t_edge,const_v
)
(
_x
)
<0
| (_x) >= (r_edge) |
(
_y
)
<0
|
(
_y
)
>=
(
t_edge
)
?
(
const_v
)
:
SRC
((
_x
)
,
(
_y
))
#
else
#
define
ELEM
(
_x,_y,r_edge,t_edge,const_v
)
SRC
((
_x
)
,
(
_y
))
#
endif
//
horizontal
and
vertical
filter
kernels
//
should
be
defined
on
host
during
compile
time
to
avoid
overhead
#
define
DIG
(
a
)
a,
__constant
WT1
mat_kernelX[]
=
{
KERNEL_MATRIX_X
}
;
__constant
WT1
mat_kernelY[]
=
{
KERNEL_MATRIX_Y
}
;
__kernel
void
laplacian
(
__global
uchar*
Src,
int
src_step,
int
srcOffsetX,
int
srcOffsetY,
int
height,
int
width,
__global
uchar*
Dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
WT1
scale,
WT1
delta
)
{
__local
WT
lsmem[BLK_Y
+
2
*
RADIUS][BLK_X
+
2
*
RADIUS]
;
__local
WT
lsmemDy1[BLK_Y][BLK_X
+
2
*
RADIUS]
;
__local
WT
lsmemDy2[BLK_Y][BLK_X
+
2
*
RADIUS]
;
int
lix
=
get_local_id
(
0
)
;
int
liy
=
get_local_id
(
1
)
;
int
x
=
get_global_id
(
0
)
;
int
srcX
=
x
+
srcOffsetX
-
RADIUS
;
int
clocY
=
liy
;
do
{
int
yb
=
clocY
+
srcOffsetY
-
RADIUS
;
EXTRAPOLATE
(
yb,
(
height
))
;
int
clocX
=
lix
;
int
cSrcX
=
srcX
;
do
{
int
xb
=
cSrcX
;
EXTRAPOLATE
(
xb,
(
width
))
;
lsmem[clocY][clocX]
=
ELEM
(
xb,
yb,
(
width
)
,
(
height
)
,
0
)
;
clocX
+=
BLK_X
;
cSrcX
+=
BLK_X
;
}
while
(
clocX
<
BLK_X+
(
RADIUS*2
))
;
clocY
+=
BLK_Y
;
}
while
(
clocY
<
BLK_Y+
(
RADIUS*2
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
WT
scale_v
=
(
WT
)
scale
;
WT
delta_v
=
(
WT
)
delta
;
for
(
int
y
=
0
; y < dst_rows; y+=BLK_Y)
{
int
i,
clocX
=
lix
;
WT
sum1
=
(
WT
)
0
;
WT
sum2
=
(
WT
)
0
;
do
{
sum1
=
(
WT
)
0
;
sum2
=
(
WT
)
0
;
for
(
i=0
; i<=2*RADIUS; i++)
{
sum1
=
mad
(
lsmem[liy
+
i][clocX],
mat_kernelY[i],
sum1
)
;
sum2
=
mad
(
lsmem[liy
+
i][clocX],
mat_kernelX[i],
sum2
)
;
}
lsmemDy1[liy][clocX]
=
sum1
;
lsmemDy2[liy][clocX]
=
sum2
;
clocX
+=
BLK_X
;
}
while
(
clocX
<
BLK_X+
(
RADIUS*2
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
((
x
<
dst_cols
)
&&
(
y
+
liy
<
dst_rows
))
{
sum1
=
(
WT
)
0
;
sum2
=
(
WT
)
0
;
for
(
i=0
; i<=2*RADIUS; i++)
{
sum1
=
mad
(
lsmemDy1[liy][lix+i],
mat_kernelX[i],
sum1
)
;
sum2
=
mad
(
lsmemDy2[liy][lix+i],
mat_kernelY[i],
sum2
)
;
}
WT
sum
=
mad
(
scale_v,
(
sum1
+
sum2
)
,
delta_v
)
;
storepix
(
convertToDT
(
sum
)
,
Dst
+
mad24
(
y
+
liy,
dst_step,
mad24
(
x,
DSTSIZE,
dst_offset
)))
;
}
for
(
int
i
=
liy
*
BLK_X
+
lix
; i < (RADIUS*2) * (BLK_X+(RADIUS*2)); i += BLK_X * BLK_Y)
{
int
clocX
=
i
%
(
BLK_X+
(
RADIUS*2
))
;
int
clocY
=
i
/
(
BLK_X+
(
RADIUS*2
))
;
lsmem[clocY][clocX]
=
lsmem[clocY
+
BLK_Y][clocX]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
yb
=
y
+
liy
+
BLK_Y
+
srcOffsetY
+
RADIUS
;
EXTRAPOLATE
(
yb,
(
height
))
;
clocX
=
lix
;
int
cSrcX
=
x
+
srcOffsetX
-
RADIUS
;
do
{
int
xb
=
cSrcX
;
EXTRAPOLATE
(
xb,
(
width
))
;
lsmem[liy
+
2*RADIUS][clocX]
=
ELEM
(
xb,
yb,
(
width
)
,
(
height
)
,
0
)
;
clocX
+=
BLK_X
;
cSrcX
+=
BLK_X
;
}
while
(
clocX
<
BLK_X+
(
RADIUS*2
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
#
endif
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment