Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
8c91a1af
Commit
8c91a1af
authored
Dec 19, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Dec 19, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2005 from krodyush:pullreq/2.4-opt-131114-extractCovData
parents
22a3cf0f
de431609
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
417 additions
and
129 deletions
+417
-129
imgproc.cpp
modules/ocl/src/imgproc.cpp
+103
-53
imgproc_sobel3.cl
modules/ocl/src/opencl/imgproc_sobel3.cl
+314
-76
No files found.
modules/ocl/src/imgproc.cpp
View file @
8c91a1af
...
...
@@ -1033,67 +1033,117 @@ namespace cv
else
scale
=
1.
/
scale
;
if
(
ksize
>
0
)
const
int
sobel_lsz
=
16
;
if
((
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_32FC1
)
&&
(
ksize
==
3
||
ksize
==
5
||
ksize
==
7
||
ksize
==-
1
)
&&
src
.
wholerows
>
sobel_lsz
+
(
ksize
>>
1
)
&&
src
.
wholecols
>
sobel_lsz
+
(
ksize
>>
1
))
{
Context
*
clCxt
=
Context
::
getContext
();
if
(
clCxt
->
supportsFeature
(
FEATURE_CL_INTEL_DEVICE
)
&&
src
.
type
()
==
CV_8UC1
&&
src
.
cols
%
8
==
0
&&
src
.
rows
%
8
==
0
&&
ksize
==
3
&&
(
borderType
==
cv
::
BORDER_REFLECT
||
borderType
==
cv
::
BORDER_REPLICATE
||
borderType
==
cv
::
BORDER_REFLECT101
||
borderType
==
cv
::
BORDER_WRAP
))
Dx
.
create
(
src
.
size
(),
CV_32FC1
);
Dy
.
create
(
src
.
size
(),
CV_32FC1
);
CV_Assert
(
Dx
.
rows
==
Dy
.
rows
&&
Dx
.
cols
==
Dy
.
cols
);
size_t
lt2
[
3
]
=
{
sobel_lsz
,
sobel_lsz
,
1
};
size_t
gt2
[
3
]
=
{
lt2
[
0
]
*
(
1
+
(
src
.
cols
-
1
)
/
lt2
[
0
]),
lt2
[
1
]
*
(
1
+
(
src
.
rows
-
1
)
/
lt2
[
1
]),
1
};
unsigned
int
src_pitch
=
src
.
step
;
unsigned
int
Dx_pitch
=
Dx
.
step
;
unsigned
int
Dy_pitch
=
Dy
.
step
;
int
src_offset_x
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
src_offset_y
=
src
.
offset
/
src
.
step
;
float
_scale
=
scale
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
src_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset_x
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset_y
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dx
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dx
.
offset
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
Dx_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dy
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dy
.
offset
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
Dy_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
wholecols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
wholerows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dx
.
cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dx
.
rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
_scale
));
string
option
=
cv
::
format
(
"-D BLK_X=%d -D BLK_Y=%d"
,(
int
)
lt2
[
0
],(
int
)
lt2
[
1
]);
switch
(
src
.
type
())
{
Dx
.
create
(
src
.
size
(),
CV_32FC1
);
Dy
.
create
(
src
.
size
(),
CV_32FC1
);
const
unsigned
int
block_x
=
8
;
const
unsigned
int
block_y
=
8
;
unsigned
int
src_pitch
=
src
.
step
;
unsigned
int
dst_pitch
=
Dx
.
cols
;
float
_scale
=
scale
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dx
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dy
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
src_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
dst_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
_scale
));
size_t
gt2
[
3
]
=
{
src
.
cols
,
src
.
rows
,
1
},
lt2
[
3
]
=
{
block_x
,
block_y
,
1
};
string
option
=
"-D BLK_X=8 -D BLK_Y=8"
;
switch
(
borderType
)
{
case
cv
:
:
BORDER_REPLICATE
:
option
+=
" -D BORDER_REPLICATE"
;
break
;
case
cv
:
:
BORDER_REFLECT
:
option
+=
" -D BORDER_REFLECT"
;
break
;
case
cv
:
:
BORDER_REFLECT101
:
option
+=
" -D BORDER_REFLECT101"
;
break
;
case
cv
:
:
BORDER_WRAP
:
option
+=
" -D BORDER_WRAP"
;
break
;
}
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_sobel3
,
"sobel3"
,
gt2
,
lt2
,
args
,
-
1
,
-
1
,
option
.
c_str
()
);
case
CV_8UC1
:
option
+=
" -D SRCTYPE=uchar"
;
break
;
case
CV_32FC1
:
option
+=
" -D SRCTYPE=float"
;
break
;
}
else
switch
(
borderType
)
{
Sobel
(
src
,
Dx
,
CV_32F
,
1
,
0
,
ksize
,
scale
,
0
,
borderType
);
Sobel
(
src
,
Dy
,
CV_32F
,
0
,
1
,
ksize
,
scale
,
0
,
borderType
);
case
cv
:
:
BORDER_CONSTANT
:
option
+=
" -D BORDER_CONSTANT"
;
break
;
case
cv
:
:
BORDER_REPLICATE
:
option
+=
" -D BORDER_REPLICATE"
;
break
;
case
cv
:
:
BORDER_REFLECT
:
option
+=
" -D BORDER_REFLECT"
;
break
;
case
cv
:
:
BORDER_REFLECT101
:
option
+=
" -D BORDER_REFLECT_101"
;
break
;
case
cv
:
:
BORDER_WRAP
:
option
+=
" -D BORDER_WRAP"
;
break
;
default
:
CV_Error
(
CV_StsBadFlag
,
"BORDER type is not supported!"
);
break
;
}
string
kernel_name
;
switch
(
ksize
)
{
case
-
1
:
option
+=
" -D SCHARR"
;
kernel_name
=
"sobel3"
;
break
;
case
3
:
kernel_name
=
"sobel3"
;
break
;
case
5
:
kernel_name
=
"sobel5"
;
break
;
case
7
:
kernel_name
=
"sobel7"
;
break
;
default
:
CV_Error
(
CV_StsBadFlag
,
"Kernel size is not supported!"
);
break
;
}
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_sobel3
,
kernel_name
,
gt2
,
lt2
,
args
,
-
1
,
-
1
,
option
.
c_str
()
);
}
else
{
Scharr
(
src
,
Dx
,
CV_32F
,
1
,
0
,
scale
,
0
,
borderType
);
Scharr
(
src
,
Dy
,
CV_32F
,
0
,
1
,
scale
,
0
,
borderType
);
if
(
ksize
>
0
)
{
Sobel
(
src
,
Dx
,
CV_32F
,
1
,
0
,
ksize
,
scale
,
0
,
borderType
);
Sobel
(
src
,
Dy
,
CV_32F
,
0
,
1
,
ksize
,
scale
,
0
,
borderType
);
}
else
{
Scharr
(
src
,
Dx
,
CV_32F
,
1
,
0
,
scale
,
0
,
borderType
);
Scharr
(
src
,
Dy
,
CV_32F
,
0
,
1
,
scale
,
0
,
borderType
);
}
}
CV_Assert
(
Dx
.
offset
==
0
&&
Dy
.
offset
==
0
);
}
...
...
modules/ocl/src/opencl/imgproc_sobel3.cl
View file @
8c91a1af
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#
ifdef
BORDER_REPLICATE
//BORDER_REPLICATE:
aaaaaa|abcdefgh|hhhhhhh
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
l_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
r_edge
)
-1
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
t_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
b_edge
)
-1
:
(
addr
))
#
endif
#
ifdef
BORDER_REFLECT
//BORDER_REFLECT:
fedcba|abcdefgh|hgfedcb
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-1+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-1+
((
b_edge
)
<<1
)
:
(
addr
))
#ifdef BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#define EXTRAPOLATE(x, maxV)
#elif defined BORDER_REPLICATE
//aaaaaa|abcdefgh|hhhhhhh
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = max(min((x), (maxV) - 1), 0); \
}
#elif defined BORDER_WRAP
//cdefgh|abcdefgh|abcdefg
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = ( (x) + (maxV) ) % (maxV); \
}
#elif defined BORDER_REFLECT
//fedcba|abcdefgh|hgfedcb
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \
}
#elif defined BORDER_REFLECT_101
//gfedcb|abcdefgh|gfedcba
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \
}
#else
#error No extrapolation method
#endif
#
ifdef
BORDER_REFLECT101
//BORDER_REFLECT101:
gfedcb|abcdefgh|gfedcba
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-2+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-2+
((
b_edge
)
<<1
)
:
(
addr
))
#
endif
#define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
#
ifdef
BORDER_WRAP
//BORDER_WRAP:
cdefgh|abcdefgh|abcdefg
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
i
)
+
(
r_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
i
)
-
(
r_edge
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
i
)
+
(
b_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
i
)
-
(
b_edge
)
:
(
addr
))
#ifdef BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
#else
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
#endif
#define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x])
#define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x])
#define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \
int srcX = x + srcOffsetX - (kernel_border); \
int srcY = y + srcOffsetY - (kernel_border); \
int xb = srcX; \
int yb = srcY; \
\
EXTRAPOLATE(xb, (width)); \
EXTRAPOLATE(yb, (height)); \
lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
\
if(lix < ((kernel_border)*2)) \
{ \
int xb = srcX+BLK_X; \
EXTRAPOLATE(xb,(width)); \
lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
} \
if(liy< ((kernel_border)*2)) \
{ \
int yb = srcY+BLK_Y; \
EXTRAPOLATE(yb, (height)); \
lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
} \
if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \
{ \
int xb = srcX+BLK_X; \
int yb = srcY+BLK_Y; \
EXTRAPOLATE(xb,(width)); \
EXTRAPOLATE(yb,(height)); \
lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
}
__kernel void sobel3(
__global uchar* Src,
__global
float*
DstX,
__global
float*
DstY,
int
width,
int
height,
uint
srcStride,
uint
dstStride,
float
scale
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+2][BLK_X+2];
...
...
@@ -47,62 +99,249 @@ __kernel void sobel3(
int lix = get_local_id(0);
int liy = get_local_id(1);
int
gix
=
get_group_id
(
0
)
;
int
giy
=
get_group_id
(
1
)
;
int
id_x
=
get_global_id
(
0
)
;
int
id_y
=
get_global_id
(
1
)
;
lsmem[liy+1][lix+1]
=
convert_float
(
Src[
id_y
*
srcStride
+
id_x
]
)
;
int
id_y_h
=
ADDR_H
(
id_y-1,
0
,
height
)
;
int
id_y_b
=
ADDR_B
(
id_y+1,
height,id_y+1
)
;
int
id_x_l
=
ADDR_L
(
id_x-1,
0
,
width
)
;
int
id_x_r
=
ADDR_R
(
id_x+1,
width,id_x+1
)
;
if
(
liy==0
)
{
lsmem[0][lix+1]=convert_float
(
Src[
id_y_h
*
srcStride
+
id_x
]
)
;
if
(
lix==0
)
lsmem[0][0]=convert_float
(
Src[
id_y_h
*
srcStride
+
id_x_l
]
)
;
else
if
(
lix==BLK_X-1
)
lsmem[0][BLK_X+1]=convert_float
(
Src[
id_y_h
*
srcStride
+
id_x_r
]
)
;
}
else
if
(
liy==BLK_Y-1
)
{
lsmem[BLK_Y+1][lix+1]=convert_float
(
Src[
id_y_b
*
srcStride
+
id_x
]
)
;
if
(
lix==0
)
lsmem[BLK_Y+1][0]=convert_float
(
Src[
id_y_b
*
srcStride
+
id_x_l
]
)
;
else
if
(
lix==BLK_X-1
)
lsmem[BLK_Y+1][BLK_X+1]=convert_float
(
Src[
id_y_b
*
srcStride
+
id_x_r
]
)
;
}
if
(
lix==0
)
lsmem[liy+1][0]
=
convert_float
(
Src[
id_y
*
srcStride
+
id_x_l
]
)
;
else
if
(
lix==BLK_X-1
)
lsmem[liy+1][BLK_X+1]
=
convert_float
(
Src[
id_y
*
srcStride
+
id_x_r
]
)
;
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float u1 = lsmem[liy][lix];
float u2 = lsmem[liy][lix+1];
float u3 = lsmem[liy][lix+2];
float m1 = lsmem[liy+1][lix];
float
m2
=
lsmem[liy+1][lix+1]
;
float m3 = lsmem[liy+1][lix+2];
float b1 = lsmem[liy+2][lix];
float b2 = lsmem[liy+2][lix+1];
float b3 = lsmem[liy+2][lix+2];
//m2
*
scale
;//
float
dx
=
mad
(
2.0f,
m3
-
m1,
u3
-
u1
+
b3
-
b1
)
;
DstX[
id_y
*
dstStride
+
id_x
]
=
dx
*
scale
;
//calc and store dx and dy;//
#ifdef SCHARR
DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale;
DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale;
#else
DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale;
DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale;
#endif
}
__kernel void sobel5(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+4][BLK_X+4];
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float t1 = lsmem[liy][lix];
float t2 = lsmem[liy][lix+1];
float t3 = lsmem[liy][lix+2];
float t4 = lsmem[liy][lix+3];
float t5 = lsmem[liy][lix+4];
float u1 = lsmem[liy+1][lix];
float u2 = lsmem[liy+1][lix+1];
float u3 = lsmem[liy+1][lix+2];
float u4 = lsmem[liy+1][lix+3];
float u5 = lsmem[liy+1][lix+4];
float m1 = lsmem[liy+2][lix];
float m2 = lsmem[liy+2][lix+1];
float m4 = lsmem[liy+2][lix+3];
float m5 = lsmem[liy+2][lix+4];
float l1 = lsmem[liy+3][lix];
float l2 = lsmem[liy+3][lix+1];
float l3 = lsmem[liy+3][lix+2];
float l4 = lsmem[liy+3][lix+3];
float l5 = lsmem[liy+3][lix+4];
float b1 = lsmem[liy+4][lix];
float b2 = lsmem[liy+4][lix+1];
float b3 = lsmem[liy+4][lix+2];
float b4 = lsmem[liy+4][lix+3];
float b5 = lsmem[liy+4][lix+4];
//calc and store dx and dy;//
DSTX(x,y) = scale *
mad(12.0f, m4 - m2,
mad(6.0f, m5 - m1,
mad(8.0f, u4 - u2 + l4 - l2,
mad(4.0f, u5 - u1 + l5 - l1,
mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 )
)
)
)
);
DSTY(x,y) = scale *
mad(12.0f, l3 - u3,
mad(6.0f, b3 - t3,
mad(8.0f, l2 - u2 + l4 - u4,
mad(4.0f, b2 - t2 + b4 - t4,
mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 )
)
)
)
);
}
__kernel void sobel7(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+6][BLK_X+6];
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float tt1 = lsmem[liy][lix];
float tt2 = lsmem[liy][lix+1];
float tt3 = lsmem[liy][lix+2];
float tt4 = lsmem[liy][lix+3];
float tt5 = lsmem[liy][lix+4];
float tt6 = lsmem[liy][lix+5];
float tt7 = lsmem[liy][lix+6];
float t1 = lsmem[liy+1][lix];
float t2 = lsmem[liy+1][lix+1];
float t3 = lsmem[liy+1][lix+2];
float t4 = lsmem[liy+1][lix+3];
float t5 = lsmem[liy+1][lix+4];
float t6 = lsmem[liy+1][lix+5];
float t7 = lsmem[liy+1][lix+6];
float u1 = lsmem[liy+2][lix];
float u2 = lsmem[liy+2][lix+1];
float u3 = lsmem[liy+2][lix+2];
float u4 = lsmem[liy+2][lix+3];
float u5 = lsmem[liy+2][lix+4];
float u6 = lsmem[liy+2][lix+5];
float u7 = lsmem[liy+2][lix+6];
float m1 = lsmem[liy+3][lix];
float m2 = lsmem[liy+3][lix+1];
float m3 = lsmem[liy+3][lix+2];
float m5 = lsmem[liy+3][lix+4];
float m6 = lsmem[liy+3][lix+5];
float m7 = lsmem[liy+3][lix+6];
float l1 = lsmem[liy+4][lix];
float l2 = lsmem[liy+4][lix+1];
float l3 = lsmem[liy+4][lix+2];
float l4 = lsmem[liy+4][lix+3];
float l5 = lsmem[liy+4][lix+4];
float l6 = lsmem[liy+4][lix+5];
float l7 = lsmem[liy+4][lix+6];
float b1 = lsmem[liy+5][lix];
float b2 = lsmem[liy+5][lix+1];
float b3 = lsmem[liy+5][lix+2];
float b4 = lsmem[liy+5][lix+3];
float b5 = lsmem[liy+5][lix+4];
float b6 = lsmem[liy+5][lix+5];
float b7 = lsmem[liy+5][lix+6];
float bb1 = lsmem[liy+6][lix];
float bb2 = lsmem[liy+6][lix+1];
float bb3 = lsmem[liy+6][lix+2];
float bb4 = lsmem[liy+6][lix+3];
float bb5 = lsmem[liy+6][lix+4];
float bb6 = lsmem[liy+6][lix+5];
float bb7 = lsmem[liy+6][lix+6];
//calc and store dx and dy
DSTX(x,y) = scale *
mad(100.0f, m5 - m3,
mad(80.0f, m6 - m2,
mad(20.0f, m7 - m1,
mad(75.0f, u5 - u3 + l5 - l3,
mad(60.0f, u6 - u2 + l6 - l2,
mad(15.0f, u7 - u1 + l7 - l1,
mad(30.0f, t5 - t3 + b5 - b3,
mad(24.0f, t6 - t2 + b6 - b2,
mad(6.0f, t7 - t1 + b7 - b1,
mad(5.0f, tt5 - tt3 + bb5 - bb3,
mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 )
)
)
)
)
)
)
)
)
)
);
float
dy
=
mad
(
2.0f,
b2
-
u2,
b1
-
u1
+
b3
-
u3
)
;
DstY[
id_y
*
dstStride
+
id_x
]
=
dy
*
scale
;
}
\ No newline at end of file
DSTY(x,y) = scale *
mad(100.0f, l4 - u4,
mad(80.0f, b4 - t4,
mad(20.0f, bb4 - tt4,
mad(75.0f, l5 - u5 + l3 - u3,
mad(60.0f, b5 - t5 + b3 - t3,
mad(15.0f, bb5 - tt5 + bb3 - tt3,
mad(30.0f, l6 - u6 + l2 - u2,
mad(24.0f, b6 - t6 + b2 - t2,
mad(6.0f, bb6 - tt6 + bb2 - tt2,
mad(5.0f, l7 - u7 + l1 - u1,
mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 )
)
)
)
)
)
)
)
)
)
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment