Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
8c91a1af
Commit
8c91a1af
authored
Dec 19, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Dec 19, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2005 from krodyush:pullreq/2.4-opt-131114-extractCovData
parents
22a3cf0f
de431609
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
417 additions
and
129 deletions
+417
-129
imgproc.cpp
modules/ocl/src/imgproc.cpp
+103
-53
imgproc_sobel3.cl
modules/ocl/src/opencl/imgproc_sobel3.cl
+314
-76
No files found.
modules/ocl/src/imgproc.cpp
View file @
8c91a1af
...
@@ -1033,67 +1033,117 @@ namespace cv
...
@@ -1033,67 +1033,117 @@ namespace cv
else
else
scale
=
1.
/
scale
;
scale
=
1.
/
scale
;
if
(
ksize
>
0
)
const
int
sobel_lsz
=
16
;
if
((
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_32FC1
)
&&
(
ksize
==
3
||
ksize
==
5
||
ksize
==
7
||
ksize
==-
1
)
&&
src
.
wholerows
>
sobel_lsz
+
(
ksize
>>
1
)
&&
src
.
wholecols
>
sobel_lsz
+
(
ksize
>>
1
))
{
{
Context
*
clCxt
=
Context
::
getContext
();
Dx
.
create
(
src
.
size
(),
CV_32FC1
);
if
(
clCxt
->
supportsFeature
(
FEATURE_CL_INTEL_DEVICE
)
&&
src
.
type
()
==
CV_8UC1
&&
Dy
.
create
(
src
.
size
(),
CV_32FC1
);
src
.
cols
%
8
==
0
&&
src
.
rows
%
8
==
0
&&
ksize
==
3
&&
CV_Assert
(
Dx
.
rows
==
Dy
.
rows
&&
Dx
.
cols
==
Dy
.
cols
);
(
borderType
==
cv
::
BORDER_REFLECT
||
borderType
==
cv
::
BORDER_REPLICATE
||
size_t
lt2
[
3
]
=
{
sobel_lsz
,
sobel_lsz
,
1
};
borderType
==
cv
::
BORDER_REFLECT101
||
size_t
gt2
[
3
]
=
{
lt2
[
0
]
*
(
1
+
(
src
.
cols
-
1
)
/
lt2
[
0
]),
lt2
[
1
]
*
(
1
+
(
src
.
rows
-
1
)
/
lt2
[
1
]),
1
};
borderType
==
cv
::
BORDER_WRAP
))
unsigned
int
src_pitch
=
src
.
step
;
unsigned
int
Dx_pitch
=
Dx
.
step
;
unsigned
int
Dy_pitch
=
Dy
.
step
;
int
src_offset_x
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
src_offset_y
=
src
.
offset
/
src
.
step
;
float
_scale
=
scale
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
src_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset_x
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset_y
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dx
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dx
.
offset
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
Dx_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dy
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dy
.
offset
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
Dy_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
wholecols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
wholerows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dx
.
cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dx
.
rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
_scale
));
string
option
=
cv
::
format
(
"-D BLK_X=%d -D BLK_Y=%d"
,(
int
)
lt2
[
0
],(
int
)
lt2
[
1
]);
switch
(
src
.
type
())
{
{
Dx
.
create
(
src
.
size
(),
CV_32FC1
);
case
CV_8UC1
:
Dy
.
create
(
src
.
size
(),
CV_32FC1
);
option
+=
" -D SRCTYPE=uchar"
;
break
;
const
unsigned
int
block_x
=
8
;
case
CV_32FC1
:
const
unsigned
int
block_y
=
8
;
option
+=
" -D SRCTYPE=float"
;
break
;
unsigned
int
src_pitch
=
src
.
step
;
unsigned
int
dst_pitch
=
Dx
.
cols
;
float
_scale
=
scale
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dx
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dy
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
src_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
dst_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
_scale
));
size_t
gt2
[
3
]
=
{
src
.
cols
,
src
.
rows
,
1
},
lt2
[
3
]
=
{
block_x
,
block_y
,
1
};
string
option
=
"-D BLK_X=8 -D BLK_Y=8"
;
switch
(
borderType
)
{
case
cv
:
:
BORDER_REPLICATE
:
option
+=
" -D BORDER_REPLICATE"
;
break
;
case
cv
:
:
BORDER_REFLECT
:
option
+=
" -D BORDER_REFLECT"
;
break
;
case
cv
:
:
BORDER_REFLECT101
:
option
+=
" -D BORDER_REFLECT101"
;
break
;
case
cv
:
:
BORDER_WRAP
:
option
+=
" -D BORDER_WRAP"
;
break
;
}
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_sobel3
,
"sobel3"
,
gt2
,
lt2
,
args
,
-
1
,
-
1
,
option
.
c_str
()
);
}
}
else
switch
(
borderType
)
{
{
Sobel
(
src
,
Dx
,
CV_32F
,
1
,
0
,
ksize
,
scale
,
0
,
borderType
);
case
cv
:
:
BORDER_CONSTANT
:
Sobel
(
src
,
Dy
,
CV_32F
,
0
,
1
,
ksize
,
scale
,
0
,
borderType
);
option
+=
" -D BORDER_CONSTANT"
;
break
;
case
cv
:
:
BORDER_REPLICATE
:
option
+=
" -D BORDER_REPLICATE"
;
break
;
case
cv
:
:
BORDER_REFLECT
:
option
+=
" -D BORDER_REFLECT"
;
break
;
case
cv
:
:
BORDER_REFLECT101
:
option
+=
" -D BORDER_REFLECT_101"
;
break
;
case
cv
:
:
BORDER_WRAP
:
option
+=
" -D BORDER_WRAP"
;
break
;
default
:
CV_Error
(
CV_StsBadFlag
,
"BORDER type is not supported!"
);
break
;
}
}
string
kernel_name
;
switch
(
ksize
)
{
case
-
1
:
option
+=
" -D SCHARR"
;
kernel_name
=
"sobel3"
;
break
;
case
3
:
kernel_name
=
"sobel3"
;
break
;
case
5
:
kernel_name
=
"sobel5"
;
break
;
case
7
:
kernel_name
=
"sobel7"
;
break
;
default
:
CV_Error
(
CV_StsBadFlag
,
"Kernel size is not supported!"
);
break
;
}
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_sobel3
,
kernel_name
,
gt2
,
lt2
,
args
,
-
1
,
-
1
,
option
.
c_str
()
);
}
}
else
else
{
{
Scharr
(
src
,
Dx
,
CV_32F
,
1
,
0
,
scale
,
0
,
borderType
);
if
(
ksize
>
0
)
Scharr
(
src
,
Dy
,
CV_32F
,
0
,
1
,
scale
,
0
,
borderType
);
{
Sobel
(
src
,
Dx
,
CV_32F
,
1
,
0
,
ksize
,
scale
,
0
,
borderType
);
Sobel
(
src
,
Dy
,
CV_32F
,
0
,
1
,
ksize
,
scale
,
0
,
borderType
);
}
else
{
Scharr
(
src
,
Dx
,
CV_32F
,
1
,
0
,
scale
,
0
,
borderType
);
Scharr
(
src
,
Dy
,
CV_32F
,
0
,
1
,
scale
,
0
,
borderType
);
}
}
}
CV_Assert
(
Dx
.
offset
==
0
&&
Dy
.
offset
==
0
);
CV_Assert
(
Dx
.
offset
==
0
&&
Dy
.
offset
==
0
);
}
}
...
...
modules/ocl/src/opencl/imgproc_sobel3.cl
View file @
8c91a1af
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#
ifdef
BORDER_REPLICATE
//BORDER_REPLICATE:
aaaaaa|abcdefgh|hhhhhhh
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
l_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
r_edge
)
-1
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
t_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
b_edge
)
-1
:
(
addr
))
#
endif
#
ifdef
BORDER_REFLECT
#ifdef BORDER_CONSTANT
//BORDER_REFLECT:
fedcba|abcdefgh|hgfedcb
//CCCCCC|abcdefgh|CCCCCCC
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
-1
:
(
i
))
#define EXTRAPOLATE(x, maxV)
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-1+
((
r_edge
)
<<1
)
:
(
addr
))
#elif defined BORDER_REPLICATE
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
-1
:
(
i
))
//aaaaaa|abcdefgh|hhhhhhh
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-1+
((
b_edge
)
<<1
)
:
(
addr
))
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = max(min((x), (maxV) - 1), 0); \
}
#elif defined BORDER_WRAP
//cdefgh|abcdefgh|abcdefg
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = ( (x) + (maxV) ) % (maxV); \
}
#elif defined BORDER_REFLECT
//fedcba|abcdefgh|hgfedcb
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \
}
#elif defined BORDER_REFLECT_101
//gfedcb|abcdefgh|gfedcba
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \
}
#else
#error No extrapolation method
#endif
#endif
#
ifdef
BORDER_REFLECT101
#define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
//BORDER_REFLECT101:
gfedcb|abcdefgh|gfedcba
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-2+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-2+
((
b_edge
)
<<1
)
:
(
addr
))
#
endif
#
ifdef
BORDER_WRAP
#ifdef BORDER_CONSTANT
//BORDER_WRAP:
cdefgh|abcdefgh|abcdefg
//CCCCCC|abcdefgh|CCCCCCC
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
i
)
+
(
r_edge
)
:
(
i
))
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
i
)
-
(
r_edge
)
:
(
addr
))
#else
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
i
)
+
(
b_edge
)
:
(
i
))
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
i
)
-
(
b_edge
)
:
(
addr
))
#endif
#endif
#define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x])
#define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x])
#define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \
int srcX = x + srcOffsetX - (kernel_border); \
int srcY = y + srcOffsetY - (kernel_border); \
int xb = srcX; \
int yb = srcY; \
\
EXTRAPOLATE(xb, (width)); \
EXTRAPOLATE(yb, (height)); \
lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
\
if(lix < ((kernel_border)*2)) \
{ \
int xb = srcX+BLK_X; \
EXTRAPOLATE(xb,(width)); \
lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
} \
if(liy< ((kernel_border)*2)) \
{ \
int yb = srcY+BLK_Y; \
EXTRAPOLATE(yb, (height)); \
lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
} \
if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \
{ \
int xb = srcX+BLK_X; \
int yb = srcY+BLK_Y; \
EXTRAPOLATE(xb,(width)); \
EXTRAPOLATE(yb,(height)); \
lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
}
__kernel void sobel3(
__kernel void sobel3(
__global uchar* Src,
__global uchar* Src,
__global
float*
DstX,
const uint SrcPitch,
__global
float*
DstY,
const int srcOffsetX,
int
width,
int
height,
const int srcOffsetY,
uint
srcStride,
uint
dstStride,
__global uchar* DstX,
float
scale
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
)
{
{
__local float lsmem[BLK_Y+2][BLK_X+2];
__local float lsmem[BLK_Y+2][BLK_X+2];
...
@@ -47,62 +99,249 @@ __kernel void sobel3(
...
@@ -47,62 +99,249 @@ __kernel void sobel3(
int lix = get_local_id(0);
int lix = get_local_id(0);
int liy = get_local_id(1);
int liy = get_local_id(1);
int
gix
=
get_group_id
(
0
)
;
int x = (int)get_global_id(0);
int
giy
=
get_group_id
(
1
)
;
int y = (int)get_global_id(1);
int
id_x
=
get_global_id
(
0
)
;
int
id_y
=
get_global_id
(
1
)
;
lsmem[liy+1][lix+1]
=
convert_float
(
Src[
id_y
*
srcStride
+
id_x
]
)
;
int
id_y_h
=
ADDR_H
(
id_y-1,
0
,
height
)
;
int
id_y_b
=
ADDR_B
(
id_y+1,
height,id_y+1
)
;
int
id_x_l
=
ADDR_L
(
id_x-1,
0
,
width
)
;
int
id_x_r
=
ADDR_R
(
id_x+1,
width,id_x+1
)
;
if
(
liy==0
)
{
lsmem[0][lix+1]=convert_float
(
Src[
id_y_h
*
srcStride
+
id_x
]
)
;
if
(
lix==0
)
lsmem[0][0]=convert_float
(
Src[
id_y_h
*
srcStride
+
id_x_l
]
)
;
else
if
(
lix==BLK_X-1
)
lsmem[0][BLK_X+1]=convert_float
(
Src[
id_y_h
*
srcStride
+
id_x_r
]
)
;
}
else
if
(
liy==BLK_Y-1
)
{
lsmem[BLK_Y+1][lix+1]=convert_float
(
Src[
id_y_b
*
srcStride
+
id_x
]
)
;
if
(
lix==0
)
lsmem[BLK_Y+1][0]=convert_float
(
Src[
id_y_b
*
srcStride
+
id_x_l
]
)
;
else
if
(
lix==BLK_X-1
)
lsmem[BLK_Y+1][BLK_X+1]=convert_float
(
Src[
id_y_b
*
srcStride
+
id_x_r
]
)
;
}
if
(
lix==0
)
lsmem[liy+1][0]
=
convert_float
(
Src[
id_y
*
srcStride
+
id_x_l
]
)
;
else
if
(
lix==BLK_X-1
)
lsmem[liy+1][BLK_X+1]
=
convert_float
(
Src[
id_y
*
srcStride
+
id_x_r
]
)
;
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1)
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float u1 = lsmem[liy][lix];
float u1 = lsmem[liy][lix];
float u2 = lsmem[liy][lix+1];
float u2 = lsmem[liy][lix+1];
float u3 = lsmem[liy][lix+2];
float u3 = lsmem[liy][lix+2];
float m1 = lsmem[liy+1][lix];
float m1 = lsmem[liy+1][lix];
float
m2
=
lsmem[liy+1][lix+1]
;
float m3 = lsmem[liy+1][lix+2];
float m3 = lsmem[liy+1][lix+2];
float b1 = lsmem[liy+2][lix];
float b1 = lsmem[liy+2][lix];
float b2 = lsmem[liy+2][lix+1];
float b2 = lsmem[liy+2][lix+1];
float b3 = lsmem[liy+2][lix+2];
float b3 = lsmem[liy+2][lix+2];
//m2
*
scale
;//
//calc and store dx and dy;//
float
dx
=
mad
(
2.0f,
m3
-
m1,
u3
-
u1
+
b3
-
b1
)
;
#ifdef SCHARR
DstX[
id_y
*
dstStride
+
id_x
]
=
dx
*
scale
;
DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale;
DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale;
#else
DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale;
DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale;
#endif
}
__kernel void sobel5(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+4][BLK_X+4];
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float t1 = lsmem[liy][lix];
float t2 = lsmem[liy][lix+1];
float t3 = lsmem[liy][lix+2];
float t4 = lsmem[liy][lix+3];
float t5 = lsmem[liy][lix+4];
float u1 = lsmem[liy+1][lix];
float u2 = lsmem[liy+1][lix+1];
float u3 = lsmem[liy+1][lix+2];
float u4 = lsmem[liy+1][lix+3];
float u5 = lsmem[liy+1][lix+4];
float m1 = lsmem[liy+2][lix];
float m2 = lsmem[liy+2][lix+1];
float m4 = lsmem[liy+2][lix+3];
float m5 = lsmem[liy+2][lix+4];
float l1 = lsmem[liy+3][lix];
float l2 = lsmem[liy+3][lix+1];
float l3 = lsmem[liy+3][lix+2];
float l4 = lsmem[liy+3][lix+3];
float l5 = lsmem[liy+3][lix+4];
float b1 = lsmem[liy+4][lix];
float b2 = lsmem[liy+4][lix+1];
float b3 = lsmem[liy+4][lix+2];
float b4 = lsmem[liy+4][lix+3];
float b5 = lsmem[liy+4][lix+4];
//calc and store dx and dy;//
DSTX(x,y) = scale *
mad(12.0f, m4 - m2,
mad(6.0f, m5 - m1,
mad(8.0f, u4 - u2 + l4 - l2,
mad(4.0f, u5 - u1 + l5 - l1,
mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 )
)
)
)
);
DSTY(x,y) = scale *
mad(12.0f, l3 - u3,
mad(6.0f, b3 - t3,
mad(8.0f, l2 - u2 + l4 - u4,
mad(4.0f, b2 - t2 + b4 - t4,
mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 )
)
)
)
);
}
__kernel void sobel7(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+6][BLK_X+6];
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float tt1 = lsmem[liy][lix];
float tt2 = lsmem[liy][lix+1];
float tt3 = lsmem[liy][lix+2];
float tt4 = lsmem[liy][lix+3];
float tt5 = lsmem[liy][lix+4];
float tt6 = lsmem[liy][lix+5];
float tt7 = lsmem[liy][lix+6];
float t1 = lsmem[liy+1][lix];
float t2 = lsmem[liy+1][lix+1];
float t3 = lsmem[liy+1][lix+2];
float t4 = lsmem[liy+1][lix+3];
float t5 = lsmem[liy+1][lix+4];
float t6 = lsmem[liy+1][lix+5];
float t7 = lsmem[liy+1][lix+6];
float u1 = lsmem[liy+2][lix];
float u2 = lsmem[liy+2][lix+1];
float u3 = lsmem[liy+2][lix+2];
float u4 = lsmem[liy+2][lix+3];
float u5 = lsmem[liy+2][lix+4];
float u6 = lsmem[liy+2][lix+5];
float u7 = lsmem[liy+2][lix+6];
float m1 = lsmem[liy+3][lix];
float m2 = lsmem[liy+3][lix+1];
float m3 = lsmem[liy+3][lix+2];
float m5 = lsmem[liy+3][lix+4];
float m6 = lsmem[liy+3][lix+5];
float m7 = lsmem[liy+3][lix+6];
float l1 = lsmem[liy+4][lix];
float l2 = lsmem[liy+4][lix+1];
float l3 = lsmem[liy+4][lix+2];
float l4 = lsmem[liy+4][lix+3];
float l5 = lsmem[liy+4][lix+4];
float l6 = lsmem[liy+4][lix+5];
float l7 = lsmem[liy+4][lix+6];
float b1 = lsmem[liy+5][lix];
float b2 = lsmem[liy+5][lix+1];
float b3 = lsmem[liy+5][lix+2];
float b4 = lsmem[liy+5][lix+3];
float b5 = lsmem[liy+5][lix+4];
float b6 = lsmem[liy+5][lix+5];
float b7 = lsmem[liy+5][lix+6];
float bb1 = lsmem[liy+6][lix];
float bb2 = lsmem[liy+6][lix+1];
float bb3 = lsmem[liy+6][lix+2];
float bb4 = lsmem[liy+6][lix+3];
float bb5 = lsmem[liy+6][lix+4];
float bb6 = lsmem[liy+6][lix+5];
float bb7 = lsmem[liy+6][lix+6];
//calc and store dx and dy
DSTX(x,y) = scale *
mad(100.0f, m5 - m3,
mad(80.0f, m6 - m2,
mad(20.0f, m7 - m1,
mad(75.0f, u5 - u3 + l5 - l3,
mad(60.0f, u6 - u2 + l6 - l2,
mad(15.0f, u7 - u1 + l7 - l1,
mad(30.0f, t5 - t3 + b5 - b3,
mad(24.0f, t6 - t2 + b6 - b2,
mad(6.0f, t7 - t1 + b7 - b1,
mad(5.0f, tt5 - tt3 + bb5 - bb3,
mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 )
)
)
)
)
)
)
)
)
)
);
float
dy
=
mad
(
2.0f,
b2
-
u2,
b1
-
u1
+
b3
-
u3
)
;
DSTY(x,y) = scale *
DstY[
id_y
*
dstStride
+
id_x
]
=
dy
*
scale
;
mad(100.0f, l4 - u4,
}
mad(80.0f, b4 - t4,
\ No newline at end of file
mad(20.0f, bb4 - tt4,
mad(75.0f, l5 - u5 + l3 - u3,
mad(60.0f, b5 - t5 + b3 - t3,
mad(15.0f, bb5 - tt5 + bb3 - tt3,
mad(30.0f, l6 - u6 + l2 - u2,
mad(24.0f, b6 - t6 + b2 - t2,
mad(6.0f, bb6 - tt6 + bb2 - tt2,
mad(5.0f, l7 - u7 + l1 - u1,
mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 )
)
)
)
)
)
)
)
)
)
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment