1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef BORDER_REPLICATE
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (t_edge) :(i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (b_edge)-1 :(addr))
#endif
#ifdef BORDER_REFLECT
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i)-1 : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
#endif
#ifdef BORDER_REFLECT101
//BORDER_REFLECT101: gfedcb|abcdefgh|gfedcba
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i) : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
#endif
#ifdef BORDER_WRAP
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (i)+(b_edge) : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
#endif
__kernel void sobel3(
__global uchar* Src,
__global float* DstX,
__global float* DstY,
int width, int height,
uint srcStride, uint dstStride,
float scale
)
{
__local float lsmem[BLK_Y+2][BLK_X+2];
int lix = get_local_id(0);
int liy = get_local_id(1);
int gix = get_group_id(0);
int giy = get_group_id(1);
int id_x = get_global_id(0);
int id_y = get_global_id(1);
lsmem[liy+1][lix+1] = convert_float(Src[ id_y * srcStride + id_x ]);
int id_y_h = ADDR_H(id_y-1, 0,height);
int id_y_b = ADDR_B(id_y+1, height,id_y+1);
int id_x_l = ADDR_L(id_x-1, 0,width);
int id_x_r = ADDR_R(id_x+1, width,id_x+1);
if(liy==0)
{
lsmem[0][lix+1]=convert_float(Src[ id_y_h * srcStride + id_x ]);
if(lix==0)
lsmem[0][0]=convert_float(Src[ id_y_h * srcStride + id_x_l ]);
else if(lix==BLK_X-1)
lsmem[0][BLK_X+1]=convert_float(Src[ id_y_h * srcStride + id_x_r ]);
}
else if(liy==BLK_Y-1)
{
lsmem[BLK_Y+1][lix+1]=convert_float(Src[ id_y_b * srcStride + id_x ]);
if(lix==0)
lsmem[BLK_Y+1][0]=convert_float(Src[ id_y_b * srcStride + id_x_l ]);
else if(lix==BLK_X-1)
lsmem[BLK_Y+1][BLK_X+1]=convert_float(Src[ id_y_b * srcStride + id_x_r ]);
}
if(lix==0)
lsmem[liy+1][0] = convert_float(Src[ id_y * srcStride + id_x_l ]);
else if(lix==BLK_X-1)
lsmem[liy+1][BLK_X+1] = convert_float(Src[ id_y * srcStride + id_x_r ]);
barrier(CLK_LOCAL_MEM_FENCE);
float u1 = lsmem[liy][lix];
float u2 = lsmem[liy][lix+1];
float u3 = lsmem[liy][lix+2];
float m1 = lsmem[liy+1][lix];
float m2 = lsmem[liy+1][lix+1];
float m3 = lsmem[liy+1][lix+2];
float b1 = lsmem[liy+2][lix];
float b2 = lsmem[liy+2][lix+1];
float b3 = lsmem[liy+2][lix+2];
//m2 * scale;//
float dx = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1 );
DstX[ id_y * dstStride + id_x ] = dx * scale;
float dy = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3);
DstY[ id_y * dstStride + id_x ] = dy * scale;
}