1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifdef OP_ERODE
#define OP(m1, m2) min(m1, m2)
#define VAL UCHAR_MAX
#endif
#ifdef OP_DILATE
#define OP(m1, m2) max(m1, m2)
#define VAL 0
#endif
#if defined OP_GRADIENT || defined OP_TOPHAT || defined OP_BLACKHAT
#define EXTRA_PARAMS , __global const uchar * matptr, int mat_step, int mat_offset
#else
#define EXTRA_PARAMS
#endif
#define PROCESS(_y, _x) \
line_out[0] = OP(line_out[0], arr[_x + 3 * _y]); \
line_out[1] = OP(line_out[1], arr[_x + 3 * (_y + 1)]);
#define PROCESS_ELEM \
line_out[0] = (uchar16)VAL; \
line_out[1] = (uchar16)VAL; \
PROCESS_ELEM_
__kernel void morph3x3_8UC1_cols16_rows2(__global const uint* src, int src_step,
__global uint* dst, int dst_step,
int rows, int cols
EXTRA_PARAMS)
{
int block_x = get_global_id(0);
int y = get_global_id(1) * 2;
int ssx = 1, dsx = 1;
if ((block_x * 16) >= cols || y >= rows) return;
uchar a; uchar16 b; uchar c;
uchar d; uchar16 e; uchar f;
uchar g; uchar16 h; uchar i;
uchar j; uchar16 k; uchar l;
uchar16 line[4];
uchar16 line_out[2];
int src_index = block_x * 4 * ssx + (y - 1) * (src_step / 4);
line[0] = (y == 0) ? (uchar16)VAL: as_uchar16(vload4(0, src + src_index));
line[1] = as_uchar16(vload4(0, src + src_index + (src_step / 4)));
line[2] = as_uchar16(vload4(0, src + src_index + 2 * (src_step / 4)));
line[3] = (y == (rows - 2)) ? (uchar16)VAL: as_uchar16(vload4(0, src + src_index + 3 * (src_step / 4)));
__global uchar *src_p = (__global uchar *)src;
bool line_end = ((block_x + 1) * 16 == cols);
src_index = block_x * 16 * ssx + (y - 1) * src_step;
a = (block_x == 0 || y == 0) ? VAL : src_p[src_index - 1];
b = line[0];
c = (line_end || y == 0) ? VAL : src_p[src_index + 16];
d = (block_x == 0) ? VAL : src_p[src_index + src_step - 1];
e = line[1];
f = line_end ? VAL : src_p[src_index + src_step + 16];
g = (block_x == 0) ? VAL : src_p[src_index + 2 * src_step - 1];
h = line[2];
i = line_end ? VAL : src_p[src_index + 2 * src_step + 16];
j = (block_x == 0 || y == (rows - 2)) ? VAL : src_p[src_index + 3 * src_step - 1];
k = line[3];
l = (line_end || y == (rows - 2)) ? VAL : src_p[src_index + 3 * src_step + 16];
uchar16 arr[12];
arr[0] = (uchar16)(a, b.s01234567, b.s89ab, b.scde);
arr[1] = b;
arr[2] = (uchar16)(b.s12345678, b.s9abc, b.sdef, c);
arr[3] = (uchar16)(d, e.s01234567, e.s89ab, e.scde);
arr[4] = e;
arr[5] = (uchar16)(e.s12345678, e.s9abc, e.sdef, f);
arr[6] = (uchar16)(g, h.s01234567, h.s89ab, h.scde);
arr[7] = h;
arr[8] = (uchar16)(h.s12345678, h.s9abc, h.sdef, i);
arr[9] = (uchar16)(j, k.s01234567, k.s89ab, k.scde);
arr[10] = k;
arr[11] = (uchar16)(k.s12345678, k.s9abc, k.sdef, l);
PROCESS_ELEM;
int dst_index = block_x * 4 * dsx + y * (dst_step / 4);
#if defined OP_GRADIENT || defined OP_TOPHAT || defined OP_BLACKHAT
int mat_index = y * mat_step + block_x * 16 * ssx + mat_offset;
uchar16 val0 = vload16(0, matptr + mat_index);
uchar16 val1 = vload16(0, matptr + mat_index + mat_step);
#ifdef OP_GRADIENT
line_out[0] = convert_uchar16_sat(convert_int16(line_out[0]) - convert_int16(val0));
line_out[1] = convert_uchar16_sat(convert_int16(line_out[1]) - convert_int16(val1));
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
#elif defined OP_TOPHAT
line_out[0] = convert_uchar16_sat(convert_int16(val0) - convert_int16(line_out[0]));
line_out[1] = convert_uchar16_sat(convert_int16(val1) - convert_int16(line_out[1]));
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
#elif defined OP_BLACKHAT
line_out[0] = convert_uchar16_sat(convert_int16(line_out[0]) - convert_int16(val0));
line_out[1] = convert_uchar16_sat(convert_int16(line_out[1]) - convert_int16(val1));
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
#endif
#else
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
#endif
}