Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b33a62be
Commit
b33a62be
authored
Oct 25, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed separable filter extrapolation
parent
f177e658
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
198 additions
and
273 deletions
+198
-273
filtering.cpp
modules/ocl/src/filtering.cpp
+17
-51
filter_sep_col.cl
modules/ocl/src/opencl/filter_sep_col.cl
+7
-36
filter_sep_row.cl
modules/ocl/src/opencl/filter_sep_row.cl
+173
-185
test_filters.cpp
modules/ocl/test/test_filters.cpp
+1
-1
No files found.
modules/ocl/src/filtering.cpp
View file @
b33a62be
...
...
@@ -1058,74 +1058,39 @@ template <> struct index_and_sizeof<float>
template
<
typename
T
>
void
linearRowFilter_gpu
(
const
oclMat
&
src
,
const
oclMat
&
dst
,
oclMat
mat_kernel
,
int
ksize
,
int
anchor
,
int
bordertype
)
{
Context
*
clCxt
=
src
.
clCxt
;
CV_Assert
(
bordertype
<=
BORDER_REFLECT_101
);
CV_Assert
(
ksize
==
(
anchor
<<
1
)
+
1
);
int
channels
=
src
.
oclchannels
();
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
string
kernelName
=
"row_filter"
;
char
btype
[
30
];
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
]
=
{
dst
.
cols
,
dst
.
rows
,
1
};
switch
(
bordertype
)
{
case
0
:
sprintf
(
btype
,
"BORDER_CONSTANT"
);
break
;
case
1
:
sprintf
(
btype
,
"BORDER_REPLICATE"
);
break
;
case
2
:
sprintf
(
btype
,
"BORDER_REFLECT"
);
break
;
case
3
:
sprintf
(
btype
,
"BORDER_WRAP"
);
break
;
case
4
:
sprintf
(
btype
,
"BORDER_REFLECT_101"
);
break
;
}
char
compile_option
[
128
];
sprintf
(
compile_option
,
"-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s"
,
anchor
,
(
int
)
localThreads
[
0
],
(
int
)
localThreads
[
1
],
channels
,
btype
);
size_t
globalThreads
[
3
];
globalThreads
[
1
]
=
(
dst
.
rows
+
localThreads
[
1
]
-
1
)
/
localThreads
[
1
]
*
localThreads
[
1
];
globalThreads
[
2
]
=
(
1
+
localThreads
[
2
]
-
1
)
/
localThreads
[
2
]
*
localThreads
[
2
];
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
"BORDER_WRAP"
,
"BORDER_REFLECT_101"
};
std
::
string
buildOptions
=
format
(
"-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s"
,
anchor
,
(
int
)
localThreads
[
0
],
(
int
)
localThreads
[
1
],
channels
,
borderMap
[
bordertype
]);
if
(
src
.
depth
()
==
CV_8U
)
{
switch
(
channels
)
{
case
1
:
case
3
:
globalThreads
[
0
]
=
((
dst
.
cols
+
4
)
/
4
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
globalThreads
[
0
]
=
(
dst
.
cols
+
3
)
>>
2
;
break
;
case
2
:
globalThreads
[
0
]
=
(
(
dst
.
cols
+
1
)
/
2
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
]
;
globalThreads
[
0
]
=
(
dst
.
cols
+
1
)
>>
1
;
break
;
case
4
:
globalThreads
[
0
]
=
(
dst
.
cols
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
]
;
globalThreads
[
0
]
=
dst
.
cols
;
break
;
}
}
else
{
globalThreads
[
0
]
=
(
dst
.
cols
+
localThreads
[
0
]
-
1
)
/
localThreads
[
0
]
*
localThreads
[
0
];
}
//sanity checks
CV_Assert
(
clCxt
==
dst
.
clCxt
);
CV_Assert
(
src
.
cols
==
dst
.
cols
);
CV_Assert
(
src
.
oclchannels
()
==
dst
.
oclchannels
());
CV_Assert
(
ksize
==
(
anchor
<<
1
)
+
1
);
int
src_pix_per_row
,
dst_pix_per_row
;
int
src_offset_x
,
src_offset_y
;
//, dst_offset_in_pixel;
src_pix_per_row
=
src
.
step
/
src
.
elemSize
();
src_offset_x
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
src_offset_y
=
src
.
offset
/
src
.
step
;
dst_pix_per_row
=
dst
.
step
/
dst
.
elemSize
();
//dst_offset_in_pixel = dst.offset / dst.elemSize();
int
src_pix_per_row
=
src
.
step
/
src
.
elemSize
();
int
src_offset_x
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
src_offset_y
=
src
.
offset
/
src
.
step
;
int
dst_pix_per_row
=
dst
.
step
/
dst
.
elemSize
();
int
ridusy
=
(
dst
.
rows
-
src
.
rows
)
>>
1
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
&
dst
.
data
));
...
...
@@ -1140,7 +1105,8 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
ridusy
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
mat_kernel
.
data
));
openCLExecuteKernel
(
clCxt
,
&
filter_sep_row
,
kernelName
,
globalThreads
,
localThreads
,
args
,
channels
,
src
.
depth
(),
compile_option
);
openCLExecuteKernel
(
src
.
clCxt
,
&
filter_sep_row
,
"row_filter"
,
globalThreads
,
localThreads
,
args
,
channels
,
src
.
depth
(),
buildOptions
.
c_str
());
}
Ptr
<
BaseRowFilter_GPU
>
cv
::
ocl
::
getLinearRowFilter_GPU
(
int
srcType
,
int
/*bufType*/
,
const
Mat
&
rowKernel
,
int
anchor
,
int
bordertype
)
...
...
modules/ocl/src/opencl/filter_sep_col.cl
View file @
b33a62be
...
...
@@ -47,36 +47,6 @@
#
define
READ_TIMES_ROW
((
2*
(
RADIUS+LSIZE0
)
-1
)
/LSIZE0
)
#
endif
#
ifdef
BORDER_CONSTANT
//BORDER_CONSTANT:
iiiiii|abcdefgh|iiiiiii
#
define
ELEM
(
i,l_edge,r_edge,elem1,elem2
)
(
i
)
<
(
l_edge
)
| (i) >= (r_edge) ? (elem1) : (elem2)
#endif
#ifdef BORDER_REPLICATE
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh
#define ADDR_L(i,l_edge,r_edge) (i) < (l_edge) ? (l_edge) : (i)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? (r_edge)-1 : (addr)
#endif
#ifdef BORDER_REFLECT
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
#define ADDR_L(i,l_edge,r_edge) (i) < (l_edge) ? -(i)-1 : (i)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr)
#endif
#ifdef BORDER_REFLECT_101
//BORDER_REFLECT_101: gfedcb|abcdefgh|gfedcba
#define ADDR_L(i,l_edge,r_edge) (i) < (l_edge) ? -(i) : (i)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr)
#endif
#ifdef BORDER_WRAP
//BORDER_WRAP: cdefgh|abcdefgh
|
abcdefg
#
define
ADDR_L
(
i,l_edge,r_edge
)
(
i
)
<
(
l_edge
)
?
(
i
)
+
(
r_edge
)
:
(
i
)
#
define
ADDR_R
(
i,r_edge,addr
)
(
i
)
>=
(
r_edge
)
?
(
i
)
-
(
r_edge
)
:
(
addr
)
#
endif
/**********************************************************************************
These
kernels
are
written
for
separable
filters
such
as
Sobel,
Scharr,
GaussianBlur.
Now
(
6/29/2011
)
the
kernels
only
support
8U
data
type
and
the
anchor
of
the
convovle
...
...
@@ -107,15 +77,16 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
l_x
=
get_local_id
(
0
)
;
int
l_y
=
get_local_id
(
1
)
;
int
start_addr
=
mad24
(
y,src_step_in_pixel,x
)
;
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,src_whole_cols
)
;
int
i
;
GENTYPE_SRC
sum
;
GENTYPE_SRC
temp[READ_TIMES_COL]
;
__local
GENTYPE_SRC
LDS_DAT[LSIZE1*READ_TIMES_COL][LSIZE0+1]
;
int
start_addr
=
mad24
(
y,
src_step_in_pixel,
x
)
;
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,
src_whole_cols
)
;
int
i
;
GENTYPE_SRC
sum,
temp[READ_TIMES_COL]
;
__local
GENTYPE_SRC
LDS_DAT[LSIZE1
*
READ_TIMES_COL][LSIZE0
+
1]
;
//read
pixels
from
src
for
(
i
=
0
;i<READ_TIMES_COL;i++)
...
...
modules/ocl/src/opencl/filter_sep_row.cl
View file @
b33a62be
...
...
@@ -48,34 +48,43 @@
#
define
ALIGN
(
RADIUS
)
#
endif
#
ifdef
BORDER_CONSTANT
//BORDER_CONSTANT:
iiiiii|abcdefgh|iiiiiii
#
define
ELEM
(
i,l_edge,r_edge,elem1,elem2
)
(
i
)
<
(
l_edge
)
| (i) >= (r_edge) ? (elem1) : (elem2)
#endif
#ifdef BORDER_REPLICATE
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh
#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? (l_edge) : (addr)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? (r_edge)-1 : (addr)
#endif
#elif defined BORDER_REPLICATE
#define EXTRAPOLATE(x, maxV) \
{ \
x = max(min(x, maxV - 1), 0); \
}
#elif defined BORDER_WRAP
#define EXTRAPOLATE(x, maxV) \
{ \
if (x < 0) \
x -= ((x - maxV + 1) / maxV) * maxV; \
if (x >= maxV) \
x %= maxV; \
}
#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
#define EXTRAPOLATE_(x, maxV, delta) \
{ \
if (maxV == 1) \
x = 0; \
else \
do \
{ \
if ( x < 0 ) \
x = -x - 1 + delta; \
else \
x = maxV - 1 - (x - maxV) - delta; \
} \
while (x >= maxV || x < 0); \
}
#ifdef BORDER_REFLECT
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? -(i)-1 : (addr)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr)
#endif
#ifdef BORDER_REFLECT_101
//BORDER_REFLECT_101: gfedcb|abcdefgh|gfedcba
#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? -(i) : (addr)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr)
#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 0)
#else
#define EXTRAPOLATE(x, maxV) EXTRAPOLATE_(x, maxV, 1)
#endif
#ifdef BORDER_WRAP
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg
#define ADDR_L(i,l_edge,r_edge,addr) (i) < (l_edge) ? (i)+(r_edge) : (addr)
#define ADDR_R(i,r_edge,addr) (i) >= (r_edge) ? (i)-(r_edge) : (addr)
#else
#error No extrapolation method
#endif
/**********************************************************************************
...
...
@@ -96,73 +105,71 @@ The info above maybe obsolete.
***********************************************************************************/
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0
(__global const uchar * restrict src,
__global float * dst,
const int dst_cols,
const int dst_rows,
const int src_whole_cols,
const int src_whole_rows,
const int src_step_in_pixel,
const int src_offset_x,
const int src_offset_y,
const int dst_step_in_pixel,
const int radiusy,
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
(__global uchar * restrict src,
__global float * dst,
int dst_cols, int dst_rows,
int src_whole_cols, int src_whole_rows,
int src_step_in_pixel,
int src_offset_x, int src_offset_y,
int dst_step_in_pixel, int radiusy,
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
{
int x = get_global_id(0)<<2;
int y = get_global_id(1);
int l_x = get_local_id(0);
int l_y = get_local_id(1);
int start_x = x+src_offset_x-RADIUSX & 0xfffffffc;
int offset = src_offset_x-RADIUSX & 3;
int start_y = y+src_offset_y-radiusy;
int start_addr = mad24(start_y,src_step_in_pixel,start_x);
int start_x = x+src_offset_x - RADIUSX & 0xfffffffc;
int offset = src_offset_x - RADIUSX & 3;
int start_y = y + src_offset_y - radiusy;
int start_addr = mad24(start_y, src_step_in_pixel, start_x);
int i;
float4 sum;
uchar4 temp[READ_TIMES_ROW];
__local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
#ifdef BORDER_CONSTANT
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
//read pixels from src
for(i = 0; i<READ_TIMES_ROW; i++)
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
// read pixels from src
for (i = 0; i < READ_TIMES_ROW; i++)
{
int current_addr = start_addr+i*LSIZE0*4;
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
temp[i] = *(__global uchar4*)&src[current_addr];
}
//judge if read out of boundary
for(i = 0; i<READ_TIMES_ROW; i++)
// judge if read out of boundary
for (i = 0; i<READ_TIMES_ROW; i++)
{
temp[i].x= ELEM(start_x+i*LSIZE0*4,0,src_whole_cols,0,temp[i].x);
temp[i].y= ELEM(start_x+i*LSIZE0*4+1,0,src_whole_cols,0,temp[i].y);
temp[i].z= ELEM(start_x+i*LSIZE0*4+2,0,src_whole_cols,0,temp[i].z);
temp[i].w= ELEM(start_x+i*LSIZE0*4+3,0,src_whole_cols,0,temp[i].w);
temp[i]= ELEM(start_y,0,src_whole_rows,(uchar4)0,temp[i]);
temp[i].x
= ELEM(start_x+i*LSIZE0*4,0,src_whole_cols,0,temp[i].x);
temp[i].y
= ELEM(start_x+i*LSIZE0*4+1,0,src_whole_cols,0,temp[i].y);
temp[i].z
= ELEM(start_x+i*LSIZE0*4+2,0,src_whole_cols,0,temp[i].z);
temp[i].w
= ELEM(start_x+i*LSIZE0*4+3,0,src_whole_cols,0,temp[i].w);
temp[i]
= ELEM(start_y,0,src_whole_rows,(uchar4)0,temp[i]);
}
#else
int not_all_in_range = (start_x<0) |
(
start_x
+
READ_TIMES_ROW*LSIZE0*4+4>src_whole_cols
)
| (start_y<0) |
(
start_y
>=
src_whole_rows
)
;
int4
index[READ_TIMES_ROW]
;
int4
addr
;
int
s_y
;
if
(
not_all_in_range
)
if
(
not_all_in_range
)
{
//judge
if
read
out
of
boundary
for
(
i
=
0
; i<
READ_TIMES_ROW; i++)
//
judge
if
read
out
of
boundary
for
(
i
=
0
; i <
READ_TIMES_ROW; i++)
{
index[i].x=
ADDR_L
(
start_x+i*LSIZE0*4,0,src_whole_cols,start_x+i*LSIZE0*4
)
;
index[i].x=
ADDR_R
(
start_x+i*LSIZE0*4,src_whole_cols,index[i].x
)
;
index[i].y=
ADDR_L
(
start_x+i*LSIZE0*4+1,0,src_whole_cols,start_x+i*LSIZE0*4+1
)
;
index[i].y=
ADDR_R
(
start_x+i*LSIZE0*4+1,src_whole_cols,index[i].y
)
;
index[i].z=
ADDR_L
(
start_x+i*LSIZE0*4+2,0,src_whole_cols,start_x+i*LSIZE0*4+2
)
;
index[i].z=
ADDR_R
(
start_x+i*LSIZE0*4+2,src_whole_cols,index[i].z
)
;
index[i].w=
ADDR_L
(
start_x+i*LSIZE0*4+3,0,src_whole_cols,start_x+i*LSIZE0*4+3
)
;
index[i].w=
ADDR_R
(
start_x+i*LSIZE0*4+3,src_whole_cols,index[i].w
)
;
index[i]
=
(
int4
)(
start_x+i*LSIZE0*4
)
+
(
int4
)(
0
,
1
,
2
,
3
)
;
EXTRAPOLATE
(
index[i].x,
src_whole_cols
)
;
EXTRAPOLATE
(
index[i].y,
src_whole_cols
)
;
EXTRAPOLATE
(
index[i].z,
src_whole_cols
)
;
EXTRAPOLATE
(
index[i].w,
src_whole_cols
)
;
}
s_y=
ADDR_L
(
start_y,0,src_whole_rows,start_y
)
;
s_y=
ADDR_R
(
start_y,src_whole_rows,s_y
)
;
//read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
s_y
=
start_y
;
EXTRAPOLATE
(
s_y,
src_whole_rows
)
;
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
addr
=
mad24
((
int4
)
s_y,
(
int4
)
src_step_in_pixel,index[i]
)
;
temp[i].x
=
src[addr.x]
;
...
...
@@ -173,64 +180,55 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
}
else
{
//read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
temp[i]
=
*
(
__global
uchar4*
)
&src[start_addr+i*LSIZE0*4]
;
}
}
#
endif
//save
pixels
to
lds
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
//
save
pixels
to
lds
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//read
pixels
from
lds
and
calculate
the
result
//
read
pixels
from
lds
and
calculate
the
result
sum
=convert_float4
(
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]+RADIUSX+offset
))
*mat_kernel[RADIUSX]
;
for
(
i=1
; i<=RADIUSX; i++)
for
(
i=1
; i<=RADIUSX; i++)
{
temp[0]
=vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]+RADIUSX+offset-
i
)
;
temp[1]
=vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]+RADIUSX+offset+
i
)
;
sum
+=
convert_float4
(
temp[0]
)
*mat_kernel[RADIUSX-i]+convert_float4
(
temp[1]
)
*
mat_kernel[RADIUSX+i]
;
temp[0]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
-
i
)
;
temp[1]
=
vload4
(
0
,
(
__local
uchar*
)
&LDS_DAT[l_y][l_x]
+
RADIUSX
+
offset
+
i
)
;
sum
+=
convert_float4
(
temp[0]
)
*
mat_kernel[RADIUSX-i]
+
convert_float4
(
temp[1]
)
*
mat_kernel[RADIUSX+i]
;
}
start_addr
=
mad24
(
y,dst_step_in_pixel,x
)
;
//write
the
result
to
dst
if
((
x+3<dst_cols
)
&
(
y<dst_rows
))
{
//
write
the
result
to
dst
if
((
x+3<dst_cols
)
&
(
y<dst_rows
))
*
(
__global
float4*
)
&dst[start_addr]
=
sum
;
}
else
if
((
x+2<dst_cols
)
&
(
y<dst_rows
))
else
if
((
x+2<dst_cols
)
&&
(
y<dst_rows
))
{
dst[start_addr]
=
sum.x
;
dst[start_addr+1]
=
sum.y
;
dst[start_addr+2]
=
sum.z
;
}
else
if
((
x+1<dst_cols
)
&
(
y<dst_rows
))
else
if
((
x+1<dst_cols
)
&
&
(
y<dst_rows
))
{
dst[start_addr]
=
sum.x
;
dst[start_addr+1]
=
sum.y
;
}
else
if
((
x<dst_cols
)
&
(
y<dst_rows
))
{
else
if
(
x<dst_cols
&&
y<dst_rows
)
dst[start_addr]
=
sum.x
;
}
}
__kernel
__attribute__
((
reqd_work_group_size
(
LSIZE0,LSIZE1,1
)))
void
row_filter_C4_D0
(
__global
const
uchar4
*
restrict
src,
__global
float4
*
dst,
const
int
dst_cols,
const
int
dst_rows,
const
int
src_whole_cols,
const
int
src_whole_rows,
const
int
src_step_in_pixel,
const
int
src_offset_x,
const
int
src_offset_y,
const
int
dst_step_in_pixel,
const
int
radiusy,
__constant
float
*
mat_kernel
__attribute__
((
max_constant_size
(
4*
(
2*RADIUSX+1
)))))
(
__global
uchar4
*
restrict
src,
__global
float4
*
dst,
int
dst_cols,
int
dst_rows,
int
src_whole_cols,
int
src_whole_rows,
int
src_step_in_pixel,
int
src_offset_x,
int
src_offset_y,
int
dst_step_in_pixel,
int
radiusy,
__constant
float
*
mat_kernel
__attribute__
((
max_constant_size
(
4*
(
2*RADIUSX+1
)))))
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
...
...
@@ -246,15 +244,17 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
__local
uchar4
LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1]
;
#
ifdef
BORDER_CONSTANT
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,src_whole_cols
)
;
//read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
int
current_addr
=
start_addr+i*LSIZE0
;
current_addr
=
((
current_addr
<
end_addr
)
&&
(
current_addr
>
0
))
?
current_addr
:
0
;
temp[i]
=
src[current_addr]
;
}
//judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
temp[i]=
ELEM
(
start_x+i*LSIZE0,0,src_whole_cols,
(
uchar4
)
0
,
temp[i]
)
;
temp[i]=
ELEM
(
start_y,0,src_whole_rows,
(
uchar4
)
0
,
temp[i]
)
;
...
...
@@ -262,39 +262,37 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
#
else
int
index[READ_TIMES_ROW]
;
int
s_x,s_y
;
//judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
//
judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
s_x
=
ADDR_L
(
start_x+i*LSIZE0,0,src_whole_cols,start_x+i*LSIZE0
)
;
s_x=
ADDR_R
(
start_x+i*LSIZE0,src_whole_cols,s_x
)
;
s_y
=
ADDR_L
(
start_y,0,src_whole_rows,start_y
)
;
s_y=
ADDR_R
(
start_y,src_whole_rows,s_y
)
;
s_x
=
start_x+i*LSIZE0
;
EXTRAPOLATE
(
s_x,
src_whole_cols
)
;
s_y
=
start_y
;
EXTRAPOLATE
(
s_y,
src_whole_rows
)
;
index[i]=mad24
(
s_y,src_step_in_pixel,s_x
)
;
}
//read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
temp[i]
=
src[index[i]]
;
}
#
endif
//save
pixels
to
lds
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//read
pixels
from
lds
and
calculate
the
result
sum
=convert_float4
(
LDS_DAT[l_y][l_x+RADIUSX]
)
*mat_kernel[RADIUSX]
;
for
(
i=1
; i<=RADIUSX; i++)
for
(
i=1
; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i]
;
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i]
;
sum
+=
convert_float4
(
temp[0]
)
*mat_kernel[RADIUSX-i]+convert_float4
(
temp[1]
)
*mat_kernel[RADIUSX+i]
;
}
//write
the
result
to
dst
if
((
x<dst_cols
)
&
(
y<dst_rows
)
)
if
(
x<dst_cols
&&
y<dst_rows
)
{
start_addr
=
mad24
(
y,dst_step_in_pixel,x
)
;
dst[start_addr]
=
sum
;
...
...
@@ -302,18 +300,14 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
}
__kernel
__attribute__
((
reqd_work_group_size
(
LSIZE0,LSIZE1,1
)))
void
row_filter_C1_D5
(
__global
const
float
*
restrict
src,
__global
float
*
dst,
const
int
dst_cols,
const
int
dst_rows,
const
int
src_whole_cols,
const
int
src_whole_rows,
const
int
src_step_in_pixel,
const
int
src_offset_x,
const
int
src_offset_y,
const
int
dst_step_in_pixel,
const
int
radiusy,
__constant
float
*
mat_kernel
__attribute__
((
max_constant_size
(
4*
(
2*RADIUSX+1
)))))
(
__global
float
*
restrict
src,
__global
float
*
dst,
int
dst_cols,
int
dst_rows,
int
src_whole_cols,
int
src_whole_rows,
int
src_step_in_pixel,
int
src_offset_x,
int
src_offset_y,
int
dst_step_in_pixel,
int
radiusy,
__constant
float
*
mat_kernel
__attribute__
((
max_constant_size
(
4*
(
2*RADIUSX+1
)))))
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
...
...
@@ -329,15 +323,17 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
__local
float
LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1]
;
#
ifdef
BORDER_CONSTANT
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,src_whole_cols
)
;
//read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
int
current_addr
=
start_addr+i*LSIZE0
;
current_addr
=
((
current_addr
<
end_addr
)
&&
(
current_addr
>
0
))
?
current_addr
:
0
;
temp[i]
=
src[current_addr]
;
}
//judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
//
judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
temp[i]=
ELEM
(
start_x+i*LSIZE0,0,src_whole_cols,
(
float
)
0
,
temp[i]
)
;
temp[i]=
ELEM
(
start_y,0,src_whole_rows,
(
float
)
0
,
temp[i]
)
;
...
...
@@ -345,39 +341,36 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
#
else
int
index[READ_TIMES_ROW]
;
int
s_x,s_y
;
//judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
//
judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
s_x
=
ADDR_L
(
start_x+i*LSIZE0,0,src_whole_cols,start_x+i*LSIZE0
)
;
s_x=
ADDR_R
(
start_x+i*LSIZE0,src_whole_cols,s_x
)
;
s_y=
ADDR_L
(
start_y,0,src_whole_rows,start_y
)
;
s_y=
ADDR_R
(
start_y,src_whole_rows,s_y
)
;
index[i]=mad24
(
s_y,
src_step_in_pixel,
s_x
)
;
s_x
=
start_x
+
i*LSIZE0,
s_y
=
start_y
;
EXTRAPOLATE
(
s_x,
src_whole_cols
)
;
EXTRAPOLATE
(
s_y,
src_whole_rows
)
;
index[i]=mad24
(
s_y,
src_step_in_pixel,
s_x
)
;
}
//read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
temp[i]
=
src[index[i]]
;
}
#
endif
//save
pixels
to
lds
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//read
pixels
from
lds
and
calculate
the
result
//
read
pixels
from
lds
and
calculate
the
result
sum
=LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX]
;
for
(
i=1
; i<=RADIUSX; i++)
for
(
i=1
; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i]
;
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i]
;
sum
+=
temp[0]*mat_kernel[RADIUSX-i]+temp[1]*mat_kernel[RADIUSX+i]
;
}
//write
the
result
to
dst
if
((
x<dst_cols
)
&
(
y<dst_rows
))
//
write
the
result
to
dst
if
(
x<dst_cols
&&
y<dst_rows
)
{
start_addr
=
mad24
(
y,dst_step_in_pixel,x
)
;
dst[start_addr]
=
sum
;
...
...
@@ -385,18 +378,14 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
}
__kernel
__attribute__
((
reqd_work_group_size
(
LSIZE0,LSIZE1,1
)))
void
row_filter_C4_D5
(
__global
const
float4
*
restrict
src,
__global
float4
*
dst,
const
int
dst_cols,
const
int
dst_rows,
const
int
src_whole_cols,
const
int
src_whole_rows,
const
int
src_step_in_pixel,
const
int
src_offset_x,
const
int
src_offset_y,
const
int
dst_step_in_pixel,
const
int
radiusy,
__constant
float
*
mat_kernel
__attribute__
((
max_constant_size
(
4*
(
2*RADIUSX+1
)))))
(
__global
float4
*
restrict
src,
__global
float4
*
dst,
int
dst_cols,
int
dst_rows,
int
src_whole_cols,
int
src_whole_rows,
int
src_step_in_pixel,
int
src_offset_x,
int
src_offset_y,
int
dst_step_in_pixel,
int
radiusy,
__constant
float
*
mat_kernel
__attribute__
((
max_constant_size
(
4*
(
2*RADIUSX+1
)))))
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
...
...
@@ -412,15 +401,17 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
__local
float4
LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1]
;
#
ifdef
BORDER_CONSTANT
int
end_addr
=
mad24
(
src_whole_rows
-
1
,
src_step_in_pixel,src_whole_cols
)
;
//read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
int
current_addr
=
start_addr+i*LSIZE0
;
current_addr
=
((
current_addr
<
end_addr
)
&&
(
current_addr
>
0
))
?
current_addr
:
0
;
temp[i]
=
src[current_addr]
;
}
//judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
//
judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
temp[i]=
ELEM
(
start_x+i*LSIZE0,0,src_whole_cols,
(
float4
)
0
,
temp[i]
)
;
temp[i]=
ELEM
(
start_y,0,src_whole_rows,
(
float4
)
0
,
temp[i]
)
;
...
...
@@ -428,42 +419,39 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
#
else
int
index[READ_TIMES_ROW]
;
int
s_x,s_y
;
//judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
//
judge
if
read
out
of
boundary
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
s_x
=
ADDR_L
(
start_x+i*LSIZE0,0,src_whole_cols,start_x+i*LSIZE0
)
;
s_x=
ADDR_R
(
start_x+i*LSIZE0,src_whole_cols,s_x
)
;
s_y=
ADDR_L
(
start_y,0,src_whole_rows,start_y
)
;
s_y=
ADDR_R
(
start_y,src_whole_rows,s_y
)
;
s_x
=
start_x
+
i*LSIZE0,
s_y
=
start_y
;
EXTRAPOLATE
(
s_x,
src_whole_cols
)
;
EXTRAPOLATE
(
s_y,
src_whole_rows
)
;
index[i]=mad24
(
s_y,src_step_in_pixel,s_x
)
;
}
//read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
//
read
pixels
from
src
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
temp[i]
=
src[index[i]]
;
}
#
endif
//save
pixels
to
lds
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
{
//
save
pixels
to
lds
for
(
i
=
0
; i<READ_TIMES_ROW; i++)
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//read
pixels
from
lds
and
calculate
the
result
//
read
pixels
from
lds
and
calculate
the
result
sum
=LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX]
;
for
(
i=1
; i<=RADIUSX; i++)
for
(
i=1
; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i]
;
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i]
;
sum
+=
temp[0]*mat_kernel[RADIUSX-i]+temp[1]*mat_kernel[RADIUSX+i]
;
}
//write
the
result
to
dst
if
((
x<dst_cols
)
&
(
y<dst_rows
))
//
write
the
result
to
dst
if
(
x<dst_cols
&&
y<dst_rows
)
{
start_addr
=
mad24
(
y,dst_step_in_pixel,x
)
;
dst[start_addr]
=
sum
;
}
}
modules/ocl/test/test_filters.cpp
View file @
b33a62be
...
...
@@ -403,7 +403,7 @@ INSTANTIATE_TEST_CASE_P(Filter, SobelTest, Combine(
Bool
()));
INSTANTIATE_TEST_CASE_P
(
Filter
,
ScharrTest
,
Combine
(
Values
(
CV_8UC1
,
CV_8UC3
,
CV_8UC4
,
CV_32FC1
,
CV_32FC4
),
Values
(
CV_8UC1
,
CV_8UC3
,
CV_8UC4
,
CV_32FC1
,
CV_32FC
3
,
CV_32FC
4
),
Values
(
0
),
// not used
Values
(
Size
(
0
,
1
),
Size
(
1
,
0
)),
Values
((
int
)
BORDER_CONSTANT
,
(
int
)
BORDER_REFLECT101
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment