Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
25e6902a
Commit
25e6902a
authored
May 14, 2013
by
Vadim Pisarevsky
Committed by
OpenCV Buildbot
May 14, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #818 from bitwangyaoyao:2.4_optBlur
parents
bd1d7cd2
dec6a3b0
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
97 additions
and
96 deletions
+97
-96
filtering_boxFilter.cl
modules/ocl/src/opencl/filtering_boxFilter.cl
+97
-96
No files found.
modules/ocl/src/opencl/filtering_boxFilter.cl
View file @
25e6902a
...
...
@@ -79,11 +79,69 @@
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
i
)
-
(
b_edge
)
:
(
addr
))
#
endif
#
define
THREADS
256
#
define
ELEM
(
i,
l_edge,
r_edge,
elem1,
elem2
)
(
i
)
>=
(
l_edge
)
&&
(
i
)
<
(
r_edge
)
?
(
elem1
)
:
(
elem2
)
inline
void
update_dst_C1_D0
(
__global
uchar
*dst,
__local
uint*
temp,
int
dst_rows,
int
dst_cols,
int
dst_startX,
int
dst_x_off,
float
alpha
)
{
if
(
get_local_id
(
0
)
<
anX
|
| get_local_id(0) >= (THREADS-ksX+anX+1))
{
return;
}
uint4 tmp_sum = 0;
int posX = dst_startX - dst_x_off + (get_local_id(0)-anX)*4;
int posY = (get_group_id(1) << 1);
for(int i=-anX; i<=anX; i++)
{
tmp_sum += vload4(get_local_id(0), temp+i);
}
if(posY < dst_rows && posX < dst_cols)
{
tmp_sum /= (uint4) alpha;
if(posX >= 0 && posX < dst_cols)
*(dst) = tmp_sum.x;
if(posX+1 >= 0 && posX+1 < dst_cols)
*(dst + 1) = tmp_sum.y;
if(posX+2 >= 0 && posX+2 < dst_cols)
*(dst + 2) = tmp_sum.z;
if(posX+3 >= 0 && posX+3 < dst_cols)
*(dst + 3) = tmp_sum.w;
}
}
inline void update_dst_C4_D0(__global uchar4 *dst, __local uint4* temp,
int dst_rows, int dst_cols,
int dst_startX, int dst_x_off,
float alpha)
{
if(get_local_id(0) >= (THREADS-ksX+1))
{
return;
}
int posX = dst_startX - dst_x_off + get_local_id(0);
int posY = (get_group_id(1) << 1);
uint4 temp_sum = 0;
for(int i=-anX; i<=anX; i++)
{
temp_sum += temp[get_local_id(0) + anX + i];
}
if(posX >= 0 && posX < dst_cols && posY >= 0 && posY < dst_rows)
*dst = convert_uchar4(convert_float4(temp_sum)/alpha);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////8uC1////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
#
define
THREADS
256
#
define
ELEM
(
i,
l_edge,
r_edge,
elem1,
elem2
)
(
i
)
>=
(
l_edge
)
&&
(
i
)
<
(
r_edge
)
?
(
elem1
)
:
(
elem2
)
__kernel void boxFilter_C1_D0(__global const uchar * restrict src, __global uchar *dst, float alpha,
int src_offset, int src_whole_rows, int src_whole_cols, int src_step,
int dst_offset, int dst_rows, int dst_cols, int dst_step
...
...
@@ -105,14 +163,19 @@ __kernel void boxFilter_C1_D0(__global const uchar * restrict src, __global ucha
int dst_startY = (gY << 1) + dst_y_off;
uint4 data[ksY+1];
__local
uint4
temp[
(
THREADS<<1
)
]
;
__local uint4 temp[
2][THREADS
];
#ifdef BORDER_CONSTANT
for(int i=0; i < ksY+1; i++)
{
if(startY+i >=0 && startY+i < src_whole_rows && startX+col*4 >=0 && startX+col*4+3<src_whole_cols)
data[i]
=
convert_uint4
(
vload4
(
col,
(
__global
uchar*
)(
src+
(
startY+i
)
*src_step
+
startX
)))
;
{
data[i].x = *(src+(startY+i)*src_step + startX + col * 4);
data[i].y = *(src+(startY+i)*src_step + startX + col * 4 + 1);
data[i].z = *(src+(startY+i)*src_step + startX + col * 4 + 2);
data[i].w = *(src+(startY+i)*src_step + startX + col * 4 + 3);
}
else
{
data[i]=0;
...
...
@@ -163,57 +226,21 @@ __kernel void boxFilter_C1_D0(__global const uchar * restrict src, __global ucha
}
}
#
endif
uint4
sum0
=
0
,
sum1
=
0
,
sum2
=
0
;
uint4
tmp_sum
=
0
;
for
(
int
i=1
; i < ksY; i++)
{
sum0
+=
(
data[i]
)
;
}
sum1
=
sum0
+
(
data[0]
)
;
sum2
=
sum0
+
(
data[ksY]
)
;
temp[col]
=
sum1
;
temp[col+THREADS]
=
sum2
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
col
>=
anX
&&
col
<
(
THREADS-ksX+anX+1
))
{
int
posX
=
dst_startX
-
dst_x_off
+
(
col-anX
)
*4
;
int
posY
=
(
gY
<<
1
)
;
uint4
tmp_sum1=0,
tmp_sum2=0
;
for
(
int
i=-anX
; i<=anX; i++)
{
tmp_sum1
+=
vload4
(
col,
(
__local
uint*
)
temp+i
)
;
tmp_sum
+=
(
data[i]
)
;
}
for
(
int
i=-anX
; i<=anX; i++)
{
tmp_sum2
+=
vload4
(
col,
(
__local
uint*
)(
temp+THREADS
)
+i
)
;
}
int
index
=
dst_startY
*
dst_step
+
dst_startX
+
(
col-anX
)
*4
;
if
(
posY
<
dst_rows
&&
posX
<
dst_cols
)
{
if
(
posX
>=
0
&&
posX
<
dst_cols
)
*
(
dst+dst_startY
*
dst_step
+
dst_startX
+
(
col-anX
)
*4
)
=
tmp_sum1.x/alpha
;
if
(
posX+1
>=
0
&&
posX+1
<
dst_cols
)
*
(
dst+dst_startY
*
dst_step
+
dst_startX+1
+
(
col-anX
)
*4
)
=
tmp_sum1.y/alpha
;
if
(
posX+2
>=
0
&&
posX+2
<
dst_cols
)
*
(
dst+dst_startY
*
dst_step
+
dst_startX+2
+
(
col-anX
)
*4
)
=
tmp_sum1.z/alpha
;
if
(
posX+3
>=
0
&&
posX+3
<
dst_cols
)
*
(
dst+dst_startY
*
dst_step
+
dst_startX+3
+
(
col-anX
)
*4
)
=
tmp_sum1.w/alpha
;
}
if
(
posY+1
<
dst_rows
&&
posX
<
dst_cols
)
{
dst_startY+=1
;
if
(
posX
>=
0
&&
posX
<
dst_cols
)
*
(
dst+dst_startY
*
dst_step
+
dst_startX
+
(
col-anX
)
*4
)
=
tmp_sum2.x/alpha
;
if
(
posX+1
>=
0
&&
posX+1
<
dst_cols
)
*
(
dst+dst_startY
*
dst_step
+
dst_startX+1
+
(
col-anX
)
*4
)
=
tmp_sum2.y/alpha
;
if
(
posX+2
>=
0
&&
posX+2
<
dst_cols
)
*
(
dst+dst_startY
*
dst_step
+
dst_startX+2
+
(
col-anX
)
*4
)
=
tmp_sum2.z/alpha
;
if
(
posX+3
>=
0
&&
posX+3
<
dst_cols
)
*
(
dst+dst_startY
*
dst_step
+
dst_startX+3
+
(
col-anX
)
*4
)
=
tmp_sum2.w/alpha
;
}
}
temp[0][col]
=
tmp_sum
+
(
data[0]
)
;
temp[1][col]
=
tmp_sum
+
(
data[ksY]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
update_dst_C1_D0
(
dst+index,
(
__local
uint
*
)(
temp[0]
)
,
dst_rows,
dst_cols,
dst_startX,
dst_x_off,
alpha
)
;
update_dst_C1_D0
(
dst+index+dst_step,
(
__local
uint
*
)(
temp[1]
)
,
dst_rows,
dst_cols,
dst_startX,
dst_x_off,
alpha
)
;
}
...
...
@@ -238,26 +265,21 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch
int
startY
=
(
gY
<<
1
)
-
anY
+
src_y_off
;
int
dst_startX
=
gX
*
(
THREADS-ksX+1
)
+
dst_x_off
;
int
dst_startY
=
(
gY
<<
1
)
+
dst_y_off
;
//int
end_addr
=
(
src_whole_rows-1
)
*
(
src_step>>2
)
+
src_whole_cols-4
;
int
end_addr
=
src_whole_cols-4
;
uint4
data[ksY+1]
;
__local
uint4
temp[2][THREADS]
;
#
ifdef
BORDER_CONSTANT
bool
con
;
uint4
ss
;
for
(
int
i=0
; i < ksY+1; i++)
{
con
=
startX+col
>=
0
&&
startX+col
<
src_whole_cols
&&
startY+i
>=
0
&&
startY+i
<
src_whole_rows
;
//int
cur_addr
=
clamp
((
startY+i
)
*
(
src_step>>2
)
+
(
startX+col
)
,
0
,
end_addr
)
;
//ss
=
convert_uint4
(
src[cur_addr]
)
;
int
cur_col
=
clamp
(
startX
+
col,
0
,
src_whole_cols
)
;
if
(
con
)
ss
=
convert_uint4
(
src[
(
startY+i
)
*
(
src_step>>2
)
+
cur_col]
)
;
data[i]
=
con
?
ss
:
0
;
data[i].x
=
con
?
src[
(
startY+i
)
*
(
src_step>>2
)
+
cur_col].x
:
0
;
data[i].y
=
con
?
src[
(
startY+i
)
*
(
src_step>>2
)
+
cur_col].y
:
0
;
data[i].z
=
con
?
src[
(
startY+i
)
*
(
src_step>>2
)
+
cur_col].z
:
0
;
data[i].w
=
con
?
src[
(
startY+i
)
*
(
src_step>>2
)
+
cur_col].w
:
0
;
}
#
else
for
(
int
i=0
; i < ksY+1; i++)
...
...
@@ -275,35 +297,22 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch
}
#
endif
uint4
sum0
=
0
,
sum1
=
0
,
sum2
=
0
;
uint4
tmp_sum
=
0
;
for
(
int
i=1
; i < ksY; i++)
{
sum0
+=
(
data[i]
)
;
tmp_sum
+=
(
data[i]
)
;
}
sum1
=
sum0
+
(
data[0]
)
;
sum2
=
sum0
+
(
data[ksY]
)
;
temp[0][col]
=
sum1
;
temp[1][col]
=
sum2
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
col
<
(
THREADS-
(
ksX-1
)))
{
col
+=
anX
;
int
posX
=
dst_startX
-
dst_x_off
+
col
-
anX
;
int
posY
=
(
gY
<<
1
)
;
uint4
tmp_sum[2]={
(
uint4
)(
0
,
0
,
0
,
0
)
,
(
uint4
)(
0
,
0
,
0
,
0
)
}
;
for
(
int
k=0
; k<2; k++)
for
(
int
i=-anX
; i<=anX; i++)
{
tmp_sum[k]
+=
temp[k][col+i]
;
}
for
(
int
i=0
; i<2; i++)
{
if
(
posX
>=
0
&&
posX
<
dst_cols
&&
(
posY+i
)
>=
0
&&
(
posY+i
)
<
dst_rows
)
dst[
(
dst_startY+i
)
*
(
dst_step>>2
)
+
dst_startX
+
col
-
anX]
=
convert_uchar4
(
convert_float4
(
tmp_sum[i]
)
/alpha
)
;
}
int
index
=
dst_startY
*
(
dst_step>>2
)
+
dst_startX
+
col
;
temp[0][col]
=
tmp_sum
+
(
data[0]
)
;
temp[1][col]
=
tmp_sum
+
(
data[ksY]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
update_dst_C4_D0
(
dst+index,
(
__local
uint4
*
)(
temp[0]
)
,
dst_rows,
dst_cols,
dst_startX,
dst_x_off,
alpha
)
;
update_dst_C4_D0
(
dst+index+
(
dst_step>>2
)
,
(
__local
uint4
*
)(
temp[1]
)
,
dst_rows,
dst_cols,
dst_startX,
dst_x_off,
alpha
)
;
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -327,7 +336,6 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float
int
startY
=
(
gY
<<
1
)
-
anY
+
src_y_off
;
int
dst_startX
=
gX
*
(
THREADS-ksX+1
)
+
dst_x_off
;
int
dst_startY
=
(
gY
<<
1
)
+
dst_y_off
;
int
end_addr
=
(
src_whole_rows-1
)
*
(
src_step>>2
)
+
src_whole_cols-4
;
float
data[ksY+1]
;
__local
float
temp[2][THREADS]
;
#
ifdef
BORDER_CONSTANT
...
...
@@ -336,12 +344,9 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float
for
(
int
i=0
; i < ksY+1; i++)
{
con
=
startX+col
>=
0
&&
startX+col
<
src_whole_cols
&&
startY+i
>=
0
&&
startY+i
<
src_whole_rows
;
//int
cur_addr
=
clamp
((
startY+i
)
*
(
src_step>>2
)
+
(
startX+col
)
,
0
,
end_addr
)
;
//ss
=
src[cur_addr]
;
int
cur_col
=
clamp
(
startX
+
col,
0
,
src_whole_cols
)
;
//ss
=
src[
(
startY+i
)
*
(
src_step>>2
)
+
cur_col]
;
ss
=
(
startY+i
)
<src_whole_rows&&
(
startY+i
)
>=0&&cur_col>=0&&cur_col<src_whole_cols?src[
(
startY+i
)
*
(
src_step>>2
)
+
cur_col]:0
;
ss
=
(
startY+i
)
<src_whole_rows&&
(
startY+i
)
>=0&&cur_col>=0&&cur_col<src_whole_cols?src[
(
startY+i
)
*
(
src_step>>2
)
+
cur_col]:
(
float
)
0
;
data[i]
=
con
?
ss
:
0.f
;
}
...
...
@@ -376,7 +381,7 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float
int
posX
=
dst_startX
-
dst_x_off
+
col
-
anX
;
int
posY
=
(
gY
<<
1
)
;
float
tmp_sum[2]={0.0,
0.0}
;
float
tmp_sum[2]=
{0.0,
0.0}
;
for
(
int
k=0
; k<2; k++)
for
(
int
i=-anX
; i<=anX; i++)
{
...
...
@@ -412,7 +417,6 @@ __kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global floa
int
startY
=
(
gY
<<
1
)
-
anY
+
src_y_off
;
int
dst_startX
=
gX
*
(
THREADS-ksX+1
)
+
dst_x_off
;
int
dst_startY
=
(
gY
<<
1
)
+
dst_y_off
;
int
end_addr
=
(
src_whole_rows-1
)
*
(
src_step>>4
)
+
src_whole_cols-16
;
float4
data[ksY+1]
;
__local
float4
temp[2][THREADS]
;
#
ifdef
BORDER_CONSTANT
...
...
@@ -421,12 +425,9 @@ __kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global floa
for
(
int
i=0
; i < ksY+1; i++)
{
con
=
startX+col
>=
0
&&
startX+col
<
src_whole_cols
&&
startY+i
>=
0
&&
startY+i
<
src_whole_rows
;
//int
cur_addr
=
clamp
((
startY+i
)
*
(
src_step>>4
)
+
(
startX+col
)
,
0
,
end_addr
)
;
//ss
=
src[cur_addr]
;
int
cur_col
=
clamp
(
startX
+
col,
0
,
src_whole_cols
)
;
//ss
=
src[
(
startY+i
)
*
(
src_step>>4
)
+
cur_col]
;
ss
=
(
startY+i
)
<src_whole_rows&&
(
startY+i
)
>=0&&cur_col>=0&&cur_col<src_whole_cols?src[
(
startY+i
)
*
(
src_step>>4
)
+
cur_col]:0
;
ss
=
(
startY+i
)
<src_whole_rows&&
(
startY+i
)
>=0&&cur_col>=0&&cur_col<src_whole_cols?src[
(
startY+i
)
*
(
src_step>>4
)
+
cur_col]:
(
float4
)
0
;
data[i]
=
con
?
ss
:
(
float4
)(
0.0
,
0.0
,
0.0
,
0.0
)
;
}
...
...
@@ -461,7 +462,7 @@ __kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global floa
int
posX
=
dst_startX
-
dst_x_off
+
col
-
anX
;
int
posY
=
(
gY
<<
1
)
;
float4
tmp_sum[2]={
(
float4
)(
0.0
,
0.0
,
0.0
,
0.0
)
,
(
float4
)(
0.0
,
0.0
,
0.0
,
0.0
)
}
;
float4
tmp_sum[2]=
{
(
float4
)(
0.0
,
0.0
,
0.0
,
0.0
)
,
(
float4
)(
0.0
,
0.0
,
0.0
,
0.0
)
}
;
for
(
int
k=0
; k<2; k++)
for
(
int
i=-anX
; i<=anX; i++)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment