Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6b9c4519
Commit
6b9c4519
authored
Sep 10, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added CV_16UC(1, 3, 4), CV_16SC(1, 3, 4) data types support in ocl::pyrUp and ocl::pyrDown
parent
747f7178
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
791 additions
and
138 deletions
+791
-138
pyr_down.cl
modules/ocl/src/opencl/pyr_down.cl
+534
-81
pyr_up.cl
modules/ocl/src/opencl/pyr_up.cl
+231
-28
pyrdown.cpp
modules/ocl/src/pyrdown.cpp
+3
-14
pyrup.cpp
modules/ocl/src/pyrup.cpp
+5
-0
test_pyramids.cpp
modules/ocl/test/test_pyramids.cpp
+18
-15
No files found.
modules/ocl/src/opencl/pyr_down.cl
View file @
6b9c4519
...
...
@@ -43,37 +43,6 @@
//
//M*/
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
uchar
round_uchar_int
(
int
v
)
{
return
(
uchar
)((
uint
)
v
<=
255
?
v
:
v
>
0
?
255
:
0
)
;
}
uchar
round_uchar_float
(
float
v
)
{
return
round_uchar_int
(
convert_int_sat_rte
(
v
))
;
}
uchar4
round_uchar4_int4
(
int4
v
)
{
uchar4
result
;
result.x
=
(
uchar
)(
v.x
<=
255
?
v.x
:
v.x
>
0
?
255
:
0
)
;
result.y
=
(
uchar
)(
v.y
<=
255
?
v.y
:
v.y
>
0
?
255
:
0
)
;
result.z
=
(
uchar
)(
v.z
<=
255
?
v.z
:
v.z
>
0
?
255
:
0
)
;
result.w
=
(
uchar
)(
v.w
<=
255
?
v.w
:
v.w
>
0
?
255
:
0
)
;
return
result
;
}
uchar4
round_uchar4_float4
(
float4
v
)
{
return
round_uchar4_int4
(
convert_int4_sat_rte
(
v
))
;
}
int
idx_row_low
(
int
y,
int
last_row
)
{
return
abs
(
y
)
%
(
last_row
+
1
)
;
...
...
@@ -104,6 +73,10 @@ int idx_col(int x, int last_col)
return
idx_col_low
(
idx_col_high
(
x,
last_col
)
,
last_col
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_8UC1
///////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C1_D0
(
__global
uchar
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
uchar
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
...
...
@@ -211,10 +184,14 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcRows,
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
+
dst_x]
=
round_uchar_float
(
sum
)
;
dst[y
*
dstStep
+
dst_x]
=
convert_uchar_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_8UC4
///////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C4_D0
(
__global
uchar4
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
uchar4
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
...
...
@@ -228,16 +205,16 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
float4
co1
=
0.375f
;
//(float4)(0.375f, 0.375f, 0.375f, 0.375f);
float4
co2
=
0.25f
;
//(float4)(0.25f, 0.25f, 0.25f, 0.25f);
float4
co3
=
0.0625f
;
//(float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
float4
co1
=
0.375f
;
float4
co2
=
0.25f
;
float4
co3
=
0.0625f
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
co3
*
convert_float4
((((
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[x]
))
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
...
...
@@ -247,9 +224,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
const
int
left_x
=
x
-
2
;
sum
=
co3
*
convert_float4
((((
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[left_x]
))
;
smem[get_local_id
(
0
)
]
=
sum
;
...
...
@@ -260,9 +237,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
const
int
right_x
=
x
+
2
;
sum
=
co3
*
convert_float4
((((
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[right_x]
))
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
...
...
@@ -273,9 +250,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
...
...
@@ -287,9 +264,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
smem[get_local_id
(
0
)
]
=
sum
;
...
...
@@ -302,9 +279,9 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co1
*
convert_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co2
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
sum
=
sum
+
co3
*
convert_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
))
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
...
...
@@ -318,18 +295,490 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
co3
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
co3
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
4
+
dst_x]
=
round_uchar4_float4
(
sum
)
;
dst[y
*
dstStep
/
4
+
dst_x]
=
convert_uchar4_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16UC1
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C1_D2
(
__global
ushort
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
ushort
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float
smem[256
+
4]
;
float
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[x]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[left_x]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[right_x]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
0.0625f
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
0.375f
*
smem[2
+
tid2
]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
0.0625f
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
2
+
dst_x]
=
convert_ushort_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16UC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C4_D2
(
__global
ushort4
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
ushort4
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float4
smem[256
+
4]
;
float4
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
float4
co1
=
0.375f
;
float4
co2
=
0.25f
;
float4
co3
=
0.0625f
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[x]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[left_x]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[right_x]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
co3
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
co3
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
8
+
dst_x]
=
convert_ushort4_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16SC1
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C1_D3
(
__global
short
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
short
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float
smem[256
+
4]
;
float
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[x]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[left_x]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[right_x]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
0.0625f
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
0.375f
*
smem[2
+
tid2
]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
0.0625f
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
2
+
dst_x]
=
convert_short_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16SC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C4_D3
(
__global
short4
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
short4
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float4
smem[256
+
4]
;
float4
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
float4
co1
=
0.375f
;
float4
co2
=
0.25f
;
float4
co3
=
0.0625f
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[x]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[left_x]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[right_x]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
co3
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
co3
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
8
+
dst_x]
=
convert_short4_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_32FC1
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C1_D5
(
__global
float
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
float
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
...
...
@@ -441,6 +890,10 @@ __kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcRows,
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_32FC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C4_D5
(
__global
float4
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
float4
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
...
...
@@ -454,16 +907,16 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
float4
co1
=
0.375f
;
//(float4)(0.375f, 0.375f, 0.375f, 0.375f);
float4
co2
=
0.25f
;
//(float4)(0.25f, 0.25f, 0.25f, 0.25f);
float4
co3
=
0.0625f
;
//(float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f);
float4
co1
=
0.375f
;
float4
co2
=
0.25f
;
float4
co3
=
0.0625f
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[x]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
...
...
@@ -473,9 +926,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
const
int
left_x
=
x
-
2
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[left_x]
;
smem[get_local_id
(
0
)
]
=
sum
;
...
...
@@ -486,9 +939,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
const
int
right_x
=
x
+
2
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[right_x]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
...
...
@@ -499,9 +952,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
...
...
@@ -513,9 +966,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
smem[get_local_id
(
0
)
]
=
sum
;
...
...
@@ -528,9 +981,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
...
...
@@ -544,9 +997,9 @@ __kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcRows,
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
co3
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
co3
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
...
...
modules/ocl/src/opencl/pyr_up.cl
View file @
6b9c4519
...
...
@@ -46,18 +46,18 @@
//
//M*/
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
uchar
get_valid_uchar
(
float
data
)
{
return
(
uchar
)(
data
<=
255
?
data
:
data
>
0
?
255
:
0
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_8UC1
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C1_D0
(
__global
uchar*
src,__global
uchar*
dst,
int
srcRows,int
dstRows,int
srcCols,int
dstCols,
int
srcOffset,int
dstOffset,int
srcStep,int
dstStep
)
__kernel
void
pyrUp_C1_D0
(
__global
uchar*
src,
__global
uchar*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
...
...
@@ -144,15 +144,15 @@ __kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst,
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
convert_uchar_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16UC1
/////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C1_D2
(
__global
ushort*
src,__global
ushort*
dst,
int
srcRows,int
dstRows,int
srcCols,int
dstCols,
int
srcOffset,int
dstOffset,int
srcStep,int
dstStep
)
__kernel
void
pyrUp_C1_D2
(
__global
ushort*
src,
__global
ushort*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
...
...
@@ -245,16 +245,116 @@ __kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst,
sum
=
sum
+
0.0625f
*
s_dstPatch[2
+
tidy
+
2][get_local_id
(
0
)
]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
convert_short_sat_rte
(
4.0f
*
sum
)
;
dst[x
+
y
*
dstStep]
=
convert_ushort_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16SC1
/////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C1_D3
(
__global
short*
src,
__global
short*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
__local
float
s_srcPatch[10][10]
;
__local
float
s_dstPatch[20][16]
;
srcStep
=
srcStep
>>
1
;
dstStep
=
dstStep
>>
1
;
srcOffset
=
srcOffset
>>
1
;
dstOffset
=
dstOffset
>>
1
;
if
(
get_local_id
(
0
)
<
10
&&
get_local_id
(
1
)
<
10
)
{
int
srcx
=
(
int
)(
get_group_id
(
0
)
*
get_local_size
(
0
)
/
2
+
get_local_id
(
0
))
-
1
;
int
srcy
=
(
int
)(
get_group_id
(
1
)
*
get_local_size
(
1
)
/
2
+
get_local_id
(
1
))
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
srcCols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
srcRows
-1
,
srcy
)
;
s_srcPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
(
float
)(
src[srcx
+
srcy
*
srcStep]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float
sum
=
0
;
const
int
evenFlag
=
(
int
)((
get_local_id
(
0
)
&
1
)
==
0
)
;
const
int
oddFlag
=
(
int
)((
get_local_id
(
0
)
&
1
)
!=
0
)
;
const
bool
eveny
=
((
get_local_id
(
1
)
&
1
)
==
0
)
;
const
int
tidx
=
get_local_id
(
0
)
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
1
)
<
2
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[0][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[0][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[0][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[0][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[0][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
1
)
>
13
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[9][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[9][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[9][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[9][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[9][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
0
;
const
int
tidy
=
get_local_id
(
1
)
;
sum
=
sum
+
0.0625f
*
s_dstPatch[2
+
tidy
-
2][get_local_id
(
0
)
]
;
sum
=
sum
+
0.25f
*
s_dstPatch[2
+
tidy
-
1][get_local_id
(
0
)
]
;
sum
=
sum
+
0.375f
*
s_dstPatch[2
+
tidy
][get_local_id
(
0
)
]
;
sum
=
sum
+
0.25f
*
s_dstPatch[2
+
tidy
+
1][get_local_id
(
0
)
]
;
sum
=
sum
+
0.0625f
*
s_dstPatch[2
+
tidy
+
2][get_local_id
(
0
)
]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
convert_short_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_32FC1
/////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C1_D5
(
__global
float*
src,__global
float*
dst,
int
srcRows,int
dstRows,int
srcCols,int
dstCols,
int
srcOffset,int
dstOffset,int
srcStep,int
dstStep
)
__kernel
void
pyrUp_C1_D5
(
__global
float*
src,
__global
float*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
...
...
@@ -346,15 +446,15 @@ __kernel void pyrUp_C1_D5(__global float* src,__global float* dst,
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
(
float
)(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_8UC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C4_D0
(
__global
uchar4*
src,__global
uchar4*
dst,
int
srcRows,int
dstRows,int
srcCols,int
dstCols,
int
srcOffset,int
dstOffset,int
srcStep,int
dstStep
)
__kernel
void
pyrUp_C4_D0
(
__global
uchar4*
src,
__global
uchar4*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
...
...
@@ -451,17 +551,16 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst,
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
+
2][tidx]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
{
dst[x
+
y
*
dstStep]
=
convert_uchar4_sat_rte
(
4.0f
*
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16UC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C4_D2
(
__global
ushort4*
src,__global
ushort4*
dst,
int
srcRows,int
dstRows,int
srcCols,int
dstCols,
int
srcOffset,int
dstOffset,int
srcStep,int
dstStep
)
__kernel
void
pyrUp_C4_D2
(
__global
ushort4*
src,
__global
ushort4*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
...
...
@@ -560,17 +659,123 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst,
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
+
2][get_local_id
(
0
)
]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
{
dst[x
+
y
*
dstStep]
=
convert_ushort4_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16SC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C4_D3
(
__global
short4*
src,
__global
short4*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
__local
float4
s_srcPatch[10][10]
;
__local
float4
s_dstPatch[20][16]
;
srcOffset
>>=
3
;
dstOffset
>>=
3
;
srcStep
>>=
3
;
dstStep
>>=
3
;
if
(
get_local_id
(
0
)
<
10
&&
get_local_id
(
1
)
<
10
)
{
int
srcx
=
(
int
)(
get_group_id
(
0
)
*
get_local_size
(
0
)
/
2
+
get_local_id
(
0
))
-
1
;
int
srcy
=
(
int
)(
get_group_id
(
1
)
*
get_local_size
(
1
)
/
2
+
get_local_id
(
1
))
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
srcCols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
srcRows
-1
,
srcy
)
;
s_srcPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
convert_float4
(
src[srcx
+
srcy
*
srcStep]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float4
sum
=
(
float4
)(
0
,
0
,
0
,
0
)
;
const
float4
evenFlag
=
(
float4
)((
get_local_id
(
0
)
&
1
)
==
0
)
;
const
float4
oddFlag
=
(
float4
)((
get_local_id
(
0
)
&
1
)
!=
0
)
;
const
bool
eveny
=
((
get_local_id
(
1
)
&
1
)
==
0
)
;
const
int
tidx
=
get_local_id
(
0
)
;
float4
co1
=
(
float4
)(
0.375f,
0.375f,
0.375f,
0.375f
)
;
float4
co2
=
(
float4
)(
0.25f,
0.25f,
0.25f,
0.25f
)
;
float4
co3
=
(
float4
)(
0.0625f,
0.0625f,
0.0625f,
0.0625f
)
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co1
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
1
)
<
2
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[0][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[0][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[0][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[0][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[0][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
1
)
>
13
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[9][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[9][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[9][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[9][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[9][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
0
;
const
int
tidy
=
get_local_id
(
1
)
;
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
-
2][get_local_id
(
0
)
]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
-
1][get_local_id
(
0
)
]
;
sum
=
sum
+
co1
*
s_dstPatch[2
+
tidy
][get_local_id
(
0
)
]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
+
1][get_local_id
(
0
)
]
;
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
+
2][get_local_id
(
0
)
]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
convert_short4_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_32FC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C4_D5
(
__global
float4*
src,__global
float4*
dst,
int
srcRows,int
dstRows,int
srcCols,int
dstCols,
int
srcOffset,int
dstOffset,int
srcStep,int
dstStep
)
__kernel
void
pyrUp_C4_D5
(
__global
float4*
src,
__global
float4*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
...
...
@@ -667,7 +872,5 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst,
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
+
2][tidx]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
{
dst[x
+
y
*
dstStep]
=
4.0f
*
sum
;
}
}
modules/ocl/src/pyrdown.cpp
View file @
6b9c4519
...
...
@@ -73,24 +73,11 @@ static void pyrdown_run(const oclMat &src, const oclMat &dst)
CV_Assert
(
src
.
depth
()
!=
CV_8S
);
Context
*
clCxt
=
src
.
clCxt
;
//int channels = dst.channels();
//int depth = dst.depth();
string
kernelName
=
"pyrDown"
;
//int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1},
// {4, 0, 4, 4, 1, 1, 1}
//};
//size_t vector_length = vector_lengths[channels-1][depth];
//int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
src
.
cols
,
dst
.
rows
,
1
};
//int dst_step1 = dst.cols * dst.elemSize();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
...
...
@@ -107,7 +94,9 @@ static void pyrdown_run(const oclMat &src, const oclMat &dst)
void
cv
::
ocl
::
pyrDown
(
const
oclMat
&
src
,
oclMat
&
dst
)
{
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
);
int
depth
=
src
.
depth
(),
channels
=
src
.
channels
();
CV_Assert
(
depth
==
CV_8U
||
depth
==
CV_16U
||
depth
==
CV_16S
||
depth
==
CV_32F
);
CV_Assert
(
channels
==
1
||
channels
==
3
||
channels
==
4
);
dst
.
create
((
src
.
rows
+
1
)
/
2
,
(
src
.
cols
+
1
)
/
2
,
src
.
type
());
...
...
modules/ocl/src/pyrup.cpp
View file @
6b9c4519
...
...
@@ -61,6 +61,11 @@ namespace cv
extern
const
char
*
pyr_up
;
void
pyrUp
(
const
cv
::
ocl
::
oclMat
&
src
,
cv
::
ocl
::
oclMat
&
dst
)
{
int
depth
=
src
.
depth
(),
channels
=
src
.
channels
();
CV_Assert
(
depth
==
CV_8U
||
depth
==
CV_16U
||
depth
==
CV_16S
||
depth
==
CV_32F
);
CV_Assert
(
channels
==
1
||
channels
==
3
||
channels
==
4
);
dst
.
create
(
src
.
rows
*
2
,
src
.
cols
*
2
,
src
.
type
());
Context
*
clCxt
=
src
.
clCxt
;
...
...
modules/ocl/test/test_pyramids.cpp
View file @
6b9c4519
...
...
@@ -57,60 +57,63 @@ using namespace std;
PARAM_TEST_CASE
(
PyrBase
,
MatType
,
int
)
{
int
type
;
int
depth
;
int
channels
;
Mat
dst_cpu
;
oclMat
gdst
;
virtual
void
SetUp
()
{
type
=
GET_PARAM
(
0
);
depth
=
GET_PARAM
(
0
);
channels
=
GET_PARAM
(
1
);
}
};
/////////////////////// PyrDown //////////////////////////
struct
PyrDown
:
PyrBase
{};
typedef
PyrBase
PyrDown
;
TEST_P
(
PyrDown
,
Mat
)
{
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
{
Size
size
(
MWIDTH
,
MHEIGHT
);
Mat
src
=
randomMat
(
size
,
CV_MAKETYPE
(
type
,
channels
));
Mat
src
=
randomMat
(
size
,
CV_MAKETYPE
(
depth
,
channels
));
oclMat
gsrc
(
src
);
pyrDown
(
src
,
dst_cpu
);
pyrDown
(
gsrc
,
gdst
);
EXPECT_MAT_NEAR
(
dst_cpu
,
Mat
(
gdst
),
type
==
CV_32F
?
1e-4
f
:
1.0
f
);
EXPECT_MAT_NEAR
(
dst_cpu
,
Mat
(
gdst
),
depth
==
CV_32F
?
1e-4
f
:
1.0
f
);
}
}
INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
PyrDown
,
Combine
(
Values
(
CV_8U
,
CV_32F
),
Values
(
1
,
3
,
4
)));
Values
(
CV_8U
,
CV_16U
,
CV_16S
,
CV_32F
),
Values
(
1
,
3
,
4
)));
/////////////////////// PyrUp //////////////////////////
struct
PyrUp
:
PyrBase
{}
;
typedef
PyrBase
PyrUp
;
TEST_P
(
PyrUp
,
Accuracy
)
{
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
{
Size
size
(
MWIDTH
,
MHEIGHT
);
Mat
src
=
randomMat
(
size
,
CV_MAKETYPE
(
type
,
channels
));
Mat
src
=
randomMat
(
size
,
CV_MAKETYPE
(
depth
,
channels
));
oclMat
gsrc
(
src
);
pyrUp
(
src
,
dst_cpu
);
pyrUp
(
gsrc
,
gdst
);
EXPECT_MAT_NEAR
(
dst_cpu
,
Mat
(
gdst
),
(
type
==
CV_32F
?
1e-4
f
:
1.0
));
EXPECT_MAT_NEAR
(
dst_cpu
,
Mat
(
gdst
),
(
depth
==
CV_32F
?
1e-4
f
:
1.0
));
}
}
INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
PyrUp
,
testing
::
Combine
(
Values
(
CV_8U
,
CV_32F
),
Values
(
1
,
3
,
4
)));
INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
PyrUp
,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_16S
,
CV_32F
),
Values
(
1
,
3
,
4
)));
#endif // HAVE_OPENCL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment