Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
febe5283
Commit
febe5283
authored
Dec 16, 2013
by
Alexander Karsakov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added ocl::pyrUp to T-API
parent
ac230cd2
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
325 additions
and
1026 deletions
+325
-1026
pyr_down.cl
modules/imgproc/src/opencl/pyr_down.cl
+44
-977
pyr_up.cl
modules/imgproc/src/opencl/pyr_up.cl
+159
-0
pyramids.cpp
modules/imgproc/src/pyramids.cpp
+65
-17
test_pyramids.cpp
modules/imgproc/test/ocl/test_pyramids.cpp
+57
-32
No files found.
modules/imgproc/src/opencl/pyr_down.cl
View file @
febe5283
...
...
@@ -43,10 +43,12 @@
//
//M*/
#
if
cn
==
4
#
define
FT
float4
#
else
#
define
FT
float
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
noconvert
...
...
@@ -81,266 +83,32 @@ inline int idx_col(int x, int last_col)
return
idx_col_low
(
idx_col_high
(
x,
last_col
)
,
last_col
)
;
}
__kernel
void
pyrDown
(
__global
const
uchar
*
src
Data
,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__kernel
void
pyrDown
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
FT
smem[256
+
4]
;
__global
T
*
dstData
=
(
__global
T
*
)(
dst
+
dst_offset
)
;
__global
const
uchar
*
srcData
=
(
__global
const
uchar*
)(
src
+
src_offset
)
;
FT
sum
;
FT
co1
=
0.375f
;
FT
co2
=
0.25f
;
FT
co3
=
0.0625f
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
2
)
*
srcStep
/
cn
))
[x]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
1
)
*
srcStep
/
cn
))
[x]
)
;
sum
=
sum
+
co1
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
)
*
srcStep
/
cn
))
[x]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
1
)
*
srcStep
/
cn
))
[x]
)
;
sum
=
sum
+
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
2
)
*
srcStep
/
cn
))
[x]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
2
)
*
srcStep
/
cn
))
[left_x]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
1
)
*
srcStep
/
cn
))
[left_x]
)
;
sum
=
sum
+
co1
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
)
*
srcStep
/
cn
))
[left_x]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
1
)
*
srcStep
/
cn
))
[left_x]
)
;
sum
=
sum
+
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
2
)
*
srcStep
/
cn
))
[left_x]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
2
)
*
srcStep
/
cn
))
[right_x]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
1
)
*
srcStep
/
cn
))
[right_x]
)
;
sum
=
sum
+
co1
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
)
*
srcStep
/
cn
))
[right_x]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
1
)
*
srcStep
/
cn
))
[right_x]
)
;
sum
=
sum
+
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
2
)
*
srcStep
/
cn
))
[right_x]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co1
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co1
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co1
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co2
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
sum
=
sum
+
co3
*
convertToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
cn
))
[col]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
co3
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
co3
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
+
dst_x]
=
convertToT
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_8UC1
///////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C1_D0
(
__global
uchar
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
uchar
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float
smem[256
+
4]
;
float
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
0.0625f
*
(((
srcData
+
(
src_y
-
2
)
*
srcStep
))
[x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
-
1
)
*
srcStep
))
[x]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
(
src_y
)
*
srcStep
))
[x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
+
1
)
*
srcStep
))
[x]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
(
src_y
+
2
)
*
srcStep
))
[x]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0.0625f
*
(((
srcData
+
(
src_y
-
2
)
*
srcStep
))
[left_x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
-
1
)
*
srcStep
))
[left_x]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
(
src_y
)
*
srcStep
))
[left_x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
+
1
)
*
srcStep
))
[left_x]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
(
src_y
+
2
)
*
srcStep
))
[left_x]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
0.0625f
*
(((
srcData
+
(
src_y
-
2
)
*
srcStep
))
[right_x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
-
1
)
*
srcStep
))
[right_x]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
(
src_y
)
*
srcStep
))
[right_x]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
(
src_y
+
1
)
*
srcStep
))
[right_x]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
(
src_y
+
2
)
*
srcStep
))
[right_x]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
0.0625f
*
(((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
0.0625f
*
(((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
0.0625f
*
(((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.375f
*
(((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.25f
*
(((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
)
;
sum
=
sum
+
0.0625f
*
(((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
0.0625f
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
0.375f
*
smem[2
+
tid2
]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
0.0625f
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
+
dst_x]
=
convert_uchar_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_8UC4
///////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C4_D0
(
__global
uchar4
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
uchar4
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float4
smem[256
+
4]
;
float4
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
const
int
last_row
=
src_rows
-
1
;
const
int
last_col
=
src_cols
-
1
;
float4
co1
=
0.375f
;
float4
co2
=
0.25f
;
float4
co3
=
0.0625f
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
if
(
src_y
>=
2
&&
src_y
<
src_rows
-
2
&&
x
>=
2
&&
x
<
src_cols
-
2
)
{
sum
=
co3
*
convert
_float4
((((
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[x]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
)
)
;
sum
=
sum
+
co1
*
convert
_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
)
)
;
sum
=
sum
+
co3
*
convert
_float4
((((
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[x]
)
)
;
sum
=
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
2
)
*
src_step
))
[x]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
1
)
*
src_step
))
[x]
)
;
sum
=
sum
+
co1
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
)
*
src_step
))
[x]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
1
)
*
src_step
))
[x]
)
;
sum
=
sum
+
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
2
)
*
src_step
))
[x]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
...
...
@@ -348,11 +116,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
{
const
int
left_x
=
x
-
2
;
sum
=
co3
*
convert
_float4
((((
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[left_x]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
)
)
;
sum
=
sum
+
co1
*
convert
_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
)
)
;
sum
=
sum
+
co3
*
convert
_float4
((((
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[left_x]
)
)
;
sum
=
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
2
)
*
src_step
))
[left_x]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
1
)
*
src_step
))
[left_x]
)
;
sum
=
sum
+
co1
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
)
*
src_step
))
[left_x]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
1
)
*
src_step
))
[left_x]
)
;
sum
=
sum
+
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
2
)
*
src_step
))
[left_x]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
...
...
@@ -361,11 +129,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
{
const
int
right_x
=
x
+
2
;
sum
=
co3
*
convert
_float4
((((
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[right_x]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
)
)
;
sum
=
sum
+
co1
*
convert
_float4
((((
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
)
)
;
sum
=
sum
+
co3
*
convert
_float4
((((
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[right_x]
)
)
;
sum
=
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
2
)
*
src_step
))
[right_x]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
-
1
)
*
src_step
))
[right_x]
)
;
sum
=
sum
+
co1
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
)
*
src_step
))
[right_x]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
1
)
*
src_step
))
[right_x]
)
;
sum
=
sum
+
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
(
src_y
+
2
)
*
src_step
))
[right_x]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
...
...
@@ -374,11 +142,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co1
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co3
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co1
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
src_step
))
[col]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
...
...
@@ -388,11 +156,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co1
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co3
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co1
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
src_step
))
[col]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
...
...
@@ -403,11 +171,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co1
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co2
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
sum
+
co3
*
convert
_float4
((((
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
)
;
sum
=
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co1
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co2
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
src_step
))
[col]
)
;
sum
=
sum
+
co3
*
convert
ToFT
(((
__global
T*
)(
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
src_step
))
[col]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
...
...
@@ -427,709 +195,8 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcRows,
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
4
+
dst_x]
=
convert_uchar4_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16UC1
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C1_D2
(
__global
ushort
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
ushort
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float
smem[256
+
4]
;
float
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[x]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[left_x]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[right_x]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
ushort*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
0.0625f
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
0.375f
*
smem[2
+
tid2
]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
0.0625f
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
2
+
dst_x]
=
convert_ushort_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16UC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C4_D2
(
__global
ushort4
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
ushort4
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float4
smem[256
+
4]
;
float4
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
float4
co1
=
0.375f
;
float4
co2
=
0.25f
;
float4
co3
=
0.0625f
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[x]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[left_x]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[right_x]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
if
(
dst_x
<
dst_cols
)
dstData[y
*
dst_step
/
((
int
)
sizeof
(
T
))
+
dst_x]
=
convertToT
(
sum
)
;
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
ushort4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
co3
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
co3
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
8
+
dst_x]
=
convert_ushort4_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16SC1
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C1_D3
(
__global
short
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
short
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float
smem[256
+
4]
;
float
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[x]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[left_x]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[right_x]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
short*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
0.0625f
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
0.375f
*
smem[2
+
tid2
]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
0.0625f
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
2
+
dst_x]
=
convert_short_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16SC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C4_D3
(
__global
short4
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
short4
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float4
smem[256
+
4]
;
float4
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
float4
co1
=
0.375f
;
float4
co2
=
0.25f
;
float4
co3
=
0.0625f
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[x]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[left_x]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[right_x]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co1
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co2
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
sum
=
sum
+
co3
*
convert_float4
(((
__global
short4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
)
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
co3
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
co3
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
8
+
dst_x]
=
convert_short4_sat_rte
(
sum
)
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_32FC1
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C1_D5
(
__global
float
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
float
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float
smem[256
+
4]
;
float
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[x]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[x]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[left_x]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[left_x]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
2
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
-
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
1
)
*
srcStep
))
[right_x]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
(
src_y
+
2
)
*
srcStep
))
[right_x]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.375f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.25f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
))
[col]
;
sum
=
sum
+
0.0625f
*
((
__global
float*
)((
__global
char*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
))
[col]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
0.0625f
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
0.375f
*
smem[2
+
tid2
]
;
sum
=
sum
+
0.25f
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
0.0625f
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
4
+
dst_x]
=
sum
;
}
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_32FC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrDown_C4_D5
(
__global
float4
*
srcData,
int
srcStep,
int
srcRows,
int
srcCols,
__global
float4
*dst,
int
dstStep,
int
dstCols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
__local
float4
smem[256
+
4]
;
float4
sum
;
const
int
src_y
=
2*y
;
const
int
last_row
=
srcRows
-
1
;
const
int
last_col
=
srcCols
-
1
;
float4
co1
=
0.375f
;
float4
co2
=
0.25f
;
float4
co3
=
0.0625f
;
if
(
src_y
>=
2
&&
src_y
<
srcRows
-
2
&&
x
>=
2
&&
x
<
srcCols
-
2
)
{
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[x]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[x]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[left_x]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[left_x]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
2
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
-
1
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
1
)
*
srcStep
/
4
))
[right_x]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
(
src_y
+
2
)
*
srcStep
/
4
))
[right_x]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
else
{
int
col
=
idx_col
(
x,
last_col
)
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
smem[2
+
get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
0
)
<
2
)
{
const
int
left_x
=
x
-
2
;
col
=
idx_col
(
left_x,
last_col
)
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
smem[get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
0
)
>
253
)
{
const
int
right_x
=
x
+
2
;
col
=
idx_col
(
right_x,
last_col
)
;
sum
=
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
-
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co1
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co2
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
1
,
last_row
)
*
srcStep
/
4
))
[col]
;
sum
=
sum
+
co3
*
((
__global
float4*
)((
__global
char4*
)
srcData
+
idx_row
(
src_y
+
2
,
last_row
)
*
srcStep
/
4
))
[col]
;
smem[4
+
get_local_id
(
0
)
]
=
sum
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
128
)
{
const
int
tid2
=
get_local_id
(
0
)
*
2
;
sum
=
co3
*
smem[2
+
tid2
-
2]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
-
1]
;
sum
=
sum
+
co1
*
smem[2
+
tid2
]
;
sum
=
sum
+
co2
*
smem[2
+
tid2
+
1]
;
sum
=
sum
+
co3
*
smem[2
+
tid2
+
2]
;
const
int
dst_x
=
(
get_group_id
(
0
)
*
get_local_size
(
0
)
+
tid2
)
/
2
;
if
(
dst_x
<
dstCols
)
dst[y
*
dstStep
/
16
+
dst_x]
=
sum
;
}
}
modules/imgproc/src/opencl/pyr_up.cl
0 → 100644
View file @
febe5283
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Multicoreware,
Inc.,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Zhang
Chunpeng
chunpeng@multicorewareinc.com
//
Dachuan
Zhao,
dachuan@multicorewareinc.com
//
Yao
Wang,
yao@multicorewareinc.com
//
Peng
Xiao,
pengxiao@outlook.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
///////////////////////////////////////////////////////////////////////
////////////////////////
Generic
PyrUp
//////////////////////////////
///////////////////////////////////////////////////////////////////////
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
noconvert
__kernel
void
pyrUp
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
const
int
lsizex
=
get_local_size
(
0
)
;
const
int
lsizey
=
get_local_size
(
1
)
;
const
int
tidx
=
get_local_id
(
0
)
;
const
int
tidy
=
get_local_id
(
1
)
;
__local
FT
s_srcPatch[10][10]
;
__local
FT
s_dstPatch[20][16]
;
__global
T
*
dstData
=
(
__global
T
*
)(
dst
+
dst_offset
)
;
__global
const
T
*
srcData
=
(
__global
const
T
*
)(
src
+
src_offset
)
;
if
(
tidx
<
10
&&
tidy
<
10
)
{
int
srcx
=
mad24
((
int
)
get_group_id
(
0
)
,
lsizex>>1,
tidx
)
-
1
;
int
srcy
=
mad24
((
int
)
get_group_id
(
1
)
,
lsizey>>1,
tidy
)
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
src_cols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
src_rows
-
1
,
srcy
)
;
s_srcPatch[tidy][tidx]
=
convertToFT
(
srcData[srcx
+
srcy
*
src_step
/
(
int
)
sizeof
(
T
)
]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
FT
sum
=
0.f
;
const
FT
evenFlag
=
(
FT
)((
tidx
&
1
)
==
0
)
;
const
FT
oddFlag
=
(
FT
)((
tidx
&
1
)
!=
0
)
;
const
bool
eveny
=
((
tidy
&
1
)
==
0
)
;
const
FT
co1
=
0.375f
;
const
FT
co2
=
0.25f
;
const
FT
co3
=
0.0625f
;
if
(
eveny
)
{
sum
=
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co1
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
tidy][tidx]
=
sum
;
if
(
tidy
<
2
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[tidy][tidx]
=
sum
;
}
if
(
tidy
>
13
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
tidy][tidx]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
co3
*
s_dstPatch[2
+
tidy
-
2][tidx]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
-
1][tidx]
;
sum
=
sum
+
co1
*
s_dstPatch[2
+
tidy
][tidx]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
+
1][tidx]
;
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
+
2][tidx]
;
if
((
x
<
dst_cols
)
&&
(
y
<
dst_rows
))
dstData[x
+
y
*
dst_step
/
(
int
)
sizeof
(
T
)
]
=
convertToT
(
4.0f
*
sum
)
;
}
modules/imgproc/src/pyramids.cpp
View file @
febe5283
...
...
@@ -401,23 +401,25 @@ pyrUp_( const Mat& _src, Mat& _dst, int)
typedef
void
(
*
PyrFunc
)(
const
Mat
&
,
Mat
&
,
int
);
}
namespace
cv
{
static
bool
ocl_pyrDown
(
InputArray
_src
,
OutputArray
_dst
,
const
Size
&
_dsz
,
int
borderType
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
channels
=
CV_MAT_CN
(
type
);
if
(
depth
!=
CV_8U
&&
depth
!=
CV_16U
&&
depth
!=
CV_16S
&&
depth
!=
CV_32F
)
if
(((
channels
!=
1
)
&&
(
channels
!=
2
)
&&
(
channels
!=
4
))
||
(
borderType
!=
BORDER_DEFAULT
))
return
false
;
if
(
channels
!=
1
&&
channels
!=
3
&&
channels
!=
4
)
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
((
depth
==
CV_64F
)
&&
!
(
doubleSupport
))
return
false
;
double
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
Size
ssize
=
_src
.
size
();
Size
dsize
=
_dsz
.
area
()
==
0
?
Size
((
ssize
.
width
+
1
)
/
2
,
(
ssize
.
height
+
1
)
/
2
)
:
_dsz
;
CV_Assert
(
ssize
.
width
>
0
&&
ssize
.
height
>
0
&&
std
::
abs
(
dsize
.
width
*
2
-
ssize
.
width
)
<=
2
&&
std
::
abs
(
dsize
.
height
*
2
-
ssize
.
height
)
<=
2
);
UMat
src
=
_src
.
getUMat
();
Size
dsize
=
cv
::
Size
((
src
.
rows
+
1
)
/
2
,
(
src
.
cols
+
1
)
/
2
);
_dst
.
create
(
dsize
,
src
.
type
()
);
UMat
dst
=
_dst
.
getUMat
();
...
...
@@ -425,31 +427,73 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
ocl
::
ProgramSource2
program
=
ocl
::
imgproc
::
pyr_down_oclsrc
;
ocl
::
Kernel
k
;
int
float_depth
=
depth
==
CV_64F
?
CV_64F
:
CV_32F
;
char
cvt
[
2
][
50
];
k
.
create
(
kernelName
,
program
,
format
(
"-D T=%s -D
cn=%d -D convertToT=%s%s -D convertToFT=%s%s"
,
ocl
::
typeToStr
(
type
),
channels
,
ocl
::
convertTypeStr
(
CV_32F
,
depth
,
channels
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
depth
,
CV_32F
,
channels
,
cvt
[
1
]),
format
(
"-D T=%s -D
FT=%s -D convertToT=%s -D convertToFT=%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_MAKETYPE
(
float_depth
,
channels
)),
ocl
::
convertTypeStr
(
float_depth
,
depth
,
channels
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
depth
,
float_depth
,
channels
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
));
size_t
localThreads
[
2
]
=
{
256
,
1
};
size_t
globalThreads
[
2
]
=
{
src
.
cols
,
dst
.
rows
};
return
k
.
run
(
2
,
globalThreads
,
localThreads
,
false
);
}
static
bool
ocl_pyrUp
(
InputArray
_src
,
OutputArray
_dst
,
const
Size
&
_dsz
,
int
borderType
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
channels
=
CV_MAT_CN
(
type
);
if
(((
channels
!=
1
)
&&
(
channels
!=
2
)
&&
(
channels
!=
4
))
||
(
borderType
!=
BORDER_DEFAULT
))
return
false
;
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
((
depth
==
CV_64F
)
&&
!
(
doubleSupport
))
return
false
;
Size
ssize
=
_src
.
size
();
if
((
_dsz
.
area
()
!=
0
)
&&
(
_dsz
!=
Size
(
ssize
.
width
*
2
,
ssize
.
height
*
2
)))
return
false
;
UMat
src
=
_src
.
getUMat
();
Size
dsize
=
Size
(
ssize
.
width
*
2
,
ssize
.
height
*
2
);
_dst
.
create
(
dsize
,
src
.
type
()
);
UMat
dst
=
_dst
.
getUMat
();
const
char
*
const
kernelName
=
"pyrUp"
;
ocl
::
ProgramSource2
program
=
ocl
::
imgproc
::
pyr_up_oclsrc
;
ocl
::
Kernel
k
;
int
float_depth
=
depth
==
CV_64F
?
CV_64F
:
CV_32F
;
char
cvt
[
2
][
50
];
k
.
create
(
kernelName
,
program
,
format
(
"-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_MAKETYPE
(
float_depth
,
channels
)),
ocl
::
convertTypeStr
(
float_depth
,
depth
,
channels
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
depth
,
float_depth
,
channels
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
));
size_t
globalThreads
[
2
]
=
{
dst
.
cols
,
dst
.
rows
};
size_t
localThreads
[
2
]
=
{
16
,
16
};
return
k
.
run
(
2
,
globalThreads
,
localThreads
,
false
);
}
}
void
cv
::
pyrDown
(
InputArray
_src
,
OutputArray
_dst
,
const
Size
&
_dsz
,
int
borderType
)
{
if
(
ocl
::
useOpenCL
()
&&
_dst
.
isUMat
()
&&
ocl_pyrDown
(
_src
,
_dst
,
_dsz
,
borderType
))
return
;
return
;
Mat
src
=
_src
.
getMat
();
Size
dsz
=
_dsz
==
Size
()
?
Size
((
src
.
cols
+
1
)
/
2
,
(
src
.
rows
+
1
)
/
2
)
:
_dsz
;
Size
dsz
=
_dsz
.
area
()
==
0
?
Size
((
src
.
cols
+
1
)
/
2
,
(
src
.
rows
+
1
)
/
2
)
:
_dsz
;
_dst
.
create
(
dsz
,
src
.
type
()
);
Mat
dst
=
_dst
.
getMat
();
...
...
@@ -478,8 +522,12 @@ void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borde
void
cv
::
pyrUp
(
InputArray
_src
,
OutputArray
_dst
,
const
Size
&
_dsz
,
int
borderType
)
{
if
(
ocl
::
useOpenCL
()
&&
_dst
.
isUMat
()
&&
ocl_pyrUp
(
_src
,
_dst
,
_dsz
,
borderType
))
return
;
Mat
src
=
_src
.
getMat
();
Size
dsz
=
_dsz
==
Size
()
?
Size
(
src
.
cols
*
2
,
src
.
rows
*
2
)
:
_dsz
;
Size
dsz
=
_dsz
.
area
()
==
0
?
Size
(
src
.
cols
*
2
,
src
.
rows
*
2
)
:
_dsz
;
_dst
.
create
(
dsz
,
src
.
type
()
);
Mat
dst
=
_dst
.
getMat
();
...
...
modules/imgproc/test/ocl/test_pyramids.cpp
View file @
febe5283
...
...
@@ -45,73 +45,98 @@
#include "test_precomp.hpp"
#include
<iomanip>
#include
"opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
using
namespace
cv
;
using
namespace
testing
;
using
namespace
std
;
namespace
cvtest
{
namespace
ocl
{
PARAM_TEST_CASE
(
Pyr
Base
,
MatDepth
,
Channels
)
PARAM_TEST_CASE
(
Pyr
TestBase
,
MatDepth
,
Channels
,
bool
)
{
int
depth
;
int
channels
;
int
depth
,
channels
;
bool
use_roi
;
Mat
dst_cpu
;
ocl
::
oclMat
gdst
;
TEST_DECLARE_INPUT_PARAMETER
(
src
)
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
)
virtual
void
SetUp
()
{
depth
=
GET_PARAM
(
0
);
channels
=
GET_PARAM
(
1
);
use_roi
=
GET_PARAM
(
2
);
}
void
generateTestData
(
Size
src_roiSize
,
Size
dst_roiSize
)
{
Border
srcBorder
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
src
,
src_roi
,
src_roiSize
,
srcBorder
,
CV_MAKETYPE
(
depth
,
channels
),
-
MAX_VALUE
,
MAX_VALUE
);
Border
dstBorder
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
dst
,
dst_roi
,
dst_roiSize
,
dstBorder
,
CV_MAKETYPE
(
depth
,
channels
),
-
MAX_VALUE
,
MAX_VALUE
);
UMAT_UPLOAD_INPUT_PARAMETER
(
src
)
UMAT_UPLOAD_OUTPUT_PARAMETER
(
dst
)
}
void
Near
(
double
threshold
=
0.0
)
{
OCL_EXPECT_MATS_NEAR
(
dst
,
threshold
);
}
};
/////////////////////// PyrDown //////////////////////////
typedef
PyrBase
PyrDown
;
typedef
Pyr
Test
Base
PyrDown
;
OCL_TEST_P
(
PyrDown
,
Mat
)
{
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
for
(
int
j
=
0
;
j
<
test_loop_times
;
j
++
)
{
Size
size
(
MWIDTH
,
MHEIGHT
);
Mat
src
=
randomMat
(
size
,
CV_MAKETYPE
(
depth
,
channels
),
0
,
255
);
ocl
::
oclMat
gsrc
(
src
);
Size
src_roiSize
=
randomSize
(
1
,
MAX_VALUE
);
Size
dst_roiSize
=
Size
(
randomInt
((
src_roiSize
.
width
-
1
)
/
2
,
(
src_roiSize
.
width
+
3
)
/
2
),
randomInt
((
src_roiSize
.
height
-
1
)
/
2
,
(
src_roiSize
.
height
+
3
)
/
2
));
dst_roiSize
=
dst_roiSize
.
area
()
==
0
?
Size
((
src_roiSize
.
width
+
1
)
/
2
,
(
src_roiSize
.
height
+
1
)
/
2
)
:
dst_roiSize
;
generateTestData
(
src_roiSize
,
dst_roiSize
);
pyrDown
(
src
,
dst_cpu
);
ocl
::
pyrDown
(
gsrc
,
gdst
);
OCL_OFF
(
pyrDown
(
src_roi
,
dst_roi
,
dst_roiSize
)
);
OCL_ON
(
pyrDown
(
usrc_roi
,
udst_roi
,
dst_roiSize
)
);
EXPECT_MAT_NEAR
(
dst_cpu
,
Mat
(
gdst
),
depth
==
CV_32F
?
1e-4
f
:
1.0
f
);
Near
(
depth
==
CV_32F
?
1e-4
f
:
1.0
f
);
}
}
INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
PyrDown
,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_16S
,
CV_32F
),
Values
(
1
,
3
,
4
)));
OCL_INSTANTIATE_TEST_CASE_P
(
ImgprocPyr
,
PyrDown
,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_16S
,
CV_32F
,
CV_64F
),
Values
(
1
,
2
,
4
),
Bool
()
));
/////////////////////// PyrUp //////////////////////////
typedef
PyrBase
PyrUp
;
typedef
Pyr
Test
Base
PyrUp
;
OCL_TEST_P
(
PyrUp
,
Accuracy
)
OCL_TEST_P
(
PyrUp
,
Mat
)
{
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
for
(
int
j
=
0
;
j
<
test_loop_times
;
j
++
)
{
Size
s
ize
(
MWIDTH
,
MHEIGHT
);
Mat
src
=
randomMat
(
size
,
CV_MAKETYPE
(
depth
,
channels
),
0
,
255
);
ocl
::
oclMat
gsrc
(
src
);
Size
s
rc_roiSize
=
randomSize
(
1
,
MAX_VALUE
);
Size
dst_roiSize
=
Size
(
2
*
src_roiSize
.
width
,
2
*
src_roiSize
.
height
);
generateTestData
(
src_roiSize
,
dst_roiSize
);
pyrUp
(
src
,
dst_cpu
);
ocl
::
pyrUp
(
gsrc
,
gdst
);
OCL_OFF
(
pyrUp
(
src_roi
,
dst_roi
,
dst_roiSize
)
);
OCL_ON
(
pyrUp
(
usrc_roi
,
udst_roi
,
dst_roiSize
)
);
EXPECT_MAT_NEAR
(
dst_cpu
,
Mat
(
gdst
),
(
depth
==
CV_32F
?
1e-4
f
:
1.0
)
);
Near
(
depth
==
CV_32F
?
1e-4
f
:
1.0
f
);
}
}
OCL_INSTANTIATE_TEST_CASE_P
(
ImgprocPyr
,
PyrUp
,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_16S
,
CV_32F
,
CV_64F
),
Values
(
1
,
2
,
4
),
Bool
()
));
}
}
// namespace cvtest::ocl
INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
PyrUp
,
Combine
(
Values
(
CV_8U
,
CV_16U
,
CV_16S
,
CV_32F
),
Values
(
1
,
3
,
4
)));
#endif // HAVE_OPENCL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment