Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a5c9d836
Commit
a5c9d836
authored
Sep 15, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed ocl::pyrUp for 2-byte types
parent
f9c61234
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
750 deletions
+32
-750
pyr_up.cl
modules/ocl/src/opencl/pyr_up.cl
+16
-746
pyrup.cpp
modules/ocl/src/pyrup.cpp
+16
-4
No files found.
modules/ocl/src/opencl/pyr_up.cl
View file @
a5c9d836
...
...
@@ -46,430 +46,25 @@
//
//M*/
uchar
get_valid_uchar
(
float
data
)
{
return
(
uchar
)(
data
<=
255
?
data
:
data
>
0
?
255
:
0
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_8UC1
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C1_D0
(
__global
uchar*
src,
__global
uchar*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
__local
float
s_srcPatch[10][10]
;
__local
float
s_dstPatch[20][16]
;
const
int
tidx
=
get_local_id
(
0
)
;
const
int
tidy
=
get_local_id
(
1
)
;
const
int
lsizex
=
get_local_size
(
0
)
;
const
int
lsizey
=
get_local_size
(
1
)
;
if
(
tidx
<
10
&&
tidy
<
10
)
{
int
srcx
=
mad24
((
int
)
get_group_id
(
0
)
,
(
lsizex>>1
)
,
tidx
)
-
1
;
int
srcy
=
mad24
((
int
)
get_group_id
(
1
)
,
(
lsizey>>1
)
,
tidy
)
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
srcCols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
srcRows
-1
,
srcy
)
;
s_srcPatch[tidy][tidx]
=
(
float
)(
src[srcx
+
srcy
*
srcStep]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float
sum
=
0
;
const
int
evenFlag
=
(
int
)((
tidx
&
1
)
==
0
)
;
const
int
oddFlag
=
(
int
)((
tidx
&
1
)
!=
0
)
;
const
bool
eveny
=
((
tidy
&
1
)
==
0
)
;
if
(
eveny
)
{
sum
=
(
evenFlag
*
0.0625f
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
1
)
<
2
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
(
evenFlag
*
0.0625f
)
*
s_srcPatch[lsizey
-
16][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[lsizey
-
16][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[lsizey
-
16][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[lsizey
-
16][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[lsizey
-
16][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
1
)
>
13
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
(
evenFlag
*
0.0625f
)
*
s_srcPatch[lsizey
-
7][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[lsizey
-
7][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[lsizey
-
7][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[lsizey
-
7][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[9][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
tidy][tidx]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
0
;
sum
=
0.0625f
*
s_dstPatch[2
+
tidy
-
2][tidx]
;
sum
=
sum
+
0.25f
*
s_dstPatch[2
+
tidy
-
1][tidx]
;
sum
=
sum
+
0.375f
*
s_dstPatch[2
+
tidy
][tidx]
;
sum
=
sum
+
0.25f
*
s_dstPatch[2
+
tidy
+
1][tidx]
;
sum
=
sum
+
0.0625f
*
s_dstPatch[2
+
tidy
+
2][tidx]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
convert_uchar_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16UC1
/////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C1_D2
(
__global
ushort*
src,
__global
ushort*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
__local
float
s_srcPatch[10][10]
;
__local
float
s_dstPatch[20][16]
;
srcStep
=
srcStep
>>
1
;
dstStep
=
dstStep
>>
1
;
srcOffset
=
srcOffset
>>
1
;
dstOffset
=
dstOffset
>>
1
;
if
(
get_local_id
(
0
)
<
10
&&
get_local_id
(
1
)
<
10
)
{
int
srcx
=
(
int
)(
get_group_id
(
0
)
*
get_local_size
(
0
)
/
2
+
get_local_id
(
0
))
-
1
;
int
srcy
=
(
int
)(
get_group_id
(
1
)
*
get_local_size
(
1
)
/
2
+
get_local_id
(
1
))
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
srcCols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
srcRows
-1
,
srcy
)
;
s_srcPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
(
float
)(
src[srcx
+
srcy
*
srcStep]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float
sum
=
0
;
const
int
evenFlag
=
(
int
)((
get_local_id
(
0
)
&
1
)
==
0
)
;
const
int
oddFlag
=
(
int
)((
get_local_id
(
0
)
&
1
)
!=
0
)
;
const
bool
eveny
=
((
get_local_id
(
1
)
&
1
)
==
0
)
;
const
int
tidx
=
get_local_id
(
0
)
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
1
)
<
2
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[0][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[0][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[0][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[0][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[0][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
1
)
>
13
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[9][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[9][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[9][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[9][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[9][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
0
;
const
int
tidy
=
get_local_id
(
1
)
;
sum
=
sum
+
0.0625f
*
s_dstPatch[2
+
tidy
-
2][get_local_id
(
0
)
]
;
sum
=
sum
+
0.25f
*
s_dstPatch[2
+
tidy
-
1][get_local_id
(
0
)
]
;
sum
=
sum
+
0.375f
*
s_dstPatch[2
+
tidy
][get_local_id
(
0
)
]
;
sum
=
sum
+
0.25f
*
s_dstPatch[2
+
tidy
+
1][get_local_id
(
0
)
]
;
sum
=
sum
+
0.0625f
*
s_dstPatch[2
+
tidy
+
2][get_local_id
(
0
)
]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
convert_ushort_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
////////////////////////
//
CV_16SC1
///
//////////////////////////////
////////////////////////
Generic
PyrUp
//////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp
_C1_D3
(
__global
short*
src,
__global
short
*
dst,
__kernel
void
pyrUp
(
__global
Type*
src,
__global
Type
*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
__local
float
s_srcPatch[10][10]
;
__local
float
s_dstPatch[20][16]
;
srcStep
=
srcStep
>>
1
;
dstStep
=
dstStep
>>
1
;
srcOffset
=
srcOffset
>>
1
;
dstOffset
=
dstOffset
>>
1
;
if
(
get_local_id
(
0
)
<
10
&&
get_local_id
(
1
)
<
10
)
{
int
srcx
=
(
int
)(
get_group_id
(
0
)
*
get_local_size
(
0
)
/
2
+
get_local_id
(
0
))
-
1
;
int
srcy
=
(
int
)(
get_group_id
(
1
)
*
get_local_size
(
1
)
/
2
+
get_local_id
(
1
))
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
srcCols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
srcRows
-1
,
srcy
)
;
s_srcPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
(
float
)(
src[srcx
+
srcy
*
srcStep]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float
sum
=
0
;
const
int
evenFlag
=
(
int
)((
get_local_id
(
0
)
&
1
)
==
0
)
;
const
int
oddFlag
=
(
int
)((
get_local_id
(
0
)
&
1
)
!=
0
)
;
const
bool
eveny
=
((
get_local_id
(
1
)
&
1
)
==
0
)
;
const
int
tidx
=
get_local_id
(
0
)
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
1
)
<
2
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[0][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[0][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[0][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[0][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[0][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
1
)
>
13
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[9][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[9][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[9][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[9][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[9][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
0
;
const
int
tidy
=
get_local_id
(
1
)
;
sum
=
sum
+
0.0625f
*
s_dstPatch[2
+
tidy
-
2][get_local_id
(
0
)
]
;
sum
=
sum
+
0.25f
*
s_dstPatch[2
+
tidy
-
1][get_local_id
(
0
)
]
;
sum
=
sum
+
0.375f
*
s_dstPatch[2
+
tidy
][get_local_id
(
0
)
]
;
sum
=
sum
+
0.25f
*
s_dstPatch[2
+
tidy
+
1][get_local_id
(
0
)
]
;
sum
=
sum
+
0.0625f
*
s_dstPatch[2
+
tidy
+
2][get_local_id
(
0
)
]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
convert_short_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_32FC1
/////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C1_D5
(
__global
float*
src,
__global
float*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
const
int
tidx
=
get_local_id
(
0
)
;
const
int
tidy
=
get_local_id
(
1
)
;
const
int
lsizex
=
get_local_size
(
0
)
;
const
int
lsizey
=
get_local_size
(
1
)
;
__local
float
s_srcPatch[10][10]
;
__local
float
s_dstPatch[20][16]
;
srcOffset
=
srcOffset
>>
2
;
dstOffset
=
dstOffset
>>
2
;
srcStep
=
srcStep
>>
2
;
dstStep
=
dstStep
>>
2
;
if
(
tidx
<
10
&&
tidy
<
10
)
{
int
srcx
=
mad24
((
int
)
get_group_id
(
0
)
,
lsizex>>1,
tidx
)
-
1
;
int
srcy
=
mad24
((
int
)
get_group_id
(
1
)
,
lsizey>>1,
tidy
)
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
srcCols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
srcRows
-1
,
srcy
)
;
s_srcPatch[tidy][tidx]
=
(
float
)(
src[srcx
+
srcy
*
srcStep]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float
sum
=
0
;
const
int
evenFlag
=
(
int
)((
tidx
&
1
)
==
0
)
;
const
int
oddFlag
=
(
int
)((
tidx
&
1
)
!=
0
)
;
const
bool
eveny
=
((
tidy
&
1
)
==
0
)
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
tidy][tidx]
=
sum
;
if
(
tidy
<
2
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[lsizey
-
16][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[lsizey
-
16][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[lsizey
-
16][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[lsizey
-
16][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[lsizey
-
16][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[tidy][tidx]
=
sum
;
}
if
(
tidy
>
13
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[lsizey
-
7][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[lsizey
-
7][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.375f
)
*
s_srcPatch[lsizey
-
7][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
0.25f
)
*
s_srcPatch[lsizey
-
7][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
0.0625f
)
*
s_srcPatch[lsizey
-
7][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
tidy][tidx]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
0.0625f
*
s_dstPatch[2
+
tidy
-
2][tidx]
;
sum
=
sum
+
0.25f
*
s_dstPatch[2
+
tidy
-
1][tidx]
;
sum
=
sum
+
0.375f
*
s_dstPatch[2
+
tidy
][tidx]
;
sum
=
sum
+
0.25f
*
s_dstPatch[2
+
tidy
+
1][tidx]
;
sum
=
sum
+
0.0625f
*
s_dstPatch[2
+
tidy
+
2][tidx]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
(
float
)(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_8UC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C4_D0
(
__global
uchar4*
src,
__global
uchar4*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
const
int
tidx
=
get_local_id
(
0
)
;
const
int
tidy
=
get_local_id
(
1
)
;
const
int
lsizex
=
get_local_size
(
0
)
;
const
int
lsizey
=
get_local_size
(
1
)
;
__local
float4
s_srcPatch[10][10]
;
__local
float4
s_dstPatch[20][16]
;
srcOffset
>>=
2
;
dstOffset
>>=
2
;
srcStep
>>=
2
;
dstStep
>>=
2
;
__local
floatType
s_srcPatch[10][10]
;
__local
floatType
s_dstPatch[20][16]
;
if
(
tidx
<
10
&&
tidy
<
10
)
{
...
...
@@ -482,351 +77,27 @@ __kernel void pyrUp_C4_D0(__global uchar4* src, __global uchar4* dst,
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
srcRows
-1
,
srcy
)
;
s_srcPatch[tidy][tidx]
=
convert
_float4
(
src[srcx
+
srcy
*
srcStep]
)
;
s_srcPatch[tidy][tidx]
=
convert
ToFloat
(
src[srcx
+
srcy
*
srcStep]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float4
sum
=
(
float4
)(
0
,
0
,
0
,
0
)
;
const
float4
evenFlag
=
(
float4
)((
tidx
&
1
)
==
0
)
;
const
float4
oddFlag
=
(
float4
)((
tidx
&
1
)
!=
0
)
;
floatType
sum
=
(
floatType
)
0
;
const
floatType
evenFlag
=
(
floatType
)((
tidx
&
1
)
==
0
)
;
const
floatType
oddFlag
=
(
floatType
)((
tidx
&
1
)
!=
0
)
;
const
bool
eveny
=
((
tidy
&
1
)
==
0
)
;
float4
co1
=
(
float4
)(
0.375f,
0.375f,
0.375f,
0.375f
)
;
float4
co2
=
(
float4
)(
0.25f,
0.25f,
0.25f,
0.25f
)
;
float4
co3
=
(
float4
)(
0.0625f,
0.0625f,
0.0625f,
0.0625f
)
;
const
floatType
co1
=
(
floatType
)
0.375f
;
const
floatType
co2
=
(
floatType
)
0.25f
;
const
floatType
co3
=
(
floatType
)
0.0625f
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
tidy][tidx]
=
sum
;
if
(
tidy
<
2
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[tidy][tidx]
=
sum
;
}
if
(
tidy
>
13
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
tidy][tidx]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
co3
*
s_dstPatch[2
+
tidy
-
2][tidx]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
-
1][tidx]
;
sum
=
sum
+
co1
*
s_dstPatch[2
+
tidy
][tidx]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
+
1][tidx]
;
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
+
2][tidx]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
convert_uchar4_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16UC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C4_D2
(
__global
ushort4*
src,
__global
ushort4*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
__local
float4
s_srcPatch[10][10]
;
__local
float4
s_dstPatch[20][16]
;
srcOffset
>>=
3
;
dstOffset
>>=
3
;
srcStep
>>=
3
;
dstStep
>>=
3
;
if
(
get_local_id
(
0
)
<
10
&&
get_local_id
(
1
)
<
10
)
{
int
srcx
=
(
int
)(
get_group_id
(
0
)
*
get_local_size
(
0
)
/
2
+
get_local_id
(
0
))
-
1
;
int
srcy
=
(
int
)(
get_group_id
(
1
)
*
get_local_size
(
1
)
/
2
+
get_local_id
(
1
))
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
srcCols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
srcRows
-1
,
srcy
)
;
s_srcPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
convert_float4
(
src[srcx
+
srcy
*
srcStep]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float4
sum
=
(
float4
)(
0
,
0
,
0
,
0
)
;
const
float4
evenFlag
=
(
float4
)((
get_local_id
(
0
)
&
1
)
==
0
)
;
const
float4
oddFlag
=
(
float4
)((
get_local_id
(
0
)
&
1
)
!=
0
)
;
const
bool
eveny
=
((
get_local_id
(
1
)
&
1
)
==
0
)
;
const
int
tidx
=
get_local_id
(
0
)
;
float4
co1
=
(
float4
)(
0.375f,
0.375f,
0.375f,
0.375f
)
;
float4
co2
=
(
float4
)(
0.25f,
0.25f,
0.25f,
0.25f
)
;
float4
co3
=
(
float4
)(
0.0625f,
0.0625f,
0.0625f,
0.0625f
)
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co1
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
1
)
<
2
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[0][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[0][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[0][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[0][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[0][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
1
)
>
13
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[9][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[9][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[9][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[9][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[9][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
0
;
const
int
tidy
=
get_local_id
(
1
)
;
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
-
2][get_local_id
(
0
)
]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
-
1][get_local_id
(
0
)
]
;
sum
=
sum
+
co1
*
s_dstPatch[2
+
tidy
][get_local_id
(
0
)
]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
+
1][get_local_id
(
0
)
]
;
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
+
2][get_local_id
(
0
)
]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
convert_ushort4_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_16SC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C4_D3
(
__global
short4*
src,
__global
short4*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
__local
float4
s_srcPatch[10][10]
;
__local
float4
s_dstPatch[20][16]
;
srcOffset
>>=
3
;
dstOffset
>>=
3
;
srcStep
>>=
3
;
dstStep
>>=
3
;
if
(
get_local_id
(
0
)
<
10
&&
get_local_id
(
1
)
<
10
)
{
int
srcx
=
(
int
)(
get_group_id
(
0
)
*
get_local_size
(
0
)
/
2
+
get_local_id
(
0
))
-
1
;
int
srcy
=
(
int
)(
get_group_id
(
1
)
*
get_local_size
(
1
)
/
2
+
get_local_id
(
1
))
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
srcCols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
srcRows
-1
,
srcy
)
;
s_srcPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
convert_float4
(
src[srcx
+
srcy
*
srcStep]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float4
sum
=
(
float4
)(
0
,
0
,
0
,
0
)
;
const
float4
evenFlag
=
(
float4
)((
get_local_id
(
0
)
&
1
)
==
0
)
;
const
float4
oddFlag
=
(
float4
)((
get_local_id
(
0
)
&
1
)
!=
0
)
;
const
bool
eveny
=
((
get_local_id
(
1
)
&
1
)
==
0
)
;
const
int
tidx
=
get_local_id
(
0
)
;
float4
co1
=
(
float4
)(
0.375f,
0.375f,
0.375f,
0.375f
)
;
float4
co2
=
(
float4
)(
0.25f,
0.25f,
0.25f,
0.25f
)
;
float4
co3
=
(
float4
)(
0.0625f,
0.0625f,
0.0625f,
0.0625f
)
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co1
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
get_local_id
(
1
)
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
if
(
get_local_id
(
1
)
<
2
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[0][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[0][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[0][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[0][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[0][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
if
(
get_local_id
(
1
)
>
13
)
{
sum
=
0
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[9][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[9][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[9][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[9][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[9][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
get_local_id
(
1
)
][get_local_id
(
0
)
]
=
sum
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
sum
=
0
;
const
int
tidy
=
get_local_id
(
1
)
;
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
-
2][get_local_id
(
0
)
]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
-
1][get_local_id
(
0
)
]
;
sum
=
sum
+
co1
*
s_dstPatch[2
+
tidy
][get_local_id
(
0
)
]
;
sum
=
sum
+
co2
*
s_dstPatch[2
+
tidy
+
1][get_local_id
(
0
)
]
;
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
+
2][get_local_id
(
0
)
]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
convert_short4_sat_rte
(
4.0f
*
sum
)
;
}
///////////////////////////////////////////////////////////////////////
//////////////////////////
CV_32FC4
//////////////////////////////////
///////////////////////////////////////////////////////////////////////
__kernel
void
pyrUp_C4_D5
(
__global
float4*
src,
__global
float4*
dst,
int
srcRows,
int
dstRows,
int
srcCols,
int
dstCols,
int
srcOffset,
int
dstOffset,
int
srcStep,
int
dstStep
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
const
int
tidx
=
get_local_id
(
0
)
;
const
int
tidy
=
get_local_id
(
1
)
;
const
int
lsizex
=
get_local_size
(
0
)
;
const
int
lsizey
=
get_local_size
(
1
)
;
__local
float4
s_srcPatch[10][10]
;
__local
float4
s_dstPatch[20][16]
;
srcOffset
>>=
4
;
dstOffset
>>=
4
;
srcStep
>>=
4
;
dstStep
>>=
4
;
if
(
tidx
<
10
&&
tidy
<
10
)
{
int
srcx
=
(
int
)(
get_group_id
(
0
)
*
get_local_size
(
0
)
/
2
+
tidx
)
-
1
;
int
srcy
=
(
int
)(
get_group_id
(
1
)
*
get_local_size
(
1
)
/
2
+
tidy
)
-
1
;
srcx
=
abs
(
srcx
)
;
srcx
=
min
(
srcCols
-
1
,
srcx
)
;
srcy
=
abs
(
srcy
)
;
srcy
=
min
(
srcRows
-1
,
srcy
)
;
s_srcPatch[tidy][tidx]
=
(
float4
)(
src[srcx
+
srcy
*
srcStep]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
float4
sum
=
(
float4
)(
0
,
0
,
0
,
0
)
;
const
float4
evenFlag
=
(
float4
)((
tidx
&
1
)
==
0
)
;
const
float4
oddFlag
=
(
float4
)((
tidx
&
1
)
!=
0
)
;
const
bool
eveny
=
((
tidy
&
1
)
==
0
)
;
float4
co1
=
(
float4
)(
0.375f,
0.375f,
0.375f,
0.375f
)
;
float4
co2
=
(
float4
)(
0.25f,
0.25f,
0.25f,
0.25f
)
;
float4
co3
=
(
float4
)(
0.0625f,
0.0625f,
0.0625f,
0.0625f
)
;
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co1
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag*
co3
)
*
s_srcPatch[1
+
(
tidy
>>
1
)
][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[2
+
tidy][tidx]
=
sum
;
...
...
@@ -837,8 +108,8 @@ __kernel void pyrUp_C4_D5(__global float4* src, __global float4* dst,
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-16][1
+
((
tidx
+
2
)
>>
1
)
]
;
...
...
@@ -853,12 +124,11 @@ __kernel void pyrUp_C4_D5(__global float4* src, __global float4* dst,
if
(
eveny
)
{
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
-
2
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
-
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co1
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
)
>>
1
)
]
;
sum
=
sum
+
(
oddFlag
*
co2
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
+
1
)
>>
1
)
]
;
sum
=
sum
+
(
evenFlag
*
co3
)
*
s_srcPatch[lsizey-7][1
+
((
tidx
+
2
)
>>
1
)
]
;
}
s_dstPatch[4
+
tidy][tidx]
=
sum
;
}
...
...
@@ -872,5 +142,5 @@ __kernel void pyrUp_C4_D5(__global float4* src, __global float4* dst,
sum
=
sum
+
co3
*
s_dstPatch[2
+
tidy
+
2][tidx]
;
if
((
x
<
dstCols
)
&&
(
y
<
dstRows
))
dst[x
+
y
*
dstStep]
=
4.0f
*
sum
;
dst[x
+
y
*
dstStep]
=
convertToType
(
4.0f
*
sum
)
;
}
modules/ocl/src/pyrup.cpp
View file @
a5c9d836
...
...
@@ -59,9 +59,10 @@ namespace cv
namespace
ocl
{
extern
const
char
*
pyr_up
;
void
pyrUp
(
const
cv
::
ocl
::
oclMat
&
src
,
cv
::
ocl
::
oclMat
&
dst
)
{
int
depth
=
src
.
depth
(),
channels
=
src
.
channels
();
int
depth
=
src
.
depth
(),
channels
=
src
.
channels
()
,
oclChannels
=
src
.
oclchannels
()
;
CV_Assert
(
depth
==
CV_8U
||
depth
==
CV_16U
||
depth
==
CV_16S
||
depth
==
CV_32F
);
CV_Assert
(
channels
==
1
||
channels
==
3
||
channels
==
4
);
...
...
@@ -70,7 +71,17 @@ namespace cv
Context
*
clCxt
=
src
.
clCxt
;
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
};
char
buildOptions
[
250
],
convertString
[
50
];
const
char
*
const
channelsString
=
oclChannels
==
1
?
""
:
"4"
;
sprintf
(
convertString
,
"convert_%s%s_sat_rte"
,
typeMap
[
depth
],
channelsString
);
sprintf
(
buildOptions
,
"-D Type=%s%s -D floatType=float%s -D convertToType=%s -D convertToFloat=%s"
,
typeMap
[
depth
],
channelsString
,
channelsString
,
depth
==
CV_32F
?
""
:
convertString
,
oclChannels
==
4
?
"convert_float4"
:
"(float)"
);
const
std
::
string
kernelName
=
"pyrUp"
;
int
dststep
=
dst
.
step
/
dst
.
elemSize
(),
srcstep
=
src
.
step
/
src
.
elemSize
();
std
::
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
...
...
@@ -81,14 +92,15 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcstep
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dststep
));
size_t
globalThreads
[
3
]
=
{
dst
.
cols
,
dst
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
openCLExecuteKernel
(
clCxt
,
&
pyr_up
,
kernelName
,
globalThreads
,
localThreads
,
args
,
src
.
oclchannels
(),
src
.
depth
());
openCLExecuteKernel
(
clCxt
,
&
pyr_up
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
buildOptions
);
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment