Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
cc237b7a
Commit
cc237b7a
authored
Nov 15, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
generalized ocl::resize for all data types (INTER_NEAREST mode)
parent
2e79dde3
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
81 additions
and
219 deletions
+81
-219
imgproc.cpp
modules/ocl/src/imgproc.cpp
+38
-77
imgproc_resize.cl
modules/ocl/src/opencl/imgproc_resize.cl
+43
-142
No files found.
modules/ocl/src/imgproc.cpp
View file @
cc237b7a
...
...
@@ -282,96 +282,63 @@ namespace cv
static
void
resize_gpu
(
const
oclMat
&
src
,
oclMat
&
dst
,
double
fx
,
double
fy
,
int
interpolation
)
{
CV_Assert
(
(
src
.
channels
()
==
dst
.
channels
())
);
Context
*
clCxt
=
src
.
clCxt
;
float
ifx
=
1.
/
fx
;
float
ify
=
1.
/
fy
;
double
ifx_d
=
1.
/
fx
;
double
ify_d
=
1.
/
fy
;
int
srcStep_in_pixel
=
src
.
step1
()
/
src
.
oclchannels
();
int
srcoffset_in_pixel
=
src
.
offset
/
src
.
elemSize
();
int
dstStep_in_pixel
=
dst
.
step1
()
/
dst
.
oclchannels
();
int
dstoffset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
string
kernelName
;
if
(
interpolation
==
INTER_LINEAR
)
kernelName
=
"resizeLN"
;
else
if
(
interpolation
==
INTER_NEAREST
)
kernelName
=
"resizeNN"
;
float
ifx
=
1.
f
/
fx
,
ify
=
1.
f
/
fy
;
int
src_step
=
src
.
step
/
src
.
elemSize
(),
src_offset
=
src
.
offset
/
src
.
elemSize
();
int
dst_step
=
dst
.
step
/
dst
.
elemSize
(),
dst_offset
=
dst
.
offset
/
dst
.
elemSize
();
int
ocn
=
interpolation
==
INTER_LINEAR
?
dst
.
oclchannels
()
:
-
1
;
int
depth
=
interpolation
==
INTER_LINEAR
?
dst
.
depth
()
:
-
1
;
const
char
*
const
interMap
[]
=
{
"NN"
,
"LN"
,
"CUBIC"
,
"AREA"
,
"LAN4"
};
std
::
string
kernelName
=
std
::
string
(
"resize"
)
+
interMap
[
interpolation
];
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"uchar"
,
"ushort"
,
"ushort"
,
"int"
,
"int"
,
"double"
};
const
char
*
const
channelMap
[]
=
{
""
,
""
,
"2"
,
"4"
,
"4"
};
std
::
string
buildOption
=
format
(
"-D %s -D T=%s%s"
,
interMap
[
interpolation
],
typeMap
[
dst
.
depth
()],
channelMap
[
dst
.
oclchannels
()]);
//TODO: improve this kernel
size_t
blkSizeX
=
16
,
blkSizeY
=
16
;
size_t
glbSizeX
;
if
(
src
.
type
()
==
CV_8UC1
)
if
(
src
.
type
()
==
CV_8UC1
&&
interpolation
==
INTER_LINEAR
)
{
size_t
cols
=
(
dst
.
cols
+
dst
.
offset
%
4
+
3
)
/
4
;
glbSizeX
=
cols
%
blkSizeX
==
0
&&
cols
!=
0
?
cols
:
(
cols
/
blkSizeX
+
1
)
*
blkSizeX
;
}
else
glbSizeX
=
dst
.
cols
%
blkSizeX
==
0
&&
dst
.
cols
!=
0
?
dst
.
cols
:
(
dst
.
cols
/
blkSizeX
+
1
)
*
blkSizeX
;
glbSizeX
=
dst
.
cols
;
size_t
glbSizeY
=
dst
.
rows
%
blkSizeY
==
0
&&
dst
.
rows
!=
0
?
dst
.
rows
:
(
dst
.
rows
/
blkSizeY
+
1
)
*
blkSizeY
;
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
glbSizeY
,
1
};
size_t
localThreads
[
3
]
=
{
blkSizeX
,
blkSizeY
,
1
};
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
dst
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
blkSizeX
,
blkSizeY
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
if
(
interpolation
==
INTER_NEAREST
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
if
(
src
.
clCxt
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ifx_d
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ify_d
));
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
}
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
}
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
openCLExecuteKernel
(
clCxt
,
&
imgproc_resize
,
kernelName
,
globalThreads
,
localThreads
,
args
,
src
.
oclchannels
(),
src
.
depth
());
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_resize
,
kernelName
,
globalThreads
,
localThreads
,
args
,
ocn
,
depth
,
buildOption
.
c_str
());
}
void
resize
(
const
oclMat
&
src
,
oclMat
&
dst
,
Size
dsize
,
double
fx
,
double
fy
,
int
interpolation
)
void
resize
(
const
oclMat
&
src
,
oclMat
&
dst
,
Size
dsize
,
double
fx
,
double
fy
,
int
interpolation
)
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC3
||
src
.
type
()
==
CV_8UC4
||
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC3
||
src
.
type
()
==
CV_32FC4
);
CV_Assert
(
interpolation
==
INTER_LINEAR
||
interpolation
==
INTER_NEAREST
);
CV_Assert
(
src
.
size
().
area
()
>
0
);
CV_Assert
(
!
(
dsize
==
Size
())
||
(
fx
>
0
&&
fy
>
0
)
);
if
(
!
(
dsize
==
Size
())
&&
(
fx
>
0
&&
fy
>
0
))
if
(
dsize
.
width
!=
(
int
)(
src
.
cols
*
fx
)
||
dsize
.
height
!=
(
int
)(
src
.
rows
*
fy
))
CV_Error
(
CV_StsUnmatchedSizes
,
"invalid dsize and fx, fy!"
);
CV_Assert
(
dsize
.
area
()
>
0
||
(
fx
>
0
&&
fy
>
0
));
if
(
dsize
==
Size
()
)
if
(
dsize
.
area
()
==
0
)
{
dsize
=
Size
(
saturate_cast
<
int
>
(
src
.
cols
*
fx
),
saturate_cast
<
int
>
(
src
.
rows
*
fy
));
CV_Assert
(
dsize
.
area
()
>
0
);
}
else
{
fx
=
(
double
)
dsize
.
width
/
src
.
cols
;
...
...
@@ -380,13 +347,7 @@ namespace cv
dst
.
create
(
dsize
,
src
.
type
());
if
(
interpolation
==
INTER_NEAREST
||
interpolation
==
INTER_LINEAR
)
{
resize_gpu
(
src
,
dst
,
fx
,
fy
,
interpolation
);
return
;
}
CV_Error
(
CV_StsUnsupportedFormat
,
"Non-supported interpolation method"
);
resize_gpu
(
src
,
dst
,
fx
,
fy
,
interpolation
);
}
////////////////////////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/imgproc_resize.cl
View file @
cc237b7a
...
...
@@ -45,7 +45,7 @@
//
resize
kernel
//
Currently,
CV_8UC1
CV_8UC4
CV_32FC1
and
CV_32FC4
are
supported.
//
Currently,
CV_8UC1
,
CV_8UC4,
CV_32FC1
and
CV_32FC4
are
supported.
//
We
shall
support
other
types
later
if
necessary.
#
ifdef
DOUBLE_SUPPORT
...
...
@@ -54,20 +54,18 @@
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
define
F
double
#
else
#
define
F
float
#
endif
#
define
INTER_RESIZE_COEF_BITS
11
#
define
INTER_RESIZE_COEF_SCALE
(
1
<<
INTER_RESIZE_COEF_BITS
)
#
define
CAST_BITS
(
INTER_RESIZE_COEF_BITS
<<
1
)
#
define
CAST_SCALE
(
1.0f/
(
1<<CAST_BITS
))
#
define
INC
(
x,l
)
((
x+1
)
>=
(
l
)
?
(
x
)
:
((
x
)
+1
))
#
ifdef
LN
__kernel
void
resizeLN_C1_D0
(
__global
uchar
*
dst,
__global
uchar
const
*
restrict
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
gx
=
get_global_id
(
0
)
;
...
...
@@ -75,7 +73,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
float4
sx,
u,
xf
;
int4
x,
DX
;
gx
=
(
gx<<2
)
-
(
dst
offset_in_pixel
&3
)
;
gx
=
(
gx<<2
)
-
(
dst
_offset
&3
)
;
DX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
sx
=
(
convert_float4
(
DX
)
+
0.5f
)
*
ifx
-
0.5f
;
xf
=
floor
(
sx
)
;
...
...
@@ -113,10 +111,10 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
int4
val1,
val2,
val
;
int4
sdata1,
sdata2,
sdata3,
sdata4
;
int4
pos1
=
mad24
((
int4
)
y,
(
int4
)
src
step_in_pixel,
x+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos2
=
mad24
((
int4
)
y,
(
int4
)
src
step_in_pixel,
x_+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos3
=
mad24
((
int4
)
y_,
(
int4
)
src
step_in_pixel,
x+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos4
=
mad24
((
int4
)
y_,
(
int4
)
src
step_in_pixel,
x_+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos1
=
mad24
((
int4
)
y,
(
int4
)
src
_step,
x+
(
int4
)
src_offset
)
;
int4
pos2
=
mad24
((
int4
)
y,
(
int4
)
src
_step,
x_+
(
int4
)
src_offset
)
;
int4
pos3
=
mad24
((
int4
)
y_,
(
int4
)
src
_step,
x+
(
int4
)
src_offset
)
;
int4
pos4
=
mad24
((
int4
)
y_,
(
int4
)
src
_step,
x_+
(
int4
)
src_offset
)
;
sdata1.s0
=
src[pos1.s0]
;
sdata1.s1
=
src[pos1.s1]
;
...
...
@@ -144,12 +142,12 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
val
=
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>
CAST_BITS
)
;
pos4
=
mad24
(
dy,
dst
step_in_pixel,
gx+dstoffset_in_pixel
)
;
pos4
=
mad24
(
dy,
dst
_step,
gx+dst_offset
)
;
pos4.y++
;
pos4.z+=2
;
pos4.w+=3
;
uchar4
uval
=
convert_uchar4_sat
(
val
)
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
&&
(
dst
offset_in_pixel
&3
)
==0
)
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
&&
(
dst
_offset
&3
)
==0
)
;
if
(
con
)
{
*
(
__global
uchar4*
)(
dst
+
pos4.x
)
=uval
;
...
...
@@ -176,7 +174,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
}
__kernel
void
resizeLN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -202,24 +200,24 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.x
=
mad24
(
y,
src
_step,
x+src_offset
)
;
srcpos.y
=
mad24
(
y,
src
_step,
x_+src_offset
)
;
srcpos.z
=
mad24
(
y_,
src
_step,
x+src_offset
)
;
srcpos.w
=
mad24
(
y_,
src
_step,
x_+src_offset
)
;
int4
data0
=
convert_int4
(
src[srcpos.x]
)
;
int4
data1
=
convert_int4
(
src[srcpos.y]
)
;
int4
data2
=
convert_int4
(
src[srcpos.z]
)
;
int4
data3
=
convert_int4
(
src[srcpos.w]
)
;
int4
val
=
mul24
((
int4
)
mul24
(
U1,
V1
)
,
data0
)
+
mul24
((
int4
)
mul24
(
U,
V1
)
,
data1
)
+mul24
((
int4
)
mul24
(
U1,
V
)
,
data2
)
+mul24
((
int4
)
mul24
(
U,
V
)
,
data3
)
;
int
dstpos
=
mad24
(
dy,
dst
step_in_pixel,
dx+dstoffset_in_pixel
)
;
int
dstpos
=
mad24
(
dy,
dst
_step,
dx+dst_offset
)
;
uchar4
uval
=
convert_uchar4
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>CAST_BITS
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dstpos]
=
uval
;
}
__kernel
void
resizeLN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -239,10 +237,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
float
u1
=
1.f-u
;
float
v1
=
1.f-v
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.x
=
mad24
(
y,
src
_step,
x+src_offset
)
;
srcpos.y
=
mad24
(
y,
src
_step,
x_+src_offset
)
;
srcpos.z
=
mad24
(
y_,
src
_step,
x+src_offset
)
;
srcpos.w
=
mad24
(
y_,
src
_step,
x_+src_offset
)
;
float
data0
=
src[srcpos.x]
;
float
data1
=
src[srcpos.y]
;
float
data2
=
src[srcpos.z]
;
...
...
@@ -252,13 +250,13 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
float
val2
=
u1
*
data2
+
u
*
data3
;
float
val
=
v1
*
val1
+
v
*
val2
;
int
dstpos
=
mad24
(
dy,
dst
step_in_pixel,
dx+dstoffset_in_pixel
)
;
int
dstpos
=
mad24
(
dy,
dst
_step,
dx+dst_offset
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dstpos]
=
val
;
}
__kernel
void
resizeLN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -278,10 +276,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
float
u1
=
1.f-u
;
float
v1
=
1.f-v
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.x
=
mad24
(
y,
src
_step,
x+src_offset
)
;
srcpos.y
=
mad24
(
y,
src
_step,
x_+src_offset
)
;
srcpos.z
=
mad24
(
y_,
src
_step,
x+src_offset
)
;
srcpos.w
=
mad24
(
y_,
src
_step,
x_+src_offset
)
;
float4
s_data1,
s_data2,
s_data3,
s_data4
;
s_data1
=
src[srcpos.x]
;
s_data2
=
src[srcpos.y]
;
...
...
@@ -289,129 +287,32 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
s_data4
=
src[srcpos.w]
;
float4
val
=
u1
*
v1
*
s_data1
+
u
*
v1
*
s_data2
+u1
*
v
*s_data3
+
u
*
v
*s_data4
;
int
dstpos
=
mad24
(
dy,
dst
step_in_pixel,
dx+dstoffset_in_pixel
)
;
int
dstpos
=
mad24
(
dy,
dst
_step,
dx+dst_offset
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dstpos]
=
val
;
}
__kernel
void
resizeNN_C1_D0
(
__global
uchar
*
dst,
__global
uchar
*
src,
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
gx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
#
elif
defined
NN
gx
=
(
gx<<2
)
-
(
dstoffset_in_pixel&3
)
;
//int4
GX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
int4
sx
;
int
sy
;
F
ss1
=
gx*ifx
;
F
ss2
=
(
gx+1
)
*ifx
;
F
ss3
=
(
gx+2
)
*ifx
;
F
ss4
=
(
gx+3
)
*ifx
;
F
s5
=
dy
*
ify
;
sx.s0
=
min
((
int
)
floor
(
ss1
)
,
src_cols-1
)
;
sx.s1
=
min
((
int
)
floor
(
ss2
)
,
src_cols-1
)
;
sx.s2
=
min
((
int
)
floor
(
ss3
)
,
src_cols-1
)
;
sx.s3
=
min
((
int
)
floor
(
ss4
)
,
src_cols-1
)
;
sy
=
min
((
int
)
floor
(
s5
)
,
src_rows-1
)
;
uchar4
val
;
int4
pos
=
mad24
((
int4
)
sy,
(
int4
)
srcstep_in_pixel,
sx+
(
int4
)
srcoffset_in_pixel
)
;
val.s0
=
src[pos.s0]
;
val.s1
=
src[pos.s1]
;
val.s2
=
src[pos.s2]
;
val.s3
=
src[pos.s3]
;
//__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dstoffset_in_pixel
+
dy
*
dststep_in_pixel
+
gx
)
;
//uchar4
dVal
=
*d
;
pos
=
mad24
(
dy,
dststep_in_pixel,
gx+dstoffset_in_pixel
)
;
pos.y++
;
pos.z+=2
;
pos.w+=3
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
&&
(
dstoffset_in_pixel&3
)
==0
)
;
if
(
con
)
{
*
(
__global
uchar4*
)(
dst
+
pos.x
)
=val
;
}
else
{
if
(
gx
>=
0
&&
gx
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.x]=val.x
;
}
if
(
gx+1
>=
0
&&
gx+1
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.y]=val.y
;
}
if
(
gx+2
>=
0
&&
gx+2
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.z]=val.z
;
}
if
(
gx+3
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.w]=val.w
;
}
}
}
__kernel
void
resizeNN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
F
s1
=
dx*ifx
;
F
s2
=
dy*ify
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
}
__kernel
void
resizeNN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
__kernel
void
resizeNN
(
__global
T
*
dst,
__global
T
*
src,
int
dst_offset,
int
src_offset,int
dst_step,
int
src_step,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
F
s1
=
dx*ifx
;
F
s2
=
dy*ify
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
if
(
dx
<
dst_cols
&&
dy
<
dst_rows
)
{
float
s1
=
dx
*
ifx,
s2
=
dy
*
ify
;
int
sx
=
min
(
convert_int_sat_rtn
(
s1
)
,
src_cols
-
1
)
;
int
sy
=
min
(
convert_int_sat_rtn
(
s2
)
,
src_rows
-
1
)
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
int
dst_index
=
mad24
(
dy,
dst_step,
dx
+
dst_offset
)
;
int
src_index
=
mad24
(
sy,
src_step,
sx
+
src_offset
)
;
dst[dst_index]
=
src[src_index]
;
}
}
__kernel
void
resizeNN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
F
s1
=
dx*ifx
;
F
s2
=
dy*ify
;
int
s_col
=
floor
(
s1
)
;
int
s_row
=
floor
(
s2
)
;
int
sx
=
min
(
s_col,
src_cols-1
)
;
int
sy
=
min
(
s_row,
src_rows-1
)
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
}
#
endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment