Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
cc237b7a
Commit
cc237b7a
authored
Nov 15, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
generalized ocl::resize for all data types (INTER_NEAREST mode)
parent
2e79dde3
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
81 additions
and
219 deletions
+81
-219
imgproc.cpp
modules/ocl/src/imgproc.cpp
+38
-77
imgproc_resize.cl
modules/ocl/src/opencl/imgproc_resize.cl
+43
-142
No files found.
modules/ocl/src/imgproc.cpp
View file @
cc237b7a
...
@@ -282,96 +282,63 @@ namespace cv
...
@@ -282,96 +282,63 @@ namespace cv
static
void
resize_gpu
(
const
oclMat
&
src
,
oclMat
&
dst
,
double
fx
,
double
fy
,
int
interpolation
)
static
void
resize_gpu
(
const
oclMat
&
src
,
oclMat
&
dst
,
double
fx
,
double
fy
,
int
interpolation
)
{
{
CV_Assert
(
(
src
.
channels
()
==
dst
.
channels
())
);
float
ifx
=
1.
f
/
fx
,
ify
=
1.
f
/
fy
;
Context
*
clCxt
=
src
.
clCxt
;
int
src_step
=
src
.
step
/
src
.
elemSize
(),
src_offset
=
src
.
offset
/
src
.
elemSize
();
float
ifx
=
1.
/
fx
;
int
dst_step
=
dst
.
step
/
dst
.
elemSize
(),
dst_offset
=
dst
.
offset
/
dst
.
elemSize
();
float
ify
=
1.
/
fy
;
int
ocn
=
interpolation
==
INTER_LINEAR
?
dst
.
oclchannels
()
:
-
1
;
double
ifx_d
=
1.
/
fx
;
int
depth
=
interpolation
==
INTER_LINEAR
?
dst
.
depth
()
:
-
1
;
double
ify_d
=
1.
/
fy
;
int
srcStep_in_pixel
=
src
.
step1
()
/
src
.
oclchannels
();
const
char
*
const
interMap
[]
=
{
"NN"
,
"LN"
,
"CUBIC"
,
"AREA"
,
"LAN4"
};
int
srcoffset_in_pixel
=
src
.
offset
/
src
.
elemSize
();
std
::
string
kernelName
=
std
::
string
(
"resize"
)
+
interMap
[
interpolation
];
int
dstStep_in_pixel
=
dst
.
step1
()
/
dst
.
oclchannels
();
int
dstoffset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"uchar"
,
"ushort"
,
"ushort"
,
"int"
,
"int"
,
"double"
};
const
char
*
const
channelMap
[]
=
{
""
,
""
,
"2"
,
"4"
,
"4"
};
string
kernelName
;
std
::
string
buildOption
=
format
(
"-D %s -D T=%s%s"
,
interMap
[
interpolation
],
typeMap
[
dst
.
depth
()],
channelMap
[
dst
.
oclchannels
()]);
if
(
interpolation
==
INTER_LINEAR
)
kernelName
=
"resizeLN"
;
else
if
(
interpolation
==
INTER_NEAREST
)
kernelName
=
"resizeNN"
;
//TODO: improve this kernel
//TODO: improve this kernel
size_t
blkSizeX
=
16
,
blkSizeY
=
16
;
size_t
blkSizeX
=
16
,
blkSizeY
=
16
;
size_t
glbSizeX
;
size_t
glbSizeX
;
if
(
src
.
type
()
==
CV_8UC1
)
if
(
src
.
type
()
==
CV_8UC1
&&
interpolation
==
INTER_LINEAR
)
{
{
size_t
cols
=
(
dst
.
cols
+
dst
.
offset
%
4
+
3
)
/
4
;
size_t
cols
=
(
dst
.
cols
+
dst
.
offset
%
4
+
3
)
/
4
;
glbSizeX
=
cols
%
blkSizeX
==
0
&&
cols
!=
0
?
cols
:
(
cols
/
blkSizeX
+
1
)
*
blkSizeX
;
glbSizeX
=
cols
%
blkSizeX
==
0
&&
cols
!=
0
?
cols
:
(
cols
/
blkSizeX
+
1
)
*
blkSizeX
;
}
}
else
else
glbSizeX
=
dst
.
cols
%
blkSizeX
==
0
&&
dst
.
cols
!=
0
?
dst
.
cols
:
(
dst
.
cols
/
blkSizeX
+
1
)
*
blkSizeX
;
glbSizeX
=
dst
.
cols
;
size_t
glbSizeY
=
dst
.
rows
%
blkSizeY
==
0
&&
dst
.
rows
!=
0
?
dst
.
rows
:
(
dst
.
rows
/
blkSizeY
+
1
)
*
blkSizeY
;
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
dst
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
glbSizeY
,
1
};
size_t
localThreads
[
3
]
=
{
blkSizeX
,
blkSizeY
,
1
};
size_t
localThreads
[
3
]
=
{
blkSizeX
,
blkSizeY
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
if
(
interpolation
==
INTER_NEAREST
)
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
if
(
src
.
clCxt
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ifx_d
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ify_d
));
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
}
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcoffset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
}
openCLExecuteKernel
(
clCxt
,
&
imgproc_resize
,
kernelName
,
globalThreads
,
localThreads
,
args
,
src
.
oclchannels
(),
src
.
depth
());
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_resize
,
kernelName
,
globalThreads
,
localThreads
,
args
,
ocn
,
depth
,
buildOption
.
c_str
());
}
}
void
resize
(
const
oclMat
&
src
,
oclMat
&
dst
,
Size
dsize
,
void
resize
(
const
oclMat
&
src
,
oclMat
&
dst
,
Size
dsize
,
double
fx
,
double
fy
,
int
interpolation
)
double
fx
,
double
fy
,
int
interpolation
)
{
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC3
||
src
.
type
()
==
CV_8UC4
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC3
||
src
.
type
()
==
CV_8UC4
||
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC3
||
src
.
type
()
==
CV_32FC4
);
||
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC3
||
src
.
type
()
==
CV_32FC4
);
CV_Assert
(
interpolation
==
INTER_LINEAR
||
interpolation
==
INTER_NEAREST
);
CV_Assert
(
interpolation
==
INTER_LINEAR
||
interpolation
==
INTER_NEAREST
);
CV_Assert
(
src
.
size
().
area
()
>
0
);
CV_Assert
(
dsize
.
area
()
>
0
||
(
fx
>
0
&&
fy
>
0
));
CV_Assert
(
!
(
dsize
==
Size
())
||
(
fx
>
0
&&
fy
>
0
)
);
if
(
!
(
dsize
==
Size
())
&&
(
fx
>
0
&&
fy
>
0
))
if
(
dsize
.
width
!=
(
int
)(
src
.
cols
*
fx
)
||
dsize
.
height
!=
(
int
)(
src
.
rows
*
fy
))
CV_Error
(
CV_StsUnmatchedSizes
,
"invalid dsize and fx, fy!"
);
if
(
dsize
==
Size
()
)
if
(
dsize
.
area
()
==
0
)
{
dsize
=
Size
(
saturate_cast
<
int
>
(
src
.
cols
*
fx
),
saturate_cast
<
int
>
(
src
.
rows
*
fy
));
dsize
=
Size
(
saturate_cast
<
int
>
(
src
.
cols
*
fx
),
saturate_cast
<
int
>
(
src
.
rows
*
fy
));
CV_Assert
(
dsize
.
area
()
>
0
);
}
else
else
{
{
fx
=
(
double
)
dsize
.
width
/
src
.
cols
;
fx
=
(
double
)
dsize
.
width
/
src
.
cols
;
...
@@ -380,13 +347,7 @@ namespace cv
...
@@ -380,13 +347,7 @@ namespace cv
dst
.
create
(
dsize
,
src
.
type
());
dst
.
create
(
dsize
,
src
.
type
());
if
(
interpolation
==
INTER_NEAREST
||
interpolation
==
INTER_LINEAR
)
resize_gpu
(
src
,
dst
,
fx
,
fy
,
interpolation
);
{
resize_gpu
(
src
,
dst
,
fx
,
fy
,
interpolation
);
return
;
}
CV_Error
(
CV_StsUnsupportedFormat
,
"Non-supported interpolation method"
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
...
modules/ocl/src/opencl/imgproc_resize.cl
View file @
cc237b7a
...
@@ -45,7 +45,7 @@
...
@@ -45,7 +45,7 @@
//
resize
kernel
//
resize
kernel
//
Currently,
CV_8UC1
CV_8UC4
CV_32FC1
and
CV_32FC4
are
supported.
//
Currently,
CV_8UC1
,
CV_8UC4,
CV_32FC1
and
CV_32FC4
are
supported.
//
We
shall
support
other
types
later
if
necessary.
//
We
shall
support
other
types
later
if
necessary.
#
ifdef
DOUBLE_SUPPORT
#
ifdef
DOUBLE_SUPPORT
...
@@ -54,20 +54,18 @@
...
@@ -54,20 +54,18 @@
#
elif
defined
(
cl_khr_fp64
)
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
F
double
#
else
#
define
F
float
#
endif
#
endif
#
define
INTER_RESIZE_COEF_BITS
11
#
define
INTER_RESIZE_COEF_BITS
11
#
define
INTER_RESIZE_COEF_SCALE
(
1
<<
INTER_RESIZE_COEF_BITS
)
#
define
INTER_RESIZE_COEF_SCALE
(
1
<<
INTER_RESIZE_COEF_BITS
)
#
define
CAST_BITS
(
INTER_RESIZE_COEF_BITS
<<
1
)
#
define
CAST_BITS
(
INTER_RESIZE_COEF_BITS
<<
1
)
#
define
CAST_SCALE
(
1.0f/
(
1<<CAST_BITS
))
#
define
CAST_SCALE
(
1.0f/
(
1<<CAST_BITS
))
#
define
INC
(
x,l
)
((
x+1
)
>=
(
l
)
?
(
x
)
:
((
x
)
+1
))
#
define
INC
(
x,l
)
((
x+1
)
>=
(
l
)
?
(
x
)
:
((
x
)
+1
))
#
ifdef
LN
__kernel
void
resizeLN_C1_D0
(
__global
uchar
*
dst,
__global
uchar
const
*
restrict
src,
__kernel
void
resizeLN_C1_D0
(
__global
uchar
*
dst,
__global
uchar
const
*
restrict
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
{
int
gx
=
get_global_id
(
0
)
;
int
gx
=
get_global_id
(
0
)
;
...
@@ -75,7 +73,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
...
@@ -75,7 +73,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
float4
sx,
u,
xf
;
float4
sx,
u,
xf
;
int4
x,
DX
;
int4
x,
DX
;
gx
=
(
gx<<2
)
-
(
dst
offset_in_pixel
&3
)
;
gx
=
(
gx<<2
)
-
(
dst
_offset
&3
)
;
DX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
DX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
sx
=
(
convert_float4
(
DX
)
+
0.5f
)
*
ifx
-
0.5f
;
sx
=
(
convert_float4
(
DX
)
+
0.5f
)
*
ifx
-
0.5f
;
xf
=
floor
(
sx
)
;
xf
=
floor
(
sx
)
;
...
@@ -113,10 +111,10 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
...
@@ -113,10 +111,10 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
int4
val1,
val2,
val
;
int4
val1,
val2,
val
;
int4
sdata1,
sdata2,
sdata3,
sdata4
;
int4
sdata1,
sdata2,
sdata3,
sdata4
;
int4
pos1
=
mad24
((
int4
)
y,
(
int4
)
src
step_in_pixel,
x+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos1
=
mad24
((
int4
)
y,
(
int4
)
src
_step,
x+
(
int4
)
src_offset
)
;
int4
pos2
=
mad24
((
int4
)
y,
(
int4
)
src
step_in_pixel,
x_+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos2
=
mad24
((
int4
)
y,
(
int4
)
src
_step,
x_+
(
int4
)
src_offset
)
;
int4
pos3
=
mad24
((
int4
)
y_,
(
int4
)
src
step_in_pixel,
x+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos3
=
mad24
((
int4
)
y_,
(
int4
)
src
_step,
x+
(
int4
)
src_offset
)
;
int4
pos4
=
mad24
((
int4
)
y_,
(
int4
)
src
step_in_pixel,
x_+
(
int4
)
srcoffset_in_pixel
)
;
int4
pos4
=
mad24
((
int4
)
y_,
(
int4
)
src
_step,
x_+
(
int4
)
src_offset
)
;
sdata1.s0
=
src[pos1.s0]
;
sdata1.s0
=
src[pos1.s0]
;
sdata1.s1
=
src[pos1.s1]
;
sdata1.s1
=
src[pos1.s1]
;
...
@@ -144,12 +142,12 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
...
@@ -144,12 +142,12 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
val
=
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>
CAST_BITS
)
;
val
=
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>
CAST_BITS
)
;
pos4
=
mad24
(
dy,
dst
step_in_pixel,
gx+dstoffset_in_pixel
)
;
pos4
=
mad24
(
dy,
dst
_step,
gx+dst_offset
)
;
pos4.y++
;
pos4.y++
;
pos4.z+=2
;
pos4.z+=2
;
pos4.w+=3
;
pos4.w+=3
;
uchar4
uval
=
convert_uchar4_sat
(
val
)
;
uchar4
uval
=
convert_uchar4_sat
(
val
)
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
&&
(
dst
offset_in_pixel
&3
)
==0
)
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
&&
(
dst
_offset
&3
)
==0
)
;
if
(
con
)
if
(
con
)
{
{
*
(
__global
uchar4*
)(
dst
+
pos4.x
)
=uval
;
*
(
__global
uchar4*
)(
dst
+
pos4.x
)
=uval
;
...
@@ -176,7 +174,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
...
@@ -176,7 +174,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
}
}
__kernel
void
resizeLN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
__kernel
void
resizeLN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
...
@@ -202,24 +200,24 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
...
@@ -202,24 +200,24 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
int
y_
=
INC
(
y,src_rows
)
;
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
int
x_
=
INC
(
x,src_cols
)
;
int4
srcpos
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.x
=
mad24
(
y,
src
_step,
x+src_offset
)
;
srcpos.y
=
mad24
(
y,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
src
_step,
x_+src_offset
)
;
srcpos.z
=
mad24
(
y_,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
src
_step,
x+src_offset
)
;
srcpos.w
=
mad24
(
y_,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
src
_step,
x_+src_offset
)
;
int4
data0
=
convert_int4
(
src[srcpos.x]
)
;
int4
data0
=
convert_int4
(
src[srcpos.x]
)
;
int4
data1
=
convert_int4
(
src[srcpos.y]
)
;
int4
data1
=
convert_int4
(
src[srcpos.y]
)
;
int4
data2
=
convert_int4
(
src[srcpos.z]
)
;
int4
data2
=
convert_int4
(
src[srcpos.z]
)
;
int4
data3
=
convert_int4
(
src[srcpos.w]
)
;
int4
data3
=
convert_int4
(
src[srcpos.w]
)
;
int4
val
=
mul24
((
int4
)
mul24
(
U1,
V1
)
,
data0
)
+
mul24
((
int4
)
mul24
(
U,
V1
)
,
data1
)
int4
val
=
mul24
((
int4
)
mul24
(
U1,
V1
)
,
data0
)
+
mul24
((
int4
)
mul24
(
U,
V1
)
,
data1
)
+mul24
((
int4
)
mul24
(
U1,
V
)
,
data2
)
+mul24
((
int4
)
mul24
(
U,
V
)
,
data3
)
;
+mul24
((
int4
)
mul24
(
U1,
V
)
,
data2
)
+mul24
((
int4
)
mul24
(
U,
V
)
,
data3
)
;
int
dstpos
=
mad24
(
dy,
dst
step_in_pixel,
dx+dstoffset_in_pixel
)
;
int
dstpos
=
mad24
(
dy,
dst
_step,
dx+dst_offset
)
;
uchar4
uval
=
convert_uchar4
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>CAST_BITS
)
;
uchar4
uval
=
convert_uchar4
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>CAST_BITS
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dstpos]
=
uval
;
dst[dstpos]
=
uval
;
}
}
__kernel
void
resizeLN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
__kernel
void
resizeLN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
...
@@ -239,10 +237,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
...
@@ -239,10 +237,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
float
u1
=
1.f-u
;
float
u1
=
1.f-u
;
float
v1
=
1.f-v
;
float
v1
=
1.f-v
;
int4
srcpos
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.x
=
mad24
(
y,
src
_step,
x+src_offset
)
;
srcpos.y
=
mad24
(
y,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
src
_step,
x_+src_offset
)
;
srcpos.z
=
mad24
(
y_,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
src
_step,
x+src_offset
)
;
srcpos.w
=
mad24
(
y_,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
src
_step,
x_+src_offset
)
;
float
data0
=
src[srcpos.x]
;
float
data0
=
src[srcpos.x]
;
float
data1
=
src[srcpos.y]
;
float
data1
=
src[srcpos.y]
;
float
data2
=
src[srcpos.z]
;
float
data2
=
src[srcpos.z]
;
...
@@ -252,13 +250,13 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
...
@@ -252,13 +250,13 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
float
val2
=
u1
*
data2
+
float
val2
=
u1
*
data2
+
u
*
data3
;
u
*
data3
;
float
val
=
v1
*
val1
+
v
*
val2
;
float
val
=
v1
*
val1
+
v
*
val2
;
int
dstpos
=
mad24
(
dy,
dst
step_in_pixel,
dx+dstoffset_in_pixel
)
;
int
dstpos
=
mad24
(
dy,
dst
_step,
dx+dst_offset
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dstpos]
=
val
;
dst[dstpos]
=
val
;
}
}
__kernel
void
resizeLN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
__kernel
void
resizeLN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
...
@@ -278,10 +276,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
...
@@ -278,10 +276,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
float
u1
=
1.f-u
;
float
u1
=
1.f-u
;
float
v1
=
1.f-v
;
float
v1
=
1.f-v
;
int4
srcpos
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.x
=
mad24
(
y,
src
_step,
x+src_offset
)
;
srcpos.y
=
mad24
(
y,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
src
_step,
x_+src_offset
)
;
srcpos.z
=
mad24
(
y_,
src
step_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
src
_step,
x+src_offset
)
;
srcpos.w
=
mad24
(
y_,
src
step_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
src
_step,
x_+src_offset
)
;
float4
s_data1,
s_data2,
s_data3,
s_data4
;
float4
s_data1,
s_data2,
s_data3,
s_data4
;
s_data1
=
src[srcpos.x]
;
s_data1
=
src[srcpos.x]
;
s_data2
=
src[srcpos.y]
;
s_data2
=
src[srcpos.y]
;
...
@@ -289,129 +287,32 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
...
@@ -289,129 +287,32 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
s_data4
=
src[srcpos.w]
;
s_data4
=
src[srcpos.w]
;
float4
val
=
u1
*
v1
*
s_data1
+
u
*
v1
*
s_data2
float4
val
=
u1
*
v1
*
s_data1
+
u
*
v1
*
s_data2
+u1
*
v
*s_data3
+
u
*
v
*s_data4
;
+u1
*
v
*s_data3
+
u
*
v
*s_data4
;
int
dstpos
=
mad24
(
dy,
dst
step_in_pixel,
dx+dstoffset_in_pixel
)
;
int
dstpos
=
mad24
(
dy,
dst
_step,
dx+dst_offset
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dstpos]
=
val
;
dst[dstpos]
=
val
;
}
}
__kernel
void
resizeNN_C1_D0
(
__global
uchar
*
dst,
__global
uchar
*
src,
#
elif
defined
NN
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
gx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
gx
=
(
gx<<2
)
-
(
dstoffset_in_pixel&3
)
;
__kernel
void
resizeNN
(
__global
T
*
dst,
__global
T
*
src,
//int4
GX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
int
dst_offset,
int
src_offset,int
dst_step,
int
src_step,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
int4
sx
;
int
sy
;
F
ss1
=
gx*ifx
;
F
ss2
=
(
gx+1
)
*ifx
;
F
ss3
=
(
gx+2
)
*ifx
;
F
ss4
=
(
gx+3
)
*ifx
;
F
s5
=
dy
*
ify
;
sx.s0
=
min
((
int
)
floor
(
ss1
)
,
src_cols-1
)
;
sx.s1
=
min
((
int
)
floor
(
ss2
)
,
src_cols-1
)
;
sx.s2
=
min
((
int
)
floor
(
ss3
)
,
src_cols-1
)
;
sx.s3
=
min
((
int
)
floor
(
ss4
)
,
src_cols-1
)
;
sy
=
min
((
int
)
floor
(
s5
)
,
src_rows-1
)
;
uchar4
val
;
int4
pos
=
mad24
((
int4
)
sy,
(
int4
)
srcstep_in_pixel,
sx+
(
int4
)
srcoffset_in_pixel
)
;
val.s0
=
src[pos.s0]
;
val.s1
=
src[pos.s1]
;
val.s2
=
src[pos.s2]
;
val.s3
=
src[pos.s3]
;
//__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dstoffset_in_pixel
+
dy
*
dststep_in_pixel
+
gx
)
;
//uchar4
dVal
=
*d
;
pos
=
mad24
(
dy,
dststep_in_pixel,
gx+dstoffset_in_pixel
)
;
pos.y++
;
pos.z+=2
;
pos.w+=3
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
&&
(
dstoffset_in_pixel&3
)
==0
)
;
if
(
con
)
{
*
(
__global
uchar4*
)(
dst
+
pos.x
)
=val
;
}
else
{
if
(
gx
>=
0
&&
gx
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.x]=val.x
;
}
if
(
gx+1
>=
0
&&
gx+1
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.y]=val.y
;
}
if
(
gx+2
>=
0
&&
gx+2
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.z]=val.z
;
}
if
(
gx+3
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.w]=val.w
;
}
}
}
__kernel
void
resizeNN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
F
s1
=
dx*ifx
;
F
s2
=
dy*ify
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
}
__kernel
void
resizeNN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
int
dy
=
get_global_id
(
1
)
;
F
s1
=
dx*ifx
;
if
(
dx
<
dst_cols
&&
dy
<
dst_rows
)
F
s2
=
dy*ify
;
{
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
float
s1
=
dx
*
ifx,
s2
=
dy
*
ify
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
int
sx
=
min
(
convert_int_sat_rtn
(
s1
)
,
src_cols
-
1
)
;
int
sy
=
min
(
convert_int_sat_rtn
(
s2
)
,
src_rows
-
1
)
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dx
+
dst_offset
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
int
src_index
=
mad24
(
sy,
src_step,
sx
+
src_offset
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
dst[dst_index]
=
src[src_index]
;
}
}
}
__kernel
void
resizeNN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
#
endif
int
dstoffset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
F
s1
=
dx*ifx
;
F
s2
=
dy*ify
;
int
s_col
=
floor
(
s1
)
;
int
s_row
=
floor
(
s2
)
;
int
sx
=
min
(
s_col,
src_cols-1
)
;
int
sy
=
min
(
s_row,
src_rows-1
)
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment