Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
46a333ed
Commit
46a333ed
authored
Nov 30, 2016
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #7755 from pengli:warp_image
parents
5b85adaa
2ca5a7e8
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
199 additions
and
5 deletions
+199
-5
imgwarp.cpp
modules/imgproc/src/imgwarp.cpp
+3
-3
warp_transform.cl
modules/imgproc/src/opencl/warp_transform.cl
+194
-0
test_warp.cpp
modules/imgproc/test/ocl/test_warp.cpp
+2
-2
No files found.
modules/imgproc/src/imgwarp.cpp
View file @
46a333ed
...
...
@@ -5662,15 +5662,15 @@ static bool ocl_warpTransform_cols4(InputArray _src, OutputArray _dst, InputArra
if
(
!
dev
.
isIntel
()
||
!
(
type
==
CV_8UC1
)
||
!
(
dtype
==
CV_8UC1
)
||
!
(
_dst
.
cols
()
%
4
==
0
)
||
!
(
borderType
==
cv
::
BORDER_CONSTANT
&&
(
interpolation
==
cv
::
INTER_NEAREST
||
interpolation
==
cv
::
INTER_LINEAR
)))
(
interpolation
==
cv
::
INTER_NEAREST
||
interpolation
==
cv
::
INTER_LINEAR
||
interpolation
==
cv
::
INTER_CUBIC
)))
return
false
;
const
char
*
const
warp_op
[
2
]
=
{
"Affine"
,
"Perspective"
};
const
char
*
const
interpolationMap
[
3
]
=
{
"nearest"
,
"linear"
,
NULL
};
const
char
*
const
interpolationMap
[
3
]
=
{
"nearest"
,
"linear"
,
"cubic"
};
ocl
::
ProgramSource
program
=
ocl
::
imgproc
::
warp_transform_oclsrc
;
String
kernelName
=
format
(
"warp%s_%s_8u"
,
warp_op
[
op_type
],
interpolationMap
[
interpolation
]);
bool
is32f
=
(
interpolation
==
INTER_
LINEAR
)
;
bool
is32f
=
(
interpolation
==
INTER_
CUBIC
||
interpolation
==
INTER_LINEAR
)
&&
op_type
==
OCL_OP_AFFINE
;
int
wdepth
=
interpolation
==
INTER_NEAREST
?
depth
:
std
::
max
(
is32f
?
CV_32F
:
CV_32S
,
depth
);
int
sctype
=
CV_MAKETYPE
(
wdepth
,
cn
);
...
...
modules/imgproc/src/opencl/warp_transform.cl
View file @
46a333ed
...
...
@@ -117,6 +117,115 @@ __kernel void warpAffine_linear_8u(__global const uchar * src, int src_step, int
vstore4(convert_uchar4_sat_rte(pixel), 0, dst + dst_index);
}
__constant float coeffs[128] =
{ 0.000000f, 1.000000f, 0.000000f, 0.000000f, -0.021996f, 0.997841f, 0.024864f, -0.000710f, -0.041199f, 0.991516f, 0.052429f, -0.002747f,
-0.057747f, 0.981255f, 0.082466f, -0.005974f, -0.071777f, 0.967285f, 0.114746f, -0.010254f, -0.083427f, 0.949837f, 0.149040f, -0.015450f,
-0.092834f, 0.929138f, 0.185120f, -0.021423f, -0.100136f, 0.905418f, 0.222755f, -0.028038f, -0.105469f, 0.878906f, 0.261719f, -0.035156f,
-0.108971f, 0.849831f, 0.301781f, -0.042641f, -0.110779f, 0.818420f, 0.342712f, -0.050354f, -0.111031f, 0.784904f, 0.384285f, -0.058159f,
-0.109863f, 0.749512f, 0.426270f, -0.065918f, -0.107414f, 0.712471f, 0.468437f, -0.073494f, -0.103821f, 0.674011f, 0.510559f, -0.080750f,
-0.099220f, 0.634361f, 0.552406f, -0.087547f, -0.093750f, 0.593750f, 0.593750f, -0.093750f, -0.087547f, 0.552406f, 0.634361f, -0.099220f,
-0.080750f, 0.510559f, 0.674011f, -0.103821f, -0.073494f, 0.468437f, 0.712471f, -0.107414f, -0.065918f, 0.426270f, 0.749512f, -0.109863f,
-0.058159f, 0.384285f, 0.784904f, -0.111031f, -0.050354f, 0.342712f, 0.818420f, -0.110779f, -0.042641f, 0.301781f, 0.849831f, -0.108971f,
-0.035156f, 0.261719f, 0.878906f, -0.105469f, -0.028038f, 0.222755f, 0.905418f, -0.100136f, -0.021423f, 0.185120f, 0.929138f, -0.092834f,
-0.015450f, 0.149040f, 0.949837f, -0.083427f, -0.010254f, 0.114746f, 0.967285f, -0.071777f, -0.005974f, 0.082466f, 0.981255f, -0.057747f,
-0.002747f, 0.052429f, 0.991516f, -0.041199f, -0.000710f, 0.024864f, 0.997841f, -0.021996f };
uchar4 read_pixels_cubic(__global const uchar * src, int tx, int ty,
int src_offset, int src_step, int src_cols, int src_rows, uchar scalar)
{
uchar4 pix;
if (tx >= 0 && (tx + 3) < src_cols && ty >= 0 && ty < src_rows)
{
pix = vload4(0, src + src_offset + ty * src_step + tx);
}
else
{
pix.s0 = GET_VAL((tx + 0), ty);
pix.s1 = GET_VAL((tx + 1), ty);
pix.s2 = GET_VAL((tx + 2), ty);
pix.s3 = GET_VAL((tx + 3), ty);
}
return pix;
}
__kernel void warpAffine_cubic_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__constant float * M, ST scalar_)
{
int x = get_global_id(0) * 4;
int y = get_global_id(1);
uchar scalar = convert_uchar_sat_rte(scalar_);
if (x >= dst_cols || y >= dst_rows) return;
/* { M0, M1, M2 }
* { M3, M4, M5 }
*/
float4 nx, ny;
nx = M[0] * convert_float4((vec_offset + (short4)x)) + M[1] * convert_float4((short4)y) + M[2];
ny = M[3] * convert_float4((vec_offset + (short4)x)) + M[4] * convert_float4((short4)y) + M[5];
int4 ax, ay;
ax = convert_int4_sat_rte((nx - floor(nx)) * 32.0f) & 31;
ay = convert_int4_sat_rte((ny - floor(ny)) * 32.0f) & 31;
int4 tx, ty;
int4 delta_x, delta_y;
delta_x = select((int4)1, (int4)0, ((nx - floor(nx))) * 64 > 63);
delta_y = select((int4)1, (int4)0, ((ny - floor(ny))) * 64 > 63);
tx = convert_int4_sat_rtn(nx) - delta_x;
ty = convert_int4_sat_rtn(ny) - delta_y;
__constant float * coeffs_x, * coeffs_y;
float4 sum = (float4)0.0f;
uchar4 pix;
float xsum;
coeffs_x = coeffs + (ax.s0 << 2);
coeffs_y = coeffs + (ay.s0 << 2);
for (int i = 0; i < 4; i++)
{
pix = read_pixels_cubic(src, tx.s0, ty.s0 + i, src_offset, src_step, src_cols, src_rows, scalar);
xsum = dot(convert_float4(pix), (float4)(coeffs_x[0], coeffs_x[1], coeffs_x[2], coeffs_x[3]));
sum.s0 = fma(xsum, coeffs_y[i], sum.s0);
}
coeffs_x = coeffs + (ax.s1 << 2);
coeffs_y = coeffs + (ay.s1 << 2);
for (int i = 0; i < 4; i++)
{
pix = read_pixels_cubic(src, tx.s1, ty.s1 + i, src_offset, src_step, src_cols, src_rows, scalar);
xsum = dot(convert_float4(pix), (float4)(coeffs_x[0], coeffs_x[1], coeffs_x[2], coeffs_x[3]));
sum.s1 = fma(xsum, coeffs_y[i], sum.s1);
}
coeffs_x = coeffs + (ax.s2 << 2);
coeffs_y = coeffs + (ay.s2 << 2);
for (int i = 0; i < 4; i++)
{
pix = read_pixels_cubic(src, tx.s2, ty.s2 + i, src_offset, src_step, src_cols, src_rows, scalar);
xsum = dot(convert_float4(pix), (float4)(coeffs_x[0], coeffs_x[1], coeffs_x[2], coeffs_x[3]));
sum.s2 = fma(xsum, coeffs_y[i], sum.s2);
}
coeffs_x = coeffs + (ax.s3 << 2);
coeffs_y = coeffs + (ay.s3 << 2);
for (int i = 0; i < 4; i++)
{
pix = read_pixels_cubic(src, tx.s3, ty.s3 + i, src_offset, src_step, src_cols, src_rows, scalar);
xsum = dot(convert_float4(pix), (float4)(coeffs_x[0], coeffs_x[1], coeffs_x[2], coeffs_x[3]));
sum.s3 = fma(xsum, coeffs_y[i], sum.s3);
}
int dst_index = x + y * dst_step + dst_offset;
vstore4(convert_uchar4_sat_rte(sum), 0, dst + dst_index);
}
__kernel void warpPerspective_nearest_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__constant float * M, ST scalar_)
...
...
@@ -212,3 +321,88 @@ __kernel void warpPerspective_linear_8u(__global const uchar * src, int src_step
int dst_index = x + y * dst_step + dst_offset;
vstore4(convert_uchar4_sat_rte(pixel), 0, dst + dst_index);
}
__kernel void warpPerspective_cubic_8u(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__constant float * M, ST scalar_)
{
int x = get_global_id(0) * 4;
int y = get_global_id(1);
uchar scalar = convert_uchar_sat_rte(scalar_);
if (x >= dst_cols |
|
y
>=
dst_rows
)
return
;
/*
{
M0,
M1,
M2
}
*
{
M3,
M4,
M5
}
*
{
M6,
M7,
M8
}
*/
float4
nx,
ny,
nz
;
nx
=
M[0]
*
convert_float4
(
vec_offset
+
(
short4
)(
x
))
+
M[1]
*
convert_float4
((
short4
)
y
)
+
M[2]
;
ny
=
M[3]
*
convert_float4
(
vec_offset
+
(
short4
)(
x
))
+
M[4]
*
convert_float4
((
short4
)
y
)
+
M[5]
;
nz
=
M[6]
*
convert_float4
(
vec_offset
+
(
short4
)(
x
))
+
M[7]
*
convert_float4
((
short4
)
y
)
+
M[8]
;
float4
fz
=
select
((
float4
)(
0.0f
)
,
(
float4
)(
1.0f
/
nz
)
,
nz
!=
0.0f
)
;
nx
=
nx
*
fz
;
ny
=
ny
*
fz
;
int4
ax,
ay
;
ax
=
convert_int4_sat_rte
((
nx
-
floor
(
nx
))
*
32.0f
)
&
31
;
ay
=
convert_int4_sat_rte
((
ny
-
floor
(
ny
))
*
32.0f
)
&
31
;
int4
tx,
ty
;
int4
delta_x,
delta_y
;
delta_x
=
select
((
int4
)
1
,
(
int4
)
0
,
((
nx
-
floor
(
nx
)))
*
64
>
63
)
;
delta_y
=
select
((
int4
)
1
,
(
int4
)
0
,
((
ny
-
floor
(
ny
)))
*
64
>
63
)
;
tx
=
convert_int4_sat_rtn
(
nx
)
-
delta_x
;
ty
=
convert_int4_sat_rtn
(
ny
)
-
delta_y
;
__constant
float
*
coeffs_x,
*
coeffs_y
;
float4
sum
=
(
float4
)
0.0f
;
uchar4
pix
;
float
xsum
;
coeffs_x
=
coeffs
+
(
ax.s0
<<
2
)
;
coeffs_y
=
coeffs
+
(
ay.s0
<<
2
)
;
for
(
int
i
=
0
; i < 4; i++)
{
pix
=
read_pixels_cubic
(
src,
tx.s0,
ty.s0
+
i,
src_offset,
src_step,
src_cols,
src_rows,
scalar
)
;
xsum
=
dot
(
convert_float4
(
pix
)
,
(
float4
)(
coeffs_x[0],
coeffs_x[1],
coeffs_x[2],
coeffs_x[3]
))
;
sum.s0
=
fma
(
xsum,
coeffs_y[i],
sum.s0
)
;
}
coeffs_x
=
coeffs
+
(
ax.s1
<<
2
)
;
coeffs_y
=
coeffs
+
(
ay.s1
<<
2
)
;
for
(
int
i
=
0
; i < 4; i++)
{
pix
=
read_pixels_cubic
(
src,
tx.s1,
ty.s1
+
i,
src_offset,
src_step,
src_cols,
src_rows,
scalar
)
;
xsum
=
dot
(
convert_float4
(
pix
)
,
(
float4
)(
coeffs_x[0],
coeffs_x[1],
coeffs_x[2],
coeffs_x[3]
))
;
sum.s1
=
fma
(
xsum,
coeffs_y[i],
sum.s1
)
;
}
coeffs_x
=
coeffs
+
(
ax.s2
<<
2
)
;
coeffs_y
=
coeffs
+
(
ay.s2
<<
2
)
;
for
(
int
i
=
0
; i < 4; i++)
{
pix
=
read_pixels_cubic
(
src,
tx.s2,
ty.s2
+
i,
src_offset,
src_step,
src_cols,
src_rows,
scalar
)
;
xsum
=
dot
(
convert_float4
(
pix
)
,
(
float4
)(
coeffs_x[0],
coeffs_x[1],
coeffs_x[2],
coeffs_x[3]
))
;
sum.s2
=
fma
(
xsum,
coeffs_y[i],
sum.s2
)
;
}
coeffs_x
=
coeffs
+
(
ax.s3
<<
2
)
;
coeffs_y
=
coeffs
+
(
ay.s3
<<
2
)
;
for
(
int
i
=
0
; i < 4; i++)
{
pix
=
read_pixels_cubic
(
src,
tx.s3,
ty.s3
+
i,
src_offset,
src_step,
src_cols,
src_rows,
scalar
)
;
xsum
=
dot
(
convert_float4
(
pix
)
,
(
float4
)(
coeffs_x[0],
coeffs_x[1],
coeffs_x[2],
coeffs_x[3]
))
;
sum.s3
=
fma
(
xsum,
coeffs_y[i],
sum.s3
)
;
}
int
dst_index
=
x
+
y
*
dst_step
+
dst_offset
;
vstore4
(
convert_uchar4_sat_rte
(
sum
)
,
0
,
dst
+
dst_index
)
;
}
modules/imgproc/test/ocl/test_warp.cpp
View file @
46a333ed
...
...
@@ -437,7 +437,7 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpAffine, Combine(
OCL_INSTANTIATE_TEST_CASE_P
(
ImgprocWarp
,
WarpAffine_cols4
,
Combine
(
Values
((
MatType
)
CV_8UC1
),
Values
((
Interpolation
)
INTER_NEAREST
,
(
Interpolation
)
INTER_LINEAR
),
Values
((
Interpolation
)
INTER_NEAREST
,
(
Interpolation
)
INTER_LINEAR
,
(
Interpolation
)
INTER_CUBIC
),
Bool
(),
Bool
()));
...
...
@@ -449,7 +449,7 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine(
OCL_INSTANTIATE_TEST_CASE_P
(
ImgprocWarp
,
WarpPerspective_cols4
,
Combine
(
Values
((
MatType
)
CV_8UC1
),
Values
((
Interpolation
)
INTER_NEAREST
,
(
Interpolation
)
INTER_LINEAR
),
Values
((
Interpolation
)
INTER_NEAREST
,
(
Interpolation
)
INTER_LINEAR
,
(
Interpolation
)
INTER_CUBIC
),
Bool
(),
Bool
()));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment