Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
667a9328
Commit
667a9328
authored
Jun 30, 2014
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2829 from ilya-lavrenov:tapi_cvtcolor
parents
2d81595e
007593ca
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
524 additions
and
392 deletions
+524
-392
color.cpp
modules/imgproc/src/color.cpp
+5
-13
cvtcolor.cl
modules/imgproc/src/opencl/cvtcolor.cl
+516
-376
test_color.cpp
modules/imgproc/test/ocl/test_color.cpp
+3
-3
No files found.
modules/imgproc/src/color.cpp
View file @
667a9328
...
@@ -2730,8 +2730,6 @@ struct mRGBA2RGBA
...
@@ -2730,8 +2730,6 @@ struct mRGBA2RGBA
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCL
#define DIVUP(total, grain) (((total) + (grain) - 1) / (grain))
static
bool
ocl_cvtColor
(
InputArray
_src
,
OutputArray
_dst
,
int
code
,
int
dcn
)
static
bool
ocl_cvtColor
(
InputArray
_src
,
OutputArray
_dst
,
int
code
,
int
dcn
)
{
{
bool
ok
=
false
;
bool
ok
=
false
;
...
@@ -2739,23 +2737,17 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2739,23 +2737,17 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
Size
sz
=
src
.
size
(),
dstSz
=
sz
;
Size
sz
=
src
.
size
(),
dstSz
=
sz
;
int
scn
=
src
.
channels
(),
depth
=
src
.
depth
(),
bidx
;
int
scn
=
src
.
channels
(),
depth
=
src
.
depth
(),
bidx
;
int
dims
=
2
,
stripeSize
=
1
;
int
dims
=
2
,
stripeSize
=
1
;
size_t
globalsize
[]
=
{
src
.
cols
,
src
.
rows
};
ocl
::
Kernel
k
;
ocl
::
Kernel
k
;
if
(
depth
!=
CV_8U
&&
depth
!=
CV_16U
&&
depth
!=
CV_32F
)
if
(
depth
!=
CV_8U
&&
depth
!=
CV_16U
&&
depth
!=
CV_32F
)
return
false
;
return
false
;
cv
::
String
opts
=
format
(
"-D depth=%d -D scn=%d "
,
depth
,
scn
);
ocl
::
Device
dev
=
ocl
::
Device
::
getDefault
();
ocl
::
Device
dev
=
ocl
::
Device
::
getDefault
();
int
pxPerWIy
=
1
;
int
pxPerWIy
=
dev
.
isIntel
()
&&
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_GPU
)
?
4
:
1
;
if
(
dev
.
isIntel
()
&&
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_GPU
)
&&
!
(
code
==
CV_BGR2Luv
||
code
==
CV_RGB2Luv
||
code
==
CV_LBGR2Luv
||
code
==
CV_LRGB2Luv
||
size_t
globalsize
[]
=
{
src
.
cols
,
(
src
.
rows
+
pxPerWIy
-
1
)
/
pxPerWIy
};
code
==
CV_Luv2BGR
||
code
==
CV_Luv2RGB
||
code
==
CV_Luv2LBGR
||
code
==
CV_Luv2LRGB
))
cv
::
String
opts
=
format
(
"-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d "
,
pxPerWIy
=
4
;
depth
,
scn
,
pxPerWIy
);
globalsize
[
1
]
=
DIVUP
(
globalsize
[
1
],
pxPerWIy
);
opts
+=
format
(
"-D PIX_PER_WI_Y=%d "
,
pxPerWIy
);
switch
(
code
)
switch
(
code
)
{
{
...
...
modules/imgproc/src/opencl/cvtcolor.cl
View file @
667a9328
...
@@ -71,10 +71,6 @@
...
@@ -71,10 +71,6 @@
#
error
"invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
#
error
"invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
#
endif
#
endif
#
ifndef
STRIPE_SIZE
#
define
STRIPE_SIZE
1
#
endif
#
define
CV_DESCALE
(
x,n
)
(((
x
)
+
(
1
<<
((
n
)
-1
)))
>>
(
n
))
#
define
CV_DESCALE
(
x,n
)
(((
x
)
+
(
1
<<
((
n
)
-1
)))
>>
(
n
))
enum
enum
...
@@ -122,8 +118,8 @@ enum
...
@@ -122,8 +118,8 @@ enum
/////////////////////////////////////
RGB
<->
GRAY
//////////////////////////////////////
/////////////////////////////////////
RGB
<->
GRAY
//////////////////////////////////////
__kernel
void
RGB2Gray
(
__global
const
uchar
*
srcptr,
int
srcstep,
int
src
offset,
__kernel
void
RGB2Gray
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_
offset,
__global
uchar
*
dstptr,
int
dststep,
int
dst
offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_
offset,
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
...
@@ -131,27 +127,32 @@ __kernel void RGB2Gray(__global const uchar* srcptr, int srcstep, int srcoffset,
...
@@ -131,27 +127,32 @@ __kernel void RGB2Gray(__global const uchar* srcptr, int srcstep, int srcoffset,
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
)
)
;
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
src_index
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
)
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
dst_index
)
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
#
ifdef
DEPTH_5
#
ifdef
DEPTH_5
dst[0]
=
src_pix.B_COMP
*
0.114f
+
src_pix.G_COMP
*
0.587f
+
src_pix.R_COMP
*
0.299f
;
dst[0]
=
fma
(
src_pix.B_COMP,
0.114f,
fma
(
src_pix.G_COMP,
0.587f,
src_pix.R_COMP
*
0.299f
))
;
#
else
#
else
dst[0]
=
(
DATA_TYPE
)
CV_DESCALE
(
(
src_pix.B_COMP
*
B2Y
+
src_pix.G_COMP
*
G2Y
+
src_pix.R_COMP
*
R2Y
)
,
yuv_shift
)
;
dst[0]
=
(
DATA_TYPE
)
CV_DESCALE
(
mad24
(
src_pix.B_COMP,
B2Y,
mad24
(
src_pix.G_COMP,
G2Y,
src_pix.R_COMP
*
R2Y
)
)
,
yuv_shift
)
;
#
endif
#
endif
++y
;
src_index
+=
src_step
;
dst_index
+=
dst_step
;
}
}
++y
;
}
}
}
}
}
}
__kernel
void
Gray2RGB
(
__global
const
uchar
*
srcptr,
int
srcstep,
int
src
offset,
__kernel
void
Gray2RGB
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_
offset,
__global
uchar
*
dstptr,
int
dststep,
int
dst
offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_
offset,
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
...
@@ -159,20 +160,29 @@ __kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
...
@@ -159,20 +160,29 @@ __kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
)
)
;
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
src_index
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
)
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
dst_index
)
;
DATA_TYPE
val
=
src[0]
;
DATA_TYPE
val
=
src[0]
;
#
if
dcn
==
3
|
| defined DEPTH_5
dst[0] = dst[1] = dst[2] = val;
dst[0] = dst[1] = dst[2] = val;
#if dcn == 4
#if dcn == 4
dst[3] = MAX_NUM;
dst[3] = MAX_NUM;
#endif
#endif
#else
*(__global DATA_TYPE_4 *)dst = (DATA_TYPE_4)(val, val, val, MAX_NUM);
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -182,8 +192,8 @@ __kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
...
@@ -182,8 +192,8 @@ __kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
__kernel
void
RGB2YUV
(
__global
const
uchar*
srcptr,
int
src
step,
int
src
offset,
__kernel void RGB2YUV(__global const uchar* srcptr, int src
_step, int src_
offset,
__global
uchar*
dstptr,
int
dst
step,
int
dst
offset,
__global uchar* dstptr, int dst
_step, int dt_
offset,
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
...
@@ -191,34 +201,40 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
...
@@ -191,34 +201,40 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
)
)
;
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr +
src_index
);
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
)
)
;
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr +
dst_index
);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE
b
=src_pix.B_COMP,
g=src_pix.G_COMP,
r=
src_pix.R_COMP
;
DATA_TYPE b
= src_pix.B_COMP, g = src_pix.G_COMP, r =
src_pix.R_COMP;
#ifdef DEPTH_5
#ifdef DEPTH_5
__constant float * coeffs = c_RGB2YUVCoeffs_f;
__constant float * coeffs = c_RGB2YUVCoeffs_f;
const
DATA_TYPE
Y
=
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2]
;
const DATA_TYPE Y =
fma(b, coeffs[0], fma(g, coeffs[1], r * coeffs[2]))
;
const
DATA_TYPE
U
=
(
b
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
const DATA_TYPE U =
fma(b - Y, coeffs[3], HALF_MAX)
;
const
DATA_TYPE
V
=
(
r
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
const DATA_TYPE V =
fma(r - Y, coeffs[4], HALF_MAX)
;
#else
#else
__constant int * coeffs = c_RGB2YUVCoeffs_i;
__constant int * coeffs = c_RGB2YUVCoeffs_i;
const int delta = HALF_MAX * (1 << yuv_shift);
const int delta = HALF_MAX * (1 << yuv_shift);
const
int
Y
=
CV_DESCALE
(
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2]
,
yuv_shift
)
;
const int Y = CV_DESCALE(
mad24(b, coeffs[0], mad24(g, coeffs[1], r * coeffs[2]))
, yuv_shift);
const
int
U
=
CV_DESCALE
(
(
b
-
Y
)
*
coeffs[3]
+
delta
,
yuv_shift
)
;
const int U = CV_DESCALE(
mad24(b - Y, coeffs[3], delta)
, yuv_shift);
const
int
V
=
CV_DESCALE
(
(
r
-
Y
)
*
coeffs[4]
+
delta
,
yuv_shift
)
;
const int V = CV_DESCALE(
mad24(r - Y, coeffs[4], delta)
, yuv_shift);
#endif
#endif
dst[0] = SAT_CAST( Y );
dst[0] = SAT_CAST( Y );
dst[1] = SAT_CAST( U );
dst[1] = SAT_CAST( U );
dst[2] = SAT_CAST( V );
dst[2] = SAT_CAST( V );
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -226,8 +242,8 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
...
@@ -226,8 +242,8 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
__kernel
void
YUV2RGB
(
__global
const
uchar*
srcptr,
int
src
step,
int
src
offset,
__kernel void YUV2RGB(__global const uchar* srcptr, int src
_step, int src_
offset,
__global
uchar*
dstptr,
int
dst
step,
int
dst
offset,
__global uchar* dstptr, int dst
_step, int dt_
offset,
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
...
@@ -235,25 +251,28 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
...
@@ -235,25 +251,28 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
)
)
;
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr +
src_index
);
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
)
)
;
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr +
dst_index
);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE Y = src_pix.x, U = src_pix.y, V = src_pix.z;
DATA_TYPE Y = src_pix.x, U = src_pix.y, V = src_pix.z;
#ifdef DEPTH_5
#ifdef DEPTH_5
__constant float * coeffs = c_YUV2RGBCoeffs_f;
__constant float * coeffs = c_YUV2RGBCoeffs_f;
const
float
r
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[3]
;
float r = fma(V - HALF_MAX, coeffs[3], Y)
;
const
float
g
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1]
;
float g = fma(V - HALF_MAX, coeffs[2], fma(U - HALF_MAX, coeffs[1], Y))
;
const
float
b
=
Y
+
(
U
-
HALF_MAX
)
*
coeffs[0]
;
float b = fma(U - HALF_MAX, coeffs[0], Y)
;
#else
#else
__constant int * coeffs = c_YUV2RGBCoeffs_i;
__constant int * coeffs = c_YUV2RGBCoeffs_i;
const int r = Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift);
const int r = Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift);
const
int
g
=
Y
+
CV_DESCALE
(
(
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1]
,
yuv_shift
)
;
const int g = Y + CV_DESCALE(
mad24(V - HALF_MAX, coeffs[2], (U - HALF_MAX) * coeffs[1])
, yuv_shift);
const int b = Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift);
const int b = Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift);
#endif
#endif
...
@@ -263,8 +282,10 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
...
@@ -263,8 +282,10 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
#if dcn == 4
#if dcn == 4
dst[3] = MAX_NUM;
dst[3] = MAX_NUM;
#endif
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -276,8 +297,8 @@ __constant int ITUR_BT_601_CVG = 852492;
...
@@ -276,8 +297,8 @@ __constant int ITUR_BT_601_CVG = 852492;
__constant int ITUR_BT_601_CVR = 1673527;
__constant int ITUR_BT_601_CVR = 1673527;
__constant int ITUR_BT_601_SHIFT = 20;
__constant int ITUR_BT_601_SHIFT = 20;
__kernel
void
YUV2RGB_NV12
(
__global
const
uchar*
srcptr,
int
src
step,
int
src
offset,
__kernel void YUV2RGB_NV12(__global const uchar* srcptr, int src
_step, int src_
offset,
__global
uchar*
dstptr,
int
dst
step,
int
dst
offset,
__global uchar* dstptr, int dst
_step, int dt_
offset,
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
...
@@ -290,15 +311,15 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff
...
@@ -290,15 +311,15 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff
{
{
if (y < rows / 2 )
if (y < rows / 2 )
{
{
__global
const
uchar*
ysrc
=
srcptr
+
mad24
(
y
<<
1
,
src
step,
(
x
<<
1
)
+
src
offset
)
;
__global const uchar* ysrc = srcptr + mad24(y << 1, src
_step, (x << 1) + src_
offset);
__global
const
uchar*
usrc
=
srcptr
+
mad24
(
rows
+
y,
src
step,
(
x
<<
1
)
+
src
offset
)
;
__global const uchar* usrc = srcptr + mad24(rows + y, src
_step, (x << 1) + src_
offset);
__global
uchar*
dst1
=
dstptr
+
mad24
(
y
<<
1
,
dst
step,
x
*
(
dcn<<1
)
+
dst
offset
)
;
__global uchar* dst1 = dstptr + mad24(y << 1, dst
_step, x * (dcn<<1) + dt_
offset);
__global
uchar*
dst2
=
dstptr
+
mad24
((
y
<<
1
)
+
1
,
dst
step,
x
*
(
dcn<<1
)
+
dst
offset
)
;
__global uchar* dst2 = dstptr + mad24((y << 1) + 1, dst
_step, x * (dcn<<1) + dt_
offset);
int Y1 = ysrc[0];
int Y1 = ysrc[0];
int Y2 = ysrc[1];
int Y2 = ysrc[1];
int
Y3
=
ysrc[srcstep]
;
int Y3 = ysrc[src
_
step];
int
Y4
=
ysrc[srcstep
+
1]
;
int Y4 = ysrc[src
_
step + 1];
int U = usrc[0] - 128;
int U = usrc[0] - 128;
int V = usrc[1] - 128;
int V = usrc[1] - 128;
...
@@ -349,8 +370,8 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff
...
@@ -349,8 +370,8 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff
__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
__kernel
void
RGB2YCrCb
(
__global
const
uchar*
srcptr,
int
src
step,
int
src
offset,
__kernel void RGB2YCrCb(__global const uchar* srcptr, int src
_step, int src_
offset,
__global
uchar*
dstptr,
int
dst
step,
int
dst
offset,
__global uchar* dstptr, int dst
_step, int dt_
offset,
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
...
@@ -358,34 +379,40 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset
...
@@ -358,34 +379,40 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
)
)
;
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr +
src_index
);
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
)
)
;
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr +
dst_index
);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE
b
=src_pix.B_COMP,
g=src_pix.G_COMP,
r=
src_pix.R_COMP
;
DATA_TYPE b
= src_pix.B_COMP, g = src_pix.G_COMP, r =
src_pix.R_COMP;
#ifdef DEPTH_5
#ifdef DEPTH_5
__constant float * coeffs = c_RGB2YCrCbCoeffs_f;
__constant float * coeffs = c_RGB2YCrCbCoeffs_f;
DATA_TYPE
Y
=
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0]
;
DATA_TYPE Y =
fma(b, coeffs[2], fma(g, coeffs[1], r * coeffs[0]))
;
DATA_TYPE
Cr
=
(
r
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
DATA_TYPE Cr =
fma(r - Y, coeffs[3], HALF_MAX)
;
DATA_TYPE
Cb
=
(
b
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
DATA_TYPE Cb =
fma(b - Y, coeffs[4], HALF_MAX)
;
#else
#else
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
int delta = HALF_MAX * (1 << yuv_shift);
int delta = HALF_MAX * (1 << yuv_shift);
int
Y
=
CV_DESCALE
(
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0]
,
yuv_shift
)
;
int Y = CV_DESCALE(
mad24(b, coeffs[2], mad24(g, coeffs[1], r * coeffs[0]))
, yuv_shift);
int
Cr
=
CV_DESCALE
(
(
r
-
Y
)
*
coeffs[3]
+
delta
,
yuv_shift
)
;
int Cr = CV_DESCALE(
mad24(r - Y, coeffs[3], delta)
, yuv_shift);
int
Cb
=
CV_DESCALE
(
(
b
-
Y
)
*
coeffs[4]
+
delta
,
yuv_shift
)
;
int Cb = CV_DESCALE(
mad24(b - Y, coeffs[4], delta)
, yuv_shift);
#endif
#endif
dst[0] = SAT_CAST( Y );
dst[0] = SAT_CAST( Y );
dst[1] = SAT_CAST( Cr );
dst[1] = SAT_CAST( Cr );
dst[2] = SAT_CAST( Cb );
dst[2] = SAT_CAST( Cb );
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -402,28 +429,29 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
...
@@ -402,28 +429,29 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
__global const DATA_TYPE * srcptr = (__global const DATA_TYPE*)(src + src_index);
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global DATA_TYPE * dstptr = (__global DATA_TYPE*)(dst + dst_index);
__global
const
DATA_TYPE
*
srcptr
=
(
__global
const
DATA_TYPE*
)(
src
+
src_idx
)
;
__global
DATA_TYPE
*
dstptr
=
(
__global
DATA_TYPE*
)(
dst
+
dst_idx
)
;
DATA_TYPE_4 src_pix = vload4(0, srcptr);
DATA_TYPE_4 src_pix = vload4(0, srcptr);
DATA_TYPE y = src_pix.x, cr = src_pix.y, cb = src_pix.z;
DATA_TYPE y = src_pix.x, cr = src_pix.y, cb = src_pix.z;
#ifdef DEPTH_5
#ifdef DEPTH_5
__constant float * coeff = c_YCrCb2RGBCoeffs_f;
__constant float * coeff = c_YCrCb2RGBCoeffs_f;
float
r
=
y
+
coeff[0]
*
(
cr
-
HALF_MAX
)
;
float r =
fma(coeff[0], cr - HALF_MAX, y
);
float
g
=
y
+
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
;
float g =
fma(coeff[1], cr - HALF_MAX, fma(coeff[2], cb - HALF_MAX, y)
);
float
b
=
y
+
coeff[3]
*
(
cb
-
HALF_MAX
)
;
float b =
fma(coeff[3], cb - HALF_MAX, y
);
#else
#else
__constant int * coeff = c_YCrCb2RGBCoeffs_i;
__constant int * coeff = c_YCrCb2RGBCoeffs_i;
int r = y + CV_DESCALE(coeff[0] * (cr - HALF_MAX), yuv_shift);
int r = y + CV_DESCALE(coeff[0] * (cr - HALF_MAX), yuv_shift);
int
g
=
y
+
CV_DESCALE
(
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
,
yuv_shift
)
;
int g = y + CV_DESCALE(
mad24(coeff[1], cr - HALF_MAX, coeff[2] * (cb - HALF_MAX)
), yuv_shift);
int b = y + CV_DESCALE(coeff[3] * (cb - HALF_MAX), yuv_shift);
int b = y + CV_DESCALE(coeff[3] * (cb - HALF_MAX), yuv_shift);
#endif
#endif
...
@@ -433,8 +461,11 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
...
@@ -433,8 +461,11 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
#if dcn == 4
#if dcn == 4
dstptr[3] = MAX_NUM;
dstptr[3] = MAX_NUM;
#endif
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -450,34 +481,37 @@ __kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -450,34 +481,37 @@ __kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offse
if (dx < cols)
if (dx < cols)
{
{
int src_index = mad24(dy, src_step, mad24(dx, scnbytes, src_offset));
int dst_index = mad24(dy, dst_step, mad24(dx, dcnbytes, dst_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (dy < rows)
if (dy < rows)
{
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE r = src_pix.x, g = src_pix.y, b = src_pix.z;
DATA_TYPE r = src_pix.x, g = src_pix.y, b = src_pix.z;
#ifdef DEPTH_5
#ifdef DEPTH_5
float
x
=
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2]
;
float x =
fma(r, coeffs[0], fma(g, coeffs[1], b * coeffs[2]))
;
float
y
=
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5]
;
float y =
fma(r, coeffs[3], fma(g, coeffs[4], b * coeffs[5]))
;
float
z
=
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8]
;
float z =
fma(r, coeffs[6], fma(g, coeffs[7], b * coeffs[8]))
;
#else
#else
int
x
=
CV_DESCALE
(
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2]
,
xyz_shift
)
;
int x = CV_DESCALE(
mad24(r, coeffs[0], mad24(g, coeffs[1], b * coeffs[2]))
, xyz_shift);
int
y
=
CV_DESCALE
(
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5]
,
xyz_shift
)
;
int y = CV_DESCALE(
mad24(r, coeffs[3], mad24(g, coeffs[4], b * coeffs[5]))
, xyz_shift);
int
z
=
CV_DESCALE
(
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8]
,
xyz_shift
)
;
int z = CV_DESCALE(
mad24(r, coeffs[6], mad24(g, coeffs[7], b * coeffs[8]))
, xyz_shift);
#endif
#endif
dst[0] = SAT_CAST(x);
dst[0] = SAT_CAST(x);
dst[1] = SAT_CAST(y);
dst[1] = SAT_CAST(y);
dst[2] = SAT_CAST(z);
dst[2] = SAT_CAST(z);
++dy;
dst_index += dst_step;
src_index += src_step;
}
}
++dy
;
}
}
}
}
}
}
...
@@ -491,37 +525,48 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -491,37 +525,48 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse
if (dx < cols)
if (dx < cols)
{
{
int src_index = mad24(dy, src_step, mad24(dx, scnbytes, src_offset));
int dst_index = mad24(dy, dst_step, mad24(dx, dcnbytes, dst_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (dy < rows)
if (dy < rows)
{
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE x = src_pix.x, y = src_pix.y, z = src_pix.z;
DATA_TYPE x = src_pix.x, y = src_pix.y, z = src_pix.z;
#ifdef DEPTH_5
#ifdef DEPTH_5
float
b
=
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2]
;
float b =
fma(x, coeffs[0], fma(y, coeffs[1], z * coeffs[2]))
;
float
g
=
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5]
;
float g =
fma(x, coeffs[3], fma(y, coeffs[4], z * coeffs[5]))
;
float
r
=
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8]
;
float r =
fma(x, coeffs[6], fma(y, coeffs[7], z * coeffs[8]))
;
#else
#else
int
b
=
CV_DESCALE
(
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2]
,
xyz_shift
)
;
int b = CV_DESCALE(
mad24(x, coeffs[0], mad24(y, coeffs[1], z * coeffs[2]))
, xyz_shift);
int
g
=
CV_DESCALE
(
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5]
,
xyz_shift
)
;
int g = CV_DESCALE(
mad24(x, coeffs[3], mad24(y, coeffs[4], z * coeffs[5]))
, xyz_shift);
int
r
=
CV_DESCALE
(
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8]
,
xyz_shift
)
;
int r = CV_DESCALE(
mad24(x, coeffs[6], mad24(y, coeffs[7], z * coeffs[8]))
, xyz_shift);
#endif
#endif
dst[0]
=
SAT_CAST
(
b
)
;
dst[1]
=
SAT_CAST
(
g
)
;
DATA_TYPE dst0 = SAT_CAST(b);
dst[2]
=
SAT_CAST
(
r
)
;
DATA_TYPE dst1 = SAT_CAST(g);
DATA_TYPE dst2 = SAT_CAST(r);
#if dcn == 3 || defined DEPTH_5
dst[0] = dst0;
dst[1] = dst1;
dst[2] = dst2;
#if dcn == 4
#if dcn == 4
dst[3] = MAX_NUM;
dst[3] = MAX_NUM;
#endif
#endif
#else
*(__global DATA_TYPE_4 *)dst = (DATA_TYPE_4)(dst0, dst1, dst2, MAX_NUM);
#endif
++dy;
dst_index += dst_step;
src_index += src_step;
}
}
++dy
;
}
}
}
}
}
}
...
@@ -537,16 +582,16 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
...
@@ -537,16 +582,16 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE_4 src_pix = vload4(0, src);
#ifdef REVERSE
#ifdef REVERSE
...
@@ -566,8 +611,11 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
...
@@ -566,8 +611,11 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
dst[3] = src[3];
dst[3] = src[3];
#endif
#endif
#endif
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -583,34 +631,38 @@ __kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset
...
@@ -583,34 +631,38 @@ __kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
ushort t = *((__global const ushort*)(src + src_index));
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
ushort
t
=
*
((
__global
const
ushort*
)(
src
+
src_idx
))
;
#if greenbits == 6
#if greenbits == 6
dst[dst_i
d
x
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_i
nde
x + bidx] = (uchar)(t << 3);
dst[dst_i
d
x
+
1]
=
(
uchar
)((
t
>>
3
)
&
~3
)
;
dst[dst_i
nde
x + 1] = (uchar)((t >> 3) & ~3);
dst[dst_i
d
x
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
8
)
&
~7
)
;
dst[dst_i
nde
x + (bidx^2)] = (uchar)((t >> 8) & ~7);
#else
#else
dst[dst_i
d
x
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_i
nde
x + bidx] = (uchar)(t << 3);
dst[dst_i
d
x
+
1]
=
(
uchar
)((
t
>>
2
)
&
~7
)
;
dst[dst_i
nde
x + 1] = (uchar)((t >> 2) & ~7);
dst[dst_i
d
x
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
7
)
&
~7
)
;
dst[dst_i
nde
x + (bidx^2)] = (uchar)((t >> 7) & ~7);
#endif
#endif
#if dcn == 4
#if dcn == 4
#if greenbits == 6
#if greenbits == 6
dst[dst_i
d
x
+
3]
=
255
;
dst[dst_i
nde
x + 3] = 255;
#else
#else
dst[dst_i
d
x
+
3]
=
t
&
0x8000
?
255
:
0
;
dst[dst_i
nde
x + 3] = t & 0x8000 ? 255 : 0;
#endif
#endif
#endif
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -624,25 +676,29 @@ __kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset
...
@@ -624,25 +676,29 @@ __kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
uchar4 src_pix = vload4(0, src + src_index);
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
uchar4
src_pix
=
vload4
(
0
,
src
+
src_idx
)
;
#if greenbits == 6
#if greenbits == 6
*
((
__global
ushort*
)(
dst
+
dst_i
d
x
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP&~3) << 3)|
((
src_pix.R_COMP&~7
)
<<
8
))
;
*((__global ushort*)(dst + dst_i
nde
x)) = (ushort)((src_pix.B_COMP >> 3)|
((
src_pix.G_COMP&~3
)
<<
3
)
|((src_pix.R_COMP&~7) << 8));
#elif scn == 3
#elif scn == 3
*
((
__global
ushort*
)(
dst
+
dst_i
d
x
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP&~7) << 2)|
((
src_pix.R_COMP&~7
)
<<
7
))
;
*((__global ushort*)(dst + dst_i
nde
x)) = (ushort)((src_pix.B_COMP >> 3)|
((
src_pix.G_COMP&~7
)
<<
2
)
|((src_pix.R_COMP&~7) << 7));
#else
#else
*
((
__global
ushort*
)(
dst
+
dst_i
d
x
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP&~7) << 2)|
*((__global ushort*)(dst + dst_i
nde
x)) = (ushort)((src_pix.B_COMP >> 3)|
((
src_pix.G_COMP&~7
)
<<
2
)
|
((src_pix.R_COMP&~7) << 7)|
(
src_pix.w
?
0x8000
:
0
))
;
((src_pix.R_COMP&~7) << 7)|
(
src_pix.w
?
0x8000
:
0
))
;
#
endif
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -658,26 +714,25 @@ __kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offse
...
@@ -658,26 +714,25 @@ __kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offse
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int
t
=
*
((
__global
const
ushort*
)(
src
+
src_index
))
;
int dst_idx = mad24(y, dst_step, dst_offset + x);
int t = *((__global const ushort*)(src + src_idx));
#
if
greenbits
==
6
#
if
greenbits
==
6
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
dst[dst_index]
=
(
uchar
)
CV_DESCALE
(
mad24
((
t
<<
3
)
&
0xf8,
B2Y,
mad24
((
t
>>
3
)
&
0xfc,
G2Y,
((
t
>>
8
)
&
0xf8
)
*
R2Y
))
,
yuv_shift
)
;
((t >> 3) & 0xfc)*G2Y +
((t >> 8) & 0xf8)*R2Y, yuv_shift);
#
else
#
else
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
dst[dst_index]
=
(
uchar
)
CV_DESCALE
(
mad24
((
t
<<
3
)
&
0xf8,
B2Y,
mad24
((
t
>>
2
)
&
0xf8,
G2Y,
((
t
>>
7
)
&
0xf8
)
*
R2Y
))
,
yuv_shift
)
;
((t >> 2) & 0xf8)*G2Y +
((t >> 7) & 0xf8)*R2Y, yuv_shift);
#
endif
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -691,23 +746,26 @@ __kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offse
...
@@ -691,23 +746,26 @@ __kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offse
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + x);
int
t
=
src[src_index]
;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
int t = src[src_idx];
#
if
greenbits
==
6
#
if
greenbits
==
6
*((__global ushort*)(dst + dst_i
d
x)) = (ushort)((t >> 3) |
((
t
&
~3
)
<<
3
)
| ((t & ~7) << 8));
*
((
__global
ushort*
)(
dst
+
dst_i
nde
x
))
=
(
ushort
)((
t
>>
3
)
| ((t & ~3) << 3) |
((
t
&
~7
)
<<
8
))
;
#
else
#
else
t
>>=
3
;
t
>>=
3
;
*((__global ushort*)(dst + dst_i
d
x)) = (ushort)(t|
(
t
<<
5
)
|(t << 10));
*
((
__global
ushort*
)(
dst
+
dst_i
nde
x
))
=
(
ushort
)(
t
|(t << 5)|
(
t
<<
10
))
;
#
endif
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -733,40 +791,44 @@ __kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset,
...
@@ -733,40 +791,44 @@ __kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset,
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
uchar4
src_pix
=
vload4
(
0
,
src
+
src_index
)
;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
int
b
=
src_pix.B_COMP,
g
=
src_pix.G_COMP,
r
=
src_pix.R_COMP
;
int
b
=
src_pix.B_COMP,
g
=
src_pix.G_COMP,
r
=
src_pix.R_COMP
;
int
h,
s,
v
=
b
;
int
h,
s,
v
=
b
;
int
vmin
=
b,
diff
;
int
vmin
=
b,
diff
;
int
vr,
vg
;
int
vr,
vg
;
v = max(
v, g
);
v
=
max
(
v,
g
)
;
v = max(
v, r
);
v
=
max
(
v,
r
)
;
vmin = min(
vmin, g
);
vmin
=
min
(
vmin,
g
)
;
vmin = min(
vmin, r
);
vmin
=
min
(
vmin,
r
)
;
diff
=
v
-
vmin
;
diff
=
v
-
vmin
;
vr
=
v
==
r
?
-1
:
0
;
vr
=
v
==
r
?
-1
:
0
;
vg
=
v
==
g
?
-1
:
0
;
vg
=
v
==
g
?
-1
:
0
;
s =
(diff * sdiv_table[v] +
(1 << (hsv_shift-1))) >> hsv_shift;
s
=
mad24
(
diff,
sdiv_table[v],
(
1
<<
(
hsv_shift-1
)))
>>
hsv_shift
;
h
=
(
vr
&
(
g
-
b
))
+
h
=
(
vr
&
(
g
-
b
))
+
(~vr & ((vg &
(b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff
))));
(
~vr
&
((
vg
&
mad24
(
diff,
2
,
b
-
r
))
+
((
~vg
)
&
mad24
(
4
,
diff,
r
-
g
))))
;
h =
(h * hdiv_table[diff] +
(1 << (hsv_shift-1))) >> hsv_shift;
h
=
mad24
(
h,
hdiv_table[diff],
(
1
<<
(
hsv_shift-1
)))
>>
hsv_shift
;
h
+=
h
<
0
?
hrange
:
0
;
h
+=
h
<
0
?
hrange
:
0
;
dst[dst_idx] = convert_uchar_sat_rte(h);
dst[dst_index]
=
convert_uchar_sat_rte
(
h
)
;
dst[dst_idx + 1] = (uchar)s;
dst[dst_index
+
1]
=
(
uchar
)
s
;
dst[dst_idx + 2] = (uchar)v;
dst[dst_index
+
2]
=
(
uchar
)
v
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -780,14 +842,15 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
...
@@ -780,14 +842,15 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
uchar4
src_pix
=
vload4
(
0
,
src
+
src_index
)
;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
float
h
=
src_pix.x,
s
=
src_pix.y*
(
1/255.f
)
,
v
=
src_pix.z*
(
1/255.f
)
;
float
h
=
src_pix.x,
s
=
src_pix.y*
(
1/255.f
)
,
v
=
src_pix.z*
(
1/255.f
)
;
float
b,
g,
r
;
float
b,
g,
r
;
...
@@ -821,14 +884,17 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
...
@@ -821,14 +884,17 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
else
else
b
=
g
=
r
=
v
;
b
=
g
=
r
=
v
;
dst[dst_i
d
x + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_i
nde
x
+
bidx]
=
convert_uchar_sat_rte
(
b*255.f
)
;
dst[dst_i
d
x + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_i
nde
x
+
1]
=
convert_uchar_sat_rte
(
g*255.f
)
;
dst[dst_i
d
x + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
dst[dst_i
nde
x
+
(
bidx^2
)
]
=
convert_uchar_sat_rte
(
r*255.f
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dst[dst_i
d
x + 3] = MAX_NUM;
dst[dst_i
nde
x
+
3]
=
MAX_NUM
;
#
endif
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -844,16 +910,16 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -844,16 +910,16 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_index
)
;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_index
)
;
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float4
src_pix
=
vload4
(
0
,
src
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
b
=
src_pix.B_COMP,
g
=
src_pix.G_COMP,
r
=
src_pix.R_COMP
;
float
b
=
src_pix.B_COMP,
g
=
src_pix.G_COMP,
r
=
src_pix.R_COMP
;
...
@@ -873,17 +939,21 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -873,17 +939,21 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset
if
(
v
==
r
)
if
(
v
==
r
)
h
=
(
g
-
b
)
*diff
;
h
=
(
g
-
b
)
*diff
;
else
if
(
v
==
g
)
else
if
(
v
==
g
)
h =
(b - r)*diff + 120.f
;
h
=
fma
(
b
-
r,
diff,
120.f
)
;
else
else
h =
(r - g)*diff + 240.f
;
h
=
fma
(
r
-
g,
diff,
240.f
)
;
if( h < 0 ) h += 360.f;
if
(
h
<
0
)
h
+=
360.f
;
dst[0]
=
h*hscale
;
dst[0]
=
h*hscale
;
dst[1]
=
s
;
dst[1]
=
s
;
dst[2]
=
v
;
dst[2]
=
v
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -897,16 +967,17 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -897,16 +967,17 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_i
d
x);
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_i
nde
x
)
;
__global float * dst = (__global float *)(dstptr + dst_i
d
x);
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_i
nde
x
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
h
=
src_pix.x,
s
=
src_pix.y,
v
=
src_pix.z
;
float
h
=
src_pix.x,
s
=
src_pix.y,
v
=
src_pix.z
;
...
@@ -947,8 +1018,11 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -947,8 +1018,11 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset
#
if
dcn
==
4
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -968,14 +1042,15 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
...
@@ -968,14 +1042,15 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
uchar4
src_pix
=
vload4
(
0
,
src
+
src_index
)
;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
float
b
=
src_pix.B_COMP*
(
1/255.f
)
,
g
=
src_pix.G_COMP*
(
1/255.f
)
,
r
=
src_pix.R_COMP*
(
1/255.f
)
;
float
b
=
src_pix.B_COMP*
(
1/255.f
)
,
g
=
src_pix.G_COMP*
(
1/255.f
)
,
r
=
src_pix.R_COMP*
(
1/255.f
)
;
float
h
=
0.f,
s
=
0.f,
l
;
float
h
=
0.f,
s
=
0.f,
l
;
...
@@ -998,18 +1073,22 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
...
@@ -998,18 +1073,22 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
if
(
vmax
==
r
)
if
(
vmax
==
r
)
h
=
(
g
-
b
)
*diff
;
h
=
(
g
-
b
)
*diff
;
else
if
(
vmax
==
g
)
else
if
(
vmax
==
g
)
h =
(b - r)*diff + 120.f
;
h
=
fma
(
b
-
r,
diff,
120.f
)
;
else
else
h =
(r - g)*diff + 240.f
;
h
=
fma
(
r
-
g,
diff,
240.f
)
;
if( h < 0.f ) h += 360.f;
if
(
h
<
0.f
)
h
+=
360.f
;
}
}
dst[dst_idx] = convert_uchar_sat_rte(h*hscale);
dst[dst_index]
=
convert_uchar_sat_rte
(
h*hscale
)
;
dst[dst_idx + 1] = convert_uchar_sat_rte(l*255.f);
dst[dst_index
+
1]
=
convert_uchar_sat_rte
(
l*255.f
)
;
dst[dst_idx + 2] = convert_uchar_sat_rte(s*255.f);
dst[dst_index
+
2]
=
convert_uchar_sat_rte
(
s*255.f
)
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -1023,14 +1102,15 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
...
@@ -1023,14 +1102,15 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
uchar4
src_pix
=
vload4
(
0
,
src
+
src_index
)
;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
float
h
=
src_pix.x,
l
=
src_pix.y*
(
1.f/255.f
)
,
s
=
src_pix.z*
(
1.f/255.f
)
;
float
h
=
src_pix.x,
l
=
src_pix.y*
(
1.f/255.f
)
,
s
=
src_pix.z*
(
1.f/255.f
)
;
float
b,
g,
r
;
float
b,
g,
r
;
...
@@ -1053,8 +1133,8 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
...
@@ -1053,8 +1133,8 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
tab[0]
=
p2
;
tab[0]
=
p2
;
tab[1]
=
p1
;
tab[1]
=
p1
;
tab[2] =
p1 + (p2 - p1)*(1-h
);
tab[2]
=
fma
(
p2
-
p1,
1-h,
p1
)
;
tab[3] =
p1 + (p2 - p1)*h
;
tab[3]
=
fma
(
p2
-
p1,
h,
p1
)
;
b
=
tab[sector_data[sector][0]]
;
b
=
tab[sector_data[sector][0]]
;
g
=
tab[sector_data[sector][1]]
;
g
=
tab[sector_data[sector][1]]
;
...
@@ -1063,14 +1143,17 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
...
@@ -1063,14 +1143,17 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
else
else
b
=
g
=
r
=
l
;
b
=
g
=
r
=
l
;
dst[dst_i
d
x + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_i
nde
x
+
bidx]
=
convert_uchar_sat_rte
(
b*255.f
)
;
dst[dst_i
d
x + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_i
nde
x
+
1]
=
convert_uchar_sat_rte
(
g*255.f
)
;
dst[dst_i
d
x + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
dst[dst_i
nde
x
+
(
bidx^2
)
]
=
convert_uchar_sat_rte
(
r*255.f
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dst[dst_i
d
x + 3] = MAX_NUM;
dst[dst_i
nde
x
+
3]
=
MAX_NUM
;
#
endif
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -1086,16 +1169,16 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -1086,16 +1169,16 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_index
)
;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_index
)
;
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float4
src_pix
=
vload4
(
0
,
src
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
b
=
src_pix.B_COMP,
g
=
src_pix.G_COMP,
r
=
src_pix.R_COMP
;
float
b
=
src_pix.B_COMP,
g
=
src_pix.G_COMP,
r
=
src_pix.R_COMP
;
...
@@ -1119,9 +1202,9 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -1119,9 +1202,9 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
if
(
vmax
==
r
)
if
(
vmax
==
r
)
h
=
(
g
-
b
)
*diff
;
h
=
(
g
-
b
)
*diff
;
else
if
(
vmax
==
g
)
else
if
(
vmax
==
g
)
h =
(b - r)*diff + 120.f
;
h
=
fma
(
b
-
r,
diff,
120.f
)
;
else
else
h =
(r - g)*diff + 240.f
;
h
=
fma
(
r
-
g,
diff,
240.f
)
;
if
(
h
<
0.f
)
h
+=
360.f
;
if
(
h
<
0.f
)
h
+=
360.f
;
}
}
...
@@ -1129,8 +1212,11 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -1129,8 +1212,11 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
dst[0]
=
h*hscale
;
dst[0]
=
h*hscale
;
dst[1]
=
l
;
dst[1]
=
l
;
dst[2]
=
s
;
dst[2]
=
s
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -1144,16 +1230,16 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -1144,16 +1230,16 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_index
)
;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_index
)
;
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float4
src_pix
=
vload4
(
0
,
src
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
h
=
src_pix.x,
l
=
src_pix.y,
s
=
src_pix.z
;
float
h
=
src_pix.x,
l
=
src_pix.y,
s
=
src_pix.z
;
...
@@ -1178,8 +1264,8 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -1178,8 +1264,8 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
tab[0]
=
p2
;
tab[0]
=
p2
;
tab[1]
=
p1
;
tab[1]
=
p1
;
tab[2] =
p1 + (p2 - p1)*(1-h
);
tab[2]
=
fma
(
p2
-
p1,
1-h,
p1
)
;
tab[3] =
p1 + (p2 - p1)*h
;
tab[3]
=
fma
(
p2
-
p1,
h,
p1
)
;
b
=
tab[sector_data[sector][0]]
;
b
=
tab[sector_data[sector][0]]
;
g
=
tab[sector_data[sector][1]]
;
g
=
tab[sector_data[sector][1]]
;
...
@@ -1194,8 +1280,11 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -1194,8 +1280,11 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
#
if
dcn
==
4
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -1215,24 +1304,25 @@ __kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset
...
@@ -1215,24 +1304,25 @@ __kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
(
x
<<
2
))
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
(
x
<<
2
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + (x << 2));
uchar4
src_pix
=
*
(
__global
const
uchar4
*
)(
src
+
src_index
)
;
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
uchar4 src_pix = vload4(0, src + src_idx);
uchar v0 = src_pix.x, v1 = src_pix.y;
*
(
__global
uchar4
*
)(
dst
+
dst_index
)
=
uchar v2 = src_pix.z, v3 = src_pix.w;
(
uchar4
)(
mad24
(
src_pix.x,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
mad24
(
src_pix.y,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
mad24
(
src_pix.z,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
src_pix.w
)
;
dst[dst_idx] = (v0 * v3 + HALF_MAX) / MAX_NUM;
++y
;
dst[dst_idx + 1] = (v1 * v3 + HALF_MAX) / MAX_NUM;
dst_index
+=
dst_step
;
dst[dst_idx + 2] = (v2 * v3 + HALF_MAX) / MAX_NUM;
src_index
+=
src_step
;
dst[dst_idx + 3] = v3;
}
}
++y;
}
}
}
}
}
}
...
@@ -1246,25 +1336,29 @@ __kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset
...
@@ -1246,25 +1336,29 @@ __kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset
if
(
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
4
,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
4
,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
{
if
(
y
<
rows
)
if
(
y
<
rows
)
{
{
int src_idx = mad24(y, src_step, src_offset + (x << 2));
uchar4
src_pix
=
*
(
__global
const
uchar4
*
)(
src
+
src_index
)
;
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
uchar
v3
=
src_pix.w,
v3_half
=
v3
/
2
;
uchar4 src_pix = vload4(0, src + src_idx);
if
(
v3
==
0
)
uchar v0 = src_pix.x, v1 = src_pix.y;
*
(
__global
uchar4
*
)(
dst
+
dst_index
)
=
(
uchar4
)(
0
,
0
,
0
,
0
)
;
uchar v2 = src_pix.z, v3 = src_pix.w;
else
uchar v3_half = v3 / 2;
*
(
__global
uchar4
*
)(
dst
+
dst_index
)
=
(
uchar4
)(
mad24
(
src_pix.x,
MAX_NUM,
v3_half
)
/
v3,
dst[dst_idx] = v3 == 0 ? 0 : (v0 * MAX_NUM + v3_half) / v3;
mad24
(
src_pix.y,
MAX_NUM,
v3_half
)
/
v3,
dst[dst_idx + 1] = v3 == 0 ? 0 : (v1 * MAX_NUM + v3_half) / v3;
mad24
(
src_pix.z,
MAX_NUM,
v3_half
)
/
v3,
v3
)
;
dst[dst_idx + 2] = v3 == 0 ? 0 : (v2 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 3] = v3;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
++y;
}
}
}
}
}
}
...
@@ -1283,8 +1377,8 @@ inline float splineInterpolate(float x, __global const float * tab, int n)
...
@@ -1283,8 +1377,8 @@ inline float splineInterpolate(float x, __global const float * tab, int n)
{
{
int ix = clamp(convert_int_sat_rtn(x), 0, n-1);
int ix = clamp(convert_int_sat_rtn(x), 0, n-1);
x -= ix;
x -= ix;
tab
+=
ix
*4
;
tab += ix
<< 2
;
return
((
tab[3]*x
+
tab[2]
)
*x
+
tab[1]
)
*x
+
tab[0]
;
return
fma(fma(fma(tab[3], x, tab[2]), x, tab[1]), x, tab[0])
;
}
}
#ifdef DEPTH_0
#ifdef DEPTH_0
...
@@ -1299,16 +1393,16 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
...
@@ -1299,16 +1393,16 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
__global const uchar* src_ptr = src + src_index;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global uchar* dst_ptr = dst + dst_index;
__global
const
uchar*
src_ptr
=
src
+
src_idx
;
__global
uchar*
dst_ptr
=
dst
+
dst_idx
;
uchar4 src_pix = vload4(0, src_ptr);
uchar4 src_pix = vload4(0, src_ptr);
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
...
@@ -1316,19 +1410,22 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
...
@@ -1316,19 +1410,22 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
int R = gammaTab[src_pix.x], G = gammaTab[src_pix.y], B = gammaTab[src_pix.z];
int R = gammaTab[src_pix.x], G = gammaTab[src_pix.y], B = gammaTab[src_pix.z];
int
fX
=
LabCbrtTab_b[CV_DESCALE
(
R*C0
+
G*C1
+
B*C2
,
lab_shift
)
]
;
int fX = LabCbrtTab_b[CV_DESCALE(
mad24(R, C0, mad24(G, C1, B*C2))
, lab_shift)];
int
fY
=
LabCbrtTab_b[CV_DESCALE
(
R*C3
+
G*C4
+
B*C5
,
lab_shift
)
]
;
int fY = LabCbrtTab_b[CV_DESCALE(
mad24(R, C3, mad24(G, C4, B*C5))
, lab_shift)];
int
fZ
=
LabCbrtTab_b[CV_DESCALE
(
R*C6
+
G*C7
+
B*C8
,
lab_shift
)
]
;
int fZ = LabCbrtTab_b[CV_DESCALE(
mad24(R, C6, mad24(G, C7, B*C8))
, lab_shift)];
int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );
int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );
int
a
=
CV_DESCALE
(
500*
(
fX
-
fY
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
int a = CV_DESCALE(
mad24(500, fX - fY, 128*(1 << lab_shift2)
), lab_shift2 );
int
b
=
CV_DESCALE
(
200*
(
fY
-
fZ
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
int b = CV_DESCALE(
mad24(200, fY - fZ, 128*(1 << lab_shift2)
), lab_shift2 );
dst_ptr[0] = SAT_CAST(L);
dst_ptr[0] = SAT_CAST(L);
dst_ptr[1] = SAT_CAST(a);
dst_ptr[1] = SAT_CAST(a);
dst_ptr[2] = SAT_CAST(b);
dst_ptr[2] = SAT_CAST(b);
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -1347,16 +1444,16 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -1347,16 +1444,16 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
__global const float * src = (__global const float *)(srcptr + src_index);
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global float * dst = (__global float *)(dstptr + dst_index);
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
float4 src_pix = vload4(0, src);
float4 src_pix = vload4(0, src);
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
...
@@ -1373,23 +1470,26 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -1373,23 +1470,26 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
B = splineInterpolate(B * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
B = splineInterpolate(B * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
#endif
#endif
float
X
=
R*C0
+
G*C1
+
B*C2
;
float X =
fma(R, C0, fma(G, C1, B*C2))
;
float
Y
=
R*C3
+
G*C4
+
B*C5
;
float Y =
fma(R, C3, fma(G, C4, B*C5))
;
float
Z
=
R*C6
+
G*C7
+
B*C8
;
float Z =
fma(R, C6, fma(G, C7, B*C8))
;
float
FX
=
X
>
0.008856f
?
pow
(
X,
_1_3
)
:
(
7.787f
*
X
+
_a
)
;
float FX = X > 0.008856f ?
rootn(X, 3) : fma(7.787f, X,
_a);
float
FY
=
Y
>
0.008856f
?
pow
(
Y,
_1_3
)
:
(
7.787f
*
Y
+
_a
)
;
float FY = Y > 0.008856f ?
rootn(Y, 3) : fma(7.787f, Y,
_a);
float
FZ
=
Z
>
0.008856f
?
pow
(
Z,
_1_3
)
:
(
7.787f
*
Z
+
_a
)
;
float FZ = Z > 0.008856f ?
rootn(Z, 3) : fma(7.787f, Z,
_a);
float
L
=
Y
>
0.008856f
?
(
116.f
*
FY
-
16.f
)
:
(
903.3f
*
Y
)
;
float L = Y > 0.008856f ?
fma(116.f, FY, -
16.f) : (903.3f * Y);
float a = 500.f * (FX - FY);
float a = 500.f * (FX - FY);
float b = 200.f * (FY - FZ);
float b = 200.f * (FY - FZ);
dst[0] = L;
dst[0] = L;
dst[1] = a;
dst[1] = a;
dst[2] = b;
dst[2] = b;
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -1412,7 +1512,7 @@ inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
...
@@ -1412,7 +1512,7 @@ inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
if (li <= lThresh)
if (li <= lThresh)
{
{
y = li / 903.3f;
y = li / 903.3f;
fy
=
7.787f
*
y
+
16.0f
/
116.0f
;
fy =
fma(7.787f, y, 16.0f / 116.0f)
;
}
}
else
else
{
{
...
@@ -1422,6 +1522,7 @@ inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
...
@@ -1422,6 +1522,7 @@ inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f };
float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f };
#pragma unroll
for (int j = 0; j < 2; j++)
for (int j = 0; j < 2; j++)
if (fxz[j] <= fThresh)
if (fxz[j] <= fThresh)
fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f;
fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f;
...
@@ -1429,9 +1530,9 @@ inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
...
@@ -1429,9 +1530,9 @@ inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
fxz[j] = fxz[j] * fxz[j] * fxz[j];
fxz[j] = fxz[j] * fxz[j] * fxz[j];
float x = fxz[0], z = fxz[1];
float x = fxz[0], z = fxz[1];
float
ro
=
clamp
(
C0
*
x
+
C1
*
y
+
C2
*
z
,
0.0f,
1.0f
)
;
float ro = clamp(
fma(C0, x, fma(C1, y, C2 * z))
, 0.0f, 1.0f);
float
go
=
clamp
(
C3
*
x
+
C4
*
y
+
C5
*
z
,
0.0f,
1.0f
)
;
float go = clamp(
fma(C3, x, fma(C4, y, C5 * z))
, 0.0f, 1.0f);
float
bo
=
clamp
(
C6
*
x
+
C7
*
y
+
C8
*
z
,
0.0f,
1.0f
)
;
float bo = clamp(
fma(C6, x, fma(C7, y, C8 * z))
, 0.0f, 1.0f);
#ifdef SRGB
#ifdef SRGB
ro = splineInterpolate(ro * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
ro = splineInterpolate(ro * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
...
@@ -1456,16 +1557,16 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
...
@@ -1456,16 +1557,16 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
__global const uchar* src_ptr = src + src_index;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global uchar * dst_ptr = dst + dst_index;
__global
const
uchar*
src_ptr
=
src
+
src_idx
;
__global
uchar*
dst_ptr
=
dst
+
dst_idx
;
uchar4 src_pix = vload4(0, src_ptr);
uchar4 src_pix = vload4(0, src_ptr);
float srcbuf[3], dstbuf[3];
float srcbuf[3], dstbuf[3];
...
@@ -1479,14 +1580,18 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
...
@@ -1479,14 +1580,18 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
#endif
#endif
coeffs, lThresh, fThresh);
coeffs, lThresh, fThresh);
#if dcn == 3
dst_ptr[0] = SAT_CAST(dstbuf[0] * 255.0f);
dst_ptr[0] = SAT_CAST(dstbuf[0] * 255.0f);
dst_ptr[1] = SAT_CAST(dstbuf[1] * 255.0f);
dst_ptr[1] = SAT_CAST(dstbuf[1] * 255.0f);
dst_ptr[2] = SAT_CAST(dstbuf[2] * 255.0f);
dst_ptr[2] = SAT_CAST(dstbuf[2] * 255.0f);
#
if
dcn
==
4
#else
dst_ptr[3]
=
MAX_NUM
;
*(__global uchar4 *)dst_ptr = (uchar4)(SAT_CAST(dstbuf[0] * 255.0f),
SAT_CAST(dstbuf[1] * 255.0f), SAT_CAST(dstbuf[2] * 255.0f), MAX_NUM);
#endif
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -1505,16 +1610,16 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -1505,16 +1610,16 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse
if (x < cols)
if (x < cols)
{
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
{
if (y < rows)
if (y < rows)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
__global const float * src = (__global const float *)(srcptr + src_index);
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global float * dst = (__global float *)(dstptr + dst_index);
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
float4 src_pix = vload4(0, src);
float4 src_pix = vload4(0, src);
float srcbuf[3], dstbuf[3];
float srcbuf[3], dstbuf[3];
...
@@ -1530,8 +1635,10 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -1530,8 +1635,10 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse
#if dcn == 4
#if dcn == 4
dst[3] = MAX_NUM;
dst[3] = MAX_NUM;
#endif
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
++y
;
}
}
}
}
}
}
...
@@ -1555,37 +1662,46 @@ __kernel void BGR2Luv(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -1555,37 +1662,46 @@ __kernel void BGR2Luv(__global const uchar * srcptr, int src_step, int src_offse
__global
const
float
*
LabCbrtTab,
__constant
float
*
coeffs,
float
_un,
float
_vn
)
__global
const
float
*
LabCbrtTab,
__constant
float
*
coeffs,
float
_un,
float
_vn
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
{
int
src_i
d
x
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
src_i
nde
x
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_i
d
x
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
int
dst_i
nde
x
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
#
pragma
unroll
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
if
(
y
<
rows
)
{
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_index
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_index
)
;
float
R
=
src[0],
G
=
src[1],
B
=
src[2]
;
float
R
=
src[0],
G
=
src[1],
B
=
src[2]
;
#
ifdef
SRGB
#
ifdef
SRGB
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
#
endif
#
endif
float
X
=
R*coeffs[0]
+
G*coeffs[1]
+
B*coeffs[2]
;
float
X
=
fma
(
R,
coeffs[0],
fma
(
G,
coeffs[1],
B*coeffs[2]
))
;
float
Y
=
R*coeffs[3]
+
G*coeffs[4]
+
B*coeffs[5]
;
float
Y
=
fma
(
R,
coeffs[3],
fma
(
G,
coeffs[4],
B*coeffs[5]
))
;
float
Z
=
R*coeffs[6]
+
G*coeffs[7]
+
B*coeffs[8]
;
float
Z
=
fma
(
R,
coeffs[6],
fma
(
G,
coeffs[7],
B*coeffs[8]
))
;
float
L
=
splineInterpolate
(
Y*LabCbrtTabScale,
LabCbrtTab,
LAB_CBRT_TAB_SIZE
)
;
float
L
=
splineInterpolate
(
Y*LabCbrtTabScale,
LabCbrtTab,
LAB_CBRT_TAB_SIZE
)
;
L
=
116.f*L
-
16.f
;
L
=
fma
(
116.f,
L,
-16.f
)
;
float
d
=
(
4*13
)
/
max
(
X
+
15
*
Y
+
3
*
Z
,
FLT_EPSILON
)
;
float
d
=
52.0f
/
fmax
(
fma
(
15.0f,
Y,
fma
(
3.0f,
Z,
X
))
,
FLT_EPSILON
)
;
float
u
=
L*
(
X*d
-
_un
)
;
float
u
=
L*fma
(
X,
d,
-
_un
)
;
float
v
=
L*
((
9*0.25f
)
*Y*
d
-
_vn
)
;
float
v
=
L*fma
(
2.25f,
Y*d,
-
_vn
)
;
dst[0]
=
L
;
dst[0]
=
L
;
dst[1]
=
u
;
dst[1]
=
u
;
dst[2]
=
v
;
dst[2]
=
v
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
}
}
}
...
@@ -1599,38 +1715,44 @@ __kernel void BGR2Luv(__global const uchar * src, int src_step, int src_offset,
...
@@ -1599,38 +1715,44 @@ __kernel void BGR2Luv(__global const uchar * src, int src_step, int src_offset,
__global
const
float
*
LabCbrtTab,
__constant
float
*
coeffs,
float
_un,
float
_vn
)
__global
const
float
*
LabCbrtTab,
__constant
float
*
coeffs,
float
_un,
float
_vn
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
src
+
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
dst
+
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
src
+=
src_idx
;
#
pragma
unroll
dst
+=
dst_idx
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
if
(
y
<
rows
)
float
scale
=
1.0f
/
255.0f
;
{
float
R
=
src[0]*scale,
G
=
src[1]*scale,
B
=
src[2]*scale
;
float
scale
=
1.0f
/
255.0f
;
float
R
=
src[0]*scale,
G
=
src[1]*scale,
B
=
src[2]*scale
;
#
ifdef
SRGB
#
ifdef
SRGB
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
#
endif
#
endif
float
X
=
R*coeffs[0]
+
G*coeffs[1]
+
B*coeffs[2]
;
float
X
=
fma
(
R,
coeffs[0],
fma
(
G,
coeffs[1],
B*coeffs[2]
))
;
float
Y
=
R*coeffs[3]
+
G*coeffs[4]
+
B*coeffs[5]
;
float
Y
=
fma
(
R,
coeffs[3],
fma
(
G,
coeffs[4],
B*coeffs[5]
))
;
float
Z
=
R*coeffs[6]
+
G*coeffs[7]
+
B*coeffs[8]
;
float
Z
=
fma
(
R,
coeffs[6],
fma
(
G,
coeffs[7],
B*coeffs[8]
))
;
float
L
=
splineInterpolate
(
Y*LabCbrtTabScale,
LabCbrtTab,
LAB_CBRT_TAB_SIZE
)
;
L
=
116.f*L
-
16.f
;
float
L
=
splineInterpolate
(
Y*LabCbrtTabScale,
LabCbrtTab,
LAB_CBRT_TAB_SIZE
)
;
float
d
=
(
4*13
)
/
fmax
(
fma
(
15.0f,
Y,
fma
(
3.0f,
Z,
X
))
,
FLT_EPSILON
)
;
L
=
116.f*L
-
16.f
;
float
u
=
L*
(
X*d
-
_un
)
;
float
v
=
L*fma
(
2.25f,
Y*d,
-_vn
)
;
float
d
=
(
4*13
)
/
max
(
X
+
15
*
Y
+
3
*
Z,
FLT_EPSILON
)
;
dst[0]
=
SAT_CAST
(
L
*
2.55f
)
;
float
u
=
L*
(
X*d
-
_un
)
;
dst[1]
=
SAT_CAST
(
fma
(
u,
0.72033898305084743f,
96.525423728813564f
)
)
;
float
v
=
L*
((
9*0.25f
)
*Y*
d
-
_vn
)
;
dst[2]
=
SAT_CAST
(
fma
(
v,
0.99609375f,
139.453125f
)
)
;
dst[0]
=
SAT_CAST
(
L
*
2.55f
)
;
++y
;
dst[1]
=
SAT_CAST
(
mad
(
u,
0.72033898305084743f,
96.525423728813564f
))
;
dst
+=
dst_step
;
dst[2]
=
SAT_CAST
(
mad
(
v,
0.99609375f,
139.453125f
))
;
src
+=
src_step
;
}
}
}
}
}
...
@@ -1646,42 +1768,50 @@ __kernel void Luv2BGR(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -1646,42 +1768,50 @@ __kernel void Luv2BGR(__global const uchar * srcptr, int src_step, int src_offse
__constant
float
*
coeffs,
float
_un,
float
_vn
)
__constant
float
*
coeffs,
float
_un,
float
_vn
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
#
pragma
unroll
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
if
(
y
<
rows
)
float
L
=
src[0],
u
=
src[1],
v
=
src[2],
d,
X,
Y,
Z
;
{
Y
=
(
L
+
16.f
)
*
(
1.f/116.f
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_index
)
;
Y
=
Y*Y*Y
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_index
)
;
d
=
(
1.f/13.f
)
/L
;
u
=
u*d
+
_un
;
float
L
=
src[0],
u
=
src[1],
v
=
src[2],
d,
X,
Y,
Z
;
v
=
v*d
+
_vn
;
Y
=
(
L
+
16.f
)
*
(
1.f/116.f
)
;
float
iv
=
1.f/v
;
Y
=
Y*Y*Y
;
X
=
2.25f
*
u
*
Y
*
iv
;
d
=
(
1.f/13.f
)
/L
;
Z
=
(
12
-
3
*
u
-
20
*
v
)
*
Y
*
0.25f
*
iv
;
u
=
fma
(
u,
d,
_un
)
;
v
=
fma
(
v,
d,
_vn
)
;
float
R
=
X*coeffs[0]
+
Y*coeffs[1]
+
Z*coeffs[2]
;
float
iv
=
1.f/v
;
float
G
=
X*coeffs[3]
+
Y*coeffs[4]
+
Z*coeffs[5]
;
X
=
2.25f
*
u
*
Y
*
iv
;
float
B
=
X*coeffs[6]
+
Y*coeffs[7]
+
Z*coeffs[8]
;
Z
=
(
12
-
fma
(
3.0f,
u,
20.0f
*
v
))
*
Y
*
0.25f
*
iv
;
float
R
=
fma
(
X,
coeffs[0],
fma
(
Y,
coeffs[1],
Z
*
coeffs[2]
))
;
float
G
=
fma
(
X,
coeffs[3],
fma
(
Y,
coeffs[4],
Z
*
coeffs[5]
))
;
float
B
=
fma
(
X,
coeffs[6],
fma
(
Y,
coeffs[7],
Z
*
coeffs[8]
))
;
#
ifdef
SRGB
#
ifdef
SRGB
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
#
endif
#
endif
dst[0]
=
R
;
dst[0]
=
R
;
dst[1]
=
G
;
dst[1]
=
G
;
dst[2]
=
B
;
dst[2]
=
B
;
#
if
dcn
==
4
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
}
}
}
...
@@ -1695,46 +1825,56 @@ __kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset,
...
@@ -1695,46 +1825,56 @@ __kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset,
__constant
float
*
coeffs,
float
_un,
float
_vn
)
__constant
float
*
coeffs,
float
_un,
float
_vn
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
src
+=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
dst
+=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
src
+=
src_idx
;
#
pragma
unroll
dst
+=
dst_idx
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
if
(
y
<
rows
)
float
d,
X,
Y,
Z
;
{
float
L
=
src[0]*
(
100.f/255.f
)
;
float
d,
X,
Y,
Z
;
float
u
=
(
float
)(
src[1]*1.388235294117647f
-
134.f
)
;
float
L
=
src[0]*
(
100.f/255.f
)
;
float
v
=
(
float
)(
src[2]*1.003921568627451f
-
140.f
)
;
float
u
=
fma
(
convert_float
(
src[1]
)
,
1.388235294117647f,
-134.f
)
;
Y
=
(
L
+
16.f
)
*
(
1.f/116.f
)
;
float
v
=
fma
(
convert_float
(
src[2]
)
,
1.003921568627451f,
-
140.f
)
;
Y
=
Y*Y*Y
;
Y
=
(
L
+
16.f
)
*
(
1.f/116.f
)
;
d
=
(
1.f/13.f
)
/L
;
Y
=
Y*Y*Y
;
u
=
u*d
+
_un
;
d
=
(
1.f/13.f
)
/L
;
v
=
v*d
+
_vn
;
u
=
fma
(
u,
d,
_un
)
;
float
iv
=
1.f/v
;
v
=
fma
(
v,
d,
_vn
)
;
X
=
2.25f
*
u
*
Y
*
iv
;
float
iv
=
1.f/v
;
Z
=
(
12
-
3
*
u
-
20
*
v
)
*
Y
*
0.25f
*
iv
;
X
=
2.25f
*
u
*
Y
*
iv
;
Z
=
(
12
-
fma
(
3.0f,
u,
20.0f
*
v
))
*
Y
*
0.25f
*
iv
;
float
R
=
X*coeffs[0]
+
Y*coeffs[1]
+
Z*coeffs[2]
;
float
G
=
X*coeffs[3]
+
Y*coeffs[4]
+
Z*coeffs[5]
;
float
R
=
fma
(
X,
coeffs[0],
fma
(
Y,
coeffs[1],
Z
*
coeffs[2]
))
;
float
B
=
X*coeffs[6]
+
Y*coeffs[7]
+
Z*coeffs[8]
;
float
G
=
fma
(
X,
coeffs[3],
fma
(
Y,
coeffs[4],
Z
*
coeffs[5]
))
;
float
B
=
fma
(
X,
coeffs[6],
fma
(
Y,
coeffs[7],
Z
*
coeffs[8]
))
;
#
ifdef
SRGB
#
ifdef
SRGB
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
#
endif
#
endif
dst[0]
=
SAT_CAST
(
R
*
255.0f
)
;
uchar
dst0
=
SAT_CAST
(
R
*
255.0f
)
;
dst[1]
=
SAT_CAST
(
G
*
255.0f
)
;
uchar
dst1
=
SAT_CAST
(
G
*
255.0f
)
;
dst[2]
=
SAT_CAST
(
B
*
255.0f
)
;
uchar
dst2
=
SAT_CAST
(
B
*
255.0f
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
*
(
__global
uchar4
*
)
dst
=
(
uchar4
)(
dst0,
dst1,
dst2,
MAX_NUM
)
;
#
else
dst[0]
=
dst0
;
dst[1]
=
dst1
;
dst[2]
=
dst2
;
#
endif
#
endif
++y
;
dst
+=
dst_step
;
src
+=
src_step
;
}
}
}
}
}
...
...
modules/imgproc/test/ocl/test_color.cpp
View file @
667a9328
...
@@ -305,11 +305,11 @@ OCL_TEST_P(CvtColor8u32f, Lab2LRGBA) { performTest(3, 4, CVTCODE(Lab2LRGB), dept
...
@@ -305,11 +305,11 @@ OCL_TEST_P(CvtColor8u32f, Lab2LRGBA) { performTest(3, 4, CVTCODE(Lab2LRGB), dept
OCL_TEST_P
(
CvtColor8u32f
,
BGR2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
BGR2Luv
),
depth
==
CV_8U
?
1
:
1e-2
);
}
OCL_TEST_P
(
CvtColor8u32f
,
BGR2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
BGR2Luv
),
depth
==
CV_8U
?
1
:
1e-2
);
}
OCL_TEST_P
(
CvtColor8u32f
,
RGB2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
RGB2Luv
),
depth
==
CV_8U
?
1
:
1e-2
);
}
OCL_TEST_P
(
CvtColor8u32f
,
RGB2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
RGB2Luv
),
depth
==
CV_8U
?
1
:
1e-2
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LBGR2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
LBGR2Luv
),
depth
==
CV_8U
?
1
:
4e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LBGR2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
LBGR2Luv
),
depth
==
CV_8U
?
1
:
4e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LRGB2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
LRGB2Luv
),
depth
==
CV_8U
?
1
:
4
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LRGB2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
LRGB2Luv
),
depth
==
CV_8U
?
1
:
5
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
BGRA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
BGR2Luv
),
depth
==
CV_8U
?
1
:
8e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
BGRA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
BGR2Luv
),
depth
==
CV_8U
?
1
:
8e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
RGBA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
RGB2Luv
),
depth
==
CV_8U
?
1
:
9e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
RGBA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
RGB2Luv
),
depth
==
CV_8U
?
1
:
9e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LBGRA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
LBGR2Luv
),
depth
==
CV_8U
?
1
:
4
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LBGRA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
LBGR2Luv
),
depth
==
CV_8U
?
1
:
5
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LRGBA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
LRGB2Luv
),
depth
==
CV_8U
?
1
:
4
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LRGBA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
LRGB2Luv
),
depth
==
CV_8U
?
1
:
5
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
Luv2BGR
)
{
performTest
(
3
,
3
,
CVTCODE
(
Luv2BGR
),
depth
==
CV_8U
?
1
:
7e-5
);
}
OCL_TEST_P
(
CvtColor8u32f
,
Luv2BGR
)
{
performTest
(
3
,
3
,
CVTCODE
(
Luv2BGR
),
depth
==
CV_8U
?
1
:
7e-5
);
}
OCL_TEST_P
(
CvtColor8u32f
,
Luv2RGB
)
{
performTest
(
3
,
3
,
CVTCODE
(
Luv2RGB
),
depth
==
CV_8U
?
1
:
7e-5
);
}
OCL_TEST_P
(
CvtColor8u32f
,
Luv2RGB
)
{
performTest
(
3
,
3
,
CVTCODE
(
Luv2RGB
),
depth
==
CV_8U
?
1
:
7e-5
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment