Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
667a9328
Commit
667a9328
authored
Jun 30, 2014
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2829 from ilya-lavrenov:tapi_cvtcolor
parents
2d81595e
007593ca
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
524 additions
and
392 deletions
+524
-392
color.cpp
modules/imgproc/src/color.cpp
+5
-13
cvtcolor.cl
modules/imgproc/src/opencl/cvtcolor.cl
+516
-376
test_color.cpp
modules/imgproc/test/ocl/test_color.cpp
+3
-3
No files found.
modules/imgproc/src/color.cpp
View file @
667a9328
...
...
@@ -2730,8 +2730,6 @@ struct mRGBA2RGBA
#ifdef HAVE_OPENCL
#define DIVUP(total, grain) (((total) + (grain) - 1) / (grain))
static
bool
ocl_cvtColor
(
InputArray
_src
,
OutputArray
_dst
,
int
code
,
int
dcn
)
{
bool
ok
=
false
;
...
...
@@ -2739,23 +2737,17 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
Size
sz
=
src
.
size
(),
dstSz
=
sz
;
int
scn
=
src
.
channels
(),
depth
=
src
.
depth
(),
bidx
;
int
dims
=
2
,
stripeSize
=
1
;
size_t
globalsize
[]
=
{
src
.
cols
,
src
.
rows
};
ocl
::
Kernel
k
;
if
(
depth
!=
CV_8U
&&
depth
!=
CV_16U
&&
depth
!=
CV_32F
)
return
false
;
cv
::
String
opts
=
format
(
"-D depth=%d -D scn=%d "
,
depth
,
scn
);
ocl
::
Device
dev
=
ocl
::
Device
::
getDefault
();
int
pxPerWIy
=
1
;
if
(
dev
.
isIntel
()
&&
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_GPU
)
&&
!
(
code
==
CV_BGR2Luv
||
code
==
CV_RGB2Luv
||
code
==
CV_LBGR2Luv
||
code
==
CV_LRGB2Luv
||
code
==
CV_Luv2BGR
||
code
==
CV_Luv2RGB
||
code
==
CV_Luv2LBGR
||
code
==
CV_Luv2LRGB
))
pxPerWIy
=
4
;
globalsize
[
1
]
=
DIVUP
(
globalsize
[
1
],
pxPerWIy
);
opts
+=
format
(
"-D PIX_PER_WI_Y=%d "
,
pxPerWIy
);
int
pxPerWIy
=
dev
.
isIntel
()
&&
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_GPU
)
?
4
:
1
;
size_t
globalsize
[]
=
{
src
.
cols
,
(
src
.
rows
+
pxPerWIy
-
1
)
/
pxPerWIy
};
cv
::
String
opts
=
format
(
"-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d "
,
depth
,
scn
,
pxPerWIy
);
switch
(
code
)
{
...
...
modules/imgproc/src/opencl/cvtcolor.cl
View file @
667a9328
...
...
@@ -71,10 +71,6 @@
#
error
"invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
#
endif
#
ifndef
STRIPE_SIZE
#
define
STRIPE_SIZE
1
#
endif
#
define
CV_DESCALE
(
x,n
)
(((
x
)
+
(
1
<<
((
n
)
-1
)))
>>
(
n
))
enum
...
...
@@ -122,8 +118,8 @@ enum
/////////////////////////////////////
RGB
<->
GRAY
//////////////////////////////////////
__kernel
void
RGB2Gray
(
__global
const
uchar
*
srcptr,
int
srcstep,
int
src
offset,
__global
uchar
*
dstptr,
int
dststep,
int
dst
offset,
__kernel
void
RGB2Gray
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_
offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_
offset,
int
rows,
int
cols
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -131,27 +127,32 @@ __kernel void RGB2Gray(__global const uchar* srcptr, int srcstep, int srcoffset,
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
)
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
)
)
;
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
src_index
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
dst_index
)
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
#
ifdef
DEPTH_5
dst[0]
=
src_pix.B_COMP
*
0.114f
+
src_pix.G_COMP
*
0.587f
+
src_pix.R_COMP
*
0.299f
;
dst[0]
=
fma
(
src_pix.B_COMP,
0.114f,
fma
(
src_pix.G_COMP,
0.587f,
src_pix.R_COMP
*
0.299f
))
;
#
else
dst[0]
=
(
DATA_TYPE
)
CV_DESCALE
(
(
src_pix.B_COMP
*
B2Y
+
src_pix.G_COMP
*
G2Y
+
src_pix.R_COMP
*
R2Y
)
,
yuv_shift
)
;
dst[0]
=
(
DATA_TYPE
)
CV_DESCALE
(
mad24
(
src_pix.B_COMP,
B2Y,
mad24
(
src_pix.G_COMP,
G2Y,
src_pix.R_COMP
*
R2Y
)
)
,
yuv_shift
)
;
#
endif
++y
;
src_index
+=
src_step
;
dst_index
+=
dst_step
;
}
++y
;
}
}
}
__kernel
void
Gray2RGB
(
__global
const
uchar
*
srcptr,
int
srcstep,
int
src
offset,
__global
uchar
*
dstptr,
int
dststep,
int
dst
offset,
__kernel
void
Gray2RGB
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_
offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_
offset,
int
rows,
int
cols
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -159,20 +160,29 @@ __kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
)
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
)
)
;
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
src_index
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
dst_index
)
;
DATA_TYPE
val
=
src[0]
;
#
if
dcn
==
3
|
| defined DEPTH_5
dst[0] = dst[1] = dst[2] = val;
#if dcn == 4
dst[3] = MAX_NUM;
#endif
#else
*(__global DATA_TYPE_4 *)dst = (DATA_TYPE_4)(val, val, val, MAX_NUM);
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -182,8 +192,8 @@ __kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 };
__kernel
void
RGB2YUV
(
__global
const
uchar*
srcptr,
int
src
step,
int
src
offset,
__global
uchar*
dstptr,
int
dst
step,
int
dst
offset,
__kernel void RGB2YUV(__global const uchar* srcptr, int src
_step, int src_
offset,
__global uchar* dstptr, int dst
_step, int dt_
offset,
int rows, int cols)
{
int x = get_global_id(0);
...
...
@@ -191,34 +201,40 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
)
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
)
)
;
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr +
src_index
);
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr +
dst_index
);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE
b
=src_pix.B_COMP,
g=src_pix.G_COMP,
r=
src_pix.R_COMP
;
DATA_TYPE b
= src_pix.B_COMP, g = src_pix.G_COMP, r =
src_pix.R_COMP;
#ifdef DEPTH_5
__constant float * coeffs = c_RGB2YUVCoeffs_f;
const
DATA_TYPE
Y
=
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2]
;
const
DATA_TYPE
U
=
(
b
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
const
DATA_TYPE
V
=
(
r
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
const DATA_TYPE Y =
fma(b, coeffs[0], fma(g, coeffs[1], r * coeffs[2]))
;
const DATA_TYPE U =
fma(b - Y, coeffs[3], HALF_MAX)
;
const DATA_TYPE V =
fma(r - Y, coeffs[4], HALF_MAX)
;
#else
__constant int * coeffs = c_RGB2YUVCoeffs_i;
const int delta = HALF_MAX * (1 << yuv_shift);
const
int
Y
=
CV_DESCALE
(
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2]
,
yuv_shift
)
;
const
int
U
=
CV_DESCALE
(
(
b
-
Y
)
*
coeffs[3]
+
delta
,
yuv_shift
)
;
const
int
V
=
CV_DESCALE
(
(
r
-
Y
)
*
coeffs[4]
+
delta
,
yuv_shift
)
;
const int Y = CV_DESCALE(
mad24(b, coeffs[0], mad24(g, coeffs[1], r * coeffs[2]))
, yuv_shift);
const int U = CV_DESCALE(
mad24(b - Y, coeffs[3], delta)
, yuv_shift);
const int V = CV_DESCALE(
mad24(r - Y, coeffs[4], delta)
, yuv_shift);
#endif
dst[0] = SAT_CAST( Y );
dst[1] = SAT_CAST( U );
dst[2] = SAT_CAST( V );
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -226,8 +242,8 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
__constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
__constant int c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
__kernel
void
YUV2RGB
(
__global
const
uchar*
srcptr,
int
src
step,
int
src
offset,
__global
uchar*
dstptr,
int
dst
step,
int
dst
offset,
__kernel void YUV2RGB(__global const uchar* srcptr, int src
_step, int src_
offset,
__global uchar* dstptr, int dst
_step, int dt_
offset,
int rows, int cols)
{
int x = get_global_id(0);
...
...
@@ -235,25 +251,28 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
)
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
)
)
;
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr +
src_index
);
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr +
dst_index
);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE Y = src_pix.x, U = src_pix.y, V = src_pix.z;
#ifdef DEPTH_5
__constant float * coeffs = c_YUV2RGBCoeffs_f;
const
float
r
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[3]
;
const
float
g
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1]
;
const
float
b
=
Y
+
(
U
-
HALF_MAX
)
*
coeffs[0]
;
float r = fma(V - HALF_MAX, coeffs[3], Y)
;
float g = fma(V - HALF_MAX, coeffs[2], fma(U - HALF_MAX, coeffs[1], Y))
;
float b = fma(U - HALF_MAX, coeffs[0], Y)
;
#else
__constant int * coeffs = c_YUV2RGBCoeffs_i;
const int r = Y + CV_DESCALE((V - HALF_MAX) * coeffs[3], yuv_shift);
const
int
g
=
Y
+
CV_DESCALE
(
(
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1]
,
yuv_shift
)
;
const int g = Y + CV_DESCALE(
mad24(V - HALF_MAX, coeffs[2], (U - HALF_MAX) * coeffs[1])
, yuv_shift);
const int b = Y + CV_DESCALE((U - HALF_MAX) * coeffs[0], yuv_shift);
#endif
...
...
@@ -263,8 +282,10 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
#if dcn == 4
dst[3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -276,8 +297,8 @@ __constant int ITUR_BT_601_CVG = 852492;
__constant int ITUR_BT_601_CVR = 1673527;
__constant int ITUR_BT_601_SHIFT = 20;
__kernel
void
YUV2RGB_NV12
(
__global
const
uchar*
srcptr,
int
src
step,
int
src
offset,
__global
uchar*
dstptr,
int
dst
step,
int
dst
offset,
__kernel void YUV2RGB_NV12(__global const uchar* srcptr, int src
_step, int src_
offset,
__global uchar* dstptr, int dst
_step, int dt_
offset,
int rows, int cols)
{
int x = get_global_id(0);
...
...
@@ -290,15 +311,15 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff
{
if (y < rows / 2 )
{
__global
const
uchar*
ysrc
=
srcptr
+
mad24
(
y
<<
1
,
src
step,
(
x
<<
1
)
+
src
offset
)
;
__global
const
uchar*
usrc
=
srcptr
+
mad24
(
rows
+
y,
src
step,
(
x
<<
1
)
+
src
offset
)
;
__global
uchar*
dst1
=
dstptr
+
mad24
(
y
<<
1
,
dst
step,
x
*
(
dcn<<1
)
+
dst
offset
)
;
__global
uchar*
dst2
=
dstptr
+
mad24
((
y
<<
1
)
+
1
,
dst
step,
x
*
(
dcn<<1
)
+
dst
offset
)
;
__global const uchar* ysrc = srcptr + mad24(y << 1, src
_step, (x << 1) + src_
offset);
__global const uchar* usrc = srcptr + mad24(rows + y, src
_step, (x << 1) + src_
offset);
__global uchar* dst1 = dstptr + mad24(y << 1, dst
_step, x * (dcn<<1) + dt_
offset);
__global uchar* dst2 = dstptr + mad24((y << 1) + 1, dst
_step, x * (dcn<<1) + dt_
offset);
int Y1 = ysrc[0];
int Y2 = ysrc[1];
int
Y3
=
ysrc[srcstep]
;
int
Y4
=
ysrc[srcstep
+
1]
;
int Y3 = ysrc[src
_
step];
int Y4 = ysrc[src
_
step + 1];
int U = usrc[0] - 128;
int V = usrc[1] - 128;
...
...
@@ -349,8 +370,8 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff
__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
__kernel
void
RGB2YCrCb
(
__global
const
uchar*
srcptr,
int
src
step,
int
src
offset,
__global
uchar*
dstptr,
int
dst
step,
int
dst
offset,
__kernel void RGB2YCrCb(__global const uchar* srcptr, int src
_step, int src_
offset,
__global uchar* dstptr, int dst
_step, int dt_
offset,
int rows, int cols)
{
int x = get_global_id(0);
...
...
@@ -358,34 +379,40 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
)
)
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
)
)
;
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr +
src_index
);
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr +
dst_index
);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE
b
=src_pix.B_COMP,
g=src_pix.G_COMP,
r=
src_pix.R_COMP
;
DATA_TYPE b
= src_pix.B_COMP, g = src_pix.G_COMP, r =
src_pix.R_COMP;
#ifdef DEPTH_5
__constant float * coeffs = c_RGB2YCrCbCoeffs_f;
DATA_TYPE
Y
=
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0]
;
DATA_TYPE
Cr
=
(
r
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
DATA_TYPE
Cb
=
(
b
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
DATA_TYPE Y =
fma(b, coeffs[2], fma(g, coeffs[1], r * coeffs[0]))
;
DATA_TYPE Cr =
fma(r - Y, coeffs[3], HALF_MAX)
;
DATA_TYPE Cb =
fma(b - Y, coeffs[4], HALF_MAX)
;
#else
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
int delta = HALF_MAX * (1 << yuv_shift);
int
Y
=
CV_DESCALE
(
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0]
,
yuv_shift
)
;
int
Cr
=
CV_DESCALE
(
(
r
-
Y
)
*
coeffs[3]
+
delta
,
yuv_shift
)
;
int
Cb
=
CV_DESCALE
(
(
b
-
Y
)
*
coeffs[4]
+
delta
,
yuv_shift
)
;
int Y = CV_DESCALE(
mad24(b, coeffs[2], mad24(g, coeffs[1], r * coeffs[0]))
, yuv_shift);
int Cr = CV_DESCALE(
mad24(r - Y, coeffs[3], delta)
, yuv_shift);
int Cb = CV_DESCALE(
mad24(b - Y, coeffs[4], delta)
, yuv_shift);
#endif
dst[0] = SAT_CAST( Y );
dst[1] = SAT_CAST( Cr );
dst[2] = SAT_CAST( Cb );
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -402,28 +429,29 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
srcptr
=
(
__global
const
DATA_TYPE*
)(
src
+
src_idx
)
;
__global
DATA_TYPE
*
dstptr
=
(
__global
DATA_TYPE*
)(
dst
+
dst_idx
)
;
__global const DATA_TYPE * srcptr = (__global const DATA_TYPE*)(src + src_index);
__global DATA_TYPE * dstptr = (__global DATA_TYPE*)(dst + dst_index);
DATA_TYPE_4 src_pix = vload4(0, srcptr);
DATA_TYPE y = src_pix.x, cr = src_pix.y, cb = src_pix.z;
#ifdef DEPTH_5
__constant float * coeff = c_YCrCb2RGBCoeffs_f;
float
r
=
y
+
coeff[0]
*
(
cr
-
HALF_MAX
)
;
float
g
=
y
+
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
;
float
b
=
y
+
coeff[3]
*
(
cb
-
HALF_MAX
)
;
float r =
fma(coeff[0], cr - HALF_MAX, y
);
float g =
fma(coeff[1], cr - HALF_MAX, fma(coeff[2], cb - HALF_MAX, y)
);
float b =
fma(coeff[3], cb - HALF_MAX, y
);
#else
__constant int * coeff = c_YCrCb2RGBCoeffs_i;
int r = y + CV_DESCALE(coeff[0] * (cr - HALF_MAX), yuv_shift);
int
g
=
y
+
CV_DESCALE
(
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
,
yuv_shift
)
;
int g = y + CV_DESCALE(
mad24(coeff[1], cr - HALF_MAX, coeff[2] * (cb - HALF_MAX)
), yuv_shift);
int b = y + CV_DESCALE(coeff[3] * (cb - HALF_MAX), yuv_shift);
#endif
...
...
@@ -433,8 +461,11 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
#if dcn == 4
dstptr[3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -450,34 +481,37 @@ __kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offse
if (dx < cols)
{
int src_index = mad24(dy, src_step, mad24(dx, scnbytes, src_offset));
int dst_index = mad24(dy, dst_step, mad24(dx, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (dy < rows)
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE r = src_pix.x, g = src_pix.y, b = src_pix.z;
#ifdef DEPTH_5
float
x
=
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2]
;
float
y
=
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5]
;
float
z
=
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8]
;
float x =
fma(r, coeffs[0], fma(g, coeffs[1], b * coeffs[2]))
;
float y =
fma(r, coeffs[3], fma(g, coeffs[4], b * coeffs[5]))
;
float z =
fma(r, coeffs[6], fma(g, coeffs[7], b * coeffs[8]))
;
#else
int
x
=
CV_DESCALE
(
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2]
,
xyz_shift
)
;
int
y
=
CV_DESCALE
(
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5]
,
xyz_shift
)
;
int
z
=
CV_DESCALE
(
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8]
,
xyz_shift
)
;
int x = CV_DESCALE(
mad24(r, coeffs[0], mad24(g, coeffs[1], b * coeffs[2]))
, xyz_shift);
int y = CV_DESCALE(
mad24(r, coeffs[3], mad24(g, coeffs[4], b * coeffs[5]))
, xyz_shift);
int z = CV_DESCALE(
mad24(r, coeffs[6], mad24(g, coeffs[7], b * coeffs[8]))
, xyz_shift);
#endif
dst[0] = SAT_CAST(x);
dst[1] = SAT_CAST(y);
dst[2] = SAT_CAST(z);
++dy;
dst_index += dst_step;
src_index += src_step;
}
++dy
;
}
}
}
...
...
@@ -491,37 +525,48 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse
if (dx < cols)
{
int src_index = mad24(dy, src_step, mad24(dx, scnbytes, src_offset));
int dst_index = mad24(dy, dst_step, mad24(dx, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (dy < rows)
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE x = src_pix.x, y = src_pix.y, z = src_pix.z;
#ifdef DEPTH_5
float
b
=
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2]
;
float
g
=
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5]
;
float
r
=
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8]
;
float b =
fma(x, coeffs[0], fma(y, coeffs[1], z * coeffs[2]))
;
float g =
fma(x, coeffs[3], fma(y, coeffs[4], z * coeffs[5]))
;
float r =
fma(x, coeffs[6], fma(y, coeffs[7], z * coeffs[8]))
;
#else
int
b
=
CV_DESCALE
(
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2]
,
xyz_shift
)
;
int
g
=
CV_DESCALE
(
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5]
,
xyz_shift
)
;
int
r
=
CV_DESCALE
(
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8]
,
xyz_shift
)
;
int b = CV_DESCALE(
mad24(x, coeffs[0], mad24(y, coeffs[1], z * coeffs[2]))
, xyz_shift);
int g = CV_DESCALE(
mad24(x, coeffs[3], mad24(y, coeffs[4], z * coeffs[5]))
, xyz_shift);
int r = CV_DESCALE(
mad24(x, coeffs[6], mad24(y, coeffs[7], z * coeffs[8]))
, xyz_shift);
#endif
dst[0]
=
SAT_CAST
(
b
)
;
dst[1]
=
SAT_CAST
(
g
)
;
dst[2]
=
SAT_CAST
(
r
)
;
DATA_TYPE dst0 = SAT_CAST(b);
DATA_TYPE dst1 = SAT_CAST(g);
DATA_TYPE dst2 = SAT_CAST(r);
#if dcn == 3 || defined DEPTH_5
dst[0] = dst0;
dst[1] = dst1;
dst[2] = dst2;
#if dcn == 4
dst[3] = MAX_NUM;
#endif
#else
*(__global DATA_TYPE_4 *)dst = (DATA_TYPE_4)(dst0, dst1, dst2, MAX_NUM);
#endif
++dy;
dst_index += dst_step;
src_index += src_step;
}
++dy
;
}
}
}
...
...
@@ -537,16 +582,16 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
DATA_TYPE_4 src_pix = vload4(0, src);
#ifdef REVERSE
...
...
@@ -566,8 +611,11 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
dst[3] = src[3];
#endif
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -583,34 +631,38 @@ __kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
ushort
t
=
*
((
__global
const
ushort*
)(
src
+
src_idx
))
;
ushort t = *((__global const ushort*)(src + src_index));
#if greenbits == 6
dst[dst_i
d
x
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_i
d
x
+
1]
=
(
uchar
)((
t
>>
3
)
&
~3
)
;
dst[dst_i
d
x
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
8
)
&
~7
)
;
dst[dst_i
nde
x + bidx] = (uchar)(t << 3);
dst[dst_i
nde
x + 1] = (uchar)((t >> 3) & ~3);
dst[dst_i
nde
x + (bidx^2)] = (uchar)((t >> 8) & ~7);
#else
dst[dst_i
d
x
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_i
d
x
+
1]
=
(
uchar
)((
t
>>
2
)
&
~7
)
;
dst[dst_i
d
x
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
7
)
&
~7
)
;
dst[dst_i
nde
x + bidx] = (uchar)(t << 3);
dst[dst_i
nde
x + 1] = (uchar)((t >> 2) & ~7);
dst[dst_i
nde
x + (bidx^2)] = (uchar)((t >> 7) & ~7);
#endif
#if dcn == 4
#if greenbits == 6
dst[dst_i
d
x
+
3]
=
255
;
dst[dst_i
nde
x + 3] = 255;
#else
dst[dst_i
d
x
+
3]
=
t
&
0x8000
?
255
:
0
;
dst[dst_i
nde
x + 3] = t & 0x8000 ? 255 : 0;
#endif
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -624,25 +676,29 @@ __kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
uchar4
src_pix
=
vload4
(
0
,
src
+
src_idx
)
;
uchar4 src_pix = vload4(0, src + src_index);
#if greenbits == 6
*
((
__global
ushort*
)(
dst
+
dst_i
d
x
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP&~3) << 3)|
((
src_pix.R_COMP&~7
)
<<
8
))
;
*((__global ushort*)(dst + dst_i
nde
x)) = (ushort)((src_pix.B_COMP >> 3)|
((
src_pix.G_COMP&~3
)
<<
3
)
|((src_pix.R_COMP&~7) << 8));
#elif scn == 3
*
((
__global
ushort*
)(
dst
+
dst_i
d
x
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP&~7) << 2)|
((
src_pix.R_COMP&~7
)
<<
7
))
;
*((__global ushort*)(dst + dst_i
nde
x)) = (ushort)((src_pix.B_COMP >> 3)|
((
src_pix.G_COMP&~7
)
<<
2
)
|((src_pix.R_COMP&~7) << 7));
#else
*
((
__global
ushort*
)(
dst
+
dst_i
d
x
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP&~7) << 2)|
*((__global ushort*)(dst + dst_i
nde
x)) = (ushort)((src_pix.B_COMP >> 3)|
((
src_pix.G_COMP&~7
)
<<
2
)
|
((src_pix.R_COMP&~7) << 7)|
(
src_pix.w
?
0x8000
:
0
))
;
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -658,26 +714,25 @@ __kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offse
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x);
int t = *((__global const ushort*)(src + src_idx));
int
t
=
*
((
__global
const
ushort*
)(
src
+
src_index
))
;
#
if
greenbits
==
6
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
((t >> 3) & 0xfc)*G2Y +
((t >> 8) & 0xf8)*R2Y, yuv_shift);
dst[dst_index]
=
(
uchar
)
CV_DESCALE
(
mad24
((
t
<<
3
)
&
0xf8,
B2Y,
mad24
((
t
>>
3
)
&
0xfc,
G2Y,
((
t
>>
8
)
&
0xf8
)
*
R2Y
))
,
yuv_shift
)
;
#
else
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
((t >> 2) & 0xf8)*G2Y +
((t >> 7) & 0xf8)*R2Y, yuv_shift);
dst[dst_index]
=
(
uchar
)
CV_DESCALE
(
mad24
((
t
<<
3
)
&
0xf8,
B2Y,
mad24
((
t
>>
2
)
&
0xf8,
G2Y,
((
t
>>
7
)
&
0xf8
)
*
R2Y
))
,
yuv_shift
)
;
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -691,23 +746,26 @@ __kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offse
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + x);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
int t = src[src_idx];
int
t
=
src[src_index]
;
#
if
greenbits
==
6
*((__global ushort*)(dst + dst_i
d
x)) = (ushort)((t >> 3) |
((
t
&
~3
)
<<
3
)
| ((t & ~7) << 8));
*
((
__global
ushort*
)(
dst
+
dst_i
nde
x
))
=
(
ushort
)((
t
>>
3
)
| ((t & ~3) << 3) |
((
t
&
~7
)
<<
8
))
;
#
else
t
>>=
3
;
*((__global ushort*)(dst + dst_i
d
x)) = (ushort)(t|
(
t
<<
5
)
|(t << 10));
*
((
__global
ushort*
)(
dst
+
dst_i
nde
x
))
=
(
ushort
)(
t
|(t << 5)|
(
t
<<
10
))
;
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -733,40 +791,44 @@ __kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset,
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
uchar4
src_pix
=
vload4
(
0
,
src
+
src_index
)
;
int
b
=
src_pix.B_COMP,
g
=
src_pix.G_COMP,
r
=
src_pix.R_COMP
;
int
h,
s,
v
=
b
;
int
vmin
=
b,
diff
;
int
vr,
vg
;
v = max(
v, g
);
v = max(
v, r
);
vmin = min(
vmin, g
);
vmin = min(
vmin, r
);
v
=
max
(
v,
g
)
;
v
=
max
(
v,
r
)
;
vmin
=
min
(
vmin,
g
)
;
vmin
=
min
(
vmin,
r
)
;
diff
=
v
-
vmin
;
vr
=
v
==
r
?
-1
:
0
;
vg
=
v
==
g
?
-1
:
0
;
s =
(diff * sdiv_table[v] +
(1 << (hsv_shift-1))) >> hsv_shift;
s
=
mad24
(
diff,
sdiv_table[v],
(
1
<<
(
hsv_shift-1
)))
>>
hsv_shift
;
h
=
(
vr
&
(
g
-
b
))
+
(~vr & ((vg &
(b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff
))));
h =
(h * hdiv_table[diff] +
(1 << (hsv_shift-1))) >> hsv_shift;
(
~vr
&
((
vg
&
mad24
(
diff,
2
,
b
-
r
))
+
((
~vg
)
&
mad24
(
4
,
diff,
r
-
g
))))
;
h
=
mad24
(
h,
hdiv_table[diff],
(
1
<<
(
hsv_shift-1
)))
>>
hsv_shift
;
h
+=
h
<
0
?
hrange
:
0
;
dst[dst_idx] = convert_uchar_sat_rte(h);
dst[dst_idx + 1] = (uchar)s;
dst[dst_idx + 2] = (uchar)v;
dst[dst_index]
=
convert_uchar_sat_rte
(
h
)
;
dst[dst_index
+
1]
=
(
uchar
)
s
;
dst[dst_index
+
2]
=
(
uchar
)
v
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -780,14 +842,15 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
uchar4
src_pix
=
vload4
(
0
,
src
+
src_index
)
;
float
h
=
src_pix.x,
s
=
src_pix.y*
(
1/255.f
)
,
v
=
src_pix.z*
(
1/255.f
)
;
float
b,
g,
r
;
...
...
@@ -821,14 +884,17 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
else
b
=
g
=
r
=
v
;
dst[dst_i
d
x + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_i
d
x + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_i
d
x + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
dst[dst_i
nde
x
+
bidx]
=
convert_uchar_sat_rte
(
b*255.f
)
;
dst[dst_i
nde
x
+
1]
=
convert_uchar_sat_rte
(
g*255.f
)
;
dst[dst_i
nde
x
+
(
bidx^2
)
]
=
convert_uchar_sat_rte
(
r*255.f
)
;
#
if
dcn
==
4
dst[dst_i
d
x + 3] = MAX_NUM;
dst[dst_i
nde
x
+
3]
=
MAX_NUM
;
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -844,16 +910,16 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_index
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_index
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
b
=
src_pix.B_COMP,
g
=
src_pix.G_COMP,
r
=
src_pix.R_COMP
;
...
...
@@ -873,17 +939,21 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset
if
(
v
==
r
)
h
=
(
g
-
b
)
*diff
;
else
if
(
v
==
g
)
h =
(b - r)*diff + 120.f
;
h
=
fma
(
b
-
r,
diff,
120.f
)
;
else
h =
(r - g)*diff + 240.f
;
h
=
fma
(
r
-
g,
diff,
240.f
)
;
if( h < 0 ) h += 360.f;
if
(
h
<
0
)
h
+=
360.f
;
dst[0]
=
h*hscale
;
dst[1]
=
s
;
dst[2]
=
v
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -897,16 +967,17 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_i
d
x);
__global float * dst = (__global float *)(dstptr + dst_i
d
x);
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_i
nde
x
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_i
nde
x
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
h
=
src_pix.x,
s
=
src_pix.y,
v
=
src_pix.z
;
...
...
@@ -947,8 +1018,11 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -968,14 +1042,15 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
uchar4
src_pix
=
vload4
(
0
,
src
+
src_index
)
;
float
b
=
src_pix.B_COMP*
(
1/255.f
)
,
g
=
src_pix.G_COMP*
(
1/255.f
)
,
r
=
src_pix.R_COMP*
(
1/255.f
)
;
float
h
=
0.f,
s
=
0.f,
l
;
...
...
@@ -998,18 +1073,22 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
if
(
vmax
==
r
)
h
=
(
g
-
b
)
*diff
;
else
if
(
vmax
==
g
)
h =
(b - r)*diff + 120.f
;
h
=
fma
(
b
-
r,
diff,
120.f
)
;
else
h =
(r - g)*diff + 240.f
;
h
=
fma
(
r
-
g,
diff,
240.f
)
;
if( h < 0.f ) h += 360.f;
if
(
h
<
0.f
)
h
+=
360.f
;
}
dst[dst_idx] = convert_uchar_sat_rte(h*hscale);
dst[dst_idx + 1] = convert_uchar_sat_rte(l*255.f);
dst[dst_idx + 2] = convert_uchar_sat_rte(s*255.f);
dst[dst_index]
=
convert_uchar_sat_rte
(
h*hscale
)
;
dst[dst_index
+
1]
=
convert_uchar_sat_rte
(
l*255.f
)
;
dst[dst_index
+
2]
=
convert_uchar_sat_rte
(
s*255.f
)
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -1023,14 +1102,15 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
uchar4
src_pix
=
vload4
(
0
,
src
+
src_index
)
;
float
h
=
src_pix.x,
l
=
src_pix.y*
(
1.f/255.f
)
,
s
=
src_pix.z*
(
1.f/255.f
)
;
float
b,
g,
r
;
...
...
@@ -1053,8 +1133,8 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
tab[0]
=
p2
;
tab[1]
=
p1
;
tab[2] =
p1 + (p2 - p1)*(1-h
);
tab[3] =
p1 + (p2 - p1)*h
;
tab[2]
=
fma
(
p2
-
p1,
1-h,
p1
)
;
tab[3]
=
fma
(
p2
-
p1,
h,
p1
)
;
b
=
tab[sector_data[sector][0]]
;
g
=
tab[sector_data[sector][1]]
;
...
...
@@ -1063,14 +1143,17 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
else
b
=
g
=
r
=
l
;
dst[dst_i
d
x + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_i
d
x + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_i
d
x + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
dst[dst_i
nde
x
+
bidx]
=
convert_uchar_sat_rte
(
b*255.f
)
;
dst[dst_i
nde
x
+
1]
=
convert_uchar_sat_rte
(
g*255.f
)
;
dst[dst_i
nde
x
+
(
bidx^2
)
]
=
convert_uchar_sat_rte
(
r*255.f
)
;
#
if
dcn
==
4
dst[dst_i
d
x + 3] = MAX_NUM;
dst[dst_i
nde
x
+
3]
=
MAX_NUM
;
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -1086,16 +1169,16 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_index
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_index
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
b
=
src_pix.B_COMP,
g
=
src_pix.G_COMP,
r
=
src_pix.R_COMP
;
...
...
@@ -1119,9 +1202,9 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
if
(
vmax
==
r
)
h
=
(
g
-
b
)
*diff
;
else
if
(
vmax
==
g
)
h =
(b - r)*diff + 120.f
;
h
=
fma
(
b
-
r,
diff,
120.f
)
;
else
h =
(r - g)*diff + 240.f
;
h
=
fma
(
r
-
g,
diff,
240.f
)
;
if
(
h
<
0.f
)
h
+=
360.f
;
}
...
...
@@ -1129,8 +1212,11 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
dst[0]
=
h*hscale
;
dst[1]
=
l
;
dst[2]
=
s
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -1144,16 +1230,16 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_index
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_index
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
h
=
src_pix.x,
l
=
src_pix.y,
s
=
src_pix.z
;
...
...
@@ -1178,8 +1264,8 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
tab[0]
=
p2
;
tab[1]
=
p1
;
tab[2] =
p1 + (p2 - p1)*(1-h
);
tab[3] =
p1 + (p2 - p1)*h
;
tab[2]
=
fma
(
p2
-
p1,
1-h,
p1
)
;
tab[3]
=
fma
(
p2
-
p1,
h,
p1
)
;
b
=
tab[sector_data[sector][0]]
;
g
=
tab[sector_data[sector][1]]
;
...
...
@@ -1194,8 +1280,11 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -1215,24 +1304,25 @@ __kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
(
x
<<
2
))
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
(
x
<<
2
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + (x << 2));
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
uchar4 src_pix = vload4(0, src + src_idx);
uchar4
src_pix
=
*
(
__global
const
uchar4
*
)(
src
+
src_index
)
;
uchar v0 = src_pix.x, v1 = src_pix.y;
uchar v2 = src_pix.z, v3 = src_pix.w;
*
(
__global
uchar4
*
)(
dst
+
dst_index
)
=
(
uchar4
)(
mad24
(
src_pix.x,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
mad24
(
src_pix.y,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
mad24
(
src_pix.z,
src_pix.w,
HALF_MAX
)
/
MAX_NUM,
src_pix.w
)
;
dst[dst_idx] = (v0 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 1] = (v1 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 2] = (v2 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 3] = v3;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -1246,25 +1336,29 @@ __kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
4
,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
4
,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int src_idx = mad24(y, src_step, src_offset + (x << 2));
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
uchar4 src_pix = vload4(0, src + src_idx);
uchar v0 = src_pix.x, v1 = src_pix.y;
uchar v2 = src_pix.z, v3 = src_pix.w;
uchar v3_half = v3 / 2;
dst[dst_idx] = v3 == 0 ? 0 : (v0 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 1] = v3 == 0 ? 0 : (v1 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 2] = v3 == 0 ? 0 : (v2 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 3] = v3;
uchar4
src_pix
=
*
(
__global
const
uchar4
*
)(
src
+
src_index
)
;
uchar
v3
=
src_pix.w,
v3_half
=
v3
/
2
;
if
(
v3
==
0
)
*
(
__global
uchar4
*
)(
dst
+
dst_index
)
=
(
uchar4
)(
0
,
0
,
0
,
0
)
;
else
*
(
__global
uchar4
*
)(
dst
+
dst_index
)
=
(
uchar4
)(
mad24
(
src_pix.x,
MAX_NUM,
v3_half
)
/
v3,
mad24
(
src_pix.y,
MAX_NUM,
v3_half
)
/
v3,
mad24
(
src_pix.z,
MAX_NUM,
v3_half
)
/
v3,
v3
)
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
++y;
}
}
}
...
...
@@ -1283,8 +1377,8 @@ inline float splineInterpolate(float x, __global const float * tab, int n)
{
int ix = clamp(convert_int_sat_rtn(x), 0, n-1);
x -= ix;
tab
+=
ix
*4
;
return
((
tab[3]*x
+
tab[2]
)
*x
+
tab[1]
)
*x
+
tab[0]
;
tab += ix
<< 2
;
return
fma(fma(fma(tab[3], x, tab[2]), x, tab[1]), x, tab[0])
;
}
#ifdef DEPTH_0
...
...
@@ -1299,16 +1393,16 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
uchar*
src_ptr
=
src
+
src_idx
;
__global
uchar*
dst_ptr
=
dst
+
dst_idx
;
__global const uchar* src_ptr = src + src_index;
__global uchar* dst_ptr = dst + dst_index;
uchar4 src_pix = vload4(0, src_ptr);
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
...
...
@@ -1316,19 +1410,22 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
int R = gammaTab[src_pix.x], G = gammaTab[src_pix.y], B = gammaTab[src_pix.z];
int
fX
=
LabCbrtTab_b[CV_DESCALE
(
R*C0
+
G*C1
+
B*C2
,
lab_shift
)
]
;
int
fY
=
LabCbrtTab_b[CV_DESCALE
(
R*C3
+
G*C4
+
B*C5
,
lab_shift
)
]
;
int
fZ
=
LabCbrtTab_b[CV_DESCALE
(
R*C6
+
G*C7
+
B*C8
,
lab_shift
)
]
;
int fX = LabCbrtTab_b[CV_DESCALE(
mad24(R, C0, mad24(G, C1, B*C2))
, lab_shift)];
int fY = LabCbrtTab_b[CV_DESCALE(
mad24(R, C3, mad24(G, C4, B*C5))
, lab_shift)];
int fZ = LabCbrtTab_b[CV_DESCALE(
mad24(R, C6, mad24(G, C7, B*C8))
, lab_shift)];
int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );
int
a
=
CV_DESCALE
(
500*
(
fX
-
fY
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
int
b
=
CV_DESCALE
(
200*
(
fY
-
fZ
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
int a = CV_DESCALE(
mad24(500, fX - fY, 128*(1 << lab_shift2)
), lab_shift2 );
int b = CV_DESCALE(
mad24(200, fY - fZ, 128*(1 << lab_shift2)
), lab_shift2 );
dst_ptr[0] = SAT_CAST(L);
dst_ptr[1] = SAT_CAST(a);
dst_ptr[2] = SAT_CAST(b);
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -1347,16 +1444,16 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float4 src_pix = vload4(0, src);
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
...
...
@@ -1373,23 +1470,26 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
B = splineInterpolate(B * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
#endif
float
X
=
R*C0
+
G*C1
+
B*C2
;
float
Y
=
R*C3
+
G*C4
+
B*C5
;
float
Z
=
R*C6
+
G*C7
+
B*C8
;
float X =
fma(R, C0, fma(G, C1, B*C2))
;
float Y =
fma(R, C3, fma(G, C4, B*C5))
;
float Z =
fma(R, C6, fma(G, C7, B*C8))
;
float
FX
=
X
>
0.008856f
?
pow
(
X,
_1_3
)
:
(
7.787f
*
X
+
_a
)
;
float
FY
=
Y
>
0.008856f
?
pow
(
Y,
_1_3
)
:
(
7.787f
*
Y
+
_a
)
;
float
FZ
=
Z
>
0.008856f
?
pow
(
Z,
_1_3
)
:
(
7.787f
*
Z
+
_a
)
;
float FX = X > 0.008856f ?
rootn(X, 3) : fma(7.787f, X,
_a);
float FY = Y > 0.008856f ?
rootn(Y, 3) : fma(7.787f, Y,
_a);
float FZ = Z > 0.008856f ?
rootn(Z, 3) : fma(7.787f, Z,
_a);
float
L
=
Y
>
0.008856f
?
(
116.f
*
FY
-
16.f
)
:
(
903.3f
*
Y
)
;
float L = Y > 0.008856f ?
fma(116.f, FY, -
16.f) : (903.3f * Y);
float a = 500.f * (FX - FY);
float b = 200.f * (FY - FZ);
dst[0] = L;
dst[1] = a;
dst[2] = b;
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -1412,7 +1512,7 @@ inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
if (li <= lThresh)
{
y = li / 903.3f;
fy
=
7.787f
*
y
+
16.0f
/
116.0f
;
fy =
fma(7.787f, y, 16.0f / 116.0f)
;
}
else
{
...
...
@@ -1422,6 +1522,7 @@ inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f };
#pragma unroll
for (int j = 0; j < 2; j++)
if (fxz[j] <= fThresh)
fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f;
...
...
@@ -1429,9 +1530,9 @@ inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
fxz[j] = fxz[j] * fxz[j] * fxz[j];
float x = fxz[0], z = fxz[1];
float
ro
=
clamp
(
C0
*
x
+
C1
*
y
+
C2
*
z
,
0.0f,
1.0f
)
;
float
go
=
clamp
(
C3
*
x
+
C4
*
y
+
C5
*
z
,
0.0f,
1.0f
)
;
float
bo
=
clamp
(
C6
*
x
+
C7
*
y
+
C8
*
z
,
0.0f,
1.0f
)
;
float ro = clamp(
fma(C0, x, fma(C1, y, C2 * z))
, 0.0f, 1.0f);
float go = clamp(
fma(C3, x, fma(C4, y, C5 * z))
, 0.0f, 1.0f);
float bo = clamp(
fma(C6, x, fma(C7, y, C8 * z))
, 0.0f, 1.0f);
#ifdef SRGB
ro = splineInterpolate(ro * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
...
...
@@ -1456,16 +1557,16 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
uchar*
src_ptr
=
src
+
src_idx
;
__global
uchar*
dst_ptr
=
dst
+
dst_idx
;
__global const uchar* src_ptr = src + src_index;
__global uchar * dst_ptr = dst + dst_index;
uchar4 src_pix = vload4(0, src_ptr);
float srcbuf[3], dstbuf[3];
...
...
@@ -1479,14 +1580,18 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
#endif
coeffs, lThresh, fThresh);
#if dcn == 3
dst_ptr[0] = SAT_CAST(dstbuf[0] * 255.0f);
dst_ptr[1] = SAT_CAST(dstbuf[1] * 255.0f);
dst_ptr[2] = SAT_CAST(dstbuf[2] * 255.0f);
#
if
dcn
==
4
dst_ptr[3]
=
MAX_NUM
;
#else
*(__global uchar4 *)dst_ptr = (uchar4)(SAT_CAST(dstbuf[0] * 255.0f),
SAT_CAST(dstbuf[1] * 255.0f), SAT_CAST(dstbuf[2] * 255.0f), MAX_NUM);
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -1505,16 +1610,16 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float4 src_pix = vload4(0, src);
float srcbuf[3], dstbuf[3];
...
...
@@ -1530,8 +1635,10 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse
#if dcn == 4
dst[3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
++y
;
}
}
}
...
...
@@ -1555,37 +1662,46 @@ __kernel void BGR2Luv(__global const uchar * srcptr, int src_step, int src_offse
__global
const
float
*
LabCbrtTab,
__constant
float
*
coeffs,
float
_un,
float
_vn
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
int
src_i
d
x
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_i
d
x
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
int
src_i
nde
x
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_i
nde
x
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
if
(
y
<
rows
)
{
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_index
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_index
)
;
float
R
=
src[0],
G
=
src[1],
B
=
src[2]
;
float
R
=
src[0],
G
=
src[1],
B
=
src[2]
;
#
ifdef
SRGB
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
#
endif
float
X
=
R*coeffs[0]
+
G*coeffs[1]
+
B*coeffs[2]
;
float
Y
=
R*coeffs[3]
+
G*coeffs[4]
+
B*coeffs[5]
;
float
Z
=
R*coeffs[6]
+
G*coeffs[7]
+
B*coeffs[8]
;
float
X
=
fma
(
R,
coeffs[0],
fma
(
G,
coeffs[1],
B*coeffs[2]
))
;
float
Y
=
fma
(
R,
coeffs[3],
fma
(
G,
coeffs[4],
B*coeffs[5]
))
;
float
Z
=
fma
(
R,
coeffs[6],
fma
(
G,
coeffs[7],
B*coeffs[8]
))
;
float
L
=
splineInterpolate
(
Y*LabCbrtTabScale,
LabCbrtTab,
LAB_CBRT_TAB_SIZE
)
;
L
=
116.f*L
-
16.f
;
float
L
=
splineInterpolate
(
Y*LabCbrtTabScale,
LabCbrtTab,
LAB_CBRT_TAB_SIZE
)
;
L
=
fma
(
116.f,
L,
-16.f
)
;
float
d
=
(
4*13
)
/
max
(
X
+
15
*
Y
+
3
*
Z
,
FLT_EPSILON
)
;
float
u
=
L*
(
X*d
-
_un
)
;
float
v
=
L*
((
9*0.25f
)
*Y*
d
-
_vn
)
;
float
d
=
52.0f
/
fmax
(
fma
(
15.0f,
Y,
fma
(
3.0f,
Z,
X
))
,
FLT_EPSILON
)
;
float
u
=
L*fma
(
X,
d,
-
_un
)
;
float
v
=
L*fma
(
2.25f,
Y*d,
-
_vn
)
;
dst[0]
=
L
;
dst[1]
=
u
;
dst[2]
=
v
;
dst[0]
=
L
;
dst[1]
=
u
;
dst[2]
=
v
;
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
}
...
...
@@ -1599,38 +1715,44 @@ __kernel void BGR2Luv(__global const uchar * src, int src_step, int src_offset,
__global
const
float
*
LabCbrtTab,
__constant
float
*
coeffs,
float
_un,
float
_vn
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
src
+
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
dst
+
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
src
+=
src_idx
;
dst
+=
dst_idx
;
float
scale
=
1.0f
/
255.0f
;
float
R
=
src[0]*scale,
G
=
src[1]*scale,
B
=
src[2]*scale
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
if
(
y
<
rows
)
{
float
scale
=
1.0f
/
255.0f
;
float
R
=
src[0]*scale,
G
=
src[1]*scale,
B
=
src[2]*scale
;
#
ifdef
SRGB
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
#
endif
float
X
=
R*coeffs[0]
+
G*coeffs[1]
+
B*coeffs[2]
;
float
Y
=
R*coeffs[3]
+
G*coeffs[4]
+
B*coeffs[5]
;
float
Z
=
R*coeffs[6]
+
G*coeffs[7]
+
B*coeffs[8]
;
float
X
=
fma
(
R,
coeffs[0],
fma
(
G,
coeffs[1],
B*coeffs[2]
))
;
float
Y
=
fma
(
R,
coeffs[3],
fma
(
G,
coeffs[4],
B*coeffs[5]
))
;
float
Z
=
fma
(
R,
coeffs[6],
fma
(
G,
coeffs[7],
B*coeffs[8]
))
;
float
L
=
splineInterpolate
(
Y*LabCbrtTabScale,
LabCbrtTab,
LAB_CBRT_TAB_SIZE
)
;
L
=
116.f*L
-
16.f
;
float
L
=
splineInterpolate
(
Y*LabCbrtTabScale,
LabCbrtTab,
LAB_CBRT_TAB_SIZE
)
;
L
=
116.f*L
-
16.f
;
float
d
=
(
4*13
)
/
fmax
(
fma
(
15.0f,
Y,
fma
(
3.0f,
Z,
X
))
,
FLT_EPSILON
)
;
float
u
=
L*
(
X*d
-
_un
)
;
float
v
=
L*fma
(
2.25f,
Y*d,
-_vn
)
;
float
d
=
(
4*13
)
/
max
(
X
+
15
*
Y
+
3
*
Z,
FLT_EPSILON
)
;
float
u
=
L*
(
X*d
-
_un
)
;
float
v
=
L*
((
9*0.25f
)
*Y*
d
-
_vn
)
;
dst[0]
=
SAT_CAST
(
L
*
2.55f
)
;
dst[1]
=
SAT_CAST
(
fma
(
u,
0.72033898305084743f,
96.525423728813564f
)
)
;
dst[2]
=
SAT_CAST
(
fma
(
v,
0.99609375f,
139.453125f
)
)
;
dst[0]
=
SAT_CAST
(
L
*
2.55f
)
;
dst[1]
=
SAT_CAST
(
mad
(
u,
0.72033898305084743f,
96.525423728813564f
))
;
dst[2]
=
SAT_CAST
(
mad
(
v,
0.99609375f,
139.453125f
))
;
++y
;
dst
+=
dst_step
;
src
+=
src_step
;
}
}
}
...
...
@@ -1646,42 +1768,50 @@ __kernel void Luv2BGR(__global const uchar * srcptr, int src_step, int src_offse
__constant
float
*
coeffs,
float
_un,
float
_vn
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
float
L
=
src[0],
u
=
src[1],
v
=
src[2],
d,
X,
Y,
Z
;
Y
=
(
L
+
16.f
)
*
(
1.f/116.f
)
;
Y
=
Y*Y*Y
;
d
=
(
1.f/13.f
)
/L
;
u
=
u*d
+
_un
;
v
=
v*d
+
_vn
;
float
iv
=
1.f/v
;
X
=
2.25f
*
u
*
Y
*
iv
;
Z
=
(
12
-
3
*
u
-
20
*
v
)
*
Y
*
0.25f
*
iv
;
float
R
=
X*coeffs[0]
+
Y*coeffs[1]
+
Z*coeffs[2]
;
float
G
=
X*coeffs[3]
+
Y*coeffs[4]
+
Z*coeffs[5]
;
float
B
=
X*coeffs[6]
+
Y*coeffs[7]
+
Z*coeffs[8]
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
if
(
y
<
rows
)
{
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_index
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_index
)
;
float
L
=
src[0],
u
=
src[1],
v
=
src[2],
d,
X,
Y,
Z
;
Y
=
(
L
+
16.f
)
*
(
1.f/116.f
)
;
Y
=
Y*Y*Y
;
d
=
(
1.f/13.f
)
/L
;
u
=
fma
(
u,
d,
_un
)
;
v
=
fma
(
v,
d,
_vn
)
;
float
iv
=
1.f/v
;
X
=
2.25f
*
u
*
Y
*
iv
;
Z
=
(
12
-
fma
(
3.0f,
u,
20.0f
*
v
))
*
Y
*
0.25f
*
iv
;
float
R
=
fma
(
X,
coeffs[0],
fma
(
Y,
coeffs[1],
Z
*
coeffs[2]
))
;
float
G
=
fma
(
X,
coeffs[3],
fma
(
Y,
coeffs[4],
Z
*
coeffs[5]
))
;
float
B
=
fma
(
X,
coeffs[6],
fma
(
Y,
coeffs[7],
Z
*
coeffs[8]
))
;
#
ifdef
SRGB
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
#
endif
dst[0]
=
R
;
dst[1]
=
G
;
dst[2]
=
B
;
dst[0]
=
R
;
dst[1]
=
G
;
dst[2]
=
B
;
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
++y
;
dst_index
+=
dst_step
;
src_index
+=
src_step
;
}
}
}
...
...
@@ -1695,46 +1825,56 @@ __kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset,
__constant
float
*
coeffs,
float
_un,
float
_vn
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
src
+=
src_idx
;
dst
+=
dst_idx
;
float
d,
X,
Y,
Z
;
float
L
=
src[0]*
(
100.f/255.f
)
;
float
u
=
(
float
)(
src[1]*1.388235294117647f
-
134.f
)
;
float
v
=
(
float
)(
src[2]*1.003921568627451f
-
140.f
)
;
Y
=
(
L
+
16.f
)
*
(
1.f/116.f
)
;
Y
=
Y*Y*Y
;
d
=
(
1.f/13.f
)
/L
;
u
=
u*d
+
_un
;
v
=
v*d
+
_vn
;
float
iv
=
1.f/v
;
X
=
2.25f
*
u
*
Y
*
iv
;
Z
=
(
12
-
3
*
u
-
20
*
v
)
*
Y
*
0.25f
*
iv
;
float
R
=
X*coeffs[0]
+
Y*coeffs[1]
+
Z*coeffs[2]
;
float
G
=
X*coeffs[3]
+
Y*coeffs[4]
+
Z*coeffs[5]
;
float
B
=
X*coeffs[6]
+
Y*coeffs[7]
+
Z*coeffs[8]
;
src
+=
mad24
(
y,
src_step,
mad24
(
x,
scnbytes,
src_offset
))
;
dst
+=
mad24
(
y,
dst_step,
mad24
(
x,
dcnbytes,
dst_offset
))
;
#
pragma
unroll
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
if
(
y
<
rows
)
{
float
d,
X,
Y,
Z
;
float
L
=
src[0]*
(
100.f/255.f
)
;
float
u
=
fma
(
convert_float
(
src[1]
)
,
1.388235294117647f,
-134.f
)
;
float
v
=
fma
(
convert_float
(
src[2]
)
,
1.003921568627451f,
-
140.f
)
;
Y
=
(
L
+
16.f
)
*
(
1.f/116.f
)
;
Y
=
Y*Y*Y
;
d
=
(
1.f/13.f
)
/L
;
u
=
fma
(
u,
d,
_un
)
;
v
=
fma
(
v,
d,
_vn
)
;
float
iv
=
1.f/v
;
X
=
2.25f
*
u
*
Y
*
iv
;
Z
=
(
12
-
fma
(
3.0f,
u,
20.0f
*
v
))
*
Y
*
0.25f
*
iv
;
float
R
=
fma
(
X,
coeffs[0],
fma
(
Y,
coeffs[1],
Z
*
coeffs[2]
))
;
float
G
=
fma
(
X,
coeffs[3],
fma
(
Y,
coeffs[4],
Z
*
coeffs[5]
))
;
float
B
=
fma
(
X,
coeffs[6],
fma
(
Y,
coeffs[7],
Z
*
coeffs[8]
))
;
#
ifdef
SRGB
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
R
=
splineInterpolate
(
R*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B*GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
#
endif
dst[0]
=
SAT_CAST
(
R
*
255.0f
)
;
dst[1]
=
SAT_CAST
(
G
*
255.0f
)
;
dst[2]
=
SAT_CAST
(
B
*
255.0f
)
;
uchar
dst0
=
SAT_CAST
(
R
*
255.0f
)
;
uchar
dst1
=
SAT_CAST
(
G
*
255.0f
)
;
uchar
dst2
=
SAT_CAST
(
B
*
255.0f
)
;
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
*
(
__global
uchar4
*
)
dst
=
(
uchar4
)(
dst0,
dst1,
dst2,
MAX_NUM
)
;
#
else
dst[0]
=
dst0
;
dst[1]
=
dst1
;
dst[2]
=
dst2
;
#
endif
++y
;
dst
+=
dst_step
;
src
+=
src_step
;
}
}
}
...
...
modules/imgproc/test/ocl/test_color.cpp
View file @
667a9328
...
...
@@ -305,11 +305,11 @@ OCL_TEST_P(CvtColor8u32f, Lab2LRGBA) { performTest(3, 4, CVTCODE(Lab2LRGB), dept
OCL_TEST_P
(
CvtColor8u32f
,
BGR2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
BGR2Luv
),
depth
==
CV_8U
?
1
:
1e-2
);
}
OCL_TEST_P
(
CvtColor8u32f
,
RGB2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
RGB2Luv
),
depth
==
CV_8U
?
1
:
1e-2
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LBGR2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
LBGR2Luv
),
depth
==
CV_8U
?
1
:
4e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LRGB2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
LRGB2Luv
),
depth
==
CV_8U
?
1
:
4
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LRGB2Luv
)
{
performTest
(
3
,
3
,
CVTCODE
(
LRGB2Luv
),
depth
==
CV_8U
?
1
:
5
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
BGRA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
BGR2Luv
),
depth
==
CV_8U
?
1
:
8e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
RGBA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
RGB2Luv
),
depth
==
CV_8U
?
1
:
9e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LBGRA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
LBGR2Luv
),
depth
==
CV_8U
?
1
:
4
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LRGBA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
LRGB2Luv
),
depth
==
CV_8U
?
1
:
4
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LBGRA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
LBGR2Luv
),
depth
==
CV_8U
?
1
:
5
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
LRGBA2Luv
)
{
performTest
(
4
,
3
,
CVTCODE
(
LRGB2Luv
),
depth
==
CV_8U
?
1
:
5
e-3
);
}
OCL_TEST_P
(
CvtColor8u32f
,
Luv2BGR
)
{
performTest
(
3
,
3
,
CVTCODE
(
Luv2BGR
),
depth
==
CV_8U
?
1
:
7e-5
);
}
OCL_TEST_P
(
CvtColor8u32f
,
Luv2RGB
)
{
performTest
(
3
,
3
,
CVTCODE
(
Luv2RGB
),
depth
==
CV_8U
?
1
:
7e-5
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment