Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
bea2515f
Commit
bea2515f
authored
Mar 20, 2014
by
Aaron Kunze
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Optimizations for OpenCL color conversion.
parent
5600bc54
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
884 additions
and
654 deletions
+884
-654
color.cpp
modules/imgproc/src/color.cpp
+38
-25
cvtcolor.cl
modules/imgproc/src/opencl/cvtcolor.cl
+846
-629
No files found.
modules/imgproc/src/color.cpp
View file @
bea2515f
...
...
@@ -2703,6 +2703,17 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
if
(
depth
!=
CV_8U
&&
depth
!=
CV_16U
&&
depth
!=
CV_32F
)
return
false
;
cv
::
String
opts
=
format
(
"-D depth=%d -D scn=%d "
,
depth
,
scn
);
ocl
::
Device
dev
=
ocl
::
Device
::
getDefault
();
int
pxPerWIy
=
1
;
if
(
dev
.
isIntel
()
&&
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_GPU
))
{
pxPerWIy
=
4
;
}
globalsize
[
1
]
/=
pxPerWIy
;
opts
+=
format
(
"-D PIX_PER_WI_Y=%d "
,
pxPerWIy
);
switch
(
code
)
{
case
COLOR_BGR2BGRA
:
case
COLOR_RGB2BGRA
:
case
COLOR_BGRA2BGR
:
...
...
@@ -2712,7 +2723,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dcn
=
code
==
COLOR_BGR2BGRA
||
code
==
COLOR_RGB2BGRA
||
code
==
COLOR_BGRA2RGBA
?
4
:
3
;
bool
reverse
=
!
(
code
==
COLOR_BGR2BGRA
||
code
==
COLOR_BGRA2BGR
);
k
.
create
(
"RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=%d -D bidx=0 -D %s"
,
depth
,
scn
,
dcn
,
opts
+
format
(
"-D dcn=%d -D bidx=0 -D %s"
,
dcn
,
reverse
?
"REVERSE"
:
"ORDER"
));
break
;
}
...
...
@@ -2726,7 +2737,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
int
greenbits
=
code
==
COLOR_BGR5652BGR
||
code
==
COLOR_BGR5652RGB
||
code
==
COLOR_BGR5652BGRA
||
code
==
COLOR_BGR5652RGBA
?
6
:
5
;
k
.
create
(
"RGB5x52RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=2 -D dcn=%d -D bidx=%d -D greenbits=%d"
,
depth
,
dcn
,
bidx
,
greenbits
));
opts
+
format
(
"-D dcn=%d -D bidx=%d -D greenbits=%d"
,
dcn
,
bidx
,
greenbits
));
break
;
}
case
COLOR_BGR2BGR565
:
case
COLOR_BGR2BGR555
:
case
COLOR_RGB2BGR565
:
case
COLOR_RGB2BGR555
:
...
...
@@ -2739,7 +2750,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
code
==
COLOR_BGRA2BGR565
||
code
==
COLOR_RGBA2BGR565
?
6
:
5
;
dcn
=
2
;
k
.
create
(
"RGB2RGB5x5"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=2 -D bidx=%d -D greenbits=%d"
,
depth
,
scn
,
bidx
,
greenbits
));
opts
+
format
(
"-D dcn=2 -D bidx=%d -D greenbits=%d"
,
bidx
,
greenbits
));
break
;
}
case
COLOR_BGR5652GRAY
:
case
COLOR_BGR5552GRAY
:
...
...
@@ -2748,7 +2759,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dcn
=
1
;
int
greenbits
=
code
==
COLOR_BGR5652GRAY
?
6
:
5
;
k
.
create
(
"BGR5x52Gray"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=2 -D dcn=1 -D bidx=0 -D greenbits=%d"
,
depth
,
greenbits
));
opts
+
format
(
"-D dcn=1 -D bidx=0 -D greenbits=%d"
,
greenbits
));
break
;
}
case
COLOR_GRAY2BGR565
:
case
COLOR_GRAY2BGR555
:
...
...
@@ -2757,7 +2768,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dcn
=
2
;
int
greenbits
=
code
==
COLOR_GRAY2BGR565
?
6
:
5
;
k
.
create
(
"Gray2BGR5x5"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=1 -D dcn=2 -D bidx=0 -D greenbits=%d"
,
depth
,
greenbits
));
opts
+
format
(
"-D dcn=2 -D bidx=0 -D greenbits=%d"
,
greenbits
));
break
;
}
case
COLOR_BGR2GRAY
:
case
COLOR_BGRA2GRAY
:
...
...
@@ -2767,8 +2778,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
bidx
=
code
==
COLOR_BGR2GRAY
||
code
==
COLOR_BGRA2GRAY
?
0
:
2
;
dcn
=
1
;
k
.
create
(
"RGB2Gray"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d
-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d"
,
depth
,
scn
,
bidx
,
stripeSize
));
opts
+
format
(
"
-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d"
,
bidx
,
stripeSize
));
globalsize
[
0
]
=
(
src
.
cols
+
stripeSize
-
1
)
/
stripeSize
;
break
;
}
...
...
@@ -2778,7 +2789,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
CV_Assert
(
scn
==
1
);
dcn
=
code
==
COLOR_GRAY2BGRA
?
4
:
3
;
k
.
create
(
"Gray2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D bidx=0 -D scn=1 -D dcn=%d"
,
depth
,
dcn
));
opts
+
format
(
"-D bidx=0 -D dcn=%d"
,
dcn
));
break
;
}
case
COLOR_BGR2YUV
:
...
...
@@ -2788,7 +2799,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
bidx
=
code
==
COLOR_RGB2YUV
?
0
:
2
;
dcn
=
3
;
k
.
create
(
"RGB2YUV"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d"
,
depth
,
scn
,
bidx
));
opts
+
format
(
"-D dcn=3 -D bidx=%d"
,
bidx
));
break
;
}
case
COLOR_YUV2BGR
:
...
...
@@ -2798,7 +2809,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
CV_Assert
(
dcn
==
3
||
dcn
==
4
);
bidx
=
code
==
COLOR_YUV2RGB
?
0
:
2
;
k
.
create
(
"YUV2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=3 -D dcn=%d -D bidx=%d"
,
depth
,
dcn
,
bidx
));
opts
+
format
(
"-D dcn=%d -D bidx=%d"
,
dcn
,
bidx
));
break
;
}
case
COLOR_YUV2RGB_NV12
:
case
COLOR_YUV2BGR_NV12
:
...
...
@@ -2811,7 +2822,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dstSz
=
Size
(
sz
.
width
,
sz
.
height
*
2
/
3
);
k
.
create
(
"YUV2RGB_NV12"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=0 -D scn=1
-D dcn=%d -D bidx=%d"
,
dcn
,
bidx
));
opts
+
format
(
"
-D dcn=%d -D bidx=%d"
,
dcn
,
bidx
));
break
;
}
case
COLOR_BGR2YCrCb
:
...
...
@@ -2821,7 +2832,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
bidx
=
code
==
COLOR_BGR2YCrCb
?
0
:
2
;
dcn
=
3
;
k
.
create
(
"RGB2YCrCb"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d"
,
depth
,
scn
,
bidx
));
opts
+
format
(
"-D dcn=3 -D bidx=%d"
,
bidx
));
break
;
}
case
COLOR_YCrCb2BGR
:
...
...
@@ -2832,7 +2843,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
CV_Assert
(
scn
==
3
&&
(
dcn
==
3
||
dcn
==
4
));
bidx
=
code
==
COLOR_YCrCb2BGR
?
0
:
2
;
k
.
create
(
"YCrCb2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=%d -D bidx=%d"
,
depth
,
scn
,
dcn
,
bidx
));
opts
+
format
(
"-D dcn=%d -D bidx=%d"
,
dcn
,
bidx
));
break
;
}
case
COLOR_BGR2XYZ
:
case
COLOR_RGB2XYZ
:
...
...
@@ -2878,7 +2889,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dst
=
_dst
.
getUMat
();
k
.
create
(
"RGB2XYZ"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d"
,
depth
,
scn
,
bidx
));
opts
+
format
(
"-D dcn=3 -D bidx=%d"
,
bidx
));
if
(
k
.
empty
())
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
c
));
...
...
@@ -2929,7 +2940,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dst
=
_dst
.
getUMat
();
k
.
create
(
"XYZ2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=3 -D dcn=%d -D bidx=%d"
,
depth
,
dcn
,
bidx
));
opts
+
format
(
"-D dcn=%d -D bidx=%d"
,
dcn
,
bidx
));
if
(
k
.
empty
())
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
c
));
...
...
@@ -2984,8 +2995,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
_dst
.
create
(
dstSz
,
CV_8UC3
);
dst
=
_dst
.
getUMat
();
k
.
create
(
"RGB2HSV"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D hrange=%d -D bidx=%d -D dcn=3 -D scn=%d"
,
depth
,
hrange
,
bidx
,
scn
));
k
.
create
(
"RGB2HSV"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
opts
+
format
(
"-D hrange=%d -D bidx=%d -D dcn=3"
,
hrange
,
bidx
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -2997,7 +3009,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
}
else
k
.
create
(
kernelName
.
c_str
(),
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D hscale=%ff -D bidx=%d -D scn=%d -D dcn=3"
,
depth
,
hrange
*
(
1.
f
/
360.
f
),
bidx
,
scn
));
opts
+
format
(
"-D hscale=%ff -D bidx=%d -D dcn=3"
,
hrange
*
(
1.
f
/
360.
f
),
bidx
));
break
;
}
case
COLOR_HSV2BGR
:
case
COLOR_HSV2RGB
:
case
COLOR_HSV2BGR_FULL
:
case
COLOR_HSV2RGB_FULL
:
...
...
@@ -3015,8 +3028,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
String
kernelName
=
String
(
is_hsv
?
"HSV"
:
"HLS"
)
+
"2RGB"
;
k
.
create
(
kernelName
.
c_str
(),
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D dcn=%d -D scn=3
-D bidx=%d -D hrange=%d -D hscale=%ff"
,
depth
,
dcn
,
bidx
,
hrange
,
6.
f
/
hrange
));
opts
+
format
(
"-D dcn=%d
-D bidx=%d -D hrange=%d -D hscale=%ff"
,
dcn
,
bidx
,
hrange
,
6.
f
/
hrange
));
break
;
}
case
COLOR_RGBA2mRGBA
:
case
COLOR_mRGBA2RGBA
:
...
...
@@ -3025,7 +3038,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dcn
=
4
;
k
.
create
(
code
==
COLOR_RGBA2mRGBA
?
"RGBA2mRGBA"
:
"mRGBA2RGBA"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D dcn=4 -D scn=4 -D bidx=3"
,
depth
)
);
opts
+
"-D dcn=4 -D bidx=3"
);
break
;
}
case
CV_BGR2Lab
:
case
CV_RGB2Lab
:
case
CV_LBGR2Lab
:
case
CV_LRGB2Lab
:
...
...
@@ -3037,8 +3050,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dcn
=
3
;
k
.
create
(
"BGR2Lab"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D dcn=3 -D scn=%d
-D bidx=%d%s"
,
depth
,
scn
,
bidx
,
srgb
?
" -D SRGB"
:
""
));
opts
+
format
(
"-D dcn=3
-D bidx=%d%s"
,
bidx
,
srgb
?
" -D SRGB"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -3139,8 +3152,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
bool
srgb
=
code
==
CV_Lab2BGR
||
code
==
CV_Lab2RGB
;
k
.
create
(
"Lab2BGR"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D dcn=%d -D scn=3
-D bidx=%d%s"
,
depth
,
dcn
,
bidx
,
srgb
?
" -D SRGB"
:
""
));
opts
+
format
(
"-D dcn=%d
-D bidx=%d%s"
,
dcn
,
bidx
,
srgb
?
" -D SRGB"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
modules/imgproc/src/opencl/cvtcolor.cl
View file @
bea2515f
...
...
@@ -99,64 +99,79 @@ enum
#
define
hrange
0
#
endif
#
if
bidx
==
0
#
define
R_COMP
z
#
define
G_COMP
y
#
define
B_COMP
x
#
elif
bidx
==
2
#
define
R_COMP
x
#
define
G_COMP
y
#
define
B_COMP
z
#
elif
bidx
==
3
//
The
only
kernel
that
uses
bidx
==
3
doesn
't
use
these
macros.
//
But
we
still
need
to
make
the
compiler
happy.
#
define
R_COMP
w
#
define
G_COMP
w
#
define
B_COMP
w
#
endif
#
define
__CAT
(
x,
y
)
x##y
#
define
CAT
(
x,
y
)
__CAT
(
x,
y
)
#
define
DATA_TYPE_4
CAT
(
DATA_TYPE,
4
)
/////////////////////////////////////
RGB
<->
GRAY
//////////////////////////////////////
__kernel
void
RGB2Gray
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcoffset,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
int
rows,
int
cols
)
{
#
if
1
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
if
(
y
<
rows
&&
x
<
cols
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
#
ifdef
DEPTH_5
dst[0]
=
src[bidx]
*
0.114f
+
src[1]
*
0.587f
+
src[
(
bidx^2
)
]
*
0.299f
;
#
else
dst[0]
=
(
DATA_TYPE
)
CV_DESCALE
((
src[bidx]
*
B2Y
+
src[1]
*
G2Y
+
src[
(
bidx^2
)
]
*
R2Y
)
,
yuv_shift
)
;
#
endif
}
#
else
const
int
x_min
=
get_global_id
(
0
)
*STRIPE_SIZE
;
const
int
x_max
=
min
(
x_min
+
STRIPE_SIZE,
cols
)
;
const
int
y
=
get_global_id
(
1
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
)
if
(
x
<
cols
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
))
+
x_min*scn
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
))
;
int
x
;
for
(
x
=
x_min
; x < x_max; x++, src += scn )
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
#
ifdef
DEPTH_5
dst[x]
=
src[bidx]
*
0.114f
+
src[1]
*
0.587f
+
src[
(
bidx^2
)
]
*
0.299f
;
dst[0]
=
src_pix.B_COMP
*
0.114f
+
src_pix.G_COMP
*
0.587f
+
src_pix.R_COMP
*
0.299f
;
#
else
dst[x]
=
(
DATA_TYPE
)(
mad24
(
src[bidx],
B2Y,
mad24
(
src[1],
G2Y,
mad24
(
src[
(
bidx^2
)
],
R2Y,
1
<<
(
yuv_shift-1
))))
>>
yuv_shift
)
;
dst[0]
=
(
DATA_TYPE
)
CV_DESCALE
((
src_pix.B_COMP
*
B2Y
+
src_pix.G_COMP
*
G2Y
+
src_pix.R_COMP
*
R2Y
)
,
yuv_shift
)
;
#
endif
}
++y
;
}
}
#
endif
}
__kernel
void
Gray2RGB
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcoffset,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
int
rows,
int
cols
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE
val
=
src[0]
;
dst[0]
=
dst[1]
=
dst[2]
=
val
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE
val
=
src[0]
;
dst[0]
=
dst[1]
=
dst[2]
=
val
;
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
}
++y
;
}
}
}
...
...
@@ -170,30 +185,38 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
int
rows,
int
cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE
b=src[bidx],
g=src[1],
r=src[bidx^2]
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE
b=src_pix.B_COMP,
g=src_pix.G_COMP,
r=src_pix.R_COMP
;
#
ifdef
DEPTH_5
__constant
float
*
coeffs
=
c_RGB2YUVCoeffs_f
;
const
DATA_TYPE
Y
=
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2]
;
const
DATA_TYPE
U
=
(
b
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
const
DATA_TYPE
V
=
(
r
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
__constant
float
*
coeffs
=
c_RGB2YUVCoeffs_f
;
const
DATA_TYPE
Y
=
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2]
;
const
DATA_TYPE
U
=
(
b
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
const
DATA_TYPE
V
=
(
r
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
#
else
__constant
int
*
coeffs
=
c_RGB2YUVCoeffs_i
;
const
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
const
int
Y
=
CV_DESCALE
(
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2],
yuv_shift
)
;
const
int
U
=
CV_DESCALE
((
b
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
const
int
V
=
CV_DESCALE
((
r
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
__constant
int
*
coeffs
=
c_RGB2YUVCoeffs_i
;
const
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
const
int
Y
=
CV_DESCALE
(
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2],
yuv_shift
)
;
const
int
U
=
CV_DESCALE
((
b
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
const
int
V
=
CV_DESCALE
((
r
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
#
endif
dst[0]
=
SAT_CAST
(
Y
)
;
dst[1]
=
SAT_CAST
(
U
)
;
dst[2]
=
SAT_CAST
(
V
)
;
dst[0]
=
SAT_CAST
(
Y
)
;
dst[1]
=
SAT_CAST
(
U
)
;
dst[2]
=
SAT_CAST
(
V
)
;
}
++y
;
}
}
}
...
...
@@ -205,32 +228,40 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
int
rows,
int
cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE
Y
=
src[0],
U
=
src[1],
V
=
src[2]
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE
Y
=
src_pix.x,
U
=
src_pix.y,
V
=
src_pix.z
;
#
ifdef
DEPTH_5
__constant
float
*
coeffs
=
c_YUV2RGBCoeffs_f
;
const
float
r
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[3]
;
const
float
g
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1]
;
const
float
b
=
Y
+
(
U
-
HALF_MAX
)
*
coeffs[0]
;
__constant
float
*
coeffs
=
c_YUV2RGBCoeffs_f
;
const
float
r
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[3]
;
const
float
g
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1]
;
const
float
b
=
Y
+
(
U
-
HALF_MAX
)
*
coeffs[0]
;
#
else
__constant
int
*
coeffs
=
c_YUV2RGBCoeffs_i
;
const
int
r
=
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
)
;
const
int
g
=
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
const
int
b
=
Y
+
CV_DESCALE
((
U
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
)
;
__constant
int
*
coeffs
=
c_YUV2RGBCoeffs_i
;
const
int
r
=
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
)
;
const
int
g
=
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
const
int
b
=
Y
+
CV_DESCALE
((
U
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
)
;
#
endif
dst[bidx]
=
SAT_CAST
(
b
)
;
dst[1]
=
SAT_CAST
(
g
)
;
dst[bidx^2]
=
SAT_CAST
(
r
)
;
dst[bidx]
=
SAT_CAST
(
b
)
;
dst[1]
=
SAT_CAST
(
g
)
;
dst[bidx^2]
=
SAT_CAST
(
r
)
;
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
}
++y
;
}
}
}
...
...
@@ -246,58 +277,65 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff
int
rows,
int
cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
/
2
&&
x
<
cols
/
2
)
if
(
x
<
cols
/
2
)
{
__global
const
uchar*
ysrc
=
srcptr
+
mad24
(
y
<<
1
,
srcstep,
(
x
<<
1
)
+
srcoffset
)
;
__global
const
uchar*
usrc
=
srcptr
+
mad24
(
rows
+
y,
srcstep,
(
x
<<
1
)
+
srcoffset
)
;
__global
uchar*
dst1
=
dstptr
+
mad24
(
y
<<
1
,
dststep,
x
*
(
dcn<<1
)
+
dstoffset
)
;
__global
uchar*
dst2
=
dstptr
+
mad24
((
y
<<
1
)
+
1
,
dststep,
x
*
(
dcn<<1
)
+
dstoffset
)
;
int
Y1
=
ysrc[0]
;
int
Y2
=
ysrc[1]
;
int
Y3
=
ysrc[srcstep]
;
int
Y4
=
ysrc[srcstep
+
1]
;
int
U
=
usrc[0]
-
128
;
int
V
=
usrc[1]
-
128
;
int
ruv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CVR
*
V
;
int
guv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
-
ITUR_BT_601_CVG
*
V
-
ITUR_BT_601_CUG
*
U
;
int
buv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CUB
*
U
;
Y1
=
max
(
0
,
Y1
-
16
)
*
ITUR_BT_601_CY
;
dst1[2
-
bidx]
=
convert_uchar_sat
((
Y1
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[1]
=
convert_uchar_sat
((
Y1
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[bidx]
=
convert_uchar_sat
((
Y1
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
/
2
)
{
__global
const
uchar*
ysrc
=
srcptr
+
mad24
(
y
<<
1
,
srcstep,
(
x
<<
1
)
+
srcoffset
)
;
__global
const
uchar*
usrc
=
srcptr
+
mad24
(
rows
+
y,
srcstep,
(
x
<<
1
)
+
srcoffset
)
;
__global
uchar*
dst1
=
dstptr
+
mad24
(
y
<<
1
,
dststep,
x
*
(
dcn<<1
)
+
dstoffset
)
;
__global
uchar*
dst2
=
dstptr
+
mad24
((
y
<<
1
)
+
1
,
dststep,
x
*
(
dcn<<1
)
+
dstoffset
)
;
int
Y1
=
ysrc[0]
;
int
Y2
=
ysrc[1]
;
int
Y3
=
ysrc[srcstep]
;
int
Y4
=
ysrc[srcstep
+
1]
;
int
U
=
usrc[0]
-
128
;
int
V
=
usrc[1]
-
128
;
int
ruv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CVR
*
V
;
int
guv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
-
ITUR_BT_601_CVG
*
V
-
ITUR_BT_601_CUG
*
U
;
int
buv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CUB
*
U
;
Y1
=
max
(
0
,
Y1
-
16
)
*
ITUR_BT_601_CY
;
dst1[2
-
bidx]
=
convert_uchar_sat
((
Y1
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[1]
=
convert_uchar_sat
((
Y1
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[bidx]
=
convert_uchar_sat
((
Y1
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
#
if
dcn
==
4
dst1[3]
=
255
;
dst1[3]
=
255
;
#
endif
Y2
=
max
(
0
,
Y2
-
16
)
*
ITUR_BT_601_CY
;
dst1[dcn
+
2
-
bidx]
=
convert_uchar_sat
((
Y2
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[dcn
+
1]
=
convert_uchar_sat
((
Y2
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[dcn
+
bidx]
=
convert_uchar_sat
((
Y2
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
Y2
=
max
(
0
,
Y2
-
16
)
*
ITUR_BT_601_CY
;
dst1[dcn
+
2
-
bidx]
=
convert_uchar_sat
((
Y2
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[dcn
+
1]
=
convert_uchar_sat
((
Y2
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[dcn
+
bidx]
=
convert_uchar_sat
((
Y2
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
#
if
dcn
==
4
dst1[7]
=
255
;
dst1[7]
=
255
;
#
endif
Y3
=
max
(
0
,
Y3
-
16
)
*
ITUR_BT_601_CY
;
dst2[2
-
bidx]
=
convert_uchar_sat
((
Y3
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[1]
=
convert_uchar_sat
((
Y3
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[bidx]
=
convert_uchar_sat
((
Y3
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
Y3
=
max
(
0
,
Y3
-
16
)
*
ITUR_BT_601_CY
;
dst2[2
-
bidx]
=
convert_uchar_sat
((
Y3
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[1]
=
convert_uchar_sat
((
Y3
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[bidx]
=
convert_uchar_sat
((
Y3
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
#
if
dcn
==
4
dst2[3]
=
255
;
dst2[3]
=
255
;
#
endif
Y4
=
max
(
0
,
Y4
-
16
)
*
ITUR_BT_601_CY
;
dst2[dcn
+
2
-
bidx]
=
convert_uchar_sat
((
Y4
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[dcn
+
1]
=
convert_uchar_sat
((
Y4
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[dcn
+
bidx]
=
convert_uchar_sat
((
Y4
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
Y4
=
max
(
0
,
Y4
-
16
)
*
ITUR_BT_601_CY
;
dst2[dcn
+
2
-
bidx]
=
convert_uchar_sat
((
Y4
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[dcn
+
1]
=
convert_uchar_sat
((
Y4
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[dcn
+
bidx]
=
convert_uchar_sat
((
Y4
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
#
if
dcn
==
4
dst2[7]
=
255
;
dst2[7]
=
255
;
#
endif
}
++y
;
}
}
}
...
...
@@ -311,30 +349,38 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset
int
rows,
int
cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE
b=src[bidx],
g=src[1],
r=src[bidx^2]
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE
b=src_pix.B_COMP,
g=src_pix.G_COMP,
r=src_pix.R_COMP
;
#
ifdef
DEPTH_5
__constant
float
*
coeffs
=
c_RGB2YCrCbCoeffs_f
;
DATA_TYPE
Y
=
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0]
;
DATA_TYPE
Cr
=
(
r
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
DATA_TYPE
Cb
=
(
b
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
__constant
float
*
coeffs
=
c_RGB2YCrCbCoeffs_f
;
DATA_TYPE
Y
=
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0]
;
DATA_TYPE
Cr
=
(
r
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
DATA_TYPE
Cb
=
(
b
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
#
else
__constant
int
*
coeffs
=
c_RGB2YCrCbCoeffs_i
;
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
int
Y
=
CV_DESCALE
(
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0],
yuv_shift
)
;
int
Cr
=
CV_DESCALE
((
r
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
int
Cb
=
CV_DESCALE
((
b
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
__constant
int
*
coeffs
=
c_RGB2YCrCbCoeffs_i
;
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
int
Y
=
CV_DESCALE
(
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0],
yuv_shift
)
;
int
Cr
=
CV_DESCALE
((
r
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
int
Cb
=
CV_DESCALE
((
b
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
#
endif
dst[0]
=
SAT_CAST
(
Y
)
;
dst[1]
=
SAT_CAST
(
Cr
)
;
dst[2]
=
SAT_CAST
(
Cb
)
;
dst[0]
=
SAT_CAST
(
Y
)
;
dst[1]
=
SAT_CAST
(
Cr
)
;
dst[2]
=
SAT_CAST
(
Cb
)
;
}
++y
;
}
}
}
...
...
@@ -346,35 +392,43 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
int
rows,
int
cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
srcptr
=
(
__global
const
DATA_TYPE*
)(
src
+
src_idx
)
;
__global
DATA_TYPE
*
dstptr
=
(
__global
DATA_TYPE*
)(
dst
+
dst_idx
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
srcptr
=
(
__global
const
DATA_TYPE*
)(
src
+
src_idx
)
;
__global
DATA_TYPE
*
dstptr
=
(
__global
DATA_TYPE*
)(
dst
+
dst_idx
)
;
DATA_TYPE
y
=
srcptr[0],
cr
=
srcptr[1],
cb
=
srcptr[2]
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
srcptr
)
;
DATA_TYPE
y
=
src_pix.x,
cr
=
src_pix.y,
cb
=
src_pix.z
;
#
ifdef
DEPTH_5
__constant
float
*
coeff
=
c_YCrCb2RGBCoeffs_f
;
float
r
=
y
+
coeff[0]
*
(
cr
-
HALF_MAX
)
;
float
g
=
y
+
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
;
float
b
=
y
+
coeff[3]
*
(
cb
-
HALF_MAX
)
;
__constant
float
*
coeff
=
c_YCrCb2RGBCoeffs_f
;
float
r
=
y
+
coeff[0]
*
(
cr
-
HALF_MAX
)
;
float
g
=
y
+
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
;
float
b
=
y
+
coeff[3]
*
(
cb
-
HALF_MAX
)
;
#
else
__constant
int
*
coeff
=
c_YCrCb2RGBCoeffs_i
;
int
r
=
y
+
CV_DESCALE
(
coeff[0]
*
(
cr
-
HALF_MAX
)
,
yuv_shift
)
;
int
g
=
y
+
CV_DESCALE
(
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
,
yuv_shift
)
;
int
b
=
y
+
CV_DESCALE
(
coeff[3]
*
(
cb
-
HALF_MAX
)
,
yuv_shift
)
;
__constant
int
*
coeff
=
c_YCrCb2RGBCoeffs_i
;
int
r
=
y
+
CV_DESCALE
(
coeff[0]
*
(
cr
-
HALF_MAX
)
,
yuv_shift
)
;
int
g
=
y
+
CV_DESCALE
(
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
,
yuv_shift
)
;
int
b
=
y
+
CV_DESCALE
(
coeff[3]
*
(
cb
-
HALF_MAX
)
,
yuv_shift
)
;
#
endif
dstptr[
(
bidx^2
)
]
=
SAT_CAST
(
r
)
;
dstptr[1]
=
SAT_CAST
(
g
)
;
dstptr[bidx]
=
SAT_CAST
(
b
)
;
dstptr[
(
bidx^2
)
]
=
SAT_CAST
(
r
)
;
dstptr[1]
=
SAT_CAST
(
g
)
;
dstptr[bidx]
=
SAT_CAST
(
b
)
;
#
if
dcn
==
4
dstptr[3]
=
MAX_NUM
;
dstptr[3]
=
MAX_NUM
;
#
endif
}
++y
;
}
}
}
...
...
@@ -385,30 +439,38 @@ __kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offse
int
rows,
int
cols,
__constant
COEFF_TYPE
*
coeffs
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
int
dy
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
d
y
<
rows
&&
d
x
<
cols
)
if
(
dx
<
cols
)
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
dy
<
rows
)
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
DATA_TYPE
r
=
src[0],
g
=
src[1],
b
=
src[2]
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE
r
=
src_pix.x,
g
=
src_pix.y,
b
=
src_pix.z
;
#
ifdef
DEPTH_5
float
x
=
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2]
;
float
y
=
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5]
;
float
z
=
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8]
;
float
x
=
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2]
;
float
y
=
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5]
;
float
z
=
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8]
;
#
else
int
x
=
CV_DESCALE
(
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2],
xyz_shift
)
;
int
y
=
CV_DESCALE
(
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5],
xyz_shift
)
;
int
z
=
CV_DESCALE
(
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8],
xyz_shift
)
;
int
x
=
CV_DESCALE
(
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2],
xyz_shift
)
;
int
y
=
CV_DESCALE
(
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5],
xyz_shift
)
;
int
z
=
CV_DESCALE
(
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8],
xyz_shift
)
;
#
endif
dst[0]
=
SAT_CAST
(
x
)
;
dst[1]
=
SAT_CAST
(
y
)
;
dst[2]
=
SAT_CAST
(
z
)
;
dst[0]
=
SAT_CAST
(
x
)
;
dst[1]
=
SAT_CAST
(
y
)
;
dst[2]
=
SAT_CAST
(
z
)
;
}
++dy
;
}
}
}
...
...
@@ -417,33 +479,41 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse
int
rows,
int
cols,
__constant
COEFF_TYPE
*
coeffs
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
int
dy
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
d
y
<
rows
&&
d
x
<
cols
)
if
(
dx
<
cols
)
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
dy
<
rows
)
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
DATA_TYPE
x
=
src[0],
y
=
src[1],
z
=
src[2]
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE
x
=
src_pix.x,
y
=
src_pix.y,
z
=
src_pix.z
;
#
ifdef
DEPTH_5
float
b
=
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2]
;
float
g
=
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5]
;
float
r
=
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8]
;
float
b
=
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2]
;
float
g
=
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5]
;
float
r
=
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8]
;
#
else
int
b
=
CV_DESCALE
(
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2],
xyz_shift
)
;
int
g
=
CV_DESCALE
(
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5],
xyz_shift
)
;
int
r
=
CV_DESCALE
(
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8],
xyz_shift
)
;
int
b
=
CV_DESCALE
(
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2],
xyz_shift
)
;
int
g
=
CV_DESCALE
(
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5],
xyz_shift
)
;
int
r
=
CV_DESCALE
(
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8],
xyz_shift
)
;
#
endif
dst[0]
=
SAT_CAST
(
b
)
;
dst[1]
=
SAT_CAST
(
g
)
;
dst[2]
=
SAT_CAST
(
r
)
;
dst[0]
=
SAT_CAST
(
b
)
;
dst[1]
=
SAT_CAST
(
g
)
;
dst[2]
=
SAT_CAST
(
r
)
;
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
}
++dy
;
}
}
}
...
...
@@ -454,33 +524,41 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
int
rows,
int
cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
#
ifdef
REVERSE
dst[0]
=
src[2]
;
dst[1]
=
src[1]
;
dst[2]
=
src[0]
;
dst[0]
=
src_pix.z
;
dst[1]
=
src_pix.y
;
dst[2]
=
src_pix.x
;
#
else
dst[0]
=
src[0]
;
dst[1]
=
src[1]
;
dst[2]
=
src[2]
;
dst[0]
=
src_pix.x
;
dst[1]
=
src_pix.y
;
dst[2]
=
src_pix.z
;
#
endif
#
if
dcn
==
4
#
if
scn
==
3
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
else
dst[3]
=
src[3]
;
dst[3]
=
src[3]
;
#
endif
#
endif
}
++y
;
}
}
}
...
...
@@ -491,31 +569,38 @@ __kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset
int
rows,
int
cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
ushort
t
=
*
((
__global
const
ushort*
)(
src
+
src_idx
))
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
ushort
t
=
*
((
__global
const
ushort*
)(
src
+
src_idx
))
;
#
if
greenbits
==
6
dst[dst_idx
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_idx
+
1]
=
(
uchar
)((
t
>>
3
)
&
~3
)
;
dst[dst_idx
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
8
)
&
~7
)
;
dst[dst_idx
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_idx
+
1]
=
(
uchar
)((
t
>>
3
)
&
~3
)
;
dst[dst_idx
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
8
)
&
~7
)
;
#
else
dst[dst_idx
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_idx
+
1]
=
(
uchar
)((
t
>>
2
)
&
~7
)
;
dst[dst_idx
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
7
)
&
~7
)
;
dst[dst_idx
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_idx
+
1]
=
(
uchar
)((
t
>>
2
)
&
~7
)
;
dst[dst_idx
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
7
)
&
~7
)
;
#
endif
#
if
dcn
==
4
#
if
greenbits
==
6
dst[dst_idx
+
3]
=
255
;
dst[dst_idx
+
3]
=
255
;
#
else
dst[dst_idx
+
3]
=
t
&
0x8000
?
255
:
0
;
dst[dst_idx
+
3]
=
t
&
0x8000
?
255
:
0
;
#
endif
#
endif
}
++y
;
}
}
}
...
...
@@ -524,21 +609,29 @@ __kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset
int
rows,
int
cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
uchar4
src_pix
=
vload4
(
0
,
src
+
src_idx
)
;
#
if
greenbits
==
6
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src[src_idx
+
bidx]
>>
3
)
|((src[src_idx + 1]&~3) << 3)|
((
src[src_idx
+
(
bidx^2
)
]
&~7
)
<<
8
))
;
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP&~3) << 3)|
((
src_pix.R_COMP
&~7
)
<<
8
))
;
#
elif
scn
==
3
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src[src_idx
+
bidx]
>>
3
)
|((src[src_idx + 1]&~7) << 2)|
((
src[src_idx
+
(
bidx^2
)
]
&~7
)
<<
7
))
;
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP&~7) << 2)|
((
src_pix.R_COMP
&~7
)
<<
7
))
;
#
else
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src[src_idx
+
bidx]
>>
3
)
|((src[src_idx + 1]
&~7) << 2)|
((
src[src_idx
+
(
bidx^2
)
]&~7
)
<<
7
)
|(src[src_idx + 3]
? 0x8000 : 0));
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP
&~7) << 2)|
((
src_pix.R_COMP&~7
)
<<
7
)
|(src_pix.w
? 0x8000 : 0));
#endif
}
++y;
}
}
}
...
...
@@ -549,23 +642,30 @@ __kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offse
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x);
int t = *((__global const ushort*)(src + src_idx));
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x);
int t = *((__global const ushort*)(src + src_idx));
#if greenbits == 6
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
((t >> 3) & 0xfc)*G2Y +
((t >> 8) & 0xf8)*R2Y, yuv_shift);
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
((t >> 3) & 0xfc)*G2Y +
((t >> 8) & 0xf8)*R2Y, yuv_shift);
#else
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
((t >> 2) & 0xf8)*G2Y +
((t >> 7) & 0xf8)*R2Y, yuv_shift);
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
((t >> 2) & 0xf8)*G2Y +
((t >> 7) & 0xf8)*R2Y, yuv_shift);
#endif
}
++y;
}
}
}
...
...
@@ -574,20 +674,27 @@ __kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offse
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
int src_idx = mad24(y, src_step, src_offset + x);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
int t = src[src_idx];
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + x);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
int t = src[src_idx];
#if greenbits == 6
*((__global ushort*)(dst + dst_idx)) = (ushort)((t >> 3) |
((
t
&
~3
)
<<
3
)
| ((t & ~7) << 8));
*((__global ushort*)(dst + dst_idx)) = (ushort)((t >> 3) |
((
t
&
~3
)
<<
3
)
| ((t & ~7) << 8));
#else
t >>= 3;
*((__global ushort*)(dst + dst_idx)) = (ushort)(t|
(
t
<<
5
)
|(t << 10));
t >>= 3;
*((__global ushort*)(dst + dst_idx)) = (ushort)(t|
(
t
<<
5
)
|(t << 10));
#endif
}
++y;
}
}
}
...
...
@@ -608,36 +715,44 @@ __kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset,
__constant int * sdiv_table, __constant int * hdiv_table)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
int b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
int h, s, v = b;
int vmin = b, diff;
int vr, vg;
v = max( v, g );
v = max( v, r );
vmin = min( vmin, g );
vmin = min( vmin, r );
diff = v - vmin;
vr = v == r ? -1 : 0;
vg = v == g ? -1 : 0;
s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
h = (vr & (g - b)) +
(~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
h += h < 0 ? hrange : 0;
dst[dst_idx] = convert_uchar_sat_rte(h);
dst[dst_idx + 1] = (uchar)s;
dst[dst_idx + 2] = (uchar)v;
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
int b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
int h, s, v = b;
int vmin = b, diff;
int vr, vg;
v = max( v, g );
v = max( v, r );
vmin = min( vmin, g );
vmin = min( vmin, r );
diff = v - vmin;
vr = v == r ? -1 : 0;
vg = v == g ? -1 : 0;
s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
h = (vr & (g - b)) +
(~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
h += h < 0 ? hrange : 0;
dst[dst_idx] = convert_uchar_sat_rte(h);
dst[dst_idx + 1] = (uchar)s;
dst[dst_idx + 2] = (uchar)v;
}
++y;
}
}
}
...
...
@@ -646,51 +761,59 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
float h = src[src_idx], s = src[src_idx + 1]*(1/255.f), v = src[src_idx + 2]*(1/255.f);
float b, g, r;
if (s != 0)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
float tab[4];
int sector;
h *= hscale;
if( h < 0 )
do h += 6; while( h < 0 );
else if( h >= 6 )
do h -= 6; while( h >= 6 );
sector = convert_int_sat_rtn(h);
h -= sector;
if( (unsigned)sector >= 6u )
if (y < rows)
{
sector = 0;
h = 0.f;
}
tab[0] = v;
tab[1] = v*(1.f - s);
tab[2] = v*(1.f - s*h);
tab[3] = v*(1.f - s*(1.f - h));
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = v;
dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
float h = src_pix.x, s = src_pix.y*(1/255.f), v = src_pix.z*(1/255.f);
float b, g, r;
if (s != 0)
{
float tab[4];
int sector;
h *= hscale;
if( h < 0 )
do h += 6; while( h < 0 );
else if( h >= 6 )
do h -= 6; while( h >= 6 );
sector = convert_int_sat_rtn(h);
h -= sector;
if( (unsigned)sector >= 6u )
{
sector = 0;
h = 0.f;
}
tab[0] = v;
tab[1] = v*(1.f - s);
tab[2] = v*(1.f - s*h);
tab[3] = v*(1.f - s*(1.f - h));
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = v;
dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
#if dcn == 4
dst[dst_idx + 3] = MAX_NUM;
dst[dst_idx + 3] = MAX_NUM;
#endif
}
++y;
}
}
}
...
...
@@ -701,42 +824,50 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float b = src[bidx], g = src[1], r = src[bidx^2];
float h, s, v;
float vmin, diff;
v = vmin = r;
if( v < g ) v = g;
if( v < b ) v = b;
if( vmin > g ) vmin = g;
if( vmin > b ) vmin = b;
diff = v - vmin;
s = diff/(float)(fabs(v) + FLT_EPSILON);
diff = (float)(60.f/(diff + FLT_EPSILON));
if( v == r )
h = (g - b)*diff;
else if( v == g )
h = (b - r)*diff + 120.f;
else
h = (r - g)*diff + 240.f;
if( h < 0 ) h += 360.f;
dst[0] = h*hscale;
dst[1] = s;
dst[2] = v;
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float4 src_pix = vload4(0, src);
float b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
float h, s, v;
float vmin, diff;
v = vmin = r;
if( v < g ) v = g;
if( v < b ) v = b;
if( vmin > g ) vmin = g;
if( vmin > b ) vmin = b;
diff = v - vmin;
s = diff/(float)(fabs(v) + FLT_EPSILON);
diff = (float)(60.f/(diff + FLT_EPSILON));
if( v == r )
h = (g - b)*diff;
else if( v == g )
h = (b - r)*diff + 120.f;
else
h = (r - g)*diff + 240.f;
if( h < 0 ) h += 360.f;
dst[0] = h*hscale;
dst[1] = s;
dst[2] = v;
}
++y;
}
}
}
...
...
@@ -745,54 +876,62 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float h = src[0], s = src[1], v = src[2];
float b, g, r;
if (s != 0)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
float tab[4];
int sector;
h *= hscale;
if(h < 0)
do h += 6; while (h < 0);
else if (h >= 6)
do h -= 6; while (h >= 6);
sector = convert_int_sat_rtn(h);
h -= sector;
if ((unsigned)sector >= 6u)
if (y < rows)
{
sector = 0;
h = 0.f;
}
tab[0] = v;
tab[1] = v*(1.f - s);
tab[2] = v*(1.f - s*h);
tab[3] = v*(1.f - s*(1.f - h));
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = v;
dst[bidx] = b;
dst[1] = g;
dst[bidx^2] = r;
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float4 src_pix = vload4(0, src);
float h = src_pix.x, s = src_pix.y, v = src_pix.z;
float b, g, r;
if (s != 0)
{
float tab[4];
int sector;
h *= hscale;
if(h < 0)
do h += 6; while (h < 0);
else if (h >= 6)
do h -= 6; while (h >= 6);
sector = convert_int_sat_rtn(h);
h -= sector;
if ((unsigned)sector >= 6u)
{
sector = 0;
h = 0.f;
}
tab[0] = v;
tab[1] = v*(1.f - s);
tab[2] = v*(1.f - s*h);
tab[3] = v*(1.f - s*(1.f - h));
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = v;
dst[bidx] = b;
dst[1] = g;
dst[bidx^2] = r;
#if dcn == 4
dst[3] = MAX_NUM;
dst[3] = MAX_NUM;
#endif
}
++y;
}
}
}
...
...
@@ -807,44 +946,52 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
float b = src[src_idx + bidx]*(1/255.f), g = src[src_idx + 1]*(1/255.f), r = src[src_idx + (bidx^2)]*(1/255.f);
float h = 0.f, s = 0.f, l;
float vmin, vmax, diff;
vmax = vmin = r;
if (vmax < g) vmax = g;
if (vmax < b) vmax = b;
if (vmin > g) vmin = g;
if (vmin > b) vmin = b;
diff = vmax - vmin;
l = (vmax + vmin)*0.5f;
if (diff > FLT_EPSILON)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
diff = 60.f/diff;
if( vmax == r )
h = (g - b)*diff;
else if( vmax == g )
h = (b - r)*diff + 120.f;
else
h = (r - g)*diff + 240.f;
if( h < 0.f ) h += 360.f;
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
float b = src_pix.B_COMP*(1/255.f), g = src_pix.G_COMP*(1/255.f), r = src_pix.R_COMP*(1/255.f);
float h = 0.f, s = 0.f, l;
float vmin, vmax, diff;
vmax = vmin = r;
if (vmax < g) vmax = g;
if (vmax < b) vmax = b;
if (vmin > g) vmin = g;
if (vmin > b) vmin = b;
diff = vmax - vmin;
l = (vmax + vmin)*0.5f;
if (diff > FLT_EPSILON)
{
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
diff = 60.f/diff;
if( vmax == r )
h = (g - b)*diff;
else if( vmax == g )
h = (b - r)*diff + 120.f;
else
h = (r - g)*diff + 240.f;
if( h < 0.f ) h += 360.f;
}
dst[dst_idx] = convert_uchar_sat_rte(h*hscale);
dst[dst_idx + 1] = convert_uchar_sat_rte(l*255.f);
dst[dst_idx + 2] = convert_uchar_sat_rte(s*255.f);
}
++y;
}
dst[dst_idx] = convert_uchar_sat_rte(h*hscale);
dst[dst_idx + 1] = convert_uchar_sat_rte(l*255.f);
dst[dst_idx + 2] = convert_uchar_sat_rte(s*255.f);
}
}
...
...
@@ -853,50 +1000,58 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
float h = src[src_idx], l = src[src_idx + 1]*(1.f/255.f), s = src[src_idx + 2]*(1.f/255.f);
float b, g, r;
if (s != 0)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
float tab[4];
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
float p1 = 2*l - p2;
h *= hscale;
if( h < 0 )
do h += 6; while( h < 0 );
else if( h >= 6 )
do h -= 6; while( h >= 6 );
int sector = convert_int_sat_rtn(h);
h -= sector;
tab[0] = p2;
tab[1] = p1;
tab[2] = p1 + (p2 - p1)*(1-h);
tab[3] = p1 + (p2 - p1)*h;
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = l;
dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
float h = src_pix.x, l = src_pix.y*(1.f/255.f), s = src_pix.z*(1.f/255.f);
float b, g, r;
if (s != 0)
{
float tab[4];
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
float p1 = 2*l - p2;
h *= hscale;
if( h < 0 )
do h += 6; while( h < 0 );
else if( h >= 6 )
do h -= 6; while( h >= 6 );
int sector = convert_int_sat_rtn(h);
h -= sector;
tab[0] = p2;
tab[1] = p1;
tab[2] = p1 + (p2 - p1)*(1-h);
tab[3] = p1 + (p2 - p1)*h;
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = l;
dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
#if dcn == 4
dst[dst_idx + 3] = MAX_NUM;
dst[dst_idx + 3] = MAX_NUM;
#endif
}
++y;
}
}
}
...
...
@@ -907,47 +1062,55 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float b = src[bidx], g = src[1], r = src[bidx^2];
float h = 0.f, s = 0.f, l;
float vmin, vmax, diff;
vmax = vmin = r;
if (vmax < g) vmax = g;
if (vmax < b) vmax = b;
if (vmin > g) vmin = g;
if (vmin > b) vmin = b;
diff = vmax - vmin;
l = (vmax + vmin)*0.5f;
if (diff > FLT_EPSILON)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
diff = 60.f/diff;
if( vmax == r )
h = (g - b)*diff;
else if( vmax == g )
h = (b - r)*diff + 120.f;
else
h = (r - g)*diff + 240.f;
if( h < 0.f ) h += 360.f;
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float4 src_pix = vload4(0, src);
float b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
float h = 0.f, s = 0.f, l;
float vmin, vmax, diff;
vmax = vmin = r;
if (vmax < g) vmax = g;
if (vmax < b) vmax = b;
if (vmin > g) vmin = g;
if (vmin > b) vmin = b;
diff = vmax - vmin;
l = (vmax + vmin)*0.5f;
if (diff > FLT_EPSILON)
{
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
diff = 60.f/diff;
if( vmax == r )
h = (g - b)*diff;
else if( vmax == g )
h = (b - r)*diff + 120.f;
else
h = (r - g)*diff + 240.f;
if( h < 0.f ) h += 360.f;
}
dst[0] = h*hscale;
dst[1] = l;
dst[2] = s;
}
++y;
}
dst[0] = h*hscale;
dst[1] = l;
dst[2] = s;
}
}
...
...
@@ -956,54 +1119,62 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float h = src[0], l = src[1], s = src[2];
float b, g, r;
if (s != 0)
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
float tab[4];
int sector;
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
float p1 = 2*l - p2;
h *= hscale;
if( h < 0 )
do h += 6; while( h < 0 );
else if( h >= 6 )
do h -= 6; while( h >= 6 );
sector = convert_int_sat_rtn(h);
h -= sector;
tab[0] = p2;
tab[1] = p1;
tab[2] = p1 + (p2 - p1)*(1-h);
tab[3] = p1 + (p2 - p1)*h;
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = l;
dst[bidx] = b;
dst[1] = g;
dst[bidx^2] = r;
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float4 src_pix = vload4(0, src);
float h = src_pix.x, l = src_pix.y, s = src_pix.z;
float b, g, r;
if (s != 0)
{
float tab[4];
int sector;
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
float p1 = 2*l - p2;
h *= hscale;
if( h < 0 )
do h += 6; while( h < 0 );
else if( h >= 6 )
do h -= 6; while( h >= 6 );
sector = convert_int_sat_rtn(h);
h -= sector;
tab[0] = p2;
tab[1] = p1;
tab[2] = p1 + (p2 - p1)*(1-h);
tab[3] = p1 + (p2 - p1)*h;
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = l;
dst[bidx] = b;
dst[1] = g;
dst[bidx^2] = r;
#if dcn == 4
dst[3] = MAX_NUM;
dst[3] = MAX_NUM;
#endif
}
++y;
}
}
}
...
...
@@ -1018,21 +1189,28 @@ __kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
x <<= 2;
int src_idx = mad24(y, src_step, src_offset + x);
int dst_idx = mad24(y, dst_step, dst_offset + x);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + (x << 2));
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
uchar4 src_pix = vload4(0, src + src_idx);
uchar v0 = src[src_idx], v1 = src[src_idx + 1]
;
uchar v2 = src[src_idx + 2], v3 = src[src_idx + 3]
;
uchar v0 = src_pix.x, v1 = src_pix.y
;
uchar v2 = src_pix.z, v3 = src_pix.w
;
dst[dst_idx] = (v0 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 1] = (v1 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 2] = (v2 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 3] = v3;
dst[dst_idx] = (v0 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 1] = (v1 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 2] = (v2 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 3] = v3;
}
++y;
}
}
}
...
...
@@ -1041,22 +1219,29 @@ __kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
x <<= 2;
int src_idx = mad24(y, src_step, src_offset + x);
int dst_idx = mad24(y, dst_step, dst_offset + x);
uchar v0 = src[src_idx], v1 = src[src_idx + 1];
uchar v2 = src[src_idx + 2], v3 = src[src_idx + 3];
uchar v3_half = v3 / 2;
dst[dst_idx] = v3 == 0 ? 0 : (v0 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 1] = v3 == 0 ? 0 : (v1 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 2] = v3 == 0 ? 0 : (v2 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 3] = v3;
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + (x << 2));
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
uchar4 src_pix = vload4(0, src + src_idx);
uchar v0 = src_pix.x, v1 = src_pix.y;
uchar v2 = src_pix.z, v3 = src_pix.w;
uchar v3_half = v3 / 2;
dst[dst_idx] = v3 == 0 ? 0 : (v0 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 1] = v3 == 0 ? 0 : (v1 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 2] = v3 == 0 ? 0 : (v2 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 3] = v3;
}
++y;
}
}
}
...
...
@@ -1086,32 +1271,40 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
__constant
int
*
coeffs,
int
Lscale,
int
Lshift
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
src
+=
src_idx
;
dst
+=
dst_idx
;
__global
const
uchar*
src_ptr
=
src
+
src_idx
;
__global
uchar*
dst_ptr
=
dst
+
dst_idx
;
uchar4
src_pix
=
vload4
(
0
,
src_ptr
)
;
int
C0
=
coeffs[0],
C1
=
coeffs[1],
C2
=
coeffs[2],
C3
=
coeffs[3],
C4
=
coeffs[4],
C5
=
coeffs[5],
C6
=
coeffs[6],
C7
=
coeffs[7],
C8
=
coeffs[8]
;
int
C0
=
coeffs[0],
C1
=
coeffs[1],
C2
=
coeffs[2],
C3
=
coeffs[3],
C4
=
coeffs[4],
C5
=
coeffs[5],
C6
=
coeffs[6],
C7
=
coeffs[7],
C8
=
coeffs[8]
;
int
R
=
gammaTab[src[0]],
G
=
gammaTab[src[1]],
B
=
gammaTab[src[2]
]
;
int
fX
=
LabCbrtTab_b[CV_DESCALE
(
R*C0
+
G*C1
+
B*C2,
lab_shift
)
]
;
int
fY
=
LabCbrtTab_b[CV_DESCALE
(
R*C3
+
G*C4
+
B*C5,
lab_shift
)
]
;
int
fZ
=
LabCbrtTab_b[CV_DESCALE
(
R*C6
+
G*C7
+
B*C8,
lab_shift
)
]
;
int
R
=
gammaTab[src_pix.x],
G
=
gammaTab[src_pix.y],
B
=
gammaTab[src_pix.z
]
;
int
fX
=
LabCbrtTab_b[CV_DESCALE
(
R*C0
+
G*C1
+
B*C2,
lab_shift
)
]
;
int
fY
=
LabCbrtTab_b[CV_DESCALE
(
R*C3
+
G*C4
+
B*C5,
lab_shift
)
]
;
int
fZ
=
LabCbrtTab_b[CV_DESCALE
(
R*C6
+
G*C7
+
B*C8,
lab_shift
)
]
;
int
L
=
CV_DESCALE
(
Lscale*fY
+
Lshift,
lab_shift2
)
;
int
a
=
CV_DESCALE
(
500*
(
fX
-
fY
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
int
b
=
CV_DESCALE
(
200*
(
fY
-
fZ
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
int
L
=
CV_DESCALE
(
Lscale*fY
+
Lshift,
lab_shift2
)
;
int
a
=
CV_DESCALE
(
500*
(
fX
-
fY
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
int
b
=
CV_DESCALE
(
200*
(
fY
-
fZ
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
dst[0]
=
SAT_CAST
(
L
)
;
dst[1]
=
SAT_CAST
(
a
)
;
dst[2]
=
SAT_CAST
(
b
)
;
dst_ptr[0]
=
SAT_CAST
(
L
)
;
dst_ptr[1]
=
SAT_CAST
(
a
)
;
dst_ptr[2]
=
SAT_CAST
(
b
)
;
}
++y
;
}
}
}
...
...
@@ -1125,45 +1318,53 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
__constant
float
*
coeffs,
float
_1_3,
float
_a
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
C0
=
coeffs[0],
C1
=
coeffs[1],
C2
=
coeffs[2],
C3
=
coeffs[3],
C4
=
coeffs[4],
C5
=
coeffs[5],
C6
=
coeffs[6],
C7
=
coeffs[7],
C8
=
coeffs[8]
;
float
C0
=
coeffs[0],
C1
=
coeffs[1],
C2
=
coeffs[2],
C3
=
coeffs[3],
C4
=
coeffs[4],
C5
=
coeffs[5],
C6
=
coeffs[6],
C7
=
coeffs[7],
C8
=
coeffs[8]
;
float
R
=
clamp
(
src[0]
,
0.0f,
1.0f
)
;
float
G
=
clamp
(
src[1]
,
0.0f,
1.0f
)
;
float
B
=
clamp
(
src[2]
,
0.0f,
1.0f
)
;
float
R
=
clamp
(
src_pix.x
,
0.0f,
1.0f
)
;
float
G
=
clamp
(
src_pix.y
,
0.0f,
1.0f
)
;
float
B
=
clamp
(
src_pix.z
,
0.0f,
1.0f
)
;
#
ifdef
SRGB
R
=
splineInterpolate
(
R
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
R
=
splineInterpolate
(
R
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
#
endif
float
X
=
R*C0
+
G*C1
+
B*C2
;
float
Y
=
R*C3
+
G*C4
+
B*C5
;
float
Z
=
R*C6
+
G*C7
+
B*C8
;
float
X
=
R*C0
+
G*C1
+
B*C2
;
float
Y
=
R*C3
+
G*C4
+
B*C5
;
float
Z
=
R*C6
+
G*C7
+
B*C8
;
float
FX
=
X
>
0.008856f
?
pow
(
X,
_1_3
)
:
(
7.787f
*
X
+
_a
)
;
float
FY
=
Y
>
0.008856f
?
pow
(
Y,
_1_3
)
:
(
7.787f
*
Y
+
_a
)
;
float
FZ
=
Z
>
0.008856f
?
pow
(
Z,
_1_3
)
:
(
7.787f
*
Z
+
_a
)
;
float
FX
=
X
>
0.008856f
?
pow
(
X,
_1_3
)
:
(
7.787f
*
X
+
_a
)
;
float
FY
=
Y
>
0.008856f
?
pow
(
Y,
_1_3
)
:
(
7.787f
*
Y
+
_a
)
;
float
FZ
=
Z
>
0.008856f
?
pow
(
Z,
_1_3
)
:
(
7.787f
*
Z
+
_a
)
;
float
L
=
Y
>
0.008856f
?
(
116.f
*
FY
-
16.f
)
:
(
903.3f
*
Y
)
;
float
a
=
500.f
*
(
FX
-
FY
)
;
float
b
=
200.f
*
(
FY
-
FZ
)
;
float
L
=
Y
>
0.008856f
?
(
116.f
*
FY
-
16.f
)
:
(
903.3f
*
Y
)
;
float
a
=
500.f
*
(
FX
-
FY
)
;
float
b
=
200.f
*
(
FY
-
FZ
)
;
dst[0]
=
L
;
dst[1]
=
a
;
dst[2]
=
b
;
dst[0]
=
L
;
dst[1]
=
a
;
dst[2]
=
b
;
}
++y
;
}
}
}
...
...
@@ -1225,33 +1426,41 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
__constant
float
*
coeffs,
float
lThresh,
float
fThresh
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
src
+=
src_idx
;
dst
+=
dst_idx
;
__global
const
uchar*
src_ptr
=
src
+
src_idx
;
__global
uchar*
dst_ptr
=
dst
+
dst_idx
;
uchar4
src_pix
=
vload4
(
0
,
src_ptr
)
;
float
srcbuf[3],
dstbuf[3]
;
srcbuf[0]
=
src[0]
*
(
100.f/255.f
)
;
srcbuf[1]
=
convert_float
(
src[1]
-
128
)
;
srcbuf[2]
=
convert_float
(
src[2]
-
128
)
;
float
srcbuf[3],
dstbuf[3]
;
srcbuf[0]
=
src_pix.x
*
(
100.f/255.f
)
;
srcbuf[1]
=
convert_float
(
src_pix.y
-
128
)
;
srcbuf[2]
=
convert_float
(
src_pix.z
-
128
)
;
Lab2BGR_f
(
&srcbuf[0],
&dstbuf[0],
Lab2BGR_f
(
&srcbuf[0],
&dstbuf[0],
#
ifdef
SRGB
gammaTab,
gammaTab,
#
endif
coeffs,
lThresh,
fThresh
)
;
coeffs,
lThresh,
fThresh
)
;
dst
[0]
=
SAT_CAST
(
dstbuf[0]
*
255.0f
)
;
dst
[1]
=
SAT_CAST
(
dstbuf[1]
*
255.0f
)
;
dst
[2]
=
SAT_CAST
(
dstbuf[2]
*
255.0f
)
;
dst_ptr
[0]
=
SAT_CAST
(
dstbuf[0]
*
255.0f
)
;
dst_ptr
[1]
=
SAT_CAST
(
dstbuf[1]
*
255.0f
)
;
dst_ptr
[2]
=
SAT_CAST
(
dstbuf[2]
*
255.0f
)
;
#
if
dcn
==
4
dst
[3]
=
MAX_NUM
;
dst_ptr
[3]
=
MAX_NUM
;
#
endif
}
++y
;
}
}
}
...
...
@@ -1265,29 +1474,37 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse
__constant
float
*
coeffs,
float
lThresh,
float
fThresh
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
srcbuf[3],
dstbuf[3]
;
srcbuf[0]
=
src[0],
srcbuf[1]
=
src[1],
srcbuf[2]
=
src[2]
;
float
srcbuf[3],
dstbuf[3]
;
srcbuf[0]
=
src_pix.x,
srcbuf[1]
=
src_pix.y,
srcbuf[2]
=
src_pix.z
;
Lab2BGR_f
(
&srcbuf[0],
&dstbuf[0],
Lab2BGR_f
(
&srcbuf[0],
&dstbuf[0],
#
ifdef
SRGB
gammaTab,
gammaTab,
#
endif
coeffs,
lThresh,
fThresh
)
;
coeffs,
lThresh,
fThresh
)
;
dst[0]
=
dstbuf[0],
dst[1]
=
dstbuf[1],
dst[2]
=
dstbuf[2]
;
dst[0]
=
dstbuf[0],
dst[1]
=
dstbuf[1],
dst[2]
=
dstbuf[2]
;
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
}
++y
;
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment