Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
fd90efc9
Commit
fd90efc9
authored
Apr 11, 2014
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Apr 11, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2593 from arkunze:pullreq/140319-color
parents
6bfbcf02
785acc18
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
914 additions
and
654 deletions
+914
-654
color.cpp
modules/imgproc/src/color.cpp
+40
-25
cvtcolor.cl
modules/imgproc/src/opencl/cvtcolor.cl
+874
-629
No files found.
modules/imgproc/src/color.cpp
View file @
fd90efc9
...
@@ -2716,6 +2716,8 @@ struct mRGBA2RGBA
...
@@ -2716,6 +2716,8 @@ struct mRGBA2RGBA
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCL
#define DIVUP(total, grain) (((total) + (grain) - 1) / (grain))
static
bool
ocl_cvtColor
(
InputArray
_src
,
OutputArray
_dst
,
int
code
,
int
dcn
)
static
bool
ocl_cvtColor
(
InputArray
_src
,
OutputArray
_dst
,
int
code
,
int
dcn
)
{
{
bool
ok
=
false
;
bool
ok
=
false
;
...
@@ -2729,6 +2731,17 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2729,6 +2731,17 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
if
(
depth
!=
CV_8U
&&
depth
!=
CV_16U
&&
depth
!=
CV_32F
)
if
(
depth
!=
CV_8U
&&
depth
!=
CV_16U
&&
depth
!=
CV_32F
)
return
false
;
return
false
;
cv
::
String
opts
=
format
(
"-D depth=%d -D scn=%d "
,
depth
,
scn
);
ocl
::
Device
dev
=
ocl
::
Device
::
getDefault
();
int
pxPerWIy
=
1
;
if
(
dev
.
isIntel
()
&&
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_GPU
))
{
pxPerWIy
=
4
;
}
globalsize
[
1
]
=
DIVUP
(
globalsize
[
1
],
pxPerWIy
);
opts
+=
format
(
"-D PIX_PER_WI_Y=%d "
,
pxPerWIy
);
switch
(
code
)
switch
(
code
)
{
{
case
COLOR_BGR2BGRA
:
case
COLOR_RGB2BGRA
:
case
COLOR_BGRA2BGR
:
case
COLOR_BGR2BGRA
:
case
COLOR_RGB2BGRA
:
case
COLOR_BGRA2BGR
:
...
@@ -2738,7 +2751,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2738,7 +2751,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dcn
=
code
==
COLOR_BGR2BGRA
||
code
==
COLOR_RGB2BGRA
||
code
==
COLOR_BGRA2RGBA
?
4
:
3
;
dcn
=
code
==
COLOR_BGR2BGRA
||
code
==
COLOR_RGB2BGRA
||
code
==
COLOR_BGRA2RGBA
?
4
:
3
;
bool
reverse
=
!
(
code
==
COLOR_BGR2BGRA
||
code
==
COLOR_BGRA2BGR
);
bool
reverse
=
!
(
code
==
COLOR_BGR2BGRA
||
code
==
COLOR_BGRA2BGR
);
k
.
create
(
"RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=%d -D bidx=0 -D %s"
,
depth
,
scn
,
dcn
,
opts
+
format
(
"-D dcn=%d -D bidx=0 -D %s"
,
dcn
,
reverse
?
"REVERSE"
:
"ORDER"
));
reverse
?
"REVERSE"
:
"ORDER"
));
break
;
break
;
}
}
...
@@ -2752,7 +2765,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2752,7 +2765,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
int
greenbits
=
code
==
COLOR_BGR5652BGR
||
code
==
COLOR_BGR5652RGB
||
int
greenbits
=
code
==
COLOR_BGR5652BGR
||
code
==
COLOR_BGR5652RGB
||
code
==
COLOR_BGR5652BGRA
||
code
==
COLOR_BGR5652RGBA
?
6
:
5
;
code
==
COLOR_BGR5652BGRA
||
code
==
COLOR_BGR5652RGBA
?
6
:
5
;
k
.
create
(
"RGB5x52RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"RGB5x52RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=2 -D dcn=%d -D bidx=%d -D greenbits=%d"
,
depth
,
dcn
,
bidx
,
greenbits
));
opts
+
format
(
"-D dcn=%d -D bidx=%d -D greenbits=%d"
,
dcn
,
bidx
,
greenbits
));
break
;
break
;
}
}
case
COLOR_BGR2BGR565
:
case
COLOR_BGR2BGR555
:
case
COLOR_RGB2BGR565
:
case
COLOR_RGB2BGR555
:
case
COLOR_BGR2BGR565
:
case
COLOR_BGR2BGR555
:
case
COLOR_RGB2BGR565
:
case
COLOR_RGB2BGR555
:
...
@@ -2765,7 +2778,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2765,7 +2778,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
code
==
COLOR_BGRA2BGR565
||
code
==
COLOR_RGBA2BGR565
?
6
:
5
;
code
==
COLOR_BGRA2BGR565
||
code
==
COLOR_RGBA2BGR565
?
6
:
5
;
dcn
=
2
;
dcn
=
2
;
k
.
create
(
"RGB2RGB5x5"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"RGB2RGB5x5"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=2 -D bidx=%d -D greenbits=%d"
,
depth
,
scn
,
bidx
,
greenbits
));
opts
+
format
(
"-D dcn=2 -D bidx=%d -D greenbits=%d"
,
bidx
,
greenbits
));
break
;
break
;
}
}
case
COLOR_BGR5652GRAY
:
case
COLOR_BGR5552GRAY
:
case
COLOR_BGR5652GRAY
:
case
COLOR_BGR5552GRAY
:
...
@@ -2774,7 +2787,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2774,7 +2787,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dcn
=
1
;
dcn
=
1
;
int
greenbits
=
code
==
COLOR_BGR5652GRAY
?
6
:
5
;
int
greenbits
=
code
==
COLOR_BGR5652GRAY
?
6
:
5
;
k
.
create
(
"BGR5x52Gray"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"BGR5x52Gray"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=2 -D dcn=1 -D bidx=0 -D greenbits=%d"
,
depth
,
greenbits
));
opts
+
format
(
"-D dcn=1 -D bidx=0 -D greenbits=%d"
,
greenbits
));
break
;
break
;
}
}
case
COLOR_GRAY2BGR565
:
case
COLOR_GRAY2BGR555
:
case
COLOR_GRAY2BGR565
:
case
COLOR_GRAY2BGR555
:
...
@@ -2783,7 +2796,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2783,7 +2796,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dcn
=
2
;
dcn
=
2
;
int
greenbits
=
code
==
COLOR_GRAY2BGR565
?
6
:
5
;
int
greenbits
=
code
==
COLOR_GRAY2BGR565
?
6
:
5
;
k
.
create
(
"Gray2BGR5x5"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"Gray2BGR5x5"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=1 -D dcn=2 -D bidx=0 -D greenbits=%d"
,
depth
,
greenbits
));
opts
+
format
(
"-D dcn=2 -D bidx=0 -D greenbits=%d"
,
greenbits
));
break
;
break
;
}
}
case
COLOR_BGR2GRAY
:
case
COLOR_BGRA2GRAY
:
case
COLOR_BGR2GRAY
:
case
COLOR_BGRA2GRAY
:
...
@@ -2793,8 +2806,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2793,8 +2806,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
bidx
=
code
==
COLOR_BGR2GRAY
||
code
==
COLOR_BGRA2GRAY
?
0
:
2
;
bidx
=
code
==
COLOR_BGR2GRAY
||
code
==
COLOR_BGRA2GRAY
?
0
:
2
;
dcn
=
1
;
dcn
=
1
;
k
.
create
(
"RGB2Gray"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"RGB2Gray"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d
-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d"
,
opts
+
format
(
"
-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d"
,
depth
,
scn
,
bidx
,
stripeSize
));
bidx
,
stripeSize
));
globalsize
[
0
]
=
(
src
.
cols
+
stripeSize
-
1
)
/
stripeSize
;
globalsize
[
0
]
=
(
src
.
cols
+
stripeSize
-
1
)
/
stripeSize
;
break
;
break
;
}
}
...
@@ -2804,7 +2817,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2804,7 +2817,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
CV_Assert
(
scn
==
1
);
CV_Assert
(
scn
==
1
);
dcn
=
code
==
COLOR_GRAY2BGRA
?
4
:
3
;
dcn
=
code
==
COLOR_GRAY2BGRA
?
4
:
3
;
k
.
create
(
"Gray2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"Gray2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D bidx=0 -D scn=1 -D dcn=%d"
,
depth
,
dcn
));
opts
+
format
(
"-D bidx=0 -D dcn=%d"
,
dcn
));
break
;
break
;
}
}
case
COLOR_BGR2YUV
:
case
COLOR_BGR2YUV
:
...
@@ -2814,7 +2827,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2814,7 +2827,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
bidx
=
code
==
COLOR_RGB2YUV
?
0
:
2
;
bidx
=
code
==
COLOR_RGB2YUV
?
0
:
2
;
dcn
=
3
;
dcn
=
3
;
k
.
create
(
"RGB2YUV"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"RGB2YUV"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d"
,
depth
,
scn
,
bidx
));
opts
+
format
(
"-D dcn=3 -D bidx=%d"
,
bidx
));
break
;
break
;
}
}
case
COLOR_YUV2BGR
:
case
COLOR_YUV2BGR
:
...
@@ -2824,7 +2837,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2824,7 +2837,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
CV_Assert
(
dcn
==
3
||
dcn
==
4
);
CV_Assert
(
dcn
==
3
||
dcn
==
4
);
bidx
=
code
==
COLOR_YUV2RGB
?
0
:
2
;
bidx
=
code
==
COLOR_YUV2RGB
?
0
:
2
;
k
.
create
(
"YUV2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"YUV2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=3 -D dcn=%d -D bidx=%d"
,
depth
,
dcn
,
bidx
));
opts
+
format
(
"-D dcn=%d -D bidx=%d"
,
dcn
,
bidx
));
break
;
break
;
}
}
case
COLOR_YUV2RGB_NV12
:
case
COLOR_YUV2BGR_NV12
:
case
COLOR_YUV2RGB_NV12
:
case
COLOR_YUV2BGR_NV12
:
...
@@ -2837,7 +2850,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2837,7 +2850,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dstSz
=
Size
(
sz
.
width
,
sz
.
height
*
2
/
3
);
dstSz
=
Size
(
sz
.
width
,
sz
.
height
*
2
/
3
);
k
.
create
(
"YUV2RGB_NV12"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"YUV2RGB_NV12"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=0 -D scn=1
-D dcn=%d -D bidx=%d"
,
dcn
,
bidx
));
opts
+
format
(
"
-D dcn=%d -D bidx=%d"
,
dcn
,
bidx
));
break
;
break
;
}
}
case
COLOR_BGR2YCrCb
:
case
COLOR_BGR2YCrCb
:
...
@@ -2847,7 +2860,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2847,7 +2860,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
bidx
=
code
==
COLOR_BGR2YCrCb
?
0
:
2
;
bidx
=
code
==
COLOR_BGR2YCrCb
?
0
:
2
;
dcn
=
3
;
dcn
=
3
;
k
.
create
(
"RGB2YCrCb"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"RGB2YCrCb"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d"
,
depth
,
scn
,
bidx
));
opts
+
format
(
"-D dcn=3 -D bidx=%d"
,
bidx
));
break
;
break
;
}
}
case
COLOR_YCrCb2BGR
:
case
COLOR_YCrCb2BGR
:
...
@@ -2858,7 +2871,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2858,7 +2871,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
CV_Assert
(
scn
==
3
&&
(
dcn
==
3
||
dcn
==
4
));
CV_Assert
(
scn
==
3
&&
(
dcn
==
3
||
dcn
==
4
));
bidx
=
code
==
COLOR_YCrCb2BGR
?
0
:
2
;
bidx
=
code
==
COLOR_YCrCb2BGR
?
0
:
2
;
k
.
create
(
"YCrCb2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"YCrCb2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=%d -D bidx=%d"
,
depth
,
scn
,
dcn
,
bidx
));
opts
+
format
(
"-D dcn=%d -D bidx=%d"
,
dcn
,
bidx
));
break
;
break
;
}
}
case
COLOR_BGR2XYZ
:
case
COLOR_RGB2XYZ
:
case
COLOR_BGR2XYZ
:
case
COLOR_RGB2XYZ
:
...
@@ -2904,7 +2917,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2904,7 +2917,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dst
=
_dst
.
getUMat
();
dst
=
_dst
.
getUMat
();
k
.
create
(
"RGB2XYZ"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"RGB2XYZ"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d"
,
depth
,
scn
,
bidx
));
opts
+
format
(
"-D dcn=3 -D bidx=%d"
,
bidx
));
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
c
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
c
));
...
@@ -2955,7 +2968,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -2955,7 +2968,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dst
=
_dst
.
getUMat
();
dst
=
_dst
.
getUMat
();
k
.
create
(
"XYZ2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"XYZ2RGB"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D scn=3 -D dcn=%d -D bidx=%d"
,
depth
,
dcn
,
bidx
));
opts
+
format
(
"-D dcn=%d -D bidx=%d"
,
dcn
,
bidx
));
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
c
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
c
));
...
@@ -3010,8 +3023,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -3010,8 +3023,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
_dst
.
create
(
dstSz
,
CV_8UC3
);
_dst
.
create
(
dstSz
,
CV_8UC3
);
dst
=
_dst
.
getUMat
();
dst
=
_dst
.
getUMat
();
k
.
create
(
"RGB2HSV"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D hrange=%d -D bidx=%d -D dcn=3 -D scn=%d"
,
k
.
create
(
"RGB2HSV"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
depth
,
hrange
,
bidx
,
scn
));
opts
+
format
(
"-D hrange=%d -D bidx=%d -D dcn=3"
,
hrange
,
bidx
));
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
...
@@ -3023,7 +3037,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -3023,7 +3037,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
}
}
else
else
k
.
create
(
kernelName
.
c_str
(),
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
kernelName
.
c_str
(),
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D hscale=%ff -D bidx=%d -D scn=%d -D dcn=3"
,
depth
,
hrange
*
(
1.
f
/
360.
f
),
bidx
,
scn
));
opts
+
format
(
"-D hscale=%ff -D bidx=%d -D dcn=3"
,
hrange
*
(
1.
f
/
360.
f
),
bidx
));
break
;
break
;
}
}
case
COLOR_HSV2BGR
:
case
COLOR_HSV2RGB
:
case
COLOR_HSV2BGR_FULL
:
case
COLOR_HSV2RGB_FULL
:
case
COLOR_HSV2BGR
:
case
COLOR_HSV2RGB
:
case
COLOR_HSV2BGR_FULL
:
case
COLOR_HSV2RGB_FULL
:
...
@@ -3041,8 +3056,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -3041,8 +3056,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
String
kernelName
=
String
(
is_hsv
?
"HSV"
:
"HLS"
)
+
"2RGB"
;
String
kernelName
=
String
(
is_hsv
?
"HSV"
:
"HLS"
)
+
"2RGB"
;
k
.
create
(
kernelName
.
c_str
(),
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
kernelName
.
c_str
(),
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D dcn=%d -D scn=3
-D bidx=%d -D hrange=%d -D hscale=%ff"
,
opts
+
format
(
"-D dcn=%d
-D bidx=%d -D hrange=%d -D hscale=%ff"
,
depth
,
dcn
,
bidx
,
hrange
,
6.
f
/
hrange
));
dcn
,
bidx
,
hrange
,
6.
f
/
hrange
));
break
;
break
;
}
}
case
COLOR_RGBA2mRGBA
:
case
COLOR_mRGBA2RGBA
:
case
COLOR_RGBA2mRGBA
:
case
COLOR_mRGBA2RGBA
:
...
@@ -3051,7 +3066,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -3051,7 +3066,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dcn
=
4
;
dcn
=
4
;
k
.
create
(
code
==
COLOR_RGBA2mRGBA
?
"RGBA2mRGBA"
:
"mRGBA2RGBA"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
code
==
COLOR_RGBA2mRGBA
?
"RGBA2mRGBA"
:
"mRGBA2RGBA"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D dcn=4 -D scn=4 -D bidx=3"
,
depth
)
);
opts
+
"-D dcn=4 -D bidx=3"
);
break
;
break
;
}
}
case
CV_BGR2Lab
:
case
CV_RGB2Lab
:
case
CV_LBGR2Lab
:
case
CV_LRGB2Lab
:
case
CV_BGR2Lab
:
case
CV_RGB2Lab
:
case
CV_LBGR2Lab
:
case
CV_LRGB2Lab
:
...
@@ -3063,8 +3078,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -3063,8 +3078,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
dcn
=
3
;
dcn
=
3
;
k
.
create
(
"BGR2Lab"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"BGR2Lab"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D dcn=3 -D scn=%d
-D bidx=%d%s"
,
opts
+
format
(
"-D dcn=3
-D bidx=%d%s"
,
depth
,
scn
,
bidx
,
srgb
?
" -D SRGB"
:
""
));
bidx
,
srgb
?
" -D SRGB"
:
""
));
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
...
@@ -3165,8 +3180,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
...
@@ -3165,8 +3180,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
bool
srgb
=
code
==
CV_Lab2BGR
||
code
==
CV_Lab2RGB
;
bool
srgb
=
code
==
CV_Lab2BGR
||
code
==
CV_Lab2RGB
;
k
.
create
(
"Lab2BGR"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
k
.
create
(
"Lab2BGR"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
format
(
"-D depth=%d -D dcn=%d -D scn=3
-D bidx=%d%s"
,
opts
+
format
(
"-D dcn=%d
-D bidx=%d%s"
,
depth
,
dcn
,
bidx
,
srgb
?
" -D SRGB"
:
""
));
dcn
,
bidx
,
srgb
?
" -D SRGB"
:
""
));
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
...
...
modules/imgproc/src/opencl/cvtcolor.cl
View file @
fd90efc9
...
@@ -99,64 +99,81 @@ enum
...
@@ -99,64 +99,81 @@ enum
#
define
hrange
0
#
define
hrange
0
#
endif
#
endif
#
if
bidx
==
0
#
define
R_COMP
z
#
define
G_COMP
y
#
define
B_COMP
x
#
elif
bidx
==
2
#
define
R_COMP
x
#
define
G_COMP
y
#
define
B_COMP
z
#
elif
bidx
==
3
//
The
only
kernel
that
uses
bidx
==
3
doesn
't
use
these
macros.
//
But
we
still
need
to
make
the
compiler
happy.
#
define
R_COMP
w
#
define
G_COMP
w
#
define
B_COMP
w
#
endif
#
define
__CAT
(
x,
y
)
x##y
#
define
CAT
(
x,
y
)
__CAT
(
x,
y
)
#
define
DATA_TYPE_4
CAT
(
DATA_TYPE,
4
)
/////////////////////////////////////
RGB
<->
GRAY
//////////////////////////////////////
/////////////////////////////////////
RGB
<->
GRAY
//////////////////////////////////////
__kernel
void
RGB2Gray
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcoffset,
__kernel
void
RGB2Gray
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcoffset,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
#
if
1
int
x
=
get_global_id
(
0
)
;
const
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
const
int
y
=
get_global_id
(
1
)
;
if
(
y
<
rows
&&
x
<
cols
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
#
ifdef
DEPTH_5
dst[0]
=
src[bidx]
*
0.114f
+
src[1]
*
0.587f
+
src[
(
bidx^2
)
]
*
0.299f
;
#
else
dst[0]
=
(
DATA_TYPE
)
CV_DESCALE
((
src[bidx]
*
B2Y
+
src[1]
*
G2Y
+
src[
(
bidx^2
)
]
*
R2Y
)
,
yuv_shift
)
;
#
endif
}
#
else
const
int
x_min
=
get_global_id
(
0
)
*STRIPE_SIZE
;
const
int
x_max
=
min
(
x_min
+
STRIPE_SIZE,
cols
)
;
const
int
y
=
get_global_id
(
1
)
;
if
(
y
<
rows
)
if
(
x
<
cols
)
{
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
#
pragma
unroll
mad24
(
y,
srcstep,
srcoffset
))
+
x_min*scn
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
))
;
{
int
x
;
if
(
y
<
rows
)
for
(
x
=
x_min
; x < x_max; x++, src += scn )
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
#
ifdef
DEPTH_5
#
ifdef
DEPTH_5
dst[x]
=
src[bidx]
*
0.114f
+
src[1]
*
0.587f
+
src[
(
bidx^2
)
]
*
0.299f
;
dst[0]
=
src_pix.B_COMP
*
0.114f
+
src_pix.G_COMP
*
0.587f
+
src_pix.R_COMP
*
0.299f
;
#
else
#
else
dst[x]
=
(
DATA_TYPE
)(
mad24
(
src[bidx],
B2Y,
mad24
(
src[1],
G2Y,
dst[0]
=
(
DATA_TYPE
)
CV_DESCALE
((
src_pix.B_COMP
*
B2Y
+
src_pix.G_COMP
*
G2Y
+
src_pix.R_COMP
*
R2Y
)
,
yuv_shift
)
;
mad24
(
src[
(
bidx^2
)
],
R2Y,
1
<<
(
yuv_shift-1
))))
>>
yuv_shift
)
;
#
endif
#
endif
}
++y
;
}
}
}
#
endif
}
}
__kernel
void
Gray2RGB
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcoffset,
__kernel
void
Gray2RGB
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcoffset,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
const
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
#
pragma
unroll
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
DATA_TYPE
val
=
src[0]
;
{
dst[0]
=
dst[1]
=
dst[2]
=
val
;
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE
val
=
src[0]
;
dst[0]
=
dst[1]
=
dst[2]
=
val
;
#
if
dcn
==
4
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
#
endif
}
++y
;
}
}
}
}
}
...
@@ -170,30 +187,39 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
...
@@ -170,30 +187,39 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int srcstep, int srcoffset,
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
#
pragma
unroll
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
DATA_TYPE
b=src[bidx],
g=src[1],
r=src[bidx^2]
;
{
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE
b=src_pix.B_COMP,
g=src_pix.G_COMP,
r=src_pix.R_COMP
;
#
ifdef
DEPTH_5
#
ifdef
DEPTH_5
__constant
float
*
coeffs
=
c_RGB2YUVCoeffs_f
;
__constant
float
*
coeffs
=
c_RGB2YUVCoeffs_f
;
const
DATA_TYPE
Y
=
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2]
;
const
DATA_TYPE
Y
=
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2]
;
const
DATA_TYPE
U
=
(
b
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
const
DATA_TYPE
U
=
(
b
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
const
DATA_TYPE
V
=
(
r
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
const
DATA_TYPE
V
=
(
r
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
#
else
#
else
__constant
int
*
coeffs
=
c_RGB2YUVCoeffs_i
;
__constant
int
*
coeffs
=
c_RGB2YUVCoeffs_i
;
const
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
const
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
const
int
Y
=
CV_DESCALE
(
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2],
yuv_shift
)
;
const
int
Y
=
CV_DESCALE
(
b
*
coeffs[0]
+
g
*
coeffs[1]
+
r
*
coeffs[2],
yuv_shift
)
;
const
int
U
=
CV_DESCALE
((
b
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
const
int
U
=
CV_DESCALE
((
b
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
const
int
V
=
CV_DESCALE
((
r
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
const
int
V
=
CV_DESCALE
((
r
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
#
endif
#
endif
dst[0]
=
SAT_CAST
(
Y
)
;
dst[0]
=
SAT_CAST
(
Y
)
;
dst[1]
=
SAT_CAST
(
U
)
;
dst[1]
=
SAT_CAST
(
U
)
;
dst[2]
=
SAT_CAST
(
V
)
;
dst[2]
=
SAT_CAST
(
V
)
;
}
++y
;
}
}
}
}
}
...
@@ -205,32 +231,41 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
...
@@ -205,32 +231,41 @@ __kernel void YUV2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
#
pragma
unroll
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
DATA_TYPE
Y
=
src[0],
U
=
src[1],
V
=
src[2]
;
{
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE
Y
=
src_pix.x,
U
=
src_pix.y,
V
=
src_pix.z
;
#
ifdef
DEPTH_5
#
ifdef
DEPTH_5
__constant
float
*
coeffs
=
c_YUV2RGBCoeffs_f
;
__constant
float
*
coeffs
=
c_YUV2RGBCoeffs_f
;
const
float
r
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[3]
;
const
float
r
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[3]
;
const
float
g
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1]
;
const
float
g
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1]
;
const
float
b
=
Y
+
(
U
-
HALF_MAX
)
*
coeffs[0]
;
const
float
b
=
Y
+
(
U
-
HALF_MAX
)
*
coeffs[0]
;
#
else
#
else
__constant
int
*
coeffs
=
c_YUV2RGBCoeffs_i
;
__constant
int
*
coeffs
=
c_YUV2RGBCoeffs_i
;
const
int
r
=
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
)
;
const
int
r
=
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
)
;
const
int
g
=
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
const
int
g
=
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
const
int
b
=
Y
+
CV_DESCALE
((
U
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
)
;
const
int
b
=
Y
+
CV_DESCALE
((
U
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
)
;
#
endif
#
endif
dst[bidx]
=
SAT_CAST
(
b
)
;
dst[bidx]
=
SAT_CAST
(
b
)
;
dst[1]
=
SAT_CAST
(
g
)
;
dst[1]
=
SAT_CAST
(
g
)
;
dst[bidx^2]
=
SAT_CAST
(
r
)
;
dst[bidx^2]
=
SAT_CAST
(
r
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
#
endif
}
++y
;
}
}
}
}
}
...
@@ -246,58 +281,66 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff
...
@@ -246,58 +281,66 @@ __kernel void YUV2RGB_NV12(__global const uchar* srcptr, int srcstep, int srcoff
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
/
2
&&
x
<
cols
/
2
)
if
(
x
<
cols
/
2
)
{
{
__global
const
uchar*
ysrc
=
srcptr
+
mad24
(
y
<<
1
,
srcstep,
(
x
<<
1
)
+
srcoffset
)
;
#
pragma
unroll
__global
const
uchar*
usrc
=
srcptr
+
mad24
(
rows
+
y,
srcstep,
(
x
<<
1
)
+
srcoffset
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
__global
uchar*
dst1
=
dstptr
+
mad24
(
y
<<
1
,
dststep,
x
*
(
dcn<<1
)
+
dstoffset
)
;
{
__global
uchar*
dst2
=
dstptr
+
mad24
((
y
<<
1
)
+
1
,
dststep,
x
*
(
dcn<<1
)
+
dstoffset
)
;
if
(
y
<
rows
/
2
)
{
int
Y1
=
ysrc[0]
;
__global
const
uchar*
ysrc
=
srcptr
+
mad24
(
y
<<
1
,
srcstep,
(
x
<<
1
)
+
srcoffset
)
;
int
Y2
=
ysrc[1]
;
__global
const
uchar*
usrc
=
srcptr
+
mad24
(
rows
+
y,
srcstep,
(
x
<<
1
)
+
srcoffset
)
;
int
Y3
=
ysrc[srcstep]
;
__global
uchar*
dst1
=
dstptr
+
mad24
(
y
<<
1
,
dststep,
x
*
(
dcn<<1
)
+
dstoffset
)
;
int
Y4
=
ysrc[srcstep
+
1]
;
__global
uchar*
dst2
=
dstptr
+
mad24
((
y
<<
1
)
+
1
,
dststep,
x
*
(
dcn<<1
)
+
dstoffset
)
;
int
U
=
usrc[0]
-
128
;
int
Y1
=
ysrc[0]
;
int
V
=
usrc[1]
-
128
;
int
Y2
=
ysrc[1]
;
int
Y3
=
ysrc[srcstep]
;
int
ruv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CVR
*
V
;
int
Y4
=
ysrc[srcstep
+
1]
;
int
guv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
-
ITUR_BT_601_CVG
*
V
-
ITUR_BT_601_CUG
*
U
;
int
buv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CUB
*
U
;
int
U
=
usrc[0]
-
128
;
int
V
=
usrc[1]
-
128
;
Y1
=
max
(
0
,
Y1
-
16
)
*
ITUR_BT_601_CY
;
dst1[2
-
bidx]
=
convert_uchar_sat
((
Y1
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
int
ruv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CVR
*
V
;
dst1[1]
=
convert_uchar_sat
((
Y1
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
int
guv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
-
ITUR_BT_601_CVG
*
V
-
ITUR_BT_601_CUG
*
U
;
dst1[bidx]
=
convert_uchar_sat
((
Y1
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
int
buv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CUB
*
U
;
Y1
=
max
(
0
,
Y1
-
16
)
*
ITUR_BT_601_CY
;
dst1[2
-
bidx]
=
convert_uchar_sat
((
Y1
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[1]
=
convert_uchar_sat
((
Y1
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[bidx]
=
convert_uchar_sat
((
Y1
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dst1[3]
=
255
;
dst1[3]
=
255
;
#
endif
#
endif
Y2
=
max
(
0
,
Y2
-
16
)
*
ITUR_BT_601_CY
;
Y2
=
max
(
0
,
Y2
-
16
)
*
ITUR_BT_601_CY
;
dst1[dcn
+
2
-
bidx]
=
convert_uchar_sat
((
Y2
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[dcn
+
2
-
bidx]
=
convert_uchar_sat
((
Y2
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[dcn
+
1]
=
convert_uchar_sat
((
Y2
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[dcn
+
1]
=
convert_uchar_sat
((
Y2
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[dcn
+
bidx]
=
convert_uchar_sat
((
Y2
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
dst1[dcn
+
bidx]
=
convert_uchar_sat
((
Y2
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dst1[7]
=
255
;
dst1[7]
=
255
;
#
endif
#
endif
Y3
=
max
(
0
,
Y3
-
16
)
*
ITUR_BT_601_CY
;
Y3
=
max
(
0
,
Y3
-
16
)
*
ITUR_BT_601_CY
;
dst2[2
-
bidx]
=
convert_uchar_sat
((
Y3
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[2
-
bidx]
=
convert_uchar_sat
((
Y3
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[1]
=
convert_uchar_sat
((
Y3
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[1]
=
convert_uchar_sat
((
Y3
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[bidx]
=
convert_uchar_sat
((
Y3
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[bidx]
=
convert_uchar_sat
((
Y3
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dst2[3]
=
255
;
dst2[3]
=
255
;
#
endif
#
endif
Y4
=
max
(
0
,
Y4
-
16
)
*
ITUR_BT_601_CY
;
Y4
=
max
(
0
,
Y4
-
16
)
*
ITUR_BT_601_CY
;
dst2[dcn
+
2
-
bidx]
=
convert_uchar_sat
((
Y4
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[dcn
+
2
-
bidx]
=
convert_uchar_sat
((
Y4
+
ruv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[dcn
+
1]
=
convert_uchar_sat
((
Y4
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[dcn
+
1]
=
convert_uchar_sat
((
Y4
+
guv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[dcn
+
bidx]
=
convert_uchar_sat
((
Y4
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
dst2[dcn
+
bidx]
=
convert_uchar_sat
((
Y4
+
buv
)
>>
ITUR_BT_601_SHIFT
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dst2[7]
=
255
;
dst2[7]
=
255
;
#
endif
#
endif
}
++y
;
}
}
}
}
}
...
@@ -311,30 +354,39 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset
...
@@ -311,30 +354,39 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int srcstep, int srcoffset
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
#
pragma
unroll
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
DATA_TYPE
b=src[bidx],
g=src[1],
r=src[bidx^2]
;
{
if
(
y
<
rows
)
{
__global
const
DATA_TYPE*
src
=
(
__global
const
DATA_TYPE*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
scnbytes
))
;
__global
DATA_TYPE*
dst
=
(
__global
DATA_TYPE*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
dcnbytes
))
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE
b=src_pix.B_COMP,
g=src_pix.G_COMP,
r=src_pix.R_COMP
;
#
ifdef
DEPTH_5
#
ifdef
DEPTH_5
__constant
float
*
coeffs
=
c_RGB2YCrCbCoeffs_f
;
__constant
float
*
coeffs
=
c_RGB2YCrCbCoeffs_f
;
DATA_TYPE
Y
=
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0]
;
DATA_TYPE
Y
=
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0]
;
DATA_TYPE
Cr
=
(
r
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
DATA_TYPE
Cr
=
(
r
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
DATA_TYPE
Cb
=
(
b
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
DATA_TYPE
Cb
=
(
b
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
#
else
#
else
__constant
int
*
coeffs
=
c_RGB2YCrCbCoeffs_i
;
__constant
int
*
coeffs
=
c_RGB2YCrCbCoeffs_i
;
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
int
Y
=
CV_DESCALE
(
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0],
yuv_shift
)
;
int
Y
=
CV_DESCALE
(
b
*
coeffs[2]
+
g
*
coeffs[1]
+
r
*
coeffs[0],
yuv_shift
)
;
int
Cr
=
CV_DESCALE
((
r
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
int
Cr
=
CV_DESCALE
((
r
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
int
Cb
=
CV_DESCALE
((
b
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
int
Cb
=
CV_DESCALE
((
b
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
#
endif
#
endif
dst[0]
=
SAT_CAST
(
Y
)
;
dst[0]
=
SAT_CAST
(
Y
)
;
dst[1]
=
SAT_CAST
(
Cr
)
;
dst[1]
=
SAT_CAST
(
Cr
)
;
dst[2]
=
SAT_CAST
(
Cb
)
;
dst[2]
=
SAT_CAST
(
Cb
)
;
}
++y
;
}
}
}
}
}
...
@@ -346,35 +398,44 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
...
@@ -346,35 +398,44 @@ __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
#
pragma
unroll
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
__global
const
DATA_TYPE
*
srcptr
=
(
__global
const
DATA_TYPE*
)(
src
+
src_idx
)
;
{
__global
DATA_TYPE
*
dstptr
=
(
__global
DATA_TYPE*
)(
dst
+
dst_idx
)
;
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
srcptr
=
(
__global
const
DATA_TYPE*
)(
src
+
src_idx
)
;
__global
DATA_TYPE
*
dstptr
=
(
__global
DATA_TYPE*
)(
dst
+
dst_idx
)
;
DATA_TYPE
y
=
srcptr[0],
cr
=
srcptr[1],
cb
=
srcptr[2]
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
srcptr
)
;
DATA_TYPE
y
=
src_pix.x,
cr
=
src_pix.y,
cb
=
src_pix.z
;
#
ifdef
DEPTH_5
#
ifdef
DEPTH_5
__constant
float
*
coeff
=
c_YCrCb2RGBCoeffs_f
;
__constant
float
*
coeff
=
c_YCrCb2RGBCoeffs_f
;
float
r
=
y
+
coeff[0]
*
(
cr
-
HALF_MAX
)
;
float
r
=
y
+
coeff[0]
*
(
cr
-
HALF_MAX
)
;
float
g
=
y
+
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
;
float
g
=
y
+
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
;
float
b
=
y
+
coeff[3]
*
(
cb
-
HALF_MAX
)
;
float
b
=
y
+
coeff[3]
*
(
cb
-
HALF_MAX
)
;
#
else
#
else
__constant
int
*
coeff
=
c_YCrCb2RGBCoeffs_i
;
__constant
int
*
coeff
=
c_YCrCb2RGBCoeffs_i
;
int
r
=
y
+
CV_DESCALE
(
coeff[0]
*
(
cr
-
HALF_MAX
)
,
yuv_shift
)
;
int
r
=
y
+
CV_DESCALE
(
coeff[0]
*
(
cr
-
HALF_MAX
)
,
yuv_shift
)
;
int
g
=
y
+
CV_DESCALE
(
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
,
yuv_shift
)
;
int
g
=
y
+
CV_DESCALE
(
coeff[1]
*
(
cr
-
HALF_MAX
)
+
coeff[2]
*
(
cb
-
HALF_MAX
)
,
yuv_shift
)
;
int
b
=
y
+
CV_DESCALE
(
coeff[3]
*
(
cb
-
HALF_MAX
)
,
yuv_shift
)
;
int
b
=
y
+
CV_DESCALE
(
coeff[3]
*
(
cb
-
HALF_MAX
)
,
yuv_shift
)
;
#
endif
#
endif
dstptr[
(
bidx^2
)
]
=
SAT_CAST
(
r
)
;
dstptr[
(
bidx^2
)
]
=
SAT_CAST
(
r
)
;
dstptr[1]
=
SAT_CAST
(
g
)
;
dstptr[1]
=
SAT_CAST
(
g
)
;
dstptr[bidx]
=
SAT_CAST
(
b
)
;
dstptr[bidx]
=
SAT_CAST
(
b
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dstptr[3]
=
MAX_NUM
;
dstptr[3]
=
MAX_NUM
;
#
endif
#
endif
}
++y
;
}
}
}
}
}
...
@@ -385,30 +446,39 @@ __kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -385,30 +446,39 @@ __kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offse
int
rows,
int
cols,
__constant
COEFF_TYPE
*
coeffs
)
int
rows,
int
cols,
__constant
COEFF_TYPE
*
coeffs
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
int
dy
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
d
y
<
rows
&&
d
x
<
cols
)
if
(
dx
<
cols
)
{
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
#
pragma
unroll
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
dy
<
rows
)
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
DATA_TYPE
r
=
src[0],
g
=
src[1],
b
=
src[2]
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE
r
=
src_pix.x,
g
=
src_pix.y,
b
=
src_pix.z
;
#
ifdef
DEPTH_5
#
ifdef
DEPTH_5
float
x
=
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2]
;
float
x
=
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2]
;
float
y
=
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5]
;
float
y
=
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5]
;
float
z
=
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8]
;
float
z
=
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8]
;
#
else
#
else
int
x
=
CV_DESCALE
(
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2],
xyz_shift
)
;
int
x
=
CV_DESCALE
(
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2],
xyz_shift
)
;
int
y
=
CV_DESCALE
(
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5],
xyz_shift
)
;
int
y
=
CV_DESCALE
(
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5],
xyz_shift
)
;
int
z
=
CV_DESCALE
(
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8],
xyz_shift
)
;
int
z
=
CV_DESCALE
(
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8],
xyz_shift
)
;
#
endif
#
endif
dst[0]
=
SAT_CAST
(
x
)
;
dst[0]
=
SAT_CAST
(
x
)
;
dst[1]
=
SAT_CAST
(
y
)
;
dst[1]
=
SAT_CAST
(
y
)
;
dst[2]
=
SAT_CAST
(
z
)
;
dst[2]
=
SAT_CAST
(
z
)
;
}
++dy
;
}
}
}
}
}
...
@@ -417,33 +487,42 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -417,33 +487,42 @@ __kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offse
int
rows,
int
cols,
__constant
COEFF_TYPE
*
coeffs
)
int
rows,
int
cols,
__constant
COEFF_TYPE
*
coeffs
)
{
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
int
dy
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
d
y
<
rows
&&
d
x
<
cols
)
if
(
dx
<
cols
)
{
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
#
pragma
unroll
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
dy
<
rows
)
{
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
*
scnbytes
)
;
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
DATA_TYPE
x
=
src[0],
y
=
src[1],
z
=
src[2]
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
DATA_TYPE
x
=
src_pix.x,
y
=
src_pix.y,
z
=
src_pix.z
;
#
ifdef
DEPTH_5
#
ifdef
DEPTH_5
float
b
=
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2]
;
float
b
=
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2]
;
float
g
=
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5]
;
float
g
=
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5]
;
float
r
=
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8]
;
float
r
=
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8]
;
#
else
#
else
int
b
=
CV_DESCALE
(
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2],
xyz_shift
)
;
int
b
=
CV_DESCALE
(
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2],
xyz_shift
)
;
int
g
=
CV_DESCALE
(
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5],
xyz_shift
)
;
int
g
=
CV_DESCALE
(
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5],
xyz_shift
)
;
int
r
=
CV_DESCALE
(
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8],
xyz_shift
)
;
int
r
=
CV_DESCALE
(
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8],
xyz_shift
)
;
#
endif
#
endif
dst[0]
=
SAT_CAST
(
b
)
;
dst[0]
=
SAT_CAST
(
b
)
;
dst[1]
=
SAT_CAST
(
g
)
;
dst[1]
=
SAT_CAST
(
g
)
;
dst[2]
=
SAT_CAST
(
r
)
;
dst[2]
=
SAT_CAST
(
r
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
#
endif
}
++dy
;
}
}
}
}
}
...
@@ -454,33 +533,42 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
...
@@ -454,33 +533,42 @@ __kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
#
pragma
unroll
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
const
DATA_TYPE
*
src
=
(
__global
const
DATA_TYPE
*
)(
srcptr
+
src_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
__global
DATA_TYPE
*
dst
=
(
__global
DATA_TYPE
*
)(
dstptr
+
dst_idx
)
;
DATA_TYPE_4
src_pix
=
vload4
(
0
,
src
)
;
#
ifdef
REVERSE
#
ifdef
REVERSE
dst[0]
=
src[2]
;
dst[0]
=
src_pix.z
;
dst[1]
=
src[1]
;
dst[1]
=
src_pix.y
;
dst[2]
=
src[0]
;
dst[2]
=
src_pix.x
;
#
else
#
else
dst[0]
=
src[0]
;
dst[0]
=
src_pix.x
;
dst[1]
=
src[1]
;
dst[1]
=
src_pix.y
;
dst[2]
=
src[2]
;
dst[2]
=
src_pix.z
;
#
endif
#
endif
#
if
dcn
==
4
#
if
dcn
==
4
#
if
scn
==
3
#
if
scn
==
3
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
else
#
else
dst[3]
=
src[3]
;
dst[3]
=
src[3]
;
#
endif
#
endif
#
endif
#
endif
}
++y
;
}
}
}
}
}
...
@@ -491,31 +579,39 @@ __kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset
...
@@ -491,31 +579,39 @@ __kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
#
pragma
unroll
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
ushort
t
=
*
((
__global
const
ushort*
)(
src
+
src_idx
))
;
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
ushort
t
=
*
((
__global
const
ushort*
)(
src
+
src_idx
))
;
#
if
greenbits
==
6
#
if
greenbits
==
6
dst[dst_idx
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_idx
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_idx
+
1]
=
(
uchar
)((
t
>>
3
)
&
~3
)
;
dst[dst_idx
+
1]
=
(
uchar
)((
t
>>
3
)
&
~3
)
;
dst[dst_idx
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
8
)
&
~7
)
;
dst[dst_idx
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
8
)
&
~7
)
;
#
else
#
else
dst[dst_idx
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_idx
+
bidx]
=
(
uchar
)(
t
<<
3
)
;
dst[dst_idx
+
1]
=
(
uchar
)((
t
>>
2
)
&
~7
)
;
dst[dst_idx
+
1]
=
(
uchar
)((
t
>>
2
)
&
~7
)
;
dst[dst_idx
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
7
)
&
~7
)
;
dst[dst_idx
+
(
bidx^2
)
]
=
(
uchar
)((
t
>>
7
)
&
~7
)
;
#
endif
#
endif
#
if
dcn
==
4
#
if
dcn
==
4
#
if
greenbits
==
6
#
if
greenbits
==
6
dst[dst_idx
+
3]
=
255
;
dst[dst_idx
+
3]
=
255
;
#
else
#
else
dst[dst_idx
+
3]
=
t
&
0x8000
?
255
:
0
;
dst[dst_idx
+
3]
=
t
&
0x8000
?
255
:
0
;
#
endif
#
endif
#
endif
#
endif
}
++y
;
}
}
}
}
}
...
@@ -524,21 +620,30 @@ __kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset
...
@@ -524,21 +620,30 @@ __kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset
int
rows,
int
cols
)
int
rows,
int
cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
#
pragma
unroll
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
uchar4
src_pix
=
vload4
(
0
,
src
+
src_idx
)
;
#
if
greenbits
==
6
#
if
greenbits
==
6
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src[src_idx
+
bidx]
>>
3
)
|((src[src_idx + 1]&~3) << 3)|
((
src[src_idx
+
(
bidx^2
)
]
&~7
)
<<
8
))
;
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP&~3) << 3)|
((
src_pix.R_COMP
&~7
)
<<
8
))
;
#
elif
scn
==
3
#
elif
scn
==
3
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src[src_idx
+
bidx]
>>
3
)
|((src[src_idx + 1]&~7) << 2)|
((
src[src_idx
+
(
bidx^2
)
]
&~7
)
<<
7
))
;
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP&~7) << 2)|
((
src_pix.R_COMP
&~7
)
<<
7
))
;
#
else
#
else
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src[src_idx
+
bidx]
>>
3
)
|((src[src_idx + 1]
&~7) << 2)|
*
((
__global
ushort*
)(
dst
+
dst_idx
))
=
(
ushort
)((
src_pix.B_COMP
>>
3
)
|((src_pix.G_COMP
&~7) << 2)|
((
src[src_idx
+
(
bidx^2
)
]&~7
)
<<
7
)
|(src[src_idx + 3]
? 0x8000 : 0));
((
src_pix.R_COMP&~7
)
<<
7
)
|(src_pix.w
? 0x8000 : 0));
#endif
#endif
}
++y;
}
}
}
}
}
...
@@ -549,23 +654,31 @@ __kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offse
...
@@ -549,23 +654,31 @@ __kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offse
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
#pragma unroll
int dst_idx = mad24(y, dst_step, dst_offset + x);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
int t = *((__global const ushort*)(src + src_idx));
{
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int dst_idx = mad24(y, dst_step, dst_offset + x);
int t = *((__global const ushort*)(src + src_idx));
#if greenbits == 6
#if greenbits == 6
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
((t >> 3) & 0xfc)*G2Y +
((t >> 3) & 0xfc)*G2Y +
((t >> 8) & 0xf8)*R2Y, yuv_shift);
((t >> 8) & 0xf8)*R2Y, yuv_shift);
#else
#else
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
((t >> 2) & 0xf8)*G2Y +
((t >> 2) & 0xf8)*G2Y +
((t >> 7) & 0xf8)*R2Y, yuv_shift);
((t >> 7) & 0xf8)*R2Y, yuv_shift);
#endif
#endif
}
++y;
}
}
}
}
}
...
@@ -574,20 +687,28 @@ __kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offse
...
@@ -574,20 +687,28 @@ __kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offse
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
int src_idx = mad24(y, src_step, src_offset + x);
#pragma unroll
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
int t = src[src_idx];
{
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + x);
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
int t = src[src_idx];
#if greenbits == 6
#if greenbits == 6
*((__global ushort*)(dst + dst_idx)) = (ushort)((t >> 3) |
((
t
&
~3
)
<<
3
)
| ((t & ~7) << 8));
*((__global ushort*)(dst + dst_idx)) = (ushort)((t >> 3) |
((
t
&
~3
)
<<
3
)
| ((t & ~7) << 8));
#else
#else
t >>= 3;
t >>= 3;
*((__global ushort*)(dst + dst_idx)) = (ushort)(t|
(
t
<<
5
)
|(t << 10));
*((__global ushort*)(dst + dst_idx)) = (ushort)(t|
(
t
<<
5
)
|(t << 10));
#endif
#endif
}
++y;
}
}
}
}
}
...
@@ -608,36 +729,45 @@ __kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset,
...
@@ -608,36 +729,45 @@ __kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset,
__constant int * sdiv_table, __constant int * hdiv_table)
__constant int * sdiv_table, __constant int * hdiv_table)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
#pragma unroll
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
int b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
if (y < rows)
int h, s, v = b;
{
int vmin = b, diff;
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
int vr, vg;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
v = max( v, g );
v = max( v, r );
int b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
vmin = min( vmin, g );
int h, s, v = b;
vmin = min( vmin, r );
int vmin = b, diff;
int vr, vg;
diff = v - vmin;
vr = v == r ? -1 : 0;
v = max( v, g );
vg = v == g ? -1 : 0;
v = max( v, r );
vmin = min( vmin, g );
s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
vmin = min( vmin, r );
h = (vr & (g - b)) +
(~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
diff = v - vmin;
h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
vr = v == r ? -1 : 0;
h += h < 0 ? hrange : 0;
vg = v == g ? -1 : 0;
dst[dst_idx] = convert_uchar_sat_rte(h);
s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
dst[dst_idx + 1] = (uchar)s;
h = (vr & (g - b)) +
dst[dst_idx + 2] = (uchar)v;
(~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
h += h < 0 ? hrange : 0;
dst[dst_idx] = convert_uchar_sat_rte(h);
dst[dst_idx + 1] = (uchar)s;
dst[dst_idx + 2] = (uchar)v;
}
++y;
}
}
}
}
}
...
@@ -646,51 +776,60 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
...
@@ -646,51 +776,60 @@ __kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
#pragma unroll
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
float h = src[src_idx], s = src[src_idx + 1]*(1/255.f), v = src[src_idx + 2]*(1/255.f);
float b, g, r;
if (s != 0)
{
{
float tab[4];
if (y < rows)
int sector;
h *= hscale;
if( h < 0 )
do h += 6; while( h < 0 );
else if( h >= 6 )
do h -= 6; while( h >= 6 );
sector = convert_int_sat_rtn(h);
h -= sector;
if( (unsigned)sector >= 6u )
{
{
sector = 0;
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
h = 0.f;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
}
uchar4 src_pix = vload4(0, src + src_idx);
tab[0] = v;
float h = src_pix.x, s = src_pix.y*(1/255.f), v = src_pix.z*(1/255.f);
tab[1] = v*(1.f - s);
float b, g, r;
tab[2] = v*(1.f - s*h);
tab[3] = v*(1.f - s*(1.f - h));
if (s != 0)
{
b = tab[sector_data[sector][0]];
float tab[4];
g = tab[sector_data[sector][1]];
int sector;
r = tab[sector_data[sector][2]];
h *= hscale;
}
if( h < 0 )
else
do h += 6; while( h < 0 );
b = g = r = v;
else if( h >= 6 )
do h -= 6; while( h >= 6 );
dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
sector = convert_int_sat_rtn(h);
dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
h -= sector;
dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
if( (unsigned)sector >= 6u )
{
sector = 0;
h = 0.f;
}
tab[0] = v;
tab[1] = v*(1.f - s);
tab[2] = v*(1.f - s*h);
tab[3] = v*(1.f - s*(1.f - h));
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = v;
dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
#if dcn == 4
#if dcn == 4
dst[dst_idx + 3] = MAX_NUM;
dst[dst_idx + 3] = MAX_NUM;
#endif
#endif
}
++y;
}
}
}
}
}
...
@@ -701,42 +840,51 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -701,42 +840,51 @@ __kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
#pragma unroll
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
__global const float * src = (__global const float *)(srcptr + src_idx);
if (y < rows)
__global float * dst = (__global float *)(dstptr + dst_idx);
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
float b = src[bidx], g = src[1], r = src[bidx^2];
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
float h, s, v;
__global const float * src = (__global const float *)(srcptr + src_idx);
float vmin, diff;
__global float * dst = (__global float *)(dstptr + dst_idx);
float4 src_pix = vload4(0, src);
v = vmin = r;
if( v < g ) v = g;
float b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
if( v < b ) v = b;
float h, s, v;
if( vmin > g ) vmin = g;
if( vmin > b ) vmin = b;
float vmin, diff;
diff = v - vmin;
v = vmin = r;
s = diff/(float)(fabs(v) + FLT_EPSILON);
if( v < g ) v = g;
diff = (float)(60.f/(diff + FLT_EPSILON));
if( v < b ) v = b;
if( v == r )
if( vmin > g ) vmin = g;
h = (g - b)*diff;
if( vmin > b ) vmin = b;
else if( v == g )
h = (b - r)*diff + 120.f;
diff = v - vmin;
else
s = diff/(float)(fabs(v) + FLT_EPSILON);
h = (r - g)*diff + 240.f;
diff = (float)(60.f/(diff + FLT_EPSILON));
if( v == r )
if( h < 0 ) h += 360.f;
h = (g - b)*diff;
else if( v == g )
dst[0] = h*hscale;
h = (b - r)*diff + 120.f;
dst[1] = s;
else
dst[2] = v;
h = (r - g)*diff + 240.f;
if( h < 0 ) h += 360.f;
dst[0] = h*hscale;
dst[1] = s;
dst[2] = v;
}
++y;
}
}
}
}
}
...
@@ -745,54 +893,63 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -745,54 +893,63 @@ __kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
#pragma unroll
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float h = src[0], s = src[1], v = src[2];
float b, g, r;
if (s != 0)
{
{
float tab[4];
if (y < rows)
int sector;
h *= hscale;
if(h < 0)
do h += 6; while (h < 0);
else if (h >= 6)
do h -= 6; while (h >= 6);
sector = convert_int_sat_rtn(h);
h -= sector;
if ((unsigned)sector >= 6u)
{
{
sector = 0;
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
h = 0.f;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
}
__global const float * src = (__global const float *)(srcptr + src_idx);
tab[0] = v;
__global float * dst = (__global float *)(dstptr + dst_idx);
tab[1] = v*(1.f - s);
float4 src_pix = vload4(0, src);
tab[2] = v*(1.f - s*h);
tab[3] = v*(1.f - s*(1.f - h));
float h = src_pix.x, s = src_pix.y, v = src_pix.z;
float b, g, r;
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
if (s != 0)
r = tab[sector_data[sector][2]];
{
}
float tab[4];
else
int sector;
b = g = r = v;
h *= hscale;
if(h < 0)
dst[bidx] = b;
do h += 6; while (h < 0);
dst[1] = g;
else if (h >= 6)
dst[bidx^2] = r;
do h -= 6; while (h >= 6);
sector = convert_int_sat_rtn(h);
h -= sector;
if ((unsigned)sector >= 6u)
{
sector = 0;
h = 0.f;
}
tab[0] = v;
tab[1] = v*(1.f - s);
tab[2] = v*(1.f - s*h);
tab[3] = v*(1.f - s*(1.f - h));
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = v;
dst[bidx] = b;
dst[1] = g;
dst[bidx^2] = r;
#if dcn == 4
#if dcn == 4
dst[3] = MAX_NUM;
dst[3] = MAX_NUM;
#endif
#endif
}
++y;
}
}
}
}
}
...
@@ -807,44 +964,53 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
...
@@ -807,44 +964,53 @@ __kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
#pragma unroll
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
float b = src[src_idx + bidx]*(1/255.f), g = src[src_idx + 1]*(1/255.f), r = src[src_idx + (bidx^2)]*(1/255.f);
float h = 0.f, s = 0.f, l;
float vmin, vmax, diff;
vmax = vmin = r;
if (vmax < g) vmax = g;
if (vmax < b) vmax = b;
if (vmin > g) vmin = g;
if (vmin > b) vmin = b;
diff = vmax - vmin;
l = (vmax + vmin)*0.5f;
if (diff > FLT_EPSILON)
{
{
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
if (y < rows)
diff = 60.f/diff;
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
if( vmax == r )
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
h = (g - b)*diff;
uchar4 src_pix = vload4(0, src + src_idx);
else if( vmax == g )
h = (b - r)*diff + 120.f;
float b = src_pix.B_COMP*(1/255.f), g = src_pix.G_COMP*(1/255.f), r = src_pix.R_COMP*(1/255.f);
else
float h = 0.f, s = 0.f, l;
h = (r - g)*diff + 240.f;
float vmin, vmax, diff;
if( h < 0.f ) h += 360.f;
vmax = vmin = r;
if (vmax < g) vmax = g;
if (vmax < b) vmax = b;
if (vmin > g) vmin = g;
if (vmin > b) vmin = b;
diff = vmax - vmin;
l = (vmax + vmin)*0.5f;
if (diff > FLT_EPSILON)
{
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
diff = 60.f/diff;
if( vmax == r )
h = (g - b)*diff;
else if( vmax == g )
h = (b - r)*diff + 120.f;
else
h = (r - g)*diff + 240.f;
if( h < 0.f ) h += 360.f;
}
dst[dst_idx] = convert_uchar_sat_rte(h*hscale);
dst[dst_idx + 1] = convert_uchar_sat_rte(l*255.f);
dst[dst_idx + 2] = convert_uchar_sat_rte(s*255.f);
}
++y;
}
}
dst[dst_idx] = convert_uchar_sat_rte(h*hscale);
dst[dst_idx + 1] = convert_uchar_sat_rte(l*255.f);
dst[dst_idx + 2] = convert_uchar_sat_rte(s*255.f);
}
}
}
}
...
@@ -853,50 +1019,59 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
...
@@ -853,50 +1019,59 @@ __kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
#pragma unroll
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
float h = src[src_idx], l = src[src_idx + 1]*(1.f/255.f), s = src[src_idx + 2]*(1.f/255.f);
float b, g, r;
if (s != 0)
{
{
float tab[4];
if (y < rows)
{
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
float p1 = 2*l - p2;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
uchar4 src_pix = vload4(0, src + src_idx);
h *= hscale;
if( h < 0 )
float h = src_pix.x, l = src_pix.y*(1.f/255.f), s = src_pix.z*(1.f/255.f);
do h += 6; while( h < 0 );
float b, g, r;
else if( h >= 6 )
do h -= 6; while( h >= 6 );
if (s != 0)
{
int sector = convert_int_sat_rtn(h);
float tab[4];
h -= sector;
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
tab[0] = p2;
float p1 = 2*l - p2;
tab[1] = p1;
tab[2] = p1 + (p2 - p1)*(1-h);
h *= hscale;
tab[3] = p1 + (p2 - p1)*h;
if( h < 0 )
do h += 6; while( h < 0 );
b = tab[sector_data[sector][0]];
else if( h >= 6 )
g = tab[sector_data[sector][1]];
do h -= 6; while( h >= 6 );
r = tab[sector_data[sector][2]];
}
int sector = convert_int_sat_rtn(h);
else
h -= sector;
b = g = r = l;
tab[0] = p2;
dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
tab[1] = p1;
dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
tab[2] = p1 + (p2 - p1)*(1-h);
dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
tab[3] = p1 + (p2 - p1)*h;
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = l;
dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
#if dcn == 4
#if dcn == 4
dst[dst_idx + 3] = MAX_NUM;
dst[dst_idx + 3] = MAX_NUM;
#endif
#endif
}
++y;
}
}
}
}
}
...
@@ -907,47 +1082,56 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -907,47 +1082,56 @@ __kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
#pragma unroll
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float b = src[bidx], g = src[1], r = src[bidx^2];
float h = 0.f, s = 0.f, l;
float vmin, vmax, diff;
vmax = vmin = r;
if (vmax < g) vmax = g;
if (vmax < b) vmax = b;
if (vmin > g) vmin = g;
if (vmin > b) vmin = b;
diff = vmax - vmin;
l = (vmax + vmin)*0.5f;
if (diff > FLT_EPSILON)
{
{
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
if (y < rows)
diff = 60.f/diff;
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
if( vmax == r )
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
h = (g - b)*diff;
else if( vmax == g )
__global const float * src = (__global const float *)(srcptr + src_idx);
h = (b - r)*diff + 120.f;
__global float * dst = (__global float *)(dstptr + dst_idx);
else
float4 src_pix = vload4(0, src);
h = (r - g)*diff + 240.f;
float b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
if( h < 0.f ) h += 360.f;
float h = 0.f, s = 0.f, l;
float vmin, vmax, diff;
vmax = vmin = r;
if (vmax < g) vmax = g;
if (vmax < b) vmax = b;
if (vmin > g) vmin = g;
if (vmin > b) vmin = b;
diff = vmax - vmin;
l = (vmax + vmin)*0.5f;
if (diff > FLT_EPSILON)
{
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
diff = 60.f/diff;
if( vmax == r )
h = (g - b)*diff;
else if( vmax == g )
h = (b - r)*diff + 120.f;
else
h = (r - g)*diff + 240.f;
if( h < 0.f ) h += 360.f;
}
dst[0] = h*hscale;
dst[1] = l;
dst[2] = s;
}
++y;
}
}
dst[0] = h*hscale;
dst[1] = l;
dst[2] = s;
}
}
}
}
...
@@ -956,54 +1140,63 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
...
@@ -956,54 +1140,63 @@ __kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
#pragma unroll
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
__global const float * src = (__global const float *)(srcptr + src_idx);
__global float * dst = (__global float *)(dstptr + dst_idx);
float h = src[0], l = src[1], s = src[2];
float b, g, r;
if (s != 0)
{
{
float tab[4];
if (y < rows)
int sector;
{
int src_idx = mad24(y, src_step, src_offset + x * scnbytes);
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
int dst_idx = mad24(y, dst_step, dst_offset + x * dcnbytes);
float p1 = 2*l - p2;
__global const float * src = (__global const float *)(srcptr + src_idx);
h *= hscale;
__global float * dst = (__global float *)(dstptr + dst_idx);
if( h < 0 )
float4 src_pix = vload4(0, src);
do h += 6; while( h < 0 );
else if( h >= 6 )
float h = src_pix.x, l = src_pix.y, s = src_pix.z;
do h -= 6; while( h >= 6 );
float b, g, r;
sector = convert_int_sat_rtn(h);
if (s != 0)
h -= sector;
{
float tab[4];
tab[0] = p2;
int sector;
tab[1] = p1;
tab[2] = p1 + (p2 - p1)*(1-h);
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
tab[3] = p1 + (p2 - p1)*h;
float p1 = 2*l - p2;
b = tab[sector_data[sector][0]];
h *= hscale;
g = tab[sector_data[sector][1]];
if( h < 0 )
r = tab[sector_data[sector][2]];
do h += 6; while( h < 0 );
}
else if( h >= 6 )
else
do h -= 6; while( h >= 6 );
b = g = r = l;
sector = convert_int_sat_rtn(h);
dst[bidx] = b;
h -= sector;
dst[1] = g;
dst[bidx^2] = r;
tab[0] = p2;
tab[1] = p1;
tab[2] = p1 + (p2 - p1)*(1-h);
tab[3] = p1 + (p2 - p1)*h;
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = l;
dst[bidx] = b;
dst[1] = g;
dst[bidx^2] = r;
#if dcn == 4
#if dcn == 4
dst[3] = MAX_NUM;
dst[3] = MAX_NUM;
#endif
#endif
}
++y;
}
}
}
}
}
...
@@ -1018,21 +1211,29 @@ __kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset
...
@@ -1018,21 +1211,29 @@ __kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
x <<= 2;
#pragma unroll
int src_idx = mad24(y, src_step, src_offset + x);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
int dst_idx = mad24(y, dst_step, dst_offset + x);
{
if (y < rows)
{
int src_idx = mad24(y, src_step, src_offset + (x << 2));
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
uchar4 src_pix = vload4(0, src + src_idx);
uchar v0 = src[src_idx], v1 = src[src_idx + 1]
;
uchar v0 = src_pix.x, v1 = src_pix.y
;
uchar v2 = src[src_idx + 2], v3 = src[src_idx + 3]
;
uchar v2 = src_pix.z, v3 = src_pix.w
;
dst[dst_idx] = (v0 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx] = (v0 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 1] = (v1 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 1] = (v1 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 2] = (v2 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 2] = (v2 * v3 + HALF_MAX) / MAX_NUM;
dst[dst_idx + 3] = v3;
dst[dst_idx + 3] = v3;
}
++y;
}
}
}
}
}
...
@@ -1041,22 +1242,30 @@ __kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset
...
@@ -1041,22 +1242,30 @@ __kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset
int rows, int cols)
int rows, int cols)
{
{
int x = get_global_id(0);
int x = get_global_id(0);
int y = get_global_id(1);
int y = get_global_id(1)
* PIX_PER_WI_Y
;
if (
y < rows &&
x < cols)
if (x < cols)
{
{
x <<= 2;
#pragma unroll
int src_idx = mad24(y, src_step, src_offset + x);
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
int dst_idx = mad24(y, dst_step, dst_offset + x);
{
if (y < rows)
uchar v0 = src[src_idx], v1 = src[src_idx + 1];
{
uchar v2 = src[src_idx + 2], v3 = src[src_idx + 3];
int src_idx = mad24(y, src_step, src_offset + (x << 2));
uchar v3_half = v3 / 2;
int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
uchar4 src_pix = vload4(0, src + src_idx);
dst[dst_idx] = v3 == 0 ? 0 : (v0 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 1] = v3 == 0 ? 0 : (v1 * MAX_NUM + v3_half) / v3;
uchar v0 = src_pix.x, v1 = src_pix.y;
dst[dst_idx + 2] = v3 == 0 ? 0 : (v2 * MAX_NUM + v3_half) / v3;
uchar v2 = src_pix.z, v3 = src_pix.w;
dst[dst_idx + 3] = v3;
uchar v3_half = v3 / 2;
dst[dst_idx] = v3 == 0 ? 0 : (v0 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 1] = v3 == 0 ? 0 : (v1 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 2] = v3 == 0 ? 0 : (v2 * MAX_NUM + v3_half) / v3;
dst[dst_idx + 3] = v3;
}
++y;
}
}
}
}
}
...
@@ -1086,32 +1295,41 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
...
@@ -1086,32 +1295,41 @@ __kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
__constant
int
*
coeffs,
int
Lscale,
int
Lshift
)
__constant
int
*
coeffs,
int
Lscale,
int
Lshift
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
#
pragma
unroll
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
src
+=
src_idx
;
__global
const
uchar*
src_ptr
=
src
+
src_idx
;
dst
+=
dst_idx
;
__global
uchar*
dst_ptr
=
dst
+
dst_idx
;
uchar4
src_pix
=
vload4
(
0
,
src_ptr
)
;
int
C0
=
coeffs[0],
C1
=
coeffs[1],
C2
=
coeffs[2],
int
C0
=
coeffs[0],
C1
=
coeffs[1],
C2
=
coeffs[2],
C3
=
coeffs[3],
C4
=
coeffs[4],
C5
=
coeffs[5],
C3
=
coeffs[3],
C4
=
coeffs[4],
C5
=
coeffs[5],
C6
=
coeffs[6],
C7
=
coeffs[7],
C8
=
coeffs[8]
;
C6
=
coeffs[6],
C7
=
coeffs[7],
C8
=
coeffs[8]
;
int
R
=
gammaTab[src[0]],
G
=
gammaTab[src[1]],
B
=
gammaTab[src[2]
]
;
int
R
=
gammaTab[src_pix.x],
G
=
gammaTab[src_pix.y],
B
=
gammaTab[src_pix.z
]
;
int
fX
=
LabCbrtTab_b[CV_DESCALE
(
R*C0
+
G*C1
+
B*C2,
lab_shift
)
]
;
int
fX
=
LabCbrtTab_b[CV_DESCALE
(
R*C0
+
G*C1
+
B*C2,
lab_shift
)
]
;
int
fY
=
LabCbrtTab_b[CV_DESCALE
(
R*C3
+
G*C4
+
B*C5,
lab_shift
)
]
;
int
fY
=
LabCbrtTab_b[CV_DESCALE
(
R*C3
+
G*C4
+
B*C5,
lab_shift
)
]
;
int
fZ
=
LabCbrtTab_b[CV_DESCALE
(
R*C6
+
G*C7
+
B*C8,
lab_shift
)
]
;
int
fZ
=
LabCbrtTab_b[CV_DESCALE
(
R*C6
+
G*C7
+
B*C8,
lab_shift
)
]
;
int
L
=
CV_DESCALE
(
Lscale*fY
+
Lshift,
lab_shift2
)
;
int
L
=
CV_DESCALE
(
Lscale*fY
+
Lshift,
lab_shift2
)
;
int
a
=
CV_DESCALE
(
500*
(
fX
-
fY
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
int
a
=
CV_DESCALE
(
500*
(
fX
-
fY
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
int
b
=
CV_DESCALE
(
200*
(
fY
-
fZ
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
int
b
=
CV_DESCALE
(
200*
(
fY
-
fZ
)
+
128*
(
1
<<
lab_shift2
)
,
lab_shift2
)
;
dst[0]
=
SAT_CAST
(
L
)
;
dst_ptr[0]
=
SAT_CAST
(
L
)
;
dst[1]
=
SAT_CAST
(
a
)
;
dst_ptr[1]
=
SAT_CAST
(
a
)
;
dst[2]
=
SAT_CAST
(
b
)
;
dst_ptr[2]
=
SAT_CAST
(
b
)
;
}
++y
;
}
}
}
}
}
...
@@ -1125,45 +1343,54 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -1125,45 +1343,54 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
__constant
float
*
coeffs,
float
_1_3,
float
_a
)
__constant
float
*
coeffs,
float
_1_3,
float
_a
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
#
pragma
unroll
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
C0
=
coeffs[0],
C1
=
coeffs[1],
C2
=
coeffs[2],
float
C0
=
coeffs[0],
C1
=
coeffs[1],
C2
=
coeffs[2],
C3
=
coeffs[3],
C4
=
coeffs[4],
C5
=
coeffs[5],
C3
=
coeffs[3],
C4
=
coeffs[4],
C5
=
coeffs[5],
C6
=
coeffs[6],
C7
=
coeffs[7],
C8
=
coeffs[8]
;
C6
=
coeffs[6],
C7
=
coeffs[7],
C8
=
coeffs[8]
;
float
R
=
clamp
(
src[0]
,
0.0f,
1.0f
)
;
float
R
=
clamp
(
src_pix.x
,
0.0f,
1.0f
)
;
float
G
=
clamp
(
src[1]
,
0.0f,
1.0f
)
;
float
G
=
clamp
(
src_pix.y
,
0.0f,
1.0f
)
;
float
B
=
clamp
(
src[2]
,
0.0f,
1.0f
)
;
float
B
=
clamp
(
src_pix.z
,
0.0f,
1.0f
)
;
#
ifdef
SRGB
#
ifdef
SRGB
R
=
splineInterpolate
(
R
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
R
=
splineInterpolate
(
R
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
G
=
splineInterpolate
(
G
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
B
=
splineInterpolate
(
B
*
GammaTabScale,
gammaTab,
GAMMA_TAB_SIZE
)
;
#
endif
#
endif
float
X
=
R*C0
+
G*C1
+
B*C2
;
float
X
=
R*C0
+
G*C1
+
B*C2
;
float
Y
=
R*C3
+
G*C4
+
B*C5
;
float
Y
=
R*C3
+
G*C4
+
B*C5
;
float
Z
=
R*C6
+
G*C7
+
B*C8
;
float
Z
=
R*C6
+
G*C7
+
B*C8
;
float
FX
=
X
>
0.008856f
?
pow
(
X,
_1_3
)
:
(
7.787f
*
X
+
_a
)
;
float
FX
=
X
>
0.008856f
?
pow
(
X,
_1_3
)
:
(
7.787f
*
X
+
_a
)
;
float
FY
=
Y
>
0.008856f
?
pow
(
Y,
_1_3
)
:
(
7.787f
*
Y
+
_a
)
;
float
FY
=
Y
>
0.008856f
?
pow
(
Y,
_1_3
)
:
(
7.787f
*
Y
+
_a
)
;
float
FZ
=
Z
>
0.008856f
?
pow
(
Z,
_1_3
)
:
(
7.787f
*
Z
+
_a
)
;
float
FZ
=
Z
>
0.008856f
?
pow
(
Z,
_1_3
)
:
(
7.787f
*
Z
+
_a
)
;
float
L
=
Y
>
0.008856f
?
(
116.f
*
FY
-
16.f
)
:
(
903.3f
*
Y
)
;
float
L
=
Y
>
0.008856f
?
(
116.f
*
FY
-
16.f
)
:
(
903.3f
*
Y
)
;
float
a
=
500.f
*
(
FX
-
FY
)
;
float
a
=
500.f
*
(
FX
-
FY
)
;
float
b
=
200.f
*
(
FY
-
FZ
)
;
float
b
=
200.f
*
(
FY
-
FZ
)
;
dst[0]
=
L
;
dst[0]
=
L
;
dst[1]
=
a
;
dst[1]
=
a
;
dst[2]
=
b
;
dst[2]
=
b
;
}
++y
;
}
}
}
}
}
...
@@ -1225,33 +1452,42 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
...
@@ -1225,33 +1452,42 @@ __kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
__constant
float
*
coeffs,
float
lThresh,
float
fThresh
)
__constant
float
*
coeffs,
float
lThresh,
float
fThresh
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
#
pragma
unroll
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
src
+=
src_idx
;
__global
const
uchar*
src_ptr
=
src
+
src_idx
;
dst
+=
dst_idx
;
__global
uchar*
dst_ptr
=
dst
+
dst_idx
;
uchar4
src_pix
=
vload4
(
0
,
src_ptr
)
;
float
srcbuf[3],
dstbuf[3]
;
float
srcbuf[3],
dstbuf[3]
;
srcbuf[0]
=
src[0]
*
(
100.f/255.f
)
;
srcbuf[0]
=
src_pix.x
*
(
100.f/255.f
)
;
srcbuf[1]
=
convert_float
(
src[1]
-
128
)
;
srcbuf[1]
=
convert_float
(
src_pix.y
-
128
)
;
srcbuf[2]
=
convert_float
(
src[2]
-
128
)
;
srcbuf[2]
=
convert_float
(
src_pix.z
-
128
)
;
Lab2BGR_f
(
&srcbuf[0],
&dstbuf[0],
Lab2BGR_f
(
&srcbuf[0],
&dstbuf[0],
#
ifdef
SRGB
#
ifdef
SRGB
gammaTab,
gammaTab,
#
endif
#
endif
coeffs,
lThresh,
fThresh
)
;
coeffs,
lThresh,
fThresh
)
;
dst
[0]
=
SAT_CAST
(
dstbuf[0]
*
255.0f
)
;
dst_ptr
[0]
=
SAT_CAST
(
dstbuf[0]
*
255.0f
)
;
dst
[1]
=
SAT_CAST
(
dstbuf[1]
*
255.0f
)
;
dst_ptr
[1]
=
SAT_CAST
(
dstbuf[1]
*
255.0f
)
;
dst
[2]
=
SAT_CAST
(
dstbuf[2]
*
255.0f
)
;
dst_ptr
[2]
=
SAT_CAST
(
dstbuf[2]
*
255.0f
)
;
#
if
dcn
==
4
#
if
dcn
==
4
dst
[3]
=
MAX_NUM
;
dst_ptr
[3]
=
MAX_NUM
;
#
endif
#
endif
}
++y
;
}
}
}
}
}
...
@@ -1265,29 +1501,38 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse
...
@@ -1265,29 +1501,38 @@ __kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offse
__constant
float
*
coeffs,
float
lThresh,
float
fThresh
)
__constant
float
*
coeffs,
float
lThresh,
float
fThresh
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
PIX_PER_WI_Y
;
if
(
y
<
rows
&&
x
<
cols
)
if
(
x
<
cols
)
{
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
#
pragma
unroll
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
for
(
int
cy
=
0
; cy < PIX_PER_WI_Y; ++cy)
{
if
(
y
<
rows
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
*
scnbytes
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
dcnbytes
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
const
float
*
src
=
(
__global
const
float
*
)(
srcptr
+
src_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
__global
float
*
dst
=
(
__global
float
*
)(
dstptr
+
dst_idx
)
;
float4
src_pix
=
vload4
(
0
,
src
)
;
float
srcbuf[3],
dstbuf[3]
;
float
srcbuf[3],
dstbuf[3]
;
srcbuf[0]
=
src[0],
srcbuf[1]
=
src[1],
srcbuf[2]
=
src[2]
;
srcbuf[0]
=
src_pix.x,
srcbuf[1]
=
src_pix.y,
srcbuf[2]
=
src_pix.z
;
Lab2BGR_f
(
&srcbuf[0],
&dstbuf[0],
Lab2BGR_f
(
&srcbuf[0],
&dstbuf[0],
#
ifdef
SRGB
#
ifdef
SRGB
gammaTab,
gammaTab,
#
endif
#
endif
coeffs,
lThresh,
fThresh
)
;
coeffs,
lThresh,
fThresh
)
;
dst[0]
=
dstbuf[0],
dst[1]
=
dstbuf[1],
dst[2]
=
dstbuf[2]
;
dst[0]
=
dstbuf[0],
dst[1]
=
dstbuf[1],
dst[2]
=
dstbuf[2]
;
#
if
dcn
==
4
#
if
dcn
==
4
dst[3]
=
MAX_NUM
;
dst[3]
=
MAX_NUM
;
#
endif
#
endif
}
++y
;
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment