Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f9268d34
Commit
f9268d34
authored
Dec 21, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Dec 21, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2023 from krodyush:pullreq/2.4-opt-131126-cvt
parents
6e22be41
e8dd31aa
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
864 additions
and
132 deletions
+864
-132
color.cpp
modules/ocl/src/color.cpp
+167
-17
cvt_color.cl
modules/ocl/src/opencl/cvt_color.cl
+697
-115
No files found.
modules/ocl/src/color.cpp
View file @
f9268d34
...
...
@@ -56,8 +56,19 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
{
int
src_offset
=
src
.
offset
/
src
.
elemSize1
(),
src_step
=
src
.
step1
();
int
dst_offset
=
dst
.
offset
/
dst
.
elemSize1
(),
dst_step
=
dst
.
step1
();
int
pixels_per_work_item
=
1
;
std
::
string
build_options
=
format
(
"-D DEPTH_%d"
,
src
.
depth
());
if
(
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_INTEL_DEVICE
))
{
if
((
src
.
cols
%
4
==
0
)
&&
(
src
.
depth
()
==
CV_8U
))
pixels_per_work_item
=
4
;
else
if
(
src
.
cols
%
2
==
0
)
pixels_per_work_item
=
2
;
else
pixels_per_work_item
=
1
;
}
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d"
,
src
.
depth
(),
src
.
oclchannels
(),
bidx
,
pixels_per_work_item
);
if
(
!
additionalOptions
.
empty
())
build_options
+=
additionalOptions
;
...
...
@@ -66,7 +77,6 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
bidx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
...
...
@@ -77,6 +87,73 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
if
(
!
data2
.
empty
())
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
data2
.
data
));
size_t
gt
[
3
]
=
{
dst
.
cols
/
pixels_per_work_item
,
dst
.
rows
,
1
};
#ifdef ANDROID
size_t
lt
[
3
]
=
{
16
,
10
,
1
};
#else
size_t
lt
[
3
]
=
{
16
,
16
,
1
};
#endif
openCLExecuteKernel
(
src
.
clCxt
,
&
cvt_color
,
kernelName
.
c_str
(),
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
.
c_str
());
}
static
void
toHSV_caller
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
bidx
,
const
std
::
string
&
kernelName
,
const
std
::
string
&
additionalOptions
=
std
::
string
(),
const
oclMat
&
data1
=
oclMat
(),
const
oclMat
&
data2
=
oclMat
())
{
int
src_offset
=
src
.
offset
/
src
.
elemSize1
(),
src_step
=
src
.
step1
();
int
dst_offset
=
dst
.
offset
/
dst
.
elemSize1
(),
dst_step
=
dst
.
step1
();
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D scn=%d -D bidx=%d"
,
src
.
depth
(),
src
.
oclchannels
(),
bidx
);
if
(
!
additionalOptions
.
empty
())
build_options
+=
additionalOptions
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_offset
));
if
(
!
data1
.
empty
())
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
data1
.
data
));
if
(
!
data2
.
empty
())
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
data2
.
data
));
size_t
gt
[
3
]
=
{
dst
.
cols
,
dst
.
rows
,
1
};
#ifdef ANDROID
size_t
lt
[
3
]
=
{
16
,
10
,
1
};
#else
size_t
lt
[
3
]
=
{
16
,
16
,
1
};
#endif
openCLExecuteKernel
(
src
.
clCxt
,
&
cvt_color
,
kernelName
.
c_str
(),
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
.
c_str
());
}
static
void
fromGray_caller
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
bidx
,
const
std
::
string
&
kernelName
,
const
std
::
string
&
additionalOptions
=
std
::
string
(),
const
oclMat
&
data
=
oclMat
())
{
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D dcn=%d -D bidx=%d"
,
src
.
depth
(),
dst
.
channels
(),
bidx
);
if
(
!
additionalOptions
.
empty
())
build_options
+=
additionalOptions
;
int
src_offset
=
src
.
offset
/
src
.
elemSize1
(),
src_step
=
src
.
step1
();
int
dst_offset
=
dst
.
offset
/
dst
.
elemSize1
(),
dst_step
=
dst
.
step1
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_offset
));
if
(
!
data
.
empty
())
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
data
.
data
));
size_t
gt
[
3
]
=
{
dst
.
cols
,
dst
.
rows
,
1
};
#ifdef ANDROID
size_t
lt
[
3
]
=
{
16
,
10
,
1
};
...
...
@@ -89,7 +166,50 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
static
void
toRGB_caller
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
bidx
,
const
std
::
string
&
kernelName
,
const
std
::
string
&
additionalOptions
=
std
::
string
(),
const
oclMat
&
data
=
oclMat
())
{
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D dcn=%d"
,
src
.
depth
(),
dst
.
channels
());
int
src_offset
=
src
.
offset
/
src
.
elemSize1
(),
src_step
=
src
.
step1
();
int
dst_offset
=
dst
.
offset
/
dst
.
elemSize1
(),
dst_step
=
dst
.
step1
();
int
pixels_per_work_item
=
1
;
if
(
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_INTEL_DEVICE
))
{
if
((
src
.
cols
%
4
==
0
)
&&
(
src
.
depth
()
==
CV_8U
))
pixels_per_work_item
=
4
;
else
if
(
src
.
cols
%
2
==
0
)
pixels_per_work_item
=
2
;
else
pixels_per_work_item
=
1
;
}
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D dcn=%d -D bidx=%d -D pixels_per_work_item=%d"
,
src
.
depth
(),
dst
.
channels
(),
bidx
,
pixels_per_work_item
);
if
(
!
additionalOptions
.
empty
())
build_options
+=
additionalOptions
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_offset
));
if
(
!
data
.
empty
())
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
data
.
data
));
size_t
gt
[
3
]
=
{
dst
.
cols
/
pixels_per_work_item
,
dst
.
rows
,
1
};
#ifdef ANDROID
size_t
lt
[
3
]
=
{
16
,
10
,
1
};
#else
size_t
lt
[
3
]
=
{
16
,
16
,
1
};
#endif
openCLExecuteKernel
(
src
.
clCxt
,
&
cvt_color
,
kernelName
.
c_str
(),
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
.
c_str
());
}
static
void
toRGB_NV12_caller
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
bidx
,
const
std
::
string
&
kernelName
,
const
std
::
string
&
additionalOptions
=
std
::
string
(),
const
oclMat
&
data
=
oclMat
())
{
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D dcn=%d -D bidx=%d"
,
src
.
depth
(),
dst
.
channels
(),
bidx
);
if
(
!
additionalOptions
.
empty
())
build_options
+=
additionalOptions
;
...
...
@@ -101,7 +221,6 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
bidx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
...
...
@@ -119,13 +238,46 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
openCLExecuteKernel
(
src
.
clCxt
,
&
cvt_color
,
kernelName
.
c_str
(),
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
.
c_str
());
}
static
void
fromHSV_caller
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
bidx
,
const
std
::
string
&
kernelName
,
const
std
::
string
&
additionalOptions
=
std
::
string
(),
const
oclMat
&
data
=
oclMat
())
{
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D dcn=%d -D bidx=%d"
,
src
.
depth
(),
dst
.
channels
(),
bidx
);
if
(
!
additionalOptions
.
empty
())
build_options
+=
additionalOptions
;
int
src_offset
=
src
.
offset
/
src
.
elemSize1
(),
src_step
=
src
.
step1
();
int
dst_offset
=
dst
.
offset
/
dst
.
elemSize1
(),
dst_step
=
dst
.
step1
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_offset
));
if
(
!
data
.
empty
())
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
data
.
data
));
size_t
gt
[
3
]
=
{
dst
.
cols
,
dst
.
rows
,
1
};
#ifdef ANDROID
size_t
lt
[
3
]
=
{
16
,
10
,
1
};
#else
size_t
lt
[
3
]
=
{
16
,
16
,
1
};
#endif
openCLExecuteKernel
(
src
.
clCxt
,
&
cvt_color
,
kernelName
.
c_str
(),
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
.
c_str
());
}
static
void
RGB_caller
(
const
oclMat
&
src
,
oclMat
&
dst
,
bool
reverse
)
{
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D dcn=%d -D scn=%d -D %s"
,
src
.
depth
(),
dst
.
channels
(),
src
.
channels
(),
reverse
?
"REVERSE"
:
"ORDER"
);
int
src_offset
=
src
.
offset
/
src
.
elemSize1
(),
src_step
=
src
.
step1
();
int
dst_offset
=
dst
.
offset
/
dst
.
elemSize1
(),
dst_step
=
dst
.
step1
();
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D dcn=%d -D scn=%d -D %s"
,
src
.
depth
(),
dst
.
channels
(),
src
.
channels
(),
reverse
?
"REVERSE"
:
"ORDER"
);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
...
...
@@ -147,8 +299,8 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
static
void
fromRGB5x5_caller
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
bidx
,
int
greenbits
,
const
std
::
string
&
kernelName
)
{
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D greenbits=%d -D dcn=%d"
,
src
.
depth
(),
greenbits
,
dst
.
channels
());
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D greenbits=%d -D dcn=%d
-D bidx=%d
"
,
src
.
depth
(),
greenbits
,
dst
.
channels
()
,
bidx
);
int
src_offset
=
src
.
offset
>>
1
,
src_step
=
src
.
step
>>
1
;
int
dst_offset
=
dst
.
offset
/
dst
.
elemSize1
(),
dst_step
=
dst
.
step
/
dst
.
elemSize1
();
...
...
@@ -157,7 +309,6 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
bidx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
...
...
@@ -174,8 +325,8 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
static
void
toRGB5x5_caller
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
bidx
,
int
greenbits
,
const
std
::
string
&
kernelName
)
{
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D greenbits=%d -D scn=%d"
,
src
.
depth
(),
greenbits
,
src
.
channels
());
std
::
string
build_options
=
format
(
"-D DEPTH_%d -D greenbits=%d -D scn=%d
-D bidx=%d
"
,
src
.
depth
(),
greenbits
,
src
.
channels
()
,
bidx
);
int
src_offset
=
(
int
)
src
.
offset
,
src_step
=
(
int
)
src
.
step
;
int
dst_offset
=
dst
.
offset
>>
1
,
dst_step
=
dst
.
step
>>
1
;
...
...
@@ -184,7 +335,6 @@ static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenb
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
bidx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
...
...
@@ -272,7 +422,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
CV_Assert
(
scn
==
1
);
dcn
=
code
==
CV_GRAY2BGRA
?
4
:
3
;
dst
.
create
(
sz
,
CV_MAKETYPE
(
depth
,
dcn
));
toRGB
_caller
(
src
,
dst
,
0
,
"Gray2RGB"
);
fromGray
_caller
(
src
,
dst
,
0
,
"Gray2RGB"
);
break
;
}
case
CV_BGR2YUV
:
case
CV_RGB2YUV
:
...
...
@@ -303,7 +453,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
Size
dstSz
(
sz
.
width
,
sz
.
height
*
2
/
3
);
dst
.
create
(
dstSz
,
CV_MAKETYPE
(
depth
,
dcn
));
toRGB_caller
(
src
,
dst
,
bidx
,
"YUV2RGBA_NV12"
);
toRGB_
NV12_
caller
(
src
,
dst
,
bidx
,
"YUV2RGBA_NV12"
);
break
;
}
case
CV_BGR2YCrCb
:
case
CV_RGB2YCrCb
:
...
...
@@ -460,11 +610,11 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
initialized
=
true
;
}
fromRGB
_caller
(
src
,
dst
,
bidx
,
kernelName
,
format
(
" -D hrange=%d"
,
hrange
),
sdiv_data
,
hrange
==
256
?
hdiv_data256
:
hdiv_data180
);
toHSV
_caller
(
src
,
dst
,
bidx
,
kernelName
,
format
(
" -D hrange=%d"
,
hrange
),
sdiv_data
,
hrange
==
256
?
hdiv_data256
:
hdiv_data180
);
return
;
}
fromRGB
_caller
(
src
,
dst
,
bidx
,
kernelName
,
format
(
" -D hscale=%f"
,
hrange
*
(
1.
f
/
360.
f
)));
toHSV
_caller
(
src
,
dst
,
bidx
,
kernelName
,
format
(
" -D hscale=%f"
,
hrange
*
(
1.
f
/
360.
f
)));
break
;
}
case
CV_HSV2BGR
:
case
CV_HSV2RGB
:
case
CV_HSV2BGR_FULL
:
case
CV_HSV2RGB_FULL
:
...
...
@@ -483,7 +633,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
dst
.
create
(
sz
,
CV_MAKETYPE
(
depth
,
dcn
));
std
::
string
kernelName
=
std
::
string
(
is_hsv
?
"HSV"
:
"HLS"
)
+
"2RGB"
;
toRGB
_caller
(
src
,
dst
,
bidx
,
kernelName
,
format
(
" -D hrange=%d -D hscale=%f"
,
hrange
,
6.
f
/
hrange
));
fromHSV
_caller
(
src
,
dst
,
bidx
,
kernelName
,
format
(
" -D hrange=%d -D hscale=%f"
,
hrange
,
6.
f
/
hrange
));
break
;
}
case
CV_RGBA2mRGBA
:
case
CV_mRGBA2RGBA
:
...
...
modules/ocl/src/opencl/cvt_color.cl
View file @
f9268d34
...
...
@@ -56,35 +56,59 @@
#
ifdef
DEPTH_0
#
define
DATA_TYPE
uchar
#
define
VECTOR2
uchar2
#
define
VECTOR4
uchar4
#
define
VECTOR8
uchar8
#
define
VECTOR16
uchar16
#
define
COEFF_TYPE
int
#
define
MAX_NUM
255
#
define
HALF_MAX
128
#
define
SAT_CAST
(
num
)
convert_uchar_sat_rte
(
num
)
#
define
SAT_CAST2
(
num
)
convert_uchar2_sat
(
num
)
#
define
SAT_CAST4
(
num
)
convert_uchar4_sat
(
num
)
#
endif
#
ifdef
DEPTH_2
#
define
DATA_TYPE
ushort
#
define
VECTOR2
ushort2
#
define
VECTOR4
ushort4
#
define
VECTOR8
ushort8
#
define
VECTOR16
ushort16
#
define
COEFF_TYPE
int
#
define
MAX_NUM
65535
#
define
HALF_MAX
32768
#
define
SAT_CAST
(
num
)
convert_ushort_sat_rte
(
num
)
#
define
SAT_CAST2
(
num
)
convert_ushort2_sat
(
num
)
#
define
SAT_CAST4
(
num
)
convert_ushort4_sat
(
num
)
#
endif
#
ifdef
DEPTH_5
#
define
DATA_TYPE
float
#
define
VECTOR2
float2
#
define
VECTOR4
float4
#
define
VECTOR8
float8
#
define
VECTOR16
float16
#
define
COEFF_TYPE
float
#
define
MAX_NUM
1.0f
#
define
HALF_MAX
0.5f
#
define
SAT_CAST
(
num
)
(
num
)
#
endif
#
ifndef
bidx
#
define
bidx
0
#
endif
#
ifndef
pixels_per_work_item
#
define
pixels_per_work_item
1
#
endif
#
define
CV_DESCALE
(
x,
n
)
(((
x
)
+
(
1
<<
((
n
)
-1
)))
>>
(
n
))
enum
{
yuv_shift
=
14
,
xyz_shift
=
12
,
hsv_shift
=
12
,
hsv_shift
=
12
,
R2Y
=
4899
,
G2Y
=
9617
,
B2Y
=
1868
,
...
...
@@ -93,26 +117,87 @@ enum
/////////////////////////////////////
RGB
<->
GRAY
//////////////////////////////////////
__constant
float
c_RGB2GrayCoeffs_f[3]
=
{
0.114f,
0.587f,
0.299f
}
;
__constant
int
c_RGB2GrayCoeffs_i[3]
=
{
B2Y,
G2Y,
R2Y
}
;
__kernel
void
RGB2Gray
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
int
src_offset,
int
dst_offset
)
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
*
pixels_per_work_item
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
rows
&&
x
<
cols
)
{
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
(
x
<<
2
))
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
#
ifndef
INTEL_DEVICE
#
ifdef
DEPTH_5
dst[dst_idx]
=
src[src_idx
+
bidx]
*
0.114f
+
src[src_idx
+
1]
*
0.587f
+
src[src_idx
+
(
bidx^2
)
]
*
0.299f
;
#
else
dst[dst_idx]
=
(
DATA_TYPE
)
CV_DESCALE
((
src[src_idx
+
bidx]
*
B2Y
+
src[src_idx
+
1]
*
G2Y
+
src[src_idx
+
(
bidx^2
)
]
*
R2Y
)
,
yuv_shift
)
;
#
endif
#
else
//INTEL_DEVICE
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
#
ifdef
DEPTH_5
__constant
float
*
coeffs
=
c_RGB2GrayCoeffs_f
;
#
else
__constant
int
*
coeffs
=
c_RGB2GrayCoeffs_i
;
#
endif
#
if
(
1
==
pixels_per_work_item
)
{
#
ifdef
DEPTH_5
*dst_ptr
=
src_ptr[bidx]
*
coeffs[0]
+
src_ptr[1]
*
coeffs[1]
+
src_ptr[
(
bidx^2
)
]
*coeffs[2]
;
#
else
*dst_ptr
=
(
DATA_TYPE
)
CV_DESCALE
((
src_ptr[bidx]
*
coeffs[0]
+
src_ptr[1]
*
coeffs[1]
+
src_ptr[
(
bidx^2
)
]
*
coeffs[2]
)
,
yuv_shift
)
;
#
endif
}
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
#
ifdef
DEPTH_5
const
float2
c0
=
r0.s04
;
const
float2
c1
=
r0.s15
;
const
float2
c2
=
r0.s26
;
const
float2
Y
=
c0
*
coeffs[bidx]
+
c1
*
coeffs[1]
+
c2
*
coeffs[bidx^2]
;
#
else
const
int2
c0
=
convert_int2
(
r0.s04
)
;
const
int2
c1
=
convert_int2
(
r0.s15
)
;
const
int2
c2
=
convert_int2
(
r0.s26
)
;
const
int2
yi
=
CV_DESCALE
(
c0
*
coeffs[bidx]
+
c1
*
coeffs[1]
+
c2
*
coeffs[bidx^2],
yuv_shift
)
;
const
VECTOR2
Y
=
SAT_CAST2
(
yi
)
;
#
endif
vstore2
(
Y,
0
,
dst_ptr
)
;
}
#
elif
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
const
int4
c0
=
convert_int4
(
r0.s048c
)
;
const
int4
c1
=
convert_int4
(
r0.s159d
)
;
const
int4
c2
=
convert_int4
(
r0.s26ae
)
;
const
int4
Y
=
CV_DESCALE
(
c0
*
coeffs[bidx]
+
c1
*
coeffs[1]
+
c2
*
coeffs[bidx^2],
yuv_shift
)
;
vstore4
(
SAT_CAST4
(
Y
)
,
0
,
dst_ptr
)
;
#
endif
}
#
endif
//pixels_per_work_item
#
endif
//INTEL_DEVICE
}
}
__kernel
void
Gray2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
Gray2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
int
src_offset,
int
dst_offset
)
{
...
...
@@ -140,10 +225,10 @@ __constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877
__constant
int
c_RGB2YUVCoeffs_i[5]
=
{
B2Y,
G2Y,
R2Y,
8061
,
14369
}
;
__kernel
void
RGB2YUV
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
int
src_offset,
int
dst_offset
)
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
*
pixels_per_work_item
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
rows
&&
x
<
cols
)
...
...
@@ -151,24 +236,84 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
x
<<=
2
;
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
DATA_TYPE
rgb[]
=
{
src[src_idx],
src[src_idx
+
1],
src[src_idx
+
2]
}
;
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
#
ifdef
DEPTH_5
__constant
float
*
coeffs
=
c_RGB2YUVCoeffs_f
;
DATA_TYPE
Y
=
rgb[0]
*
coeffs[bidx^2]
+
rgb[1]
*
coeffs[1]
+
rgb[2]
*
coeffs[bidx]
;
DATA_TYPE
Cr
=
(
rgb[bidx^2]
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
DATA_TYPE
Cb
=
(
rgb[bidx]
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
#
else
__constant
int
*
coeffs
=
c_RGB2YUVCoeffs_i
;
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
int
Y
=
CV_DESCALE
(
rgb[0]
*
coeffs[bidx^2]
+
rgb[1]
*
coeffs[1]
+
rgb[2]
*
coeffs[bidx],
yuv_shift
)
;
int
Cr
=
CV_DESCALE
((
rgb[bidx^2]
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
int
Cb
=
CV_DESCALE
((
rgb[bidx]
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
const
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
#
endif
#
if
(
1
==
pixels_per_work_item
)
{
const
DATA_TYPE
rgb[]
=
{src_ptr[0],
src_ptr[1],
src_ptr[2]}
;
#
ifdef
DEPTH_5
float
Y
=
rgb[0]
*
coeffs[bidx^2]
+
rgb[1]
*
coeffs[1]
+
rgb[2]
*
coeffs[bidx]
;
float
U
=
(
rgb[bidx^2]
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
float
V
=
(
rgb[bidx]
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
#
else
int
Y
=
CV_DESCALE
(
rgb[0]
*
coeffs[bidx^2]
+
rgb[1]
*
coeffs[1]
+
rgb[2]
*
coeffs[bidx],
yuv_shift
)
;
int
U
=
CV_DESCALE
((
rgb[bidx^2]
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
int
V
=
CV_DESCALE
((
rgb[bidx]
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
#
endif
dst[dst_idx]
=
SAT_CAST
(
Y
)
;
dst[dst_idx
+
1]
=
SAT_CAST
(
Cr
)
;
dst[dst_idx
+
2]
=
SAT_CAST
(
Cb
)
;
dst_ptr[0]
=
SAT_CAST
(
Y
)
;
dst_ptr[1]
=
SAT_CAST
(
U
)
;
dst_ptr[2]
=
SAT_CAST
(
V
)
;
}
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
#
ifdef
DEPTH_5
const
float2
c0
=
r0.s04
;
const
float2
c1
=
r0.s15
;
const
float2
c2
=
r0.s26
;
const
float2
Y
=
(
bidx
==
0
)
?
(
c0
*
coeffs[2]
+
c1
*
coeffs[1]
+
c2
*
coeffs[0]
)
:
(
c0
*
coeffs[0]
+
c1
*
coeffs[1]
+
c2
*
coeffs[2]
)
;
const
float2
U
=
(
bidx
==
0
)
?
((
c2
-
Y
)
*
coeffs[3]
+
HALF_MAX
)
:
((
c0
-
Y
)
*
coeffs[3]
+
HALF_MAX
)
;
const
float2
V
=
(
bidx
==
0
)
?
((
c0
-
Y
)
*
coeffs[4]
+
HALF_MAX
)
:
((
c2
-
Y
)
*
coeffs[4]
+
HALF_MAX
)
;
#
else
const
int2
c0
=
convert_int2
(
r0.s04
)
;
const
int2
c1
=
convert_int2
(
r0.s15
)
;
const
int2
c2
=
convert_int2
(
r0.s26
)
;
const
int2
yi
=
(
bidx
==
0
)
?
CV_DESCALE
(
c0
*
coeffs[2]
+
c1
*
coeffs[1]
+
c2
*
coeffs[0],
yuv_shift
)
:
CV_DESCALE
(
c0
*
coeffs[0]
+
c1
*
coeffs[1]
+
c2
*
coeffs[2],
yuv_shift
)
;
const
int2
ui
=
(
bidx
==
0
)
?
CV_DESCALE
((
c2
-
yi
)
*
coeffs[3]
+
delta,
yuv_shift
)
:
CV_DESCALE
((
c0
-
yi
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
const
int2
vi
=
(
bidx
==
0
)
?
CV_DESCALE
((
c0
-
yi
)
*
coeffs[4]
+
delta,
yuv_shift
)
:
CV_DESCALE
((
c2
-
yi
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
const
VECTOR2
Y
=
SAT_CAST2
(
yi
)
;
const
VECTOR2
U
=
SAT_CAST2
(
ui
)
;
const
VECTOR2
V
=
SAT_CAST2
(
vi
)
;
#
endif
vstore8
((
VECTOR8
)(
Y.s0,
U.s0,
V.s0,
0
,
Y.s1,
U.s1,
V.s1,
0
)
,
0
,
dst_ptr
)
;
}
#
elif
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
const
int4
c0
=
convert_int4
(
r0.s048c
)
;
const
int4
c1
=
convert_int4
(
r0.s159d
)
;
const
int4
c2
=
convert_int4
(
r0.s26ae
)
;
const
int4
yi
=
(
bidx
==
0
)
?
CV_DESCALE
(
c0
*
coeffs[2]
+
c1
*
coeffs[1]
+
c2
*
coeffs[0],
yuv_shift
)
:
CV_DESCALE
(
c0
*
coeffs[0]
+
c1
*
coeffs[1]
+
c2
*
coeffs[2],
yuv_shift
)
;
const
int4
ui
=
(
bidx
==
0
)
?
CV_DESCALE
((
c2
-
yi
)
*
coeffs[3]
+
delta,
yuv_shift
)
:
CV_DESCALE
((
c0
-
yi
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
const
int4
vi
=
(
bidx
==
0
)
?
CV_DESCALE
((
c0
-
yi
)
*
coeffs[4]
+
delta,
yuv_shift
)
:
CV_DESCALE
((
c2
-
yi
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
const
VECTOR4
Y
=
SAT_CAST4
(
yi
)
;
const
VECTOR4
U
=
SAT_CAST4
(
ui
)
;
const
VECTOR4
V
=
SAT_CAST4
(
vi
)
;
vstore16
((
VECTOR16
)(
Y.s0,
U.s0,
V.s0,
0
,
Y.s1,
U.s1,
V.s1,
0
,
Y.s2,
U.s2,
V.s2,
0
,
Y.s3,
U.s3,
V.s3,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
#
endif
//pixels_per_work_item
}
}
...
...
@@ -176,10 +321,10 @@ __constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
__constant
int
c_YUV2RGBCoeffs_i[5]
=
{
33292
,
-6472
,
-9519
,
18678
}
;
__kernel
void
YUV2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
int
src_offset,
int
dst_offset
)
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
*
pixels_per_work_item
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
rows
&&
x
<
cols
)
...
...
@@ -187,26 +332,94 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
x
<<=
2
;
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
DATA_TYPE
yuv[]
=
{
src[src_idx],
src[src_idx
+
1],
src[src_idx
+
2]
}
;
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
#
ifdef
DEPTH_5
__constant
float
*
coeffs
=
c_YUV2RGBCoeffs_f
;
float
b
=
yuv[0]
+
(
yuv[2]
-
HALF_MAX
)
*
coeffs[3]
;
float
g
=
yuv[0]
+
(
yuv[2]
-
HALF_MAX
)
*
coeffs[2]
+
(
yuv[1]
-
HALF_MAX
)
*
coeffs[1]
;
float
r
=
yuv[0]
+
(
yuv[1]
-
HALF_MAX
)
*
coeffs[0]
;
#
else
__constant
int
*
coeffs
=
c_YUV2RGBCoeffs_i
;
int
b
=
yuv[0]
+
CV_DESCALE
((
yuv[2]
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
)
;
int
g
=
yuv[0]
+
CV_DESCALE
((
yuv[2]
-
HALF_MAX
)
*
coeffs[2]
+
(
yuv[1]
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
int
r
=
yuv[0]
+
CV_DESCALE
((
yuv[1]
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
)
;
#
endif
dst[dst_idx
+
bidx]
=
SAT_CAST
(
b
)
;
dst[dst_idx
+
1]
=
SAT_CAST
(
g
)
;
dst[dst_idx
+
(
bidx^2
)
]
=
SAT_CAST
(
r
)
;
#
if
(
1
==
pixels_per_work_item
)
{
const
DATA_TYPE
yuv[]
=
{src_ptr[0],
src_ptr[1],
src_ptr[2]}
;
#
ifdef
DEPTH_5
float
B
=
yuv[0]
+
(
yuv[2]
-
HALF_MAX
)
*
coeffs[3]
;
float
G
=
yuv[0]
+
(
yuv[2]
-
HALF_MAX
)
*
coeffs[2]
+
(
yuv[1]
-
HALF_MAX
)
*
coeffs[1]
;
float
R
=
yuv[0]
+
(
yuv[1]
-
HALF_MAX
)
*
coeffs[0]
;
#
else
int
B
=
yuv[0]
+
CV_DESCALE
((
yuv[2]
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
)
;
int
G
=
yuv[0]
+
CV_DESCALE
((
yuv[2]
-
HALF_MAX
)
*
coeffs[2]
+
(
yuv[1]
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
int
R
=
yuv[0]
+
CV_DESCALE
((
yuv[1]
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
)
;
#
endif
dst_ptr[bidx]
=
SAT_CAST
(
B
)
;
dst_ptr[1]
=
SAT_CAST
(
G
)
;
dst_ptr[
(
bidx^2
)
]
=
SAT_CAST
(
R
)
;
#
if
dcn
==
4
dst[dst_idx
+
3]
=
MAX_NUM
;
dst_ptr[3]
=
MAX_NUM
;
#
endif
}
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
#
ifdef
DEPTH_5
const
float2
Y
=
r0.s04
;
const
float2
U
=
r0.s15
;
const
float2
V
=
r0.s26
;
const
float2
c0
=
(
bidx
==
0
)
?
(
Y
+
(
V
-
HALF_MAX
)
*
coeffs[3]
)
:
(
Y
+
(
U
-
HALF_MAX
)
*
coeffs[0]
)
;
const
float2
c1
=
Y
+
(
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1]
;
const
float2
c2
=
(
bidx
==
0
)
?
(
Y
+
(
U
-
HALF_MAX
)
*
coeffs[0]
)
:
(
Y
+
(
V
-
HALF_MAX
)
*
coeffs[3]
)
;
#
else
const
int2
Y
=
convert_int2
(
r0.s04
)
;
const
int2
U
=
convert_int2
(
r0.s15
)
;
const
int2
V
=
convert_int2
(
r0.s26
)
;
const
int2
c0i
=
(
bidx
==
0
)
?
(
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
))
:
(
Y
+
CV_DESCALE
((
U
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
))
;
const
int2
c1i
=
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
const
int2
c2i
=
(
bidx
==
0
)
?
(
Y
+
CV_DESCALE
((
U
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
))
:
(
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
))
;
const
VECTOR2
c0
=
SAT_CAST2
(
c0i
)
;
const
VECTOR2
c1
=
SAT_CAST2
(
c1i
)
;
const
VECTOR2
c2
=
SAT_CAST2
(
c2i
)
;
#
endif
#
if
dcn
==
4
vstore8
((
VECTOR8
)(
c0.s0,
c1.s0,
c2.s0,
MAX_NUM,
c0.s1,
c1.s1,
c2.s1,
MAX_NUM
)
,
0
,
dst_ptr
)
;
#
else
vstore8
((
VECTOR8
)(
c0.s0,
c1.s0,
c2.s0,
0
,
c0.s1,
c1.s1,
c2.s1,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
#
elif
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
const
int4
Y
=
convert_int4
(
r0.s048c
)
;
const
int4
U
=
convert_int4
(
r0.s159d
)
;
const
int4
V
=
convert_int4
(
r0.s26ae
)
;
const
int4
c0i
=
(
bidx
==
0
)
?
(
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
))
:
(
Y
+
CV_DESCALE
((
U
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
))
;
const
int4
c1i
=
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[2]
+
(
U
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
const
int4
c2i
=
(
bidx
==
0
)
?
(
Y
+
CV_DESCALE
((
U
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
))
:
(
Y
+
CV_DESCALE
((
V
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
))
;
const
VECTOR4
c0
=
SAT_CAST4
(
c0i
)
;
const
VECTOR4
c1
=
SAT_CAST4
(
c1i
)
;
const
VECTOR4
c2
=
SAT_CAST4
(
c2i
)
;
#
if
dcn
==
4
vstore16
((
VECTOR16
)(
c0.s0,
c1.s0,
c2.s0,
MAX_NUM,
c0.s1,
c1.s1,
c2.s1,
MAX_NUM,
c0.s2,
c1.s2,
c2.s2,
MAX_NUM,
c0.s3,
c1.s3,
c2.s3,
MAX_NUM
)
,
0
,
dst_ptr
)
;
#
else
vstore16
((
VECTOR16
)(
c0.s0,
c1.s0,
c2.s0,
0
,
c0.s1,
c1.s1,
c2.s1,
0
,
c0.s2,
c1.s2,
c2.s2,
0
,
c0.s3,
c1.s3,
c2.s3,
0
)
,
0
,
dst_ptr
)
;
#
endif
#
endif
}
#
endif
//pixels_per_work_item
}
}
...
...
@@ -218,7 +431,7 @@ __constant int ITUR_BT_601_CVR = 1673527;
__constant
int
ITUR_BT_601_SHIFT
=
20
;
__kernel
void
YUV2RGBA_NV12
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__global
const
uchar*
src,
__global
uchar*
dst,
__global
const
uchar*
src,
__global
uchar*
dst,
int
src_offset,
int
dst_offset
)
{
const
int
x
=
get_global_id
(
0
)
;
...
...
@@ -275,10 +488,10 @@ __constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564
__constant
int
c_RGB2YCrCbCoeffs_i[5]
=
{R2Y,
G2Y,
B2Y,
11682
,
9241}
;
__kernel
void
RGB2YCrCb
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
int
src_offset,
int
dst_offset
)
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
int
src_offset,
int
dst_offset
)
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
*
pixels_per_work_item
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
rows
&&
x
<
cols
)
...
...
@@ -287,24 +500,82 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
DATA_TYPE
rgb[]
=
{
src[src_idx],
src[src_idx
+
1],
src[src_idx
+
2]
}
;
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
#
ifdef
DEPTH_5
__constant
float
*
coeffs
=
c_RGB2YCrCbCoeffs_f
;
DATA_TYPE
Y
=
rgb[0]
*
coeffs[bidx^2]
+
rgb[1]
*
coeffs[1]
+
rgb[2]
*
coeffs[bidx]
;
DATA_TYPE
Cr
=
(
rgb[bidx^2]
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
DATA_TYPE
Cb
=
(
rgb[bidx]
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
#
else
__constant
int
*
coeffs
=
c_RGB2YCrCbCoeffs_i
;
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
int
Y
=
CV_DESCALE
(
rgb[0]
*
coeffs[bidx^2]
+
rgb[1]
*
coeffs[1]
+
rgb[2]
*
coeffs[bidx],
yuv_shift
)
;
int
Cr
=
CV_DESCALE
((
rgb[bidx^2]
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
int
Cb
=
CV_DESCALE
((
rgb[bidx]
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
const
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
#
endif
#
if
(
1
==
pixels_per_work_item
)
{
const
DATA_TYPE
rgb[]
=
{src_ptr[0],
src_ptr[1],
src_ptr[2]}
;
#
ifdef
DEPTH_5
float
Y
=
rgb[0]
*
coeffs[bidx^2]
+
rgb[1]
*
coeffs[1]
+
rgb[2]
*
coeffs[bidx]
;
float
Cr
=
(
rgb[bidx^2]
-
Y
)
*
coeffs[3]
+
HALF_MAX
;
float
Cb
=
(
rgb[bidx]
-
Y
)
*
coeffs[4]
+
HALF_MAX
;
#
else
int
Y
=
CV_DESCALE
(
rgb[0]
*
coeffs[bidx^2]
+
rgb[1]
*
coeffs[1]
+
rgb[2]
*
coeffs[bidx],
yuv_shift
)
;
int
Cr
=
CV_DESCALE
((
rgb[bidx^2]
-
Y
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
int
Cb
=
CV_DESCALE
((
rgb[bidx]
-
Y
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
#
endif
dst[dst_idx]
=
SAT_CAST
(
Y
)
;
dst[dst_idx
+
1]
=
SAT_CAST
(
Cr
)
;
dst[dst_idx
+
2]
=
SAT_CAST
(
Cb
)
;
dst_ptr[0]
=
SAT_CAST
(
Y
)
;
dst_ptr[1]
=
SAT_CAST
(
Cr
)
;
dst_ptr[2]
=
SAT_CAST
(
Cb
)
;
}
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
#
ifdef
DEPTH_5
const
float2
c0
=
r0.s04
;
const
float2
c1
=
r0.s15
;
const
float2
c2
=
r0.s26
;
const
float2
Y
=
(
bidx
==
0
)
?
(
c0
*
coeffs[2]
+
c1
*
coeffs[1]
+
c2
*
coeffs[0]
)
:
(
c0
*
coeffs[0]
+
c1
*
coeffs[1]
+
c2
*
coeffs[2]
)
;
const
float2
Cr
=
(
bidx
==
0
)
?
((
c2
-
Y
)
*
coeffs[3]
+
HALF_MAX
)
:
((
c0
-
Y
)
*
coeffs[3]
+
HALF_MAX
)
;
const
float2
Cb
=
(
bidx
==
0
)
?
((
c0
-
Y
)
*
coeffs[4]
+
HALF_MAX
)
:
((
c2
-
Y
)
*
coeffs[4]
+
HALF_MAX
)
;
#
else
const
int2
c0
=
convert_int2
(
r0.s04
)
;
const
int2
c1
=
convert_int2
(
r0.s15
)
;
const
int2
c2
=
convert_int2
(
r0.s26
)
;
const
int2
yi
=
(
bidx
==
0
)
?
CV_DESCALE
(
c0
*
coeffs[2]
+
c1
*
coeffs[1]
+
c2
*
coeffs[0],
yuv_shift
)
:
CV_DESCALE
(
c0
*
coeffs[0]
+
c1
*
coeffs[1]
+
c2
*
coeffs[2],
yuv_shift
)
;
const
int2
ui
=
(
bidx
==
0
)
?
CV_DESCALE
((
c2
-
yi
)
*
coeffs[3]
+
delta,
yuv_shift
)
:
CV_DESCALE
((
c0
-
yi
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
const
int2
vi
=
(
bidx
==
0
)
?
CV_DESCALE
((
c0
-
yi
)
*
coeffs[4]
+
delta,
yuv_shift
)
:
CV_DESCALE
((
c2
-
yi
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
const
VECTOR2
Y
=
SAT_CAST2
(
yi
)
;
const
VECTOR2
Cr
=
SAT_CAST2
(
ui
)
;
const
VECTOR2
Cb
=
SAT_CAST2
(
vi
)
;
#
endif
vstore8
((
VECTOR8
)(
Y.s0,
Cr.s0,
Cb.s0,
0
,
Y.s1,
Cr.s1,
Cb.s1,
0
)
,
0
,
dst_ptr
)
;
}
#
elif
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
const
int4
c0
=
convert_int4
(
r0.s048c
)
;
const
int4
c1
=
convert_int4
(
r0.s159d
)
;
const
int4
c2
=
convert_int4
(
r0.s26ae
)
;
const
int4
yi
=
(
bidx
==
0
)
?
CV_DESCALE
(
c0
*
coeffs[2]
+
c1
*
coeffs[1]
+
c2
*
coeffs[0],
yuv_shift
)
:
CV_DESCALE
(
c0
*
coeffs[0]
+
c1
*
coeffs[1]
+
c2
*
coeffs[2],
yuv_shift
)
;
const
int4
ui
=
(
bidx
==
0
)
?
CV_DESCALE
((
c2
-
yi
)
*
coeffs[3]
+
delta,
yuv_shift
)
:
CV_DESCALE
((
c0
-
yi
)
*
coeffs[3]
+
delta,
yuv_shift
)
;
const
int4
vi
=
(
bidx
==
0
)
?
CV_DESCALE
((
c0
-
yi
)
*
coeffs[4]
+
delta,
yuv_shift
)
:
CV_DESCALE
((
c2
-
yi
)
*
coeffs[4]
+
delta,
yuv_shift
)
;
const
VECTOR4
Y
=
SAT_CAST4
(
yi
)
;
const
VECTOR4
Cr
=
SAT_CAST4
(
ui
)
;
const
VECTOR4
Cb
=
SAT_CAST4
(
vi
)
;
vstore16
((
VECTOR16
)(
Y.s0,
Cr.s0,
Cb.s0,
0
,
Y.s1,
Cr.s1,
Cb.s1,
0
,
Y.s2,
Cr.s2,
Cb.s2,
0
,
Y.s3,
Cr.s3,
Cb.s3,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
#
endif
//pixels_per_work_item
}
}
...
...
@@ -312,10 +583,10 @@ __constant float c_YCrCb2RGBCoeffs_f[4] = { 1.403f, -0.714f, -0.344f, 1.773f };
__constant
int
c_YCrCb2RGBCoeffs_i[4]
=
{
22987
,
-11698
,
-5636
,
29049
}
;
__kernel
void
YCrCb2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
int
src_offset,
int
dst_offset
)
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
int
src_offset,
int
dst_offset
)
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
*
pixels_per_work_item
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
rows
&&
x
<
cols
)
...
...
@@ -324,36 +595,103 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
DATA_TYPE
ycrcb[]
=
{
src[src_idx],
src[src_idx
+
1],
src[src_idx
+
2]
}
;
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
#
ifdef
DEPTH_5
__constant
float
*
coeff
=
c_YCrCb2RGBCoeffs_f
;
float
r
=
ycrcb[0]
+
coeff[0]
*
(
ycrcb[1]
-
HALF_MAX
)
;
float
g
=
ycrcb[0]
+
coeff[1]
*
(
ycrcb[1]
-
HALF_MAX
)
+
coeff[2]
*
(
ycrcb[2]
-
HALF_MAX
)
;
float
b
=
ycrcb[0]
+
coeff[3]
*
(
ycrcb[2]
-
HALF_MAX
)
;
__constant
float
*
coeffs
=
c_YCrCb2RGBCoeffs_f
;
#
else
__constant
int
*
coeff
=
c_YCrCb2RGBCoeffs_i
;
int
r
=
ycrcb[0]
+
CV_DESCALE
(
coeff[0]
*
(
ycrcb[1]
-
HALF_MAX
)
,
yuv_shift
)
;
int
g
=
ycrcb[0]
+
CV_DESCALE
(
coeff[1]
*
(
ycrcb[1]
-
HALF_MAX
)
+
coeff[2]
*
(
ycrcb[2]
-
HALF_MAX
)
,
yuv_shift
)
;
int
b
=
ycrcb[0]
+
CV_DESCALE
(
coeff[3]
*
(
ycrcb[2]
-
HALF_MAX
)
,
yuv_shift
)
;
__constant
int
*
coeffs
=
c_YCrCb2RGBCoeffs_i
;
#
endif
dst[dst_idx
+
(
bidx^2
)
]
=
SAT_CAST
(
r
)
;
dst[dst_idx
+
1]
=
SAT_CAST
(
g
)
;
dst[dst_idx
+
bidx]
=
SAT_CAST
(
b
)
;
#
if
(
1
==
pixels_per_work_item
)
{
const
DATA_TYPE
ycrcb[]
=
{src_ptr[0],
src_ptr[1],
src_ptr[2]}
;
#
ifdef
DEPTH_5
float
B
=
ycrcb[0]
+
(
ycrcb[2]
-
HALF_MAX
)
*
coeffs[3]
;
float
G
=
ycrcb[0]
+
(
ycrcb[2]
-
HALF_MAX
)
*
coeffs[2]
+
(
ycrcb[1]
-
HALF_MAX
)
*
coeffs[1]
;
float
R
=
ycrcb[0]
+
(
ycrcb[1]
-
HALF_MAX
)
*
coeffs[0]
;
#
else
int
B
=
ycrcb[0]
+
CV_DESCALE
((
ycrcb[2]
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
)
;
int
G
=
ycrcb[0]
+
CV_DESCALE
((
ycrcb[2]
-
HALF_MAX
)
*
coeffs[2]
+
(
ycrcb[1]
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
int
R
=
ycrcb[0]
+
CV_DESCALE
((
ycrcb[1]
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
)
;
#
endif
dst_ptr[bidx]
=
SAT_CAST
(
B
)
;
dst_ptr[1]
=
SAT_CAST
(
G
)
;
dst_ptr[
(
bidx^2
)
]
=
SAT_CAST
(
R
)
;
#
if
dcn
==
4
dst[dst_idx
+
3]
=
MAX_NUM
;
dst_ptr[3]
=
MAX_NUM
;
#
endif
}
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
#
ifdef
DEPTH_5
const
float2
Y
=
r0.s04
;
const
float2
Cr
=
r0.s15
;
const
float2
Cb
=
r0.s26
;
const
float2
c0
=
(
bidx
==
0
)
?
(
Y
+
(
Cb
-
HALF_MAX
)
*
coeffs[3]
)
:
(
Y
+
(
Cr
-
HALF_MAX
)
*
coeffs[0]
)
;
const
float2
c1
=
Y
+
(
Cb
-
HALF_MAX
)
*
coeffs[2]
+
(
Cr
-
HALF_MAX
)
*
coeffs[1]
;
const
float2
c2
=
(
bidx
==
0
)
?
(
Y
+
(
Cr
-
HALF_MAX
)
*
coeffs[0]
)
:
(
Y
+
(
Cb
-
HALF_MAX
)
*
coeffs[3]
)
;
#
else
const
int2
Y
=
convert_int2
(
r0.s04
)
;
const
int2
Cr
=
convert_int2
(
r0.s15
)
;
const
int2
Cb
=
convert_int2
(
r0.s26
)
;
const
int2
c0i
=
(
bidx
==
0
)
?
(
Y
+
CV_DESCALE
((
Cb
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
))
:
(
Y
+
CV_DESCALE
((
Cr
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
))
;
const
int2
c1i
=
Y
+
CV_DESCALE
((
Cb
-
HALF_MAX
)
*
coeffs[2]
+
(
Cr
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
const
int2
c2i
=
(
bidx
==
0
)
?
(
Y
+
CV_DESCALE
((
Cr
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
))
:
(
Y
+
CV_DESCALE
((
Cb
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
))
;
const
VECTOR2
c0
=
SAT_CAST2
(
c0i
)
;
const
VECTOR2
c1
=
SAT_CAST2
(
c1i
)
;
const
VECTOR2
c2
=
SAT_CAST2
(
c2i
)
;
#
endif
#
if
dcn
==
4
vstore8
((
VECTOR8
)(
c0.s0,
c1.s0,
c2.s0,
MAX_NUM,
c0.s1,
c1.s1,
c2.s1,
MAX_NUM
)
,
0
,
dst_ptr
)
;
#
else
vstore8
((
VECTOR8
)(
c0.s0,
c1.s0,
c2.s0,
0
,
c0.s1,
c1.s1,
c2.s1,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
#
elif
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
const
int4
Y
=
convert_int4
(
r0.s048c
)
;
const
int4
Cr
=
convert_int4
(
r0.s159d
)
;
const
int4
Cb
=
convert_int4
(
r0.s26ae
)
;
const
int4
c0i
=
(
bidx
==
0
)
?
(
Y
+
CV_DESCALE
((
Cb
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
))
:
(
Y
+
CV_DESCALE
((
Cr
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
))
;
const
int4
c1i
=
Y
+
CV_DESCALE
((
Cb
-
HALF_MAX
)
*
coeffs[2]
+
(
Cr
-
HALF_MAX
)
*
coeffs[1],
yuv_shift
)
;
const
int4
c2i
=
(
bidx
==
0
)
?
(
Y
+
CV_DESCALE
((
Cr
-
HALF_MAX
)
*
coeffs[0],
yuv_shift
))
:
(
Y
+
CV_DESCALE
((
Cb
-
HALF_MAX
)
*
coeffs[3],
yuv_shift
))
;
const
VECTOR4
c0
=
SAT_CAST4
(
c0i
)
;
const
VECTOR4
c1
=
SAT_CAST4
(
c1i
)
;
const
VECTOR4
c2
=
SAT_CAST4
(
c2i
)
;
#
if
dcn
==
4
vstore16
((
VECTOR16
)(
c0.s0,
c1.s0,
c2.s0,
MAX_NUM,
c0.s1,
c1.s1,
c2.s1,
MAX_NUM,
c0.s2,
c1.s2,
c2.s2,
MAX_NUM,
c0.s3,
c1.s3,
c2.s3,
MAX_NUM
)
,
0
,
dst_ptr
)
;
#
else
vstore16
((
VECTOR16
)(
c0.s0,
c1.s0,
c2.s0,
0
,
c0.s1,
c1.s1,
c2.s1,
0
,
c0.s2,
c1.s2,
c2.s2,
0
,
c0.s3,
c1.s3,
c2.s3,
0
)
,
0
,
dst_ptr
)
;
#
endif
#
endif
}
#
endif
//pixels_per_work_item
}
}
/////////////////////////////////////
RGB
<->
XYZ
//////////////////////////////////////
__kernel
void
RGB2XYZ
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
int
src_offset,
int
dst_offset,
__constant
COEFF_TYPE
*
coeffs
)
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
*
pixels_per_work_item
;
int
dy
=
get_global_id
(
1
)
;
if
(
dy
<
rows
&&
dx
<
cols
)
...
...
@@ -362,28 +700,84 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
)
;
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
)
;
DATA_TYPE
r
=
src[src_idx],
g
=
src[src_idx
+
1],
b
=
src[src_idx
+
2]
;
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
#
if
(
1
==
pixels_per_work_item
)
{
DATA_TYPE
R
=
src_ptr[0],
G
=
src_ptr[1],
B
=
src_ptr[2]
;
#
ifdef
DEPTH_5
float
x
=
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2]
;
float
y
=
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5]
;
float
z
=
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8]
;
float
X
=
R
*
coeffs[0]
+
G
*
coeffs[1]
+
B
*
coeffs[2]
;
float
Y
=
R
*
coeffs[3]
+
G
*
coeffs[4]
+
B
*
coeffs[5]
;
float
Z
=
R
*
coeffs[6]
+
G
*
coeffs[7]
+
B
*
coeffs[8]
;
#
else
int
x
=
CV_DESCALE
(
r
*
coeffs[0]
+
g
*
coeffs[1]
+
b
*
coeffs[2],
xyz_shift
)
;
int
y
=
CV_DESCALE
(
r
*
coeffs[3]
+
g
*
coeffs[4]
+
b
*
coeffs[5],
xyz_shift
)
;
int
z
=
CV_DESCALE
(
r
*
coeffs[6]
+
g
*
coeffs[7]
+
b
*
coeffs[8],
xyz_shift
)
;
int
X
=
CV_DESCALE
(
R
*
coeffs[0]
+
G
*
coeffs[1]
+
B
*
coeffs[2],
xyz_shift
)
;
int
Y
=
CV_DESCALE
(
R
*
coeffs[3]
+
G
*
coeffs[4]
+
B
*
coeffs[5],
xyz_shift
)
;
int
Z
=
CV_DESCALE
(
R
*
coeffs[6]
+
G
*
coeffs[7]
+
B
*
coeffs[8],
xyz_shift
)
;
#
endif
dst[dst_idx]
=
SAT_CAST
(
x
)
;
dst[dst_idx
+
1]
=
SAT_CAST
(
y
)
;
dst[dst_idx
+
2]
=
SAT_CAST
(
z
)
;
dst_ptr[0]
=
SAT_CAST
(
X
)
;
dst_ptr[1]
=
SAT_CAST
(
Y
)
;
dst_ptr[2]
=
SAT_CAST
(
Z
)
;
}
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
#
ifdef
DEPTH_5
const
float2
R
=
r0.s04
;
const
float2
G
=
r0.s15
;
const
float2
B
=
r0.s26
;
const
float2
X
=
R
*
coeffs[0]
+
G
*
coeffs[1]
+
B
*
coeffs[2]
;
const
float2
Y
=
R
*
coeffs[3]
+
G
*
coeffs[4]
+
B
*
coeffs[5]
;
const
float2
Z
=
R
*
coeffs[6]
+
G
*
coeffs[7]
+
B
*
coeffs[8]
;
#
else
const
int2
R
=
convert_int2
(
r0.s04
)
;
const
int2
G
=
convert_int2
(
r0.s15
)
;
const
int2
B
=
convert_int2
(
r0.s26
)
;
const
int2
xi
=
CV_DESCALE
(
R
*
coeffs[0]
+
G
*
coeffs[1]
+
B
*
coeffs[2],
xyz_shift
)
;
const
int2
yi
=
CV_DESCALE
(
R
*
coeffs[3]
+
G
*
coeffs[4]
+
B
*
coeffs[5],
xyz_shift
)
;
const
int2
zi
=
CV_DESCALE
(
R
*
coeffs[6]
+
G
*
coeffs[7]
+
B
*
coeffs[8],
xyz_shift
)
;
const
VECTOR2
X
=
SAT_CAST2
(
xi
)
;
const
VECTOR2
Y
=
SAT_CAST2
(
yi
)
;
const
VECTOR2
Z
=
SAT_CAST2
(
zi
)
;
#
endif
vstore8
((
VECTOR8
)(
X.s0,
Y.s0,
Z.s0,
0
,
X.s1,
Y.s1,
Z.s1,
0
)
,
0
,
dst_ptr
)
;
}
#
elif
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
const
int4
R
=
convert_int4
(
r0.s048c
)
;
const
int4
G
=
convert_int4
(
r0.s159d
)
;
const
int4
B
=
convert_int4
(
r0.s26ae
)
;
const
int4
xi
=
CV_DESCALE
(
R
*
coeffs[0]
+
G
*
coeffs[1]
+
B
*
coeffs[2],
xyz_shift
)
;
const
int4
yi
=
CV_DESCALE
(
R
*
coeffs[3]
+
G
*
coeffs[4]
+
B
*
coeffs[5],
xyz_shift
)
;
const
int4
zi
=
CV_DESCALE
(
R
*
coeffs[6]
+
G
*
coeffs[7]
+
B
*
coeffs[8],
xyz_shift
)
;
const
VECTOR4
X
=
SAT_CAST4
(
xi
)
;
const
VECTOR4
Y
=
SAT_CAST4
(
yi
)
;
const
VECTOR4
Z
=
SAT_CAST4
(
zi
)
;
vstore16
((
VECTOR16
)(
X.s0,
Y.s0,
Z.s0,
0
,
X.s1,
Y.s1,
Z.s1,
0
,
X.s2,
Y.s2,
Z.s2,
0
,
X.s3,
Y.s3,
Z.s3,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
#
endif
//pixels_per_work_item
}
}
__kernel
void
XYZ2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
__global
const
DATA_TYPE*
src,
__global
DATA_TYPE*
dst,
int
src_offset,
int
dst_offset,
__constant
COEFF_TYPE
*
coeffs
)
{
int
dx
=
get_global_id
(
0
)
;
int
dx
=
get_global_id
(
0
)
*
pixels_per_work_item
;
int
dy
=
get_global_id
(
1
)
;
if
(
dy
<
rows
&&
dx
<
cols
)
...
...
@@ -392,23 +786,87 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
int
src_idx
=
mad24
(
dy,
src_step,
src_offset
+
dx
)
;
int
dst_idx
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
)
;
DATA_TYPE
x
=
src[src_idx],
y
=
src[src_idx
+
1],
z
=
src[src_idx
+
2]
;
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
#
if
(
1
==
pixels_per_work_item
)
{
const
DATA_TYPE
X
=
src_ptr[0],
Y
=
src_ptr[1],
Z
=
src_ptr[2]
;
#
ifdef
DEPTH_5
float
b
=
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2]
;
float
g
=
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5]
;
float
r
=
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8]
;
float
B
=
X
*
coeffs[0]
+
Y
*
coeffs[1]
+
Z
*
coeffs[2]
;
float
G
=
X
*
coeffs[3]
+
Y
*
coeffs[4]
+
Z
*
coeffs[5]
;
float
R
=
X
*
coeffs[6]
+
Y
*
coeffs[7]
+
Z
*
coeffs[8]
;
#
else
int
b
=
CV_DESCALE
(
x
*
coeffs[0]
+
y
*
coeffs[1]
+
z
*
coeffs[2],
xyz_shift
)
;
int
g
=
CV_DESCALE
(
x
*
coeffs[3]
+
y
*
coeffs[4]
+
z
*
coeffs[5],
xyz_shift
)
;
int
r
=
CV_DESCALE
(
x
*
coeffs[6]
+
y
*
coeffs[7]
+
z
*
coeffs[8],
xyz_shift
)
;
int
B
=
CV_DESCALE
(
X
*
coeffs[0]
+
Y
*
coeffs[1]
+
Z
*
coeffs[2],
xyz_shift
)
;
int
G
=
CV_DESCALE
(
X
*
coeffs[3]
+
Y
*
coeffs[4]
+
Z
*
coeffs[5],
xyz_shift
)
;
int
R
=
CV_DESCALE
(
X
*
coeffs[6]
+
Y
*
coeffs[7]
+
Z
*
coeffs[8],
xyz_shift
)
;
#
endif
dst[dst_idx]
=
SAT_CAST
(
b
)
;
dst[dst_idx
+
1]
=
SAT_CAST
(
g
)
;
dst[dst_idx
+
2]
=
SAT_CAST
(
r
)
;
dst_ptr[0]
=
SAT_CAST
(
B
)
;
dst_ptr[1]
=
SAT_CAST
(
G
)
;
dst_ptr[2]
=
SAT_CAST
(
R
)
;
#
if
dcn
==
4
dst[dst_idx
+
3]
=
MAX_NUM
;
dst_ptr[
3]
=
MAX_NUM
;
#
endif
}
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
#
ifdef
DEPTH_5
const
float2
X
=
r0.s04
;
const
float2
Y
=
r0.s15
;
const
float2
Z
=
r0.s26
;
float2
B
=
X
*
coeffs[0]
+
Y
*
coeffs[1]
+
Z
*
coeffs[2]
;
float2
G
=
X
*
coeffs[3]
+
Y
*
coeffs[4]
+
Z
*
coeffs[5]
;
float2
R
=
X
*
coeffs[6]
+
Y
*
coeffs[7]
+
Z
*
coeffs[8]
;
#
else
const
int2
xi
=
convert_int2
(
r0.s04
)
;
const
int2
yi
=
convert_int2
(
r0.s15
)
;
const
int2
zi
=
convert_int2
(
r0.s26
)
;
const
int2
bi
=
CV_DESCALE
(
xi
*
coeffs[0]
+
yi
*
coeffs[1]
+
zi
*
coeffs[2],
xyz_shift
)
;
const
int2
gi
=
CV_DESCALE
(
xi
*
coeffs[3]
+
yi
*
coeffs[4]
+
zi
*
coeffs[5],
xyz_shift
)
;
const
int2
ri
=
CV_DESCALE
(
xi
*
coeffs[6]
+
yi
*
coeffs[7]
+
zi
*
coeffs[8],
xyz_shift
)
;
const
VECTOR2
R
=
SAT_CAST2
(
ri
)
;
const
VECTOR2
G
=
SAT_CAST2
(
gi
)
;
const
VECTOR2
B
=
SAT_CAST2
(
bi
)
;
#
endif
#
if
dcn
==
4
vstore8
((
VECTOR8
)(
B.s0,
G.s0,
R.s0,
MAX_NUM,
B.s1,
G.s1,
R.s1,
MAX_NUM
)
,
0
,
dst_ptr
)
;
#
else
vstore8
((
VECTOR8
)(
B.s0,
G.s0,
R.s0,
0
,
B.s1,
G.s1,
R.s1,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
#
elif
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
const
int4
xi
=
convert_int4
(
r0.s048c
)
;
const
int4
yi
=
convert_int4
(
r0.s159d
)
;
const
int4
zi
=
convert_int4
(
r0.s26ae
)
;
const
int4
bi
=
CV_DESCALE
(
xi
*
coeffs[0]
+
yi
*
coeffs[1]
+
zi
*
coeffs[2],
xyz_shift
)
;
const
int4
gi
=
CV_DESCALE
(
xi
*
coeffs[3]
+
yi
*
coeffs[4]
+
zi
*
coeffs[5],
xyz_shift
)
;
const
int4
ri
=
CV_DESCALE
(
xi
*
coeffs[6]
+
yi
*
coeffs[7]
+
zi
*
coeffs[8],
xyz_shift
)
;
const
VECTOR4
R
=
SAT_CAST4
(
ri
)
;
const
VECTOR4
G
=
SAT_CAST4
(
gi
)
;
const
VECTOR4
B
=
SAT_CAST4
(
bi
)
;
#
if
dcn
==
4
vstore16
((
VECTOR16
)(
B.s0,
G.s0,
R.s0,
MAX_NUM,
B.s1,
G.s1,
R.s1,
MAX_NUM,
B.s2,
G.s2,
R.s2,
MAX_NUM,
B.s3,
G.s3,
R.s3,
MAX_NUM
)
,
0
,
dst_ptr
)
;
#
else
vstore16
((
VECTOR16
)(
B.s0,
G.s0,
R.s0,
0
,
B.s1,
G.s1,
R.s1,
0
,
B.s2,
G.s2,
R.s2,
0
,
B.s3,
G.s3,
R.s3,
0
)
,
0
,
dst_ptr
)
;
#
endif
#
endif
}
#
endif
//
pixels_per_work_item
}
}
...
...
@@ -427,6 +885,7 @@ __kernel void RGB(int cols, int rows, int src_step, int dst_step,
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
#
ifndef
INTEL_DEVICE
#
ifdef
REVERSE
dst[dst_idx]
=
src[src_idx
+
2]
;
dst[dst_idx
+
1]
=
src[src_idx
+
1]
;
...
...
@@ -444,12 +903,43 @@ __kernel void RGB(int cols, int rows, int src_step, int dst_step,
dst[dst_idx
+
3]
=
src[src_idx
+
3]
;
#
endif
#
endif
#
else
//INTEL_DEVICE
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
const
VECTOR4
r0
=
vload4
(
0
,
src_ptr
)
;
#
ifdef
REVERSE
if
(
3
==
dcn
)
{
vstore4
((
VECTOR4
)(
r0.s210,
0
)
,
0
,
dst_ptr
)
;
}
else
if
(
3
==
scn
)
{
vstore4
((
VECTOR4
)(
r0.s210,
MAX_NUM
)
,
0
,
dst_ptr
)
;
}
else
{
vstore4
((
VECTOR4
)(
r0.s2103
)
,
0
,
dst_ptr
)
;
}
#
elif
defined
ORDER
if
(
3
==
dcn
)
{
vstore4
((
VECTOR4
)(
r0.s012,
0
)
,
0
,
dst_ptr
)
;
}
else
if
(
3
==
scn
)
{
vstore4
((
VECTOR4
)(
r0.s012,
MAX_NUM
)
,
0
,
dst_ptr
)
;
}
else
{
vstore4
(
r0,
0
,
dst_ptr
)
;
}
#
endif
#
endif
//INTEL_DEVICE
}
}
/////////////////////////////////////
RGB5x5
<->
RGB
//////////////////////////////////////
__kernel
void
RGB5x52RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
RGB5x52RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
ushort
*
src,
__global
uchar
*
dst,
int
src_offset,
int
dst_offset
)
{
...
...
@@ -482,7 +972,7 @@ __kernel void RGB5x52RGB(int cols, int rows, int src_step, int dst_step, int bid
}
}
__kernel
void
RGB2RGB5x5
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
RGB2RGB5x5
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
uchar
*
src,
__global
ushort
*
dst,
int
src_offset,
int
dst_offset
)
{
...
...
@@ -507,7 +997,7 @@ __kernel void RGB2RGB5x5(int cols, int rows, int src_step, int dst_step, int bid
///////////////////////////////////// RGB5x5 <-> RGB //////////////////////////////////////
__kernel void BGR5x52Gray(int cols, int rows, int src_step, int dst_step,
int bidx,
__kernel void BGR5x52Gray(int cols, int rows, int src_step, int dst_step,
__global const ushort * src, __global uchar * dst,
int src_offset, int dst_offset)
{
...
...
@@ -532,7 +1022,7 @@ __kernel void BGR5x52Gray(int cols, int rows, int src_step, int dst_step, int bi
}
}
__kernel void Gray2BGR5x5(int cols, int rows, int src_step, int dst_step,
int bidx,
__kernel void Gray2BGR5x5(int cols, int rows, int src_step, int dst_step,
__global const uchar * src, __global ushort * dst,
int src_offset, int dst_offset)
{
...
...
@@ -560,7 +1050,7 @@ __constant int sector_data[][3] = { {1, 3, 0}, { 1, 0, 2 }, { 3, 0, 1 }, { 0, 2,
#
ifdef
DEPTH_0
__kernel
void
RGB2HSV
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
RGB2HSV
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
uchar
*
src,
__global
uchar
*
dst,
int
src_offset,
int
dst_offset,
__constant
int
*
sdiv_table,
__constant
int
*
hdiv_table
)
...
...
@@ -600,7 +1090,7 @@ __kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
}
}
__kernel
void
HSV2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
HSV2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
uchar
*
src,
__global
uchar
*
dst,
int
src_offset,
int
dst_offset
)
{
...
...
@@ -656,7 +1146,7 @@ __kernel void HSV2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
#
elif
defined
DEPTH_5
__kernel
void
RGB2HSV
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
RGB2HSV
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
float
*
src,
__global
float
*
dst,
int
src_offset,
int
dst_offset
)
{
...
...
@@ -698,7 +1188,7 @@ __kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
}
}
__kernel
void
HSV2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
HSV2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
float
*
src,
__global
float
*
dst,
int
src_offset,
int
dst_offset
)
{
...
...
@@ -758,7 +1248,7 @@ __kernel void HSV2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
#
ifdef
DEPTH_0
__kernel
void
RGB2HLS
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
RGB2HLS
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
uchar
*
src,
__global
uchar
*
dst,
int
src_offset,
int
dst_offset
)
{
...
...
@@ -805,7 +1295,7 @@ __kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step, int bidx,
}
}
__kernel
void
HLS2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
HLS2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
uchar
*
src,
__global
uchar
*
dst,
int
src_offset,
int
dst_offset
)
{
...
...
@@ -860,7 +1350,7 @@ __kernel void HLS2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
#
elif
defined
DEPTH_5
__kernel
void
RGB2HLS
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
RGB2HLS
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
float
*
src,
__global
float
*
dst,
int
src_offset,
int
dst_offset
)
{
...
...
@@ -907,7 +1397,7 @@ __kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step, int bidx,
}
}
__kernel
void
HLS2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
HLS2RGB
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
float
*
src,
__global
float
*
dst,
int
src_offset,
int
dst_offset
)
{
...
...
@@ -968,10 +1458,10 @@ __kernel void HLS2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
#
ifdef
DEPTH_0
__kernel
void
RGBA2mRGBA
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__global
const
uchar
*
src,
__global
uchar
*
dst,
__global
const
uchar
*
src,
__global
uchar
*
dst,
int
src_offset,
int
dst_offset
)
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
*
pixels_per_work_item
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
rows
&&
x
<
cols
)
...
...
@@ -980,21 +1470,65 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
uchar
v0
=
src[src_idx],
v1
=
src[src_idx
+
1]
;
uchar
v2
=
src[src_idx
+
2],
v3
=
src[src_idx
+
3]
;
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
#
if
(
1
==
pixels_per_work_item
)
{
const
uchar4
r0
=
vload4
(
0
,
src_ptr
)
;
dst_ptr[0]
=
(
r0.s0
*
r0.s3
+
HALF_MAX
)
/
MAX_NUM
;
dst_ptr[1]
=
(
r0.s1
*
r0.s3
+
HALF_MAX
)
/
MAX_NUM
;
dst_ptr[2]
=
(
r0.s2
*
r0.s3
+
HALF_MAX
)
/
MAX_NUM
;
dst_ptr[3]
=
r0.s3
;
}
#
elif
(
2
==
pixels_per_work_item
)
{
const
uchar8
r0
=
vload8
(
0
,
src_ptr
)
;
const
int2
v0
=
convert_int2
(
r0.s04
)
;
const
int2
v1
=
convert_int2
(
r0.s15
)
;
const
int2
v2
=
convert_int2
(
r0.s26
)
;
const
int2
v3
=
convert_int2
(
r0.s37
)
;
const
int2
ri
=
(
v0
*
v3
+
HALF_MAX
)
/
MAX_NUM
;
const
int2
gi
=
(
v1
*
v3
+
HALF_MAX
)
/
MAX_NUM
;
const
int2
bi
=
(
v2
*
v3
+
HALF_MAX
)
/
MAX_NUM
;
const
uchar2
r
=
convert_uchar2
(
ri
)
;
const
uchar2
g
=
convert_uchar2
(
gi
)
;
const
uchar2
b
=
convert_uchar2
(
bi
)
;
vstore8
((
uchar8
)(
r.s0,
g.s0,
b.s0,
v3.s0,
r.s1,
g.s1,
b.s1,
v3.s1
)
,
0
,
dst_ptr
)
;
}
#
elif
(
4
==
pixels_per_work_item
)
{
const
uchar16
r0
=
vload16
(
0
,
src_ptr
)
;
const
int4
v0
=
convert_int4
(
r0.s048c
)
;
const
int4
v1
=
convert_int4
(
r0.s159d
)
;
const
int4
v2
=
convert_int4
(
r0.s26ae
)
;
const
int4
v3
=
convert_int4
(
r0.s37bf
)
;
const
int4
ri
=
(
v0
*
v3
+
HALF_MAX
)
/
MAX_NUM
;
const
int4
gi
=
(
v1
*
v3
+
HALF_MAX
)
/
MAX_NUM
;
const
int4
bi
=
(
v2
*
v3
+
HALF_MAX
)
/
MAX_NUM
;
const
uchar4
r
=
convert_uchar4
(
ri
)
;
const
uchar4
g
=
convert_uchar4
(
gi
)
;
const
uchar4
b
=
convert_uchar4
(
bi
)
;
dst[dst_idx]
=
(
v0
*
v3
+
HALF_MAX
)
/
MAX_NUM
;
dst[dst_idx
+
1]
=
(
v1
*
v3
+
HALF_MAX
)
/
MAX_NUM
;
dst[dst_idx
+
2]
=
(
v2
*
v3
+
HALF_MAX
)
/
MAX_NUM
;
dst[dst_idx
+
3]
=
v3
;
vstore16
((
uchar16
)(
r.s0,
g.s0,
b.s0,
v3.s0,
r.s1,
g.s1,
b.s1,
v3.s1,
r.s2,
g.s2,
b.s2,
v3.s2,
r.s3,
g.s3,
b.s3,
v3.s3
)
,
0
,
dst_ptr
)
;
}
#
endif
//
pixels_per_work_item
}
}
__kernel
void
mRGBA2RGBA
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
int
bidx,
__kernel
void
mRGBA2RGBA
(
int
cols,
int
rows,
int
src_step,
int
dst_step,
__global
const
uchar
*
src,
__global
uchar
*
dst,
int
src_offset,
int
dst_offset
)
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
*
pixels_per_work_item
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
rows
&&
x
<
cols
)
...
...
@@ -1003,14 +1537,62 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step, int bid
int
src_idx
=
mad24
(
y,
src_step,
src_offset
+
x
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
uchar
v0
=
src[src_idx],
v1
=
src[src_idx
+
1]
;
uchar
v2
=
src[src_idx
+
2],
v3
=
src[src_idx
+
3]
;
uchar
v3_half
=
v3
/
2
;
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
#
if
(
1
==
pixels_per_work_item
)
{
const
uchar4
r0
=
vload4
(
0
,
src_ptr
)
;
const
uchar
v3_half
=
r0.s3
/
2
;
dst[dst_idx]
=
v3
==
0
?
0
:
(
v0
*
MAX_NUM
+
v3_half
)
/
v3
;
dst[dst_idx
+
1]
=
v3
==
0
?
0
:
(
v1
*
MAX_NUM
+
v3_half
)
/
v3
;
dst[dst_idx
+
2]
=
v3
==
0
?
0
:
(
v2
*
MAX_NUM
+
v3_half
)
/
v3
;
dst[dst_idx
+
3]
=
v3
;
const
uchar
r
=
(
r0.s3
==
0
)
?
0
:
(
r0.s0
*
MAX_NUM
+
v3_half
)
/
r0.s3
;
const
uchar
g
=
(
r0.s3
==
0
)
?
0
:
(
r0.s1
*
MAX_NUM
+
v3_half
)
/
r0.s3
;
const
uchar
b
=
(
r0.s3
==
0
)
?
0
:
(
r0.s2
*
MAX_NUM
+
v3_half
)
/
r0.s3
;
vstore4
((
uchar4
)(
r,
g,
b,
r0.s3
)
,
0
,
dst_ptr
)
;
}
#
elif
(
2
==
pixels_per_work_item
)
{
const
uchar8
r0
=
vload8
(
0
,
src_ptr
)
;
const
int2
v0
=
convert_int2
(
r0.s04
)
;
const
int2
v1
=
convert_int2
(
r0.s15
)
;
const
int2
v2
=
convert_int2
(
r0.s26
)
;
const
int2
v3
=
convert_int2
(
r0.s37
)
;
const
int2
v3_half
=
v3
/
2
;
const
int2
ri
=
(
v3
==
0
)
?
0
:
(
v0
*
MAX_NUM
+
v3_half
)
/
v3
;
const
int2
gi
=
(
v3
==
0
)
?
0
:
(
v1
*
MAX_NUM
+
v3_half
)
/
v3
;
const
int2
bi
=
(
v3
==
0
)
?
0
:
(
v2
*
MAX_NUM
+
v3_half
)
/
v3
;
const
uchar2
r
=
convert_uchar2
(
ri
)
;
const
uchar2
g
=
convert_uchar2
(
gi
)
;
const
uchar2
b
=
convert_uchar2
(
bi
)
;
vstore8
((
uchar8
)(
r.s0,
g.s0,
b.s0,
v3.s0,
r.s1,
g.s1,
b.s1,
v3.s1
)
,
0
,
dst_ptr
)
;
}
#
elif
(
4
==
pixels_per_work_item
)
{
const
uchar16
r0
=
vload16
(
0
,
src_ptr
)
;
const
int4
v0
=
convert_int4
(
r0.s048c
)
;
const
int4
v1
=
convert_int4
(
r0.s159d
)
;
const
int4
v2
=
convert_int4
(
r0.s26ae
)
;
const
int4
v3
=
convert_int4
(
r0.s37bf
)
;
const
int4
v3_half
=
v3
/
2
;
const
int4
ri
=
(
v3
==
0
)
?
0
:
(
v0
*
MAX_NUM
+
v3_half
)
/
v3
;
const
int4
gi
=
(
v3
==
0
)
?
0
:
(
v1
*
MAX_NUM
+
v3_half
)
/
v3
;
const
int4
bi
=
(
v3
==
0
)
?
0
:
(
v2
*
MAX_NUM
+
v3_half
)
/
v3
;
const
uchar4
r
=
convert_uchar4
(
ri
)
;
const
uchar4
g
=
convert_uchar4
(
gi
)
;
const
uchar4
b
=
convert_uchar4
(
bi
)
;
vstore16
((
uchar16
)(
r.s0,
g.s0,
b.s0,
v3.s0,
r.s1,
g.s1,
b.s1,
v3.s1,
r.s2,
g.s2,
b.s2,
v3.s2,
r.s3,
g.s3,
b.s3,
v3.s3
)
,
0
,
dst_ptr
)
;
}
#
endif
//
pixels_per_work_item
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment