Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e8dd31aa
Commit
e8dd31aa
authored
Dec 20, 2013
by
krodyush
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change code according reviewer suggesions
parent
27c1bd27
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
67 additions
and
113 deletions
+67
-113
perf_color.cpp
modules/ocl/perf/perf_color.cpp
+26
-67
cvt_color.cl
modules/ocl/src/opencl/cvt_color.cl
+41
-46
No files found.
modules/ocl/perf/perf_color.cpp
View file @
e8dd31aa
...
...
@@ -57,39 +57,9 @@ CV_ENUM(ConversionTypes, CV_RGB2GRAY, CV_RGB2BGR, CV_RGB2YUV, CV_YUV2RGB, CV_RGB
CV_HLS2RGB
,
CV_BGR5652BGR
,
CV_BGR2BGR565
,
CV_RGBA2mRGBA
,
CV_mRGBA2RGBA
,
CV_YUV2RGB_NV12
)
typedef
tuple
<
Size
,
tuple
<
ConversionTypes
,
int
,
int
>
>
cvtColorParams
;
typedef
TestBaseWithParam
<
cvtColorParams
>
cvtColorU8Fixture
;
typedef
TestBaseWithParam
<
cvtColorParams
>
cvtColorF32Fixture
;
typedef
TestBaseWithParam
<
cvtColorParams
>
cvtColorU16Fixture
;
typedef
TestBaseWithParam
<
cvtColorParams
>
cvtColorFixture
;
#define RUN_CVT_PERF_TEST \
cvtColorParams params = GetParam();\
const Size srcSize = get<0>(params);\
const tuple<int, int, int> conversionParams = get<1>(params);\
const int code = get<0>(conversionParams), scn = get<1>(conversionParams),\
dcn = get<2>(conversionParams);\
\
Mat src(srcSize, CV_8UC(scn)), dst(srcSize, CV_8UC(scn));\
declare.in(src, WARMUP_RNG).out(dst);\
\
if (RUN_OCL_IMPL)\
{\
ocl::oclMat oclSrc(src), oclDst(src.size(), dst.type());\
\
OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, code, dcn);\
oclDst.download(dst);\
\
SANITY_CHECK(dst, 1);\
}\
else if (RUN_PLAIN_IMPL)\
{\
TEST_CYCLE() cv::cvtColor(src, dst, code, dcn);\
\
SANITY_CHECK(dst);\
}\
else\
OCL_PERF_ELSE\
PERF_TEST_P
(
cvtColorU8Fixture
,
cvtColor
,
testing
::
Combine
(
PERF_TEST_P
(
cvtColorFixture
,
cvtColor
,
testing
::
Combine
(
testing
::
Values
(
Size
(
1000
,
1002
),
Size
(
2000
,
2004
),
Size
(
4000
,
4008
)),
testing
::
Values
(
make_tuple
(
ConversionTypes
(
CV_RGB2GRAY
),
3
,
1
),
...
...
@@ -111,41 +81,30 @@ PERF_TEST_P(cvtColorU8Fixture, cvtColor, testing::Combine(
make_tuple
(
ConversionTypes
(
CV_YUV2RGB_NV12
),
1
,
3
)
)))
{
RUN_CVT_PERF_TEST
}
cvtColorParams
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
const
tuple
<
int
,
int
,
int
>
conversionParams
=
get
<
1
>
(
params
);
const
int
code
=
get
<
0
>
(
conversionParams
),
scn
=
get
<
1
>
(
conversionParams
),
dcn
=
get
<
2
>
(
conversionParams
);
PERF_TEST_P
(
cvtColorF32Fixture
,
cvtColor
,
testing
::
Combine
(
testing
::
Values
(
Size
(
1000
,
1002
),
Size
(
2000
,
2004
),
Size
(
4000
,
4008
)),
testing
::
Values
(
make_tuple
(
ConversionTypes
(
CV_RGB2GRAY
),
3
,
1
),
make_tuple
(
ConversionTypes
(
CV_RGB2BGR
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_RGB2YUV
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_YUV2RGB
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_RGB2YCrCb
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_YCrCb2RGB
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_RGB2XYZ
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_XYZ2RGB
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_RGB2HSV
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_HSV2RGB
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_RGB2HLS
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_HLS2RGB
),
3
,
3
)
)))
{
RUN_CVT_PERF_TEST
}
Mat
src
(
srcSize
,
CV_8UC
(
scn
)),
dst
(
srcSize
,
CV_8UC
(
scn
));
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
);
PERF_TEST_P
(
cvtColorU16Fixture
,
cvtColor
,
testing
::
Combine
(
testing
::
Values
(
Size
(
1000
,
1002
),
Size
(
2000
,
2004
),
Size
(
4000
,
4008
)),
testing
::
Values
(
make_tuple
(
ConversionTypes
(
CV_RGB2GRAY
),
3
,
1
),
make_tuple
(
ConversionTypes
(
CV_RGB2BGR
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_RGB2YUV
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_YUV2RGB
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_RGB2YCrCb
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_YCrCb2RGB
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_RGB2XYZ
),
3
,
3
),
make_tuple
(
ConversionTypes
(
CV_XYZ2RGB
),
3
,
3
)
)))
{
RUN_CVT_PERF_TEST
if
(
RUN_OCL_IMPL
)
{
ocl
::
oclMat
oclSrc
(
src
),
oclDst
(
src
.
size
(),
dst
.
type
());
OCL_TEST_CYCLE
()
ocl
::
cvtColor
(
oclSrc
,
oclDst
,
code
,
dcn
);
oclDst
.
download
(
dst
);
SANITY_CHECK
(
dst
,
1
);
}
else
if
(
RUN_PLAIN_IMPL
)
{
TEST_CYCLE
()
cv
::
cvtColor
(
src
,
dst
,
code
,
dcn
);
SANITY_CHECK
(
dst
);
}
else
OCL_PERF_ELSE
}
modules/ocl/src/opencl/cvt_color.cl
View file @
e8dd31aa
...
...
@@ -133,12 +133,14 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
int
dst_idx
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
#
ifndef
INTEL_DEVICE
#
ifdef
DEPTH_5
dst[dst_idx]
=
src[src_idx
+
bidx]
*
0.114f
+
src[src_idx
+
1]
*
0.587f
+
src[src_idx
+
(
bidx^2
)
]
*
0.299f
;
#
else
dst[dst_idx]
=
(
DATA_TYPE
)
CV_DESCALE
((
src[src_idx
+
bidx]
*
B2Y
+
src[src_idx
+
1]
*
G2Y
+
src[src_idx
+
(
bidx^2
)
]
*
R2Y
)
,
yuv_shift
)
;
#
endif
#
else
#
else
//INTEL_DEVICE
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
...
...
@@ -148,7 +150,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
__constant
int
*
coeffs
=
c_RGB2GrayCoeffs_i
;
#
endif
if
(
1
==
pixels_per_work_item
)
#
if
(
1
==
pixels_per_work_item
)
{
#
ifdef
DEPTH_5
*dst_ptr
=
src_ptr[bidx]
*
coeffs[0]
+
src_ptr[1]
*
coeffs[1]
+
src_ptr[
(
bidx^2
)
]
*coeffs[2]
;
...
...
@@ -156,7 +158,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
*dst_ptr
=
(
DATA_TYPE
)
CV_DESCALE
((
src_ptr[bidx]
*
coeffs[0]
+
src_ptr[1]
*
coeffs[1]
+
src_ptr[
(
bidx^2
)
]
*
coeffs[2]
)
,
yuv_shift
)
;
#
endif
}
else
if
(
2
==
pixels_per_work_item
)
#
el
if
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
...
...
@@ -177,7 +179,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
vstore2
(
Y,
0
,
dst_ptr
)
;
}
else
if
(
4
==
pixels_per_work_item
)
#
el
if
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
...
...
@@ -190,6 +192,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
vstore4
(
SAT_CAST4
(
Y
)
,
0
,
dst_ptr
)
;
#
endif
}
#
endif
//pixels_per_work_item
#
endif
//INTEL_DEVICE
}
}
...
...
@@ -244,7 +247,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
const
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
#
endif
if
(
1
==
pixels_per_work_item
)
#
if
(
1
==
pixels_per_work_item
)
{
const
DATA_TYPE
rgb[]
=
{src_ptr[0],
src_ptr[1],
src_ptr[2]}
;
...
...
@@ -262,8 +265,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
dst_ptr[1]
=
SAT_CAST
(
U
)
;
dst_ptr[2]
=
SAT_CAST
(
V
)
;
}
#
ifdef
INTEL_DEVICE
else
if
(
2
==
pixels_per_work_item
)
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
...
...
@@ -291,7 +293,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
vstore8
((
VECTOR8
)(
Y.s0,
U.s0,
V.s0,
0
,
Y.s1,
U.s1,
V.s1,
0
)
,
0
,
dst_ptr
)
;
}
else
if
(
4
==
pixels_per_work_item
)
#
el
if
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
...
...
@@ -311,7 +313,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
vstore16
((
VECTOR16
)(
Y.s0,
U.s0,
V.s0,
0
,
Y.s1,
U.s1,
V.s1,
0
,
Y.s2,
U.s2,
V.s2,
0
,
Y.s3,
U.s3,
V.s3,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
#
endif
//
INTEL_DEVICE
#
endif
//
pixels_per_work_item
}
}
...
...
@@ -340,7 +342,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
__constant
int
*
coeffs
=
c_YUV2RGBCoeffs_i
;
#
endif
if
(
1
==
pixels_per_work_item
)
#
if
(
1
==
pixels_per_work_item
)
{
const
DATA_TYPE
yuv[]
=
{src_ptr[0],
src_ptr[1],
src_ptr[2]}
;
...
...
@@ -361,8 +363,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
dst_ptr[3]
=
MAX_NUM
;
#
endif
}
#
ifdef
INTEL_DEVICE
else
if
(
2
==
pixels_per_work_item
)
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
...
...
@@ -394,7 +395,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
vstore8
((
VECTOR8
)(
c0.s0,
c1.s0,
c2.s0,
0
,
c0.s1,
c1.s1,
c2.s1,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
else
if
(
4
==
pixels_per_work_item
)
#
el
if
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
...
...
@@ -418,7 +419,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
#
endif
#
endif
}
#
endif
//INTEL_DEVICE
#
endif
//pixels_per_work_item
}
}
...
...
@@ -509,7 +510,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
const
int
delta
=
HALF_MAX
*
(
1
<<
yuv_shift
)
;
#
endif
if
(
1
==
pixels_per_work_item
)
#
if
(
1
==
pixels_per_work_item
)
{
const
DATA_TYPE
rgb[]
=
{src_ptr[0],
src_ptr[1],
src_ptr[2]}
;
...
...
@@ -527,8 +528,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
dst_ptr[1]
=
SAT_CAST
(
Cr
)
;
dst_ptr[2]
=
SAT_CAST
(
Cb
)
;
}
#
ifdef
INTEL_DEVICE
else
if
(
2
==
pixels_per_work_item
)
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
...
...
@@ -556,7 +556,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
vstore8
((
VECTOR8
)(
Y.s0,
Cr.s0,
Cb.s0,
0
,
Y.s1,
Cr.s1,
Cb.s1,
0
)
,
0
,
dst_ptr
)
;
}
else
if
(
4
==
pixels_per_work_item
)
#
el
if
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
...
...
@@ -575,7 +575,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
vstore16
((
VECTOR16
)(
Y.s0,
Cr.s0,
Cb.s0,
0
,
Y.s1,
Cr.s1,
Cb.s1,
0
,
Y.s2,
Cr.s2,
Cb.s2,
0
,
Y.s3,
Cr.s3,
Cb.s3,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
#
endif
//
INTEL_DEVICE
#
endif
//
pixels_per_work_item
}
}
...
...
@@ -604,7 +604,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
__constant
int
*
coeffs
=
c_YCrCb2RGBCoeffs_i
;
#
endif
if
(
1
==
pixels_per_work_item
)
#
if
(
1
==
pixels_per_work_item
)
{
const
DATA_TYPE
ycrcb[]
=
{src_ptr[0],
src_ptr[1],
src_ptr[2]}
;
...
...
@@ -625,8 +625,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
dst_ptr[3]
=
MAX_NUM
;
#
endif
}
#
ifdef
INTEL_DEVICE
else
if
(
2
==
pixels_per_work_item
)
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
...
...
@@ -658,7 +657,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
vstore8
((
VECTOR8
)(
c0.s0,
c1.s0,
c2.s0,
0
,
c0.s1,
c1.s1,
c2.s1,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
else
if
(
4
==
pixels_per_work_item
)
#
el
if
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
...
...
@@ -682,7 +681,7 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
#
endif
#
endif
}
#
endif
//
INTEL_DEVICE
#
endif
//
pixels_per_work_item
}
}
...
...
@@ -704,7 +703,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
if
(
1
==
pixels_per_work_item
)
#
if
(
1
==
pixels_per_work_item
)
{
DATA_TYPE
R
=
src_ptr[0],
G
=
src_ptr[1],
B
=
src_ptr[2]
;
...
...
@@ -722,8 +721,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
dst_ptr[1]
=
SAT_CAST
(
Y
)
;
dst_ptr[2]
=
SAT_CAST
(
Z
)
;
}
#
ifdef
INTEL_DEVICE
else
if
(
2
==
pixels_per_work_item
)
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
...
...
@@ -751,7 +749,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
vstore8
((
VECTOR8
)(
X.s0,
Y.s0,
Z.s0,
0
,
X.s1,
Y.s1,
Z.s1,
0
)
,
0
,
dst_ptr
)
;
}
else
if
(
4
==
pixels_per_work_item
)
#
el
if
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
...
...
@@ -771,7 +769,7 @@ __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
vstore16
((
VECTOR16
)(
X.s0,
Y.s0,
Z.s0,
0
,
X.s1,
Y.s1,
Z.s1,
0
,
X.s2,
Y.s2,
Z.s2,
0
,
X.s3,
Y.s3,
Z.s3,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
#
endif
//
INTEL_DEVICE
#
endif
//
pixels_per_work_item
}
}
...
...
@@ -791,7 +789,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
if
(
1
==
pixels_per_work_item
)
#
if
(
1
==
pixels_per_work_item
)
{
const
DATA_TYPE
X
=
src_ptr[0],
Y
=
src_ptr[1],
Z
=
src_ptr[2]
;
...
...
@@ -812,8 +810,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
dst_ptr[3]
=
MAX_NUM
;
#
endif
}
#
ifdef
INTEL_DEVICE
else
if
(
2
==
pixels_per_work_item
)
#
elif
(
2
==
pixels_per_work_item
)
{
const
VECTOR8
r0
=
vload8
(
0
,
src_ptr
)
;
...
...
@@ -845,7 +842,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
vstore8
((
VECTOR8
)(
B.s0,
G.s0,
R.s0,
0
,
B.s1,
G.s1,
R.s1,
0
)
,
0
,
dst_ptr
)
;
#
endif
}
else
if
(
4
==
pixels_per_work_item
)
#
el
if
(
4
==
pixels_per_work_item
)
{
#
ifndef
DEPTH_5
const
VECTOR16
r0
=
vload16
(
0
,
src_ptr
)
;
...
...
@@ -869,7 +866,7 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
#
endif
#
endif
}
#
endif
//
INTEL_DEVICE
#
endif
//
pixels_per_work_item
}
}
...
...
@@ -906,7 +903,7 @@ __kernel void RGB(int cols, int rows, int src_step, int dst_step,
dst[dst_idx
+
3]
=
src[src_idx
+
3]
;
#
endif
#
endif
#
else
#
else
//INTEL_DEVICE
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
...
...
@@ -936,7 +933,7 @@ __kernel void RGB(int cols, int rows, int src_step, int dst_step,
vstore4
(
r0,
0
,
dst_ptr
)
;
}
#
endif
#
endif
#
endif
//INTEL_DEVICE
}
}
...
...
@@ -1476,7 +1473,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
if
(
1
==
pixels_per_work_item
)
#
if
(
1
==
pixels_per_work_item
)
{
const
uchar4
r0
=
vload4
(
0
,
src_ptr
)
;
...
...
@@ -1485,8 +1482,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
dst_ptr[2]
=
(
r0.s2
*
r0.s3
+
HALF_MAX
)
/
MAX_NUM
;
dst_ptr[3]
=
r0.s3
;
}
#
ifdef
INTEL_DEVICE
else
if
(
2
==
pixels_per_work_item
)
#
elif
(
2
==
pixels_per_work_item
)
{
const
uchar8
r0
=
vload8
(
0
,
src_ptr
)
;
...
...
@@ -1505,7 +1501,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
vstore8
((
uchar8
)(
r.s0,
g.s0,
b.s0,
v3.s0,
r.s1,
g.s1,
b.s1,
v3.s1
)
,
0
,
dst_ptr
)
;
}
else
if
(
4
==
pixels_per_work_item
)
#
el
if
(
4
==
pixels_per_work_item
)
{
const
uchar16
r0
=
vload16
(
0
,
src_ptr
)
;
...
...
@@ -1524,7 +1520,7 @@ __kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
vstore16
((
uchar16
)(
r.s0,
g.s0,
b.s0,
v3.s0,
r.s1,
g.s1,
b.s1,
v3.s1,
r.s2,
g.s2,
b.s2,
v3.s2,
r.s3,
g.s3,
b.s3,
v3.s3
)
,
0
,
dst_ptr
)
;
}
#
endif
//
INTEL_DEVICE
#
endif
//
pixels_per_work_item
}
}
...
...
@@ -1544,7 +1540,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step,
global
DATA_TYPE
*src_ptr
=
(
global
DATA_TYPE
*
)(
src
+
src_idx
)
;
global
DATA_TYPE
*dst_ptr
=
(
global
DATA_TYPE
*
)(
dst
+
dst_idx
)
;
if
(
1
==
pixels_per_work_item
)
#
if
(
1
==
pixels_per_work_item
)
{
const
uchar4
r0
=
vload4
(
0
,
src_ptr
)
;
const
uchar
v3_half
=
r0.s3
/
2
;
...
...
@@ -1555,8 +1551,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step,
vstore4
((
uchar4
)(
r,
g,
b,
r0.s3
)
,
0
,
dst_ptr
)
;
}
#
ifdef
INTEL_DEVICE
else
if
(
2
==
pixels_per_work_item
)
#
elif
(
2
==
pixels_per_work_item
)
{
const
uchar8
r0
=
vload8
(
0
,
src_ptr
)
;
...
...
@@ -1576,7 +1571,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step,
vstore8
((
uchar8
)(
r.s0,
g.s0,
b.s0,
v3.s0,
r.s1,
g.s1,
b.s1,
v3.s1
)
,
0
,
dst_ptr
)
;
}
else
if
(
4
==
pixels_per_work_item
)
#
el
if
(
4
==
pixels_per_work_item
)
{
const
uchar16
r0
=
vload16
(
0
,
src_ptr
)
;
...
...
@@ -1597,7 +1592,7 @@ __kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step,
vstore16
((
uchar16
)(
r.s0,
g.s0,
b.s0,
v3.s0,
r.s1,
g.s1,
b.s1,
v3.s1,
r.s2,
g.s2,
b.s2,
v3.s2,
r.s3,
g.s3,
b.s3,
v3.s3
)
,
0
,
dst_ptr
)
;
}
#
endif
//
INTEL_DEVICE
#
endif
//
pixels_per_work_item
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment