Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
adca219f
Commit
adca219f
authored
Oct 02, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed convertC3C4 and convertC4C3 functions in case cols == 1
parent
88419f89
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
59 additions
and
136 deletions
+59
-136
matrix_operations.cpp
modules/ocl/src/matrix_operations.cpp
+30
-117
convertC3C4.cl
modules/ocl/src/opencl/convertC3C4.cl
+28
-18
test_matrix_operation.cpp
modules/ocl/test/test_matrix_operation.cpp
+1
-1
No files found.
modules/ocl/src/matrix_operations.cpp
View file @
adca219f
...
...
@@ -58,12 +58,13 @@ using namespace std;
//////////////////////////////// oclMat ////////////////////////////////
////////////////////////////////////////////////////////////////////////
//helper routines
//
helper routines
namespace
cv
{
namespace
ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
/////////////////////////// OpenCL kernel strings ///////////////////////////
extern
const
char
*
operator_copyToM
;
extern
const
char
*
operator_convertTo
;
extern
const
char
*
operator_setTo
;
...
...
@@ -74,42 +75,18 @@ namespace cv
}
}
////////////////////////////////////////////////////////////////////////
// convert_C3C4
static
void
convert_C3C4
(
const
cl_mem
&
src
,
oclMat
&
dst
)
{
int
dstStep_in_pixel
=
dst
.
step1
()
/
dst
.
oclchannels
();
int
pixel_end
=
dst
.
wholecols
*
dst
.
wholerows
-
1
;
Context
*
clCxt
=
dst
.
clCxt
;
string
kernelName
=
"convertC3C4"
;
char
compile_option
[
32
];
switch
(
dst
.
depth
())
{
case
0
:
sprintf
(
compile_option
,
"-D GENTYPE4=uchar4"
);
break
;
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE4=char4"
);
break
;
case
2
:
sprintf
(
compile_option
,
"-D GENTYPE4=ushort4"
);
break
;
case
3
:
sprintf
(
compile_option
,
"-D GENTYPE4=short4"
);
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE4=int4"
);
break
;
case
5
:
sprintf
(
compile_option
,
"-D GENTYPE4=float4"
);
break
;
case
6
:
sprintf
(
compile_option
,
"-D GENTYPE4=double4"
);
break
;
default
:
CV_Error
(
CV_StsUnsupportedFormat
,
"unknown depth"
);
}
int
pixel_end
=
dst
.
wholecols
*
dst
.
wholerows
-
1
;
int
dstStep_in_pixel
=
dst
.
step1
()
/
dst
.
oclchannels
();
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
std
::
string
buildOptions
=
format
(
"-D GENTYPE4=%s4"
,
typeMap
[
dst
.
depth
()]);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
...
...
@@ -118,46 +95,24 @@ static void convert_C3C4(const cl_mem &src, oclMat &dst)
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dstStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
pixel_end
));
size_t
globalThreads
[
3
]
=
{
((
dst
.
wholecols
*
dst
.
wholerows
+
3
)
/
4
+
255
)
/
256
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
dst
.
wholecols
*
dst
.
wholerows
,
4
),
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
clCxt
,
&
convertC3C4
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
compile_option
);
openCLExecuteKernel
(
clCxt
,
&
convertC3C4
,
"convertC3C4"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
}
////////////////////////////////////////////////////////////////////////
// convert_C4C3
static
void
convert_C4C3
(
const
oclMat
&
src
,
cl_mem
&
dst
)
{
int
srcStep_in_pixel
=
src
.
step1
()
/
src
.
oclchannels
();
int
pixel_end
=
src
.
wholecols
*
src
.
wholerows
-
1
;
Context
*
clCxt
=
src
.
clCxt
;
string
kernelName
=
"convertC4C3"
;
char
compile_option
[
32
];
switch
(
src
.
depth
())
{
case
0
:
sprintf
(
compile_option
,
"-D GENTYPE4=uchar4"
);
break
;
case
1
:
sprintf
(
compile_option
,
"-D GENTYPE4=char4"
);
break
;
case
2
:
sprintf
(
compile_option
,
"-D GENTYPE4=ushort4"
);
break
;
case
3
:
sprintf
(
compile_option
,
"-D GENTYPE4=short4"
);
break
;
case
4
:
sprintf
(
compile_option
,
"-D GENTYPE4=int4"
);
break
;
case
5
:
sprintf
(
compile_option
,
"-D GENTYPE4=float4"
);
break
;
case
6
:
sprintf
(
compile_option
,
"-D GENTYPE4=double4"
);
break
;
default
:
CV_Error
(
CV_StsUnsupportedFormat
,
"unknown depth"
);
}
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
std
::
string
buildOptions
=
format
(
"-D GENTYPE4=%s4"
,
typeMap
[
src
.
depth
()]);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
...
...
@@ -167,10 +122,10 @@ static void convert_C4C3(const oclMat &src, cl_mem &dst)
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
srcStep_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
pixel_end
));
size_t
globalThreads
[
3
]
=
{
((
src
.
wholecols
*
src
.
wholerows
+
3
)
/
4
+
255
)
/
256
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
src
.
wholecols
*
src
.
wholerows
,
4
)
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
clCxt
,
&
convertC3C4
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
compile_option
);
openCLExecuteKernel
(
clCxt
,
&
convertC3C4
,
"convertC4C3"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
buildOptions
.
c_str
()
);
}
void
cv
::
ocl
::
oclMat
::
upload
(
const
Mat
&
m
)
...
...
@@ -179,14 +134,10 @@ void cv::ocl::oclMat::upload(const Mat &m)
Size
wholeSize
;
Point
ofs
;
m
.
locateROI
(
wholeSize
,
ofs
);
// int type = m.type();
// if(m.oclchannels() == 3)
//{
// type = CV_MAKETYPE(m.depth(), 4);
//}
create
(
wholeSize
,
m
.
type
());
if
(
m
.
channels
()
==
3
)
if
(
m
.
channels
()
==
3
)
{
int
pitch
=
wholeSize
.
width
*
3
*
m
.
elemSize1
();
int
tail_padding
=
m
.
elemSize1
()
*
3072
;
...
...
@@ -197,35 +148,15 @@ void cv::ocl::oclMat::upload(const Mat &m)
openCLMemcpy2D
(
clCxt
,
temp
,
pitch
,
m
.
datastart
,
m
.
step
,
wholeSize
.
width
*
m
.
elemSize
(),
wholeSize
.
height
,
clMemcpyHostToDevice
,
3
);
convert_C3C4
(
temp
,
*
this
);
//int* cputemp=new int[wholeSize.height*wholeSize.width * 3];
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
// 0, wholeSize.height*wholeSize.width * 3* sizeof(int), cputemp, 0, NULL, NULL));
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
// 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
//for(int i=0;i<wholeSize.height;i++)
//{
// int *a = cputemp+i*wholeSize.width * 3,*b = cpudata + i*this->step/sizeof(int);
// for(int j=0;j<wholeSize.width;j++)
// {
// if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
// printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
// i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
// }
//}
//delete []cputemp;
//delete []cpudata;
openCLSafeCall
(
clReleaseMemObject
(
temp
));
}
else
{
openCLMemcpy2D
(
clCxt
,
data
,
step
,
m
.
datastart
,
m
.
step
,
wholeSize
.
width
*
elemSize
(),
wholeSize
.
height
,
clMemcpyHostToDevice
);
}
rows
=
m
.
rows
;
cols
=
m
.
cols
;
offset
=
ofs
.
y
*
step
+
ofs
.
x
*
elemSize
();
//download_channels = m.channels();
}
cv
::
ocl
::
oclMat
::
operator
cv
::
_InputArray
()
...
...
@@ -259,11 +190,6 @@ cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src)
void
cv
::
ocl
::
oclMat
::
download
(
cv
::
Mat
&
m
)
const
{
CV_DbgAssert
(
!
this
->
empty
());
// int t = type();
// if(download_channels == 3)
//{
// t = CV_MAKETYPE(depth(), 3);
//}
m
.
create
(
wholerows
,
wholecols
,
type
());
if
(
m
.
channels
()
==
3
)
...
...
@@ -277,30 +203,14 @@ void cv::ocl::oclMat::download(cv::Mat &m) const
convert_C4C3
(
*
this
,
temp
);
openCLMemcpy2D
(
clCxt
,
m
.
data
,
m
.
step
,
temp
,
pitch
,
wholecols
*
m
.
elemSize
(),
wholerows
,
clMemcpyDeviceToHost
,
3
);
//int* cputemp=new int[wholecols*wholerows * 3];
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
// 0, wholecols*wholerows * 3* sizeof(int), cputemp, 0, NULL, NULL));
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
// 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
//for(int i=0;i<wholerows;i++)
//{
// int *a = cputemp+i*wholecols * 3,*b = cpudata + i*this->step/sizeof(int);
// for(int j=0;j<wholecols;j++)
// {
// if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
// printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
// i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
// }
//}
//delete []cputemp;
//delete []cpudata;
openCLSafeCall
(
clReleaseMemObject
(
temp
));
}
else
{
openCLMemcpy2D
(
clCxt
,
m
.
data
,
m
.
step
,
data
,
step
,
wholecols
*
elemSize
(),
wholerows
,
clMemcpyDeviceToHost
);
}
Size
wholesize
;
Point
ofs
;
locateROI
(
wholesize
,
ofs
);
...
...
@@ -323,6 +233,7 @@ static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask
{
"uchar3"
,
"char3"
,
"ushort3"
,
"short3"
,
"int3"
,
"float3"
,
"double3"
},
{
"uchar4"
,
"char4"
,
"ushort4"
,
"short4"
,
"int4"
,
"float4"
,
"double4"
}
};
char
compile_option
[
32
];
sprintf
(
compile_option
,
"-D GENTYPE=%s"
,
string_types
[
dst
.
oclchannels
()
-
1
][
dst
.
depth
()].
c_str
());
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
...
...
@@ -366,6 +277,7 @@ void cv::ocl::oclMat::copyTo( oclMat &mat, const oclMat &mask) const
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
static
void
convert_run
(
const
oclMat
&
src
,
oclMat
&
dst
,
double
alpha
,
double
beta
)
{
string
kernelName
=
"convert_to"
;
...
...
@@ -404,6 +316,7 @@ static void convert_run(const oclMat &src, oclMat &dst, double alpha, double bet
openCLExecuteKernel
(
dst
.
clCxt
,
&
operator_convertTo
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
buildOptions
);
}
void
cv
::
ocl
::
oclMat
::
convertTo
(
oclMat
&
dst
,
int
rtype
,
double
alpha
,
double
beta
)
const
{
if
(
!
clCxt
->
supportsFeature
(
Context
::
CL_DOUBLE
)
&&
...
...
modules/ocl/src/opencl/convertC3C4.cl
View file @
adca219f
...
...
@@ -32,23 +32,23 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
if
defined
(
DOUBLE_SUPPORT
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
__kernel
void
convertC3C4
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
dstStep_in_piexl,int
pixel_end
)
{
int
id
=
get_global_id
(
0
)
;
//int
pixel_end
=
mul24
(
cols
-1
,
rows
-1
)
;
int3
pixelid
=
(
int3
)(
mul24
(
id,3
)
,
mad24
(
id,3,1
)
,
mad24
(
id,3,2
))
;
pixelid
=
clamp
(
pixelid,0,pixel_end
)
;
GENTYPE4
pixel0,
pixel1,
pixel2,
outpix0,outpix1,outpix2,outpix3
;
pixel0
=
src[pixelid.x]
;
pixel1
=
src[pixelid.y]
;
pixel2
=
src[pixelid.z]
;
outpix0
=
(
GENTYPE4
)(
pixel0.x,pixel0.y,pixel0.z,0
)
;
outpix1
=
(
GENTYPE4
)(
pixel0.w,pixel1.x,pixel1.y,0
)
;
outpix2
=
(
GENTYPE4
)(
pixel1.z,pixel1.w,pixel2.x,0
)
;
...
...
@@ -56,17 +56,19 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY
int4
outy
=
(
id<<2
)
/cols
;
int4
outx
=
(
id<<2
)
%cols
;
outx.y++
;
outx.z+=2
;
outx.w+=3
;
outy
=
select
(
outy,outy+1,outx>=cols
)
;
outx
=
select
(
outx,outx-cols,outx>=cols
)
;
//outpix3
=
select
(
outpix3,
outpix0,
(
uchar4
)(
outy.w>=rows
))
;
//outpix2
=
select
(
outpix2,
outpix0,
(
uchar4
)(
outy.z>=rows
))
;
//outpix1
=
select
(
outpix1,
outpix0,
(
uchar4
)(
outy.y>=rows
))
;
//outx
=
select
(
outx,
(
int4
)
outx.x,outy>=rows
)
;
//outy
=
select
(
outy,
(
int4
)
outy.x,outy>=rows
)
;
outx
+=
(
int4
)(
0
,
1
,
2
,
3
)
;
outy
=
select
(
outy,
outy+1,
outx>=cols
)
;
outx
=
select
(
outx,
outx-cols,
outx>=cols
)
;
//
when
cols
==
1
outy
=
select
(
outy,
outy
+
1
,
outx
>=
cols
)
;
outx
=
select
(
outx,
outx-cols,
outx
>=
cols
)
;
outy
=
select
(
outy,
outy
+
1
,
outx
>=
cols
)
;
outx
=
select
(
outx,
outx-cols,
outx
>=
cols
)
;
int4
addr
=
mad24
(
outy,
(
int4
)
dstStep_in_piexl,outx
)
;
if
(
outx.w<cols
&&
outy.w<rows
)
{
dst[addr.x]
=
outpix0
;
...
...
@@ -91,20 +93,26 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY
}
}
__kernel
void
convertC4C3
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
srcStep_in_pixel,int
pixel_end
)
{
int
id
=
get_global_id
(
0
)
<<2
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
int4
x4
=
(
int4
)(
x,x+1,x+2,x+3
)
;
int4
y4
=
select
((
int4
)
y,
(
int4
)(
y+1
)
,
x4>=
(
int4
)
cols
)
;
y4=clamp
(
y4,
(
int4
)
0
,
(
int4
)(
rows-1
))
;
x4
=
select
(
x4,x4-
(
int4
)
cols,x4>=
(
int4
)
cols
)
;
int4
addr
=
mad24
(
y4,
(
int4
)
srcStep_in_pixel,x4
)
;
//
when
cols
==
1
y4
=
select
(
y4,
y4
+
1
,
x4>=
(
int4
)
cols
)
;
x4
=
select
(
x4,
x4
-
(
int4
)
cols,x4>=
(
int4
)
cols
)
;
y4
=
select
(
y4,
y4
+
1
,
x4>=
(
int4
)
cols
)
;
x4
=
select
(
x4,
x4-
(
int4
)
cols,x4>=
(
int4
)
cols
)
;
y4=clamp
(
y4,
(
int4
)
0
,
(
int4
)(
rows-1
))
;
int4
addr
=
mad24
(
y4,
(
int4
)
srcStep_in_pixel,
x4
)
;
GENTYPE4
pixel0,pixel1,pixel2,pixel3,
outpixel1,
outpixel2
;
pixel0
=
src[addr.x]
;
pixel1
=
src[addr.y]
;
...
...
@@ -120,9 +128,11 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY
outpixel2.y
=
pixel3.x
;
outpixel2.z
=
pixel3.y
;
outpixel2.w
=
pixel3.z
;
int4
outaddr
=
mul24
(
id>>2
,
3
)
;
outaddr.y++
;
outaddr.z+=2
;
if
(
outaddr.z
<=
pixel_end
)
{
dst[outaddr.x]
=
pixel0
;
...
...
modules/ocl/test/test_matrix_operation.cpp
View file @
adca219f
...
...
@@ -402,7 +402,7 @@ PARAM_TEST_CASE(convertC3C4, MatType, bool)
int
type
=
CV_MAKE_TYPE
(
depth
,
3
);
cv
::
RNG
&
rng
=
TS
::
ptr
()
->
get_rng
();
src
=
randomMat
(
rng
,
randomSize
(
MIN_VALUE
,
MAX_VALUE
),
type
,
0
,
40
,
false
);
src
=
randomMat
(
rng
,
randomSize
(
1
,
MAX_VALUE
),
type
,
0
,
40
,
false
);
}
void
random_roi
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment