Commit 0a624ee6 authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #1901 from vpisarev:ocl_svm3

parents aed69cc4 cd97d789
......@@ -279,21 +279,21 @@ public:
//virtual void allocate(int dims, const int* sizes, int type, int*& refcount,
// uchar*& datastart, uchar*& data, size_t* step) = 0;
//virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0;
virtual UMatData* allocate(int dims, const int* sizes,
int type, size_t* step) const = 0;
virtual UMatData* allocate(int dims, const int* sizes, int type,
void* data, size_t* step, int flags) const = 0;
virtual bool allocate(UMatData* data, int accessflags) const = 0;
virtual void deallocate(UMatData* data) const = 0;
virtual void map(UMatData* data, int accessflags) const = 0;
virtual void unmap(UMatData* data) const = 0;
virtual void map(UMatData* data, int accessflags) const;
virtual void unmap(UMatData* data) const;
virtual void download(UMatData* data, void* dst, int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
const size_t dststep[]) const = 0;
const size_t dststep[]) const;
virtual void upload(UMatData* data, const void* src, int dims, const size_t sz[],
const size_t dstofs[], const size_t dststep[],
const size_t srcstep[]) const = 0;
const size_t srcstep[]) const;
virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
const size_t dstofs[], const size_t dststep[], bool sync) const = 0;
const size_t dstofs[], const size_t dststep[], bool sync) const;
};
......@@ -335,8 +335,10 @@ protected:
struct CV_EXPORTS UMatData
{
enum { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2,
DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24 };
DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24,
USER_ALLOCATED=32 };
UMatData(const MatAllocator* allocator);
~UMatData();
// provide atomic access to the structure
void lock();
......
......@@ -3131,14 +3131,6 @@ cols(1), allocator(0), u(0), offset(0), size(&rows)
}
inline
UMat::~UMat()
{
release();
if( step.p != step.buf )
fastFree(step.p);
}
inline
UMat& UMat::operator = (const UMat& m)
{
......
......@@ -48,146 +48,161 @@
namespace cv {
class StdMatAllocator : public MatAllocator
void MatAllocator::map(UMatData*, int) const
{
public:
UMatData* allocate(int dims, const int* sizes, int type, size_t* step) const
{
size_t total = CV_ELEM_SIZE(type);
for( int i = dims-1; i >= 0; i-- )
{
if( step )
step[i] = total;
total *= sizes[i];
}
uchar* data = (uchar*)fastMalloc(total);
UMatData* u = new UMatData(this);
u->data = u->origdata = data;
u->size = total;
u->refcount = 1;
}
return u;
}
void MatAllocator::unmap(UMatData* u) const
{
if(u->urefcount == 0 && u->refcount == 0)
deallocate(u);
}
bool allocate(UMatData* u, int accessFlags) const
void MatAllocator::download(UMatData* u, void* dstptr,
int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
const size_t dststep[]) const
{
if(!u)
return;
int isz[CV_MAX_DIM];
uchar* srcptr = u->data;
for( int i = 0; i < dims; i++ )
{
if(!u) return false;
if(u->handle != 0)
return true;
return UMat::getStdAllocator()->allocate(u, accessFlags);
CV_Assert( sz[i] <= (size_t)INT_MAX );
if( sz[i] == 0 )
return;
if( srcofs )
srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
isz[i] = (int)sz[i];
}
void deallocate(UMatData* u) const
{
if(u)
{
fastFree(u->origdata);
delete u;
}
}
Mat src(dims, isz, CV_8U, srcptr, srcstep);
Mat dst(dims, isz, CV_8U, dstptr, dststep);
void map(UMatData*, int) const
{
}
const Mat* arrays[] = { &src, &dst };
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs, 2);
size_t j, planesz = it.size;
for( j = 0; j < it.nplanes; j++, ++it )
memcpy(ptrs[1], ptrs[0], planesz);
}
void unmap(UMatData* u) const
void MatAllocator::upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
const size_t dstofs[], const size_t dststep[],
const size_t srcstep[]) const
{
if(!u)
return;
int isz[CV_MAX_DIM];
uchar* dstptr = u->data;
for( int i = 0; i < dims; i++ )
{
if(u->urefcount == 0)
deallocate(u);
CV_Assert( sz[i] <= (size_t)INT_MAX );
if( sz[i] == 0 )
return;
if( dstofs )
dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
isz[i] = (int)sz[i];
}
void download(UMatData* u, void* dstptr,
int dims, const size_t sz[],
Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
Mat dst(dims, isz, CV_8U, dstptr, dststep);
const Mat* arrays[] = { &src, &dst };
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs, 2);
size_t j, planesz = it.size;
for( j = 0; j < it.nplanes; j++, ++it )
memcpy(ptrs[1], ptrs[0], planesz);
}
void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
const size_t dststep[]) const
const size_t dstofs[], const size_t dststep[], bool /*sync*/) const
{
if(!usrc || !udst)
return;
int isz[CV_MAX_DIM];
uchar* srcptr = usrc->data;
uchar* dstptr = udst->data;
for( int i = 0; i < dims; i++ )
{
if(!u)
return;
int isz[CV_MAX_DIM];
uchar* srcptr = u->data;
for( int i = 0; i < dims; i++ )
{
CV_Assert( sz[i] <= (size_t)INT_MAX );
if( sz[i] == 0 )
return;
if( srcofs )
srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
isz[i] = (int)sz[i];
}
CV_Assert( sz[i] <= (size_t)INT_MAX );
if( sz[i] == 0 )
return;
if( srcofs )
srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
if( dstofs )
dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
isz[i] = (int)sz[i];
}
Mat src(dims, isz, CV_8U, srcptr, srcstep);
Mat dst(dims, isz, CV_8U, dstptr, dststep);
Mat src(dims, isz, CV_8U, srcptr, srcstep);
Mat dst(dims, isz, CV_8U, dstptr, dststep);
const Mat* arrays[] = { &src, &dst };
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs, 2);
size_t j, planesz = it.size;
const Mat* arrays[] = { &src, &dst };
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs, 2);
size_t j, planesz = it.size;
for( j = 0; j < it.nplanes; j++, ++it )
memcpy(ptrs[1], ptrs[0], planesz);
}
for( j = 0; j < it.nplanes; j++, ++it )
memcpy(ptrs[1], ptrs[0], planesz);
}
void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
const size_t dstofs[], const size_t dststep[],
const size_t srcstep[]) const
class StdMatAllocator : public MatAllocator
{
public:
UMatData* allocate(int dims, const int* sizes, int type,
void* data0, size_t* step, int /*flags*/) const
{
if(!u)
return;
int isz[CV_MAX_DIM];
uchar* dstptr = u->data;
for( int i = 0; i < dims; i++ )
size_t total = CV_ELEM_SIZE(type);
for( int i = dims-1; i >= 0; i-- )
{
CV_Assert( sz[i] <= (size_t)INT_MAX );
if( sz[i] == 0 )
return;
if( dstofs )
dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
isz[i] = (int)sz[i];
if( step )
{
if( data0 && step[i] != CV_AUTOSTEP )
{
CV_Assert(total <= step[i]);
total = step[i];
}
else
step[i] = total;
}
total *= sizes[i];
}
uchar* data = data0 ? (uchar*)data0 : (uchar*)fastMalloc(total);
UMatData* u = new UMatData(this);
u->data = u->origdata = data;
u->size = total;
u->refcount = data0 == 0;
if(data0)
u->flags |= UMatData::USER_ALLOCATED;
Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
Mat dst(dims, isz, CV_8U, dstptr, dststep);
const Mat* arrays[] = { &src, &dst };
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs, 2);
size_t j, planesz = it.size;
return u;
}
for( j = 0; j < it.nplanes; j++, ++it )
memcpy(ptrs[1], ptrs[0], planesz);
bool allocate(UMatData* u, int /*accessFlags*/) const
{
if(!u) return false;
CV_XADD(&u->urefcount, 1);
return true;
}
void copy(UMatData* usrc, UMatData* udst, int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
const size_t dstofs[], const size_t dststep[], bool) const
void deallocate(UMatData* u) const
{
if(!usrc || !udst)
return;
int isz[CV_MAX_DIM];
uchar* srcptr = usrc->data;
uchar* dstptr = udst->data;
for( int i = 0; i < dims; i++ )
if(u && u->refcount == 0)
{
CV_Assert( sz[i] <= (size_t)INT_MAX );
if( sz[i] == 0 )
return;
if( srcofs )
srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
if( dstofs )
dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
isz[i] = (int)sz[i];
if( !(u->flags & UMatData::USER_ALLOCATED) )
{
fastFree(u->origdata);
u->origdata = 0;
}
delete u;
}
Mat src(dims, isz, CV_8U, srcptr, srcstep);
Mat dst(dims, isz, CV_8U, dstptr, dststep);
const Mat* arrays[] = { &src, &dst };
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs, 2);
size_t j, planesz = it.size;
for( j = 0; j < it.nplanes; j++, ++it )
memcpy(ptrs[1], ptrs[0], planesz);
}
};
......@@ -364,13 +379,13 @@ void Mat::create(int d, const int* _sizes, int _type)
a = a0;
try
{
u = a->allocate(dims, size, _type, step.p);
u = a->allocate(dims, size, _type, 0, step.p, 0);
CV_Assert(u != 0);
}
catch(...)
{
if(a != a0)
u = a0->allocate(dims, size, _type, step.p);
u = a0->allocate(dims, size, _type, 0, step.p, 0);
CV_Assert(u != 0);
}
CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
......
This diff is collapsed.
......@@ -62,6 +62,17 @@ UMatData::UMatData(const MatAllocator* allocator)
userdata = 0;
}
UMatData::~UMatData()
{
prevAllocator = currAllocator = 0;
urefcount = refcount = 0;
data = origdata = 0;
size = 0;
flags = 0;
handle = 0;
userdata = 0;
}
void UMatData::lock()
{
umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock();
......@@ -75,7 +86,9 @@ void UMatData::unlock()
MatAllocator* UMat::getStdAllocator()
{
return ocl::getOpenCLAllocator();
if( ocl::haveOpenCL() )
return ocl::getOpenCLAllocator();
return Mat::getStdAllocator();
}
void swap( UMat& a, UMat& b )
......@@ -195,13 +208,21 @@ static void finalizeHdr(UMat& m)
UMat Mat::getUMat(int accessFlags) const
{
UMat hdr;
if(!u)
if(!data)
return hdr;
UMat::getStdAllocator()->allocate(u, accessFlags);
UMatData* temp_u = u;
if(!temp_u)
{
MatAllocator *a = allocator, *a0 = getStdAllocator();
if(!a)
a = a0;
temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags);
}
UMat::getStdAllocator()->allocate(temp_u, accessFlags);
hdr.flags = flags;
setSize(hdr, dims, size.p, step.p);
finalizeHdr(hdr);
hdr.u = u;
hdr.u = temp_u;
hdr.offset = data - datastart;
return hdr;
}
......@@ -237,13 +258,13 @@ void UMat::create(int d, const int* _sizes, int _type)
a = a0;
try
{
u = a->allocate(dims, size, _type, step.p);
u = a->allocate(dims, size, _type, 0, step.p, 0);
CV_Assert(u != 0);
}
catch(...)
{
if(a != a0)
u = a0->allocate(dims, size, _type, step.p);
u = a0->allocate(dims, size, _type, 0, step.p, 0);
CV_Assert(u != 0);
}
CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
......@@ -262,6 +283,14 @@ void UMat::copySize(const UMat& m)
}
}
UMat::~UMat()
{
release();
if( step.p != step.buf )
fastFree(step.p);
}
void UMat::deallocate()
{
u->currAllocator->deallocate(u);
......@@ -546,7 +575,7 @@ Mat UMat::getMat(int accessFlags) const
{
if(!u)
return Mat();
u->currAllocator->map(u, accessFlags);
u->currAllocator->map(u, accessFlags | ACCESS_READ);
CV_Assert(u->data != 0);
Mat hdr(dims, size.p, type(), u->data + offset, step.p);
hdr.u = u;
......@@ -568,12 +597,6 @@ void* UMat::handle(int /*accessFlags*/) const
CV_Assert(u->refcount == 0);
u->currAllocator->unmap(u);
}
/*else if( u->refcount > 0 && (accessFlags & ACCESS_WRITE) )
{
CV_Error(Error::StsError,
"it's not allowed to access UMat handle for writing "
"while it's mapped; call Mat::release() first for all its mappings");
}*/
return u->handle;
}
......
......@@ -200,3 +200,40 @@ void CV_UMatTest::run( int /* start_from */)
}
TEST(Core_UMat, base) { CV_UMatTest test; test.safe_run(); }
TEST(Core_UMat, getUMat)
{
{
int a[3] = { 1, 2, 3 };
Mat m = Mat(1, 1, CV_32SC3, a);
UMat u = m.getUMat(ACCESS_READ);
EXPECT_NE((void*)NULL, u.u);
}
{
Mat m(10, 10, CV_8UC1), ref;
for (int y = 0; y < m.rows; ++y)
{
uchar * const ptr = m.ptr<uchar>(y);
for (int x = 0; x < m.cols; ++x)
ptr[x] = (uchar)(x + y * 2);
}
ref = m.clone();
Rect r(1, 1, 8, 8);
ref(r).setTo(17);
{
UMat um = m(r).getUMat(ACCESS_WRITE);
um.setTo(17);
}
double err = norm(m, ref, NORM_INF);
if(err > 0)
{
std::cout << "m: " << m << std::endl;
std::cout << "ref: " << ref << std::endl;
}
EXPECT_EQ(err, 0.);
}
}
......@@ -2695,6 +2695,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
UMat src = _src.getUMat(), dst;
Size sz = src.size(), dstSz = sz;
int scn = src.channels(), depth = src.depth(), bidx;
int dims = 2, stripeSize = 1;
size_t globalsize[] = { src.cols, src.rows };
ocl::Kernel k;
......@@ -2765,7 +2766,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2;
dcn = 1;
k.create("RGB2Gray", ocl::imgproc::cvtcolor_oclsrc,
format("-D depth=%d -D scn=%d -D dcn=1 -D bidx=%d", depth, scn, bidx));
format("-D depth=%d -D scn=%d -D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d",
depth, scn, bidx, stripeSize));
globalsize[0] = (src.cols + stripeSize-1)/stripeSize;
break;
}
case COLOR_GRAY2BGR:
......@@ -3027,7 +3030,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
_dst.create(dstSz, CV_MAKETYPE(depth, dcn));
dst = _dst.getUMat();
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
ok = k.run(2, globalsize, 0, false);
ok = k.run(dims, globalsize, 0, false);
}
return ok;
}
......
......@@ -75,6 +75,10 @@
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
#endif
#ifndef STRIPE_SIZE
#define STRIPE_SIZE 1
#endif
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
enum
......@@ -105,6 +109,7 @@ __kernel void RGB2Gray(__global const uchar* srcptr, int srcstep, int srcoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols)
{
#if 1
const int x = get_global_id(0);
const int y = get_global_id(1);
......@@ -118,6 +123,26 @@ __kernel void RGB2Gray(__global const uchar* srcptr, int srcstep, int srcoffset,
dst[0] = (DATA_TYPE)CV_DESCALE((src[bidx] * B2Y + src[1] * G2Y + src[(bidx^2)] * R2Y), yuv_shift);
#endif
}
#else
const int x_min = get_global_id(0)*STRIPE_SIZE;
const int x_max = min(x_min + STRIPE_SIZE, cols);
const int y = get_global_id(1);
if( y < rows )
{
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr +
mad24(y, srcstep, srcoffset)) + x_min*scn;
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset));
int x;
for( x = x_min; x < x_max; x++, src += scn )
#ifdef DEPTH_5
dst[x] = src[bidx] * 0.114f + src[1] * 0.587f + src[(bidx^2)] * 0.299f;
#else
dst[x] = (DATA_TYPE)(mad24(src[bidx], B2Y, mad24(src[1], G2Y,
mad24(src[(bidx^2)], R2Y, 1 << (yuv_shift-1)))) >> yuv_shift);
#endif
}
#endif
}
__kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
......
......@@ -41,6 +41,7 @@
#include "test_precomp.hpp"
#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" // for OpenCL types: cl_mem
#include "opencv2/core/ocl.hpp"
TEST(TestAPI, openCLExecuteKernelInterop)
{
......@@ -78,3 +79,51 @@ TEST(TestAPI, openCLExecuteKernelInterop)
EXPECT_LE(checkNorm(cpuMat, dst), 1e-3);
}
TEST(OCL_TestTAPI, performance)
{
cv::RNG rng;
cv::Mat src(1280,768,CV_8UC4), dst;
rng.fill(src, RNG::UNIFORM, 0, 255);
cv::UMat usrc, udst;
src.copyTo(usrc);
cv::ocl::oclMat osrc(src);
cv::ocl::oclMat odst;
int cvtcode = cv::COLOR_BGR2GRAY;
int i, niters = 10;
double t;
cv::ocl::cvtColor(osrc, odst, cvtcode);
cv::ocl::finish();
t = (double)cv::getTickCount();
for(i = 0; i < niters; i++)
{
cv::ocl::cvtColor(osrc, odst, cvtcode);
}
cv::ocl::finish();
t = (double)cv::getTickCount() - t;
printf("ocl exec time = %gms per iter\n", t*1000./niters/cv::getTickFrequency());
cv::cvtColor(usrc, udst, cvtcode);
cv::ocl::finish2();
t = (double)cv::getTickCount();
for(i = 0; i < niters; i++)
{
cv::cvtColor(usrc, udst, cvtcode);
}
cv::ocl::finish2();
t = (double)cv::getTickCount() - t;
printf("t-api exec time = %gms per iter\n", t*1000./niters/cv::getTickFrequency());
cv::cvtColor(src, dst, cvtcode);
t = (double)cv::getTickCount();
for(i = 0; i < niters; i++)
{
cv::cvtColor(src, dst, cvtcode);
}
t = (double)cv::getTickCount() - t;
printf("cpu exec time = %gms per iter\n", t*1000./niters/cv::getTickFrequency());
}
......@@ -195,8 +195,14 @@ public:
return u;
}
UMatData* allocate(int dims0, const int* sizes, int type, size_t* step) const
UMatData* allocate(int dims0, const int* sizes, int type, void* data, size_t* step, int flags) const
{
if( data != 0 )
{
CV_Error(Error::StsAssert, "The data should normally be NULL!");
// probably this is safe to do in such extreme case
return stdAllocator->allocate(dims0, sizes, type, data, step, flags);
}
PyEnsureGIL gil;
int depth = CV_MAT_DEPTH(type);
......@@ -229,43 +235,11 @@ public:
{
PyEnsureGIL gil;
PyObject* o = (PyObject*)u->userdata;
Py_DECREF(o);
Py_XDECREF(o);
delete u;
}
}
void map(UMatData*, int) const
{
}
void unmap(UMatData* u) const
{
if(u->urefcount == 0)
deallocate(u);
}
void download(UMatData* u, void* dstptr,
int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
const size_t dststep[]) const
{
stdAllocator->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
}
void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
const size_t dstofs[], const size_t dststep[],
const size_t srcstep[]) const
{
stdAllocator->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
}
void copy(UMatData* usrc, UMatData* udst, int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
const size_t dstofs[], const size_t dststep[], bool sync) const
{
stdAllocator->copy(usrc, udst, dims, sz, srcofs, srcstep, dstofs, dststep, sync);
}
const MatAllocator* stdAllocator;
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment