Commit 485d36d3 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

changed InputArray's enumeration constant for UMat, since it may potentially…

changed InputArray's enumeration constant for UMat, since it may potentially conflict with existing OpenCL module. refactored Kernel's setArg API (now UMat is passed to a kernel as a structure, as Alexander A suggested). removed Kernel's cleanup callback from the external API; now each kernel keeps track of used matrices and they are dereferenced after it's complete.
parent 6416c330
...@@ -83,9 +83,9 @@ public: ...@@ -83,9 +83,9 @@ public:
CUDA_MEM = 8 << KIND_SHIFT, CUDA_MEM = 8 << KIND_SHIFT,
GPU_MAT = 9 << KIND_SHIFT, GPU_MAT = 9 << KIND_SHIFT,
OCL_MAT =10 << KIND_SHIFT, OCL_MAT =10 << KIND_SHIFT,
UMAT =OCL_MAT, UMAT =11 << KIND_SHIFT,
STD_VECTOR_UMAT =11 << KIND_SHIFT, STD_VECTOR_UMAT =12 << KIND_SHIFT,
UEXPR =12 << KIND_SHIFT UEXPR =13 << KIND_SHIFT
}; };
_InputArray(); _InputArray();
......
This diff is collapsed.
...@@ -1210,6 +1210,46 @@ OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event)) ...@@ -1210,6 +1210,46 @@ OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
namespace cv { namespace ocl { namespace cv { namespace ocl {
struct UMat2D
{
UMat2D(const UMat& m, int accessFlags)
{
CV_Assert(m.dims == 2);
data = (cl_mem)m.handle(accessFlags);
offset = m.offset;
step = m.step;
rows = m.rows;
cols = m.cols;
}
cl_mem data;
size_t offset;
size_t step;
int rows;
int cols;
};
struct UMat3D
{
UMat3D(const UMat& m, int accessFlags)
{
CV_Assert(m.dims == 3);
data = (cl_mem)m.handle(accessFlags);
offset = m.offset;
step = m.step.p[1];
slicestep = m.step.p[0];
slices = m.size.p[0];
rows = m.size.p[1];
cols = m.size.p[2];
}
cl_mem data;
size_t offset;
size_t slicestep;
size_t step;
int slices;
int rows;
int cols;
};
// Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 ) static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
{ {
...@@ -1266,6 +1306,15 @@ bool useOpenCL() ...@@ -1266,6 +1306,15 @@ bool useOpenCL()
return data->useOpenCL > 0; return data->useOpenCL > 0;
} }
void setUseOpenCL(bool flag)
{
if( haveOpenCL() )
{
TLSData* data = TLSData::get();
data->useOpenCL = flag ? 1 : 0;
}
}
void finish() void finish()
{ {
Queue::getDefault().finish(); Queue::getDefault().finish();
...@@ -1980,10 +2029,33 @@ struct Kernel::Impl ...@@ -1980,10 +2029,33 @@ struct Kernel::Impl
cl_int retval = 0; cl_int retval = 0;
handle = ph != 0 ? handle = ph != 0 ?
clCreateKernel(ph, kname, &retval) : 0; clCreateKernel(ph, kname, &retval) : 0;
for( int i = 0; i < MAX_ARRS; i++ )
u[i] = 0;
} }
void cleanupUMats()
{
for( int i = 0; i < MAX_ARRS; i++ )
if( u[i] )
{
if( CV_XADD(&u[i]->urefcount, -1) == 1 )
u[i]->currAllocator->deallocate(u[i]);
u[i] = 0;
}
nu = 0;
}
void addUMat(const UMat& m)
{
CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
u[nu] = m.u;
CV_XADD(&m.u->urefcount, 1);
nu++;
}
void finit() void finit()
{ {
if(!f.empty()) f->operator()(); cleanupUMats();
if(e) { clReleaseEvent(e); e = 0; } if(e) { clReleaseEvent(e); e = 0; }
release(); release();
} }
...@@ -1998,7 +2070,9 @@ struct Kernel::Impl ...@@ -1998,7 +2070,9 @@ struct Kernel::Impl
cl_kernel handle; cl_kernel handle;
cl_event e; cl_event e;
Ptr<Kernel::Callback> f; enum { MAX_ARRS = 16 };
UMatData* u[MAX_ARRS];
int nu;
}; };
}} }}
...@@ -2086,51 +2160,48 @@ void* Kernel::ptr() const ...@@ -2086,51 +2160,48 @@ void* Kernel::ptr() const
return p ? p->handle : 0; return p ? p->handle : 0;
} }
int Kernel::set(int i, const void* value, size_t sz) void Kernel::set(int i, const void* value, size_t sz)
{ {
CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 ); CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 );
return i+1; if( i == 0 )
p->cleanupUMats();
} }
int Kernel::set(int i, const UMat& m) void Kernel::set(int i, const UMat& m)
{ {
return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0)); set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
} }
int Kernel::set(int i, const KernelArg& arg) void Kernel::set(int i, const KernelArg& arg)
{ {
CV_Assert( p && p->handle ); CV_Assert( p && p->handle );
if( i == 0 )
p->cleanupUMats();
if( arg.m ) if( arg.m )
{ {
int dims = arg.m->dims; int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
void* h = arg.m->handle(((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) + ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0)); if( arg.m->dims <= 2 )
clSetKernelArg(p->handle, (cl_uint)i, sizeof(cl_mem), &h);
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(size_t), &arg.m->offset);
if( dims <= 2 )
{ {
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(size_t), &arg.m->step.p[0]); UMat2D u2d(*arg.m, accessFlags);
clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(arg.m->rows), &arg.m->rows); clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d), &u2d);
clSetKernelArg(p->handle, (cl_uint)(i+4), sizeof(arg.m->cols), &arg.m->cols);
return i + 5;
} }
else else
{ {
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(size_t)*(dims-1), &arg.m->step.p[0]); UMat3D u3d(*arg.m, accessFlags);
clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(cl_int)*dims, &arg.m->size.p[0]); clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d), &u3d);
return i + 4;
} }
p->addUMat(*arg.m);
} }
else else
{ {
clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj); clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
return i+1;
} }
} }
void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[], void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[],
bool sync, const Ptr<Callback>& cleanupCallback, const Queue& q) bool sync, const Queue& q)
{ {
CV_Assert(p && p->handle && p->e == 0); CV_Assert(p && p->handle && p->e == 0);
cl_command_queue qq = getQueue(q); cl_command_queue qq = getQueue(q);
...@@ -2140,18 +2211,16 @@ void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsiz ...@@ -2140,18 +2211,16 @@ void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsiz
if( sync ) if( sync )
{ {
clFinish(qq); clFinish(qq);
if( !cleanupCallback.empty() ) p->cleanupUMats();
cleanupCallback->operator()();
} }
else else
{ {
p->f = cleanupCallback;
p->addref(); p->addref();
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p); clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
} }
} }
void Kernel::runTask(bool sync, const Ptr<Callback>& cleanupCallback, const Queue& q) void Kernel::runTask(bool sync, const Queue& q)
{ {
CV_Assert(p && p->handle && p->e == 0); CV_Assert(p && p->handle && p->e == 0);
cl_command_queue qq = getQueue(q); cl_command_queue qq = getQueue(q);
...@@ -2159,12 +2228,10 @@ void Kernel::runTask(bool sync, const Ptr<Callback>& cleanupCallback, const Queu ...@@ -2159,12 +2228,10 @@ void Kernel::runTask(bool sync, const Ptr<Callback>& cleanupCallback, const Queu
if( sync ) if( sync )
{ {
clFinish(qq); clFinish(qq);
if( !cleanupCallback.empty() ) p->cleanupUMats();
cleanupCallback->operator()();
} }
else else
{ {
p->f = cleanupCallback;
p->addref(); p->addref();
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p); clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment