Commit 485d36d3 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

changed InputArray's enumeration constant for UMat, since it may potentially…

changed InputArray's enumeration constant for UMat, since it may potentially conflict with existing OpenCL module. refactored Kernel's setArg API (now UMat is passed to a kernel as a structure, as Alexander A suggested). removed Kernel's cleanup callback from the external API; now each kernel keeps track of used matrices and they are dereferenced after it's complete.
parent 6416c330
......@@ -83,9 +83,9 @@ public:
CUDA_MEM = 8 << KIND_SHIFT,
GPU_MAT = 9 << KIND_SHIFT,
OCL_MAT =10 << KIND_SHIFT,
UMAT =OCL_MAT,
STD_VECTOR_UMAT =11 << KIND_SHIFT,
UEXPR =12 << KIND_SHIFT
UMAT =11 << KIND_SHIFT,
STD_VECTOR_UMAT =12 << KIND_SHIFT,
UEXPR =13 << KIND_SHIFT
};
_InputArray();
......
This diff is collapsed.
......@@ -1210,6 +1210,46 @@ OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
namespace cv { namespace ocl {
struct UMat2D
{
UMat2D(const UMat& m, int accessFlags)
{
CV_Assert(m.dims == 2);
data = (cl_mem)m.handle(accessFlags);
offset = m.offset;
step = m.step;
rows = m.rows;
cols = m.cols;
}
cl_mem data;
size_t offset;
size_t step;
int rows;
int cols;
};
struct UMat3D
{
UMat3D(const UMat& m, int accessFlags)
{
CV_Assert(m.dims == 3);
data = (cl_mem)m.handle(accessFlags);
offset = m.offset;
step = m.step.p[1];
slicestep = m.step.p[0];
slices = m.size.p[0];
rows = m.size.p[1];
cols = m.size.p[2];
}
cl_mem data;
size_t offset;
size_t slicestep;
size_t step;
int slices;
int rows;
int cols;
};
// Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
{
......@@ -1266,6 +1306,15 @@ bool useOpenCL()
return data->useOpenCL > 0;
}
void setUseOpenCL(bool flag)
{
if( haveOpenCL() )
{
TLSData* data = TLSData::get();
data->useOpenCL = flag ? 1 : 0;
}
}
void finish()
{
Queue::getDefault().finish();
......@@ -1980,10 +2029,33 @@ struct Kernel::Impl
cl_int retval = 0;
handle = ph != 0 ?
clCreateKernel(ph, kname, &retval) : 0;
for( int i = 0; i < MAX_ARRS; i++ )
u[i] = 0;
}
void cleanupUMats()
{
for( int i = 0; i < MAX_ARRS; i++ )
if( u[i] )
{
if( CV_XADD(&u[i]->urefcount, -1) == 1 )
u[i]->currAllocator->deallocate(u[i]);
u[i] = 0;
}
nu = 0;
}
void addUMat(const UMat& m)
{
CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
u[nu] = m.u;
CV_XADD(&m.u->urefcount, 1);
nu++;
}
void finit()
{
if(!f.empty()) f->operator()();
cleanupUMats();
if(e) { clReleaseEvent(e); e = 0; }
release();
}
......@@ -1998,7 +2070,9 @@ struct Kernel::Impl
cl_kernel handle;
cl_event e;
Ptr<Kernel::Callback> f;
enum { MAX_ARRS = 16 };
UMatData* u[MAX_ARRS];
int nu;
};
}}
......@@ -2086,51 +2160,48 @@ void* Kernel::ptr() const
return p ? p->handle : 0;
}
int Kernel::set(int i, const void* value, size_t sz)
void Kernel::set(int i, const void* value, size_t sz)
{
CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 );
return i+1;
if( i == 0 )
p->cleanupUMats();
}
int Kernel::set(int i, const UMat& m)
void Kernel::set(int i, const UMat& m)
{
return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
}
int Kernel::set(int i, const KernelArg& arg)
void Kernel::set(int i, const KernelArg& arg)
{
CV_Assert( p && p->handle );
if( i == 0 )
p->cleanupUMats();
if( arg.m )
{
int dims = arg.m->dims;
void* h = arg.m->handle(((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0));
clSetKernelArg(p->handle, (cl_uint)i, sizeof(cl_mem), &h);
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(size_t), &arg.m->offset);
if( dims <= 2 )
int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
if( arg.m->dims <= 2 )
{
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(size_t), &arg.m->step.p[0]);
clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(arg.m->rows), &arg.m->rows);
clSetKernelArg(p->handle, (cl_uint)(i+4), sizeof(arg.m->cols), &arg.m->cols);
return i + 5;
UMat2D u2d(*arg.m, accessFlags);
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d), &u2d);
}
else
{
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(size_t)*(dims-1), &arg.m->step.p[0]);
clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(cl_int)*dims, &arg.m->size.p[0]);
return i + 4;
UMat3D u3d(*arg.m, accessFlags);
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d), &u3d);
}
p->addUMat(*arg.m);
}
else
{
clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
return i+1;
}
}
void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[],
bool sync, const Ptr<Callback>& cleanupCallback, const Queue& q)
bool sync, const Queue& q)
{
CV_Assert(p && p->handle && p->e == 0);
cl_command_queue qq = getQueue(q);
......@@ -2140,18 +2211,16 @@ void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsiz
if( sync )
{
clFinish(qq);
if( !cleanupCallback.empty() )
cleanupCallback->operator()();
p->cleanupUMats();
}
else
{
p->f = cleanupCallback;
p->addref();
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
}
}
void Kernel::runTask(bool sync, const Ptr<Callback>& cleanupCallback, const Queue& q)
void Kernel::runTask(bool sync, const Queue& q)
{
CV_Assert(p && p->handle && p->e == 0);
cl_command_queue qq = getQueue(q);
......@@ -2159,12 +2228,10 @@ void Kernel::runTask(bool sync, const Ptr<Callback>& cleanupCallback, const Queu
if( sync )
{
clFinish(qq);
if( !cleanupCallback.empty() )
cleanupCallback->operator()();
p->cleanupUMats();
}
else
{
p->f = cleanupCallback;
p->addref();
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment