Commit 190d00ea authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #5317 from alalek:workaround

parents 31fbe8ca ad70ab40
...@@ -496,6 +496,8 @@ struct CV_EXPORTS UMatData ...@@ -496,6 +496,8 @@ struct CV_EXPORTS UMatData
void* handle; void* handle;
void* userdata; void* userdata;
int allocatorFlags_; int allocatorFlags_;
int mapcount;
UMatData* originalUMatData;
}; };
......
...@@ -208,17 +208,14 @@ public: ...@@ -208,17 +208,14 @@ public:
if(!u) if(!u)
return; return;
CV_Assert(u->urefcount >= 0); CV_Assert(u->urefcount == 0);
CV_Assert(u->refcount >= 0); CV_Assert(u->refcount == 0);
if(u->refcount == 0) if( !(u->flags & UMatData::USER_ALLOCATED) )
{ {
if( !(u->flags & UMatData::USER_ALLOCATED) ) fastFree(u->origdata);
{ u->origdata = 0;
fastFree(u->origdata);
u->origdata = 0;
}
delete u;
} }
delete u;
} }
}; };
......
...@@ -4453,8 +4453,11 @@ public: ...@@ -4453,8 +4453,11 @@ public:
#endif #endif
{ {
tempUMatFlags = UMatData::TEMP_UMAT; tempUMatFlags = UMatData::TEMP_UMAT;
handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags, if (u->origdata == cv::alignPtr(u->origdata, 4)) // There are OpenCL runtime issues for less aligned data
u->size, u->origdata, &retval); {
handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
u->size, u->origdata, &retval);
}
if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST)) if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
{ {
handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
...@@ -4510,16 +4513,17 @@ public: ...@@ -4510,16 +4513,17 @@ public:
if(!u) if(!u)
return; return;
CV_Assert(u->urefcount >= 0); CV_Assert(u->urefcount == 0);
CV_Assert(u->refcount >= 0); CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
CV_Assert(u->handle != 0 && u->urefcount == 0); CV_Assert(u->handle != 0);
CV_Assert(u->mapcount == 0);
if(u->tempUMat()) if(u->tempUMat())
{ {
CV_Assert(u->origdata); CV_Assert(u->origdata);
// UMatDataAutoLock lock(u); // UMatDataAutoLock lock(u);
if( u->hostCopyObsolete() && u->refcount > 0 ) if (u->hostCopyObsolete())
{ {
#ifdef HAVE_OPENCL_SVM #ifdef HAVE_OPENCL_SVM
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
...@@ -4572,16 +4576,29 @@ public: ...@@ -4572,16 +4576,29 @@ public:
else else
{ {
cl_int retval = 0; cl_int retval = 0;
void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, if (u->tempUMat())
(CL_MAP_READ | CL_MAP_WRITE), {
0, u->size, 0, 0, 0, &retval); CV_Assert(u->mapcount == 0);
CV_OclDbgAssert(retval == CL_SUCCESS); void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS); (CL_MAP_READ | CL_MAP_WRITE),
CV_OclDbgAssert(clFinish(q) == CL_SUCCESS); 0, u->size, 0, 0, 0, &retval);
CV_Assert(u->origdata == data);
CV_OclDbgAssert(retval == CL_SUCCESS);
if (u->originalUMatData)
{
CV_Assert(u->originalUMatData->data == data);
}
CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
}
} }
} }
u->markHostCopyObsolete(false); u->markHostCopyObsolete(false);
} }
else
{
// nothing
}
#ifdef HAVE_OPENCL_SVM #ifdef HAVE_OPENCL_SVM
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{ {
...@@ -4607,16 +4624,12 @@ public: ...@@ -4607,16 +4624,12 @@ public:
if(u->data && u->copyOnMap() && u->data != u->origdata) if(u->data && u->copyOnMap() && u->data != u->origdata)
fastFree(u->data); fastFree(u->data);
u->data = u->origdata; u->data = u->origdata;
if(u->refcount == 0) u->currAllocator->deallocate(u);
{ u = NULL;
u->currAllocator->deallocate(u);
u = NULL;
}
} }
else else
{ {
CV_Assert(u->origdata == NULL); CV_Assert(u->origdata == NULL);
CV_Assert(u->refcount == 0);
if(u->data && u->copyOnMap() && u->data != u->origdata) if(u->data && u->copyOnMap() && u->data != u->origdata)
{ {
fastFree(u->data); fastFree(u->data);
...@@ -4665,17 +4678,13 @@ public: ...@@ -4665,17 +4678,13 @@ public:
delete u; delete u;
u = NULL; u = NULL;
} }
CV_Assert(u == NULL || u->refcount); CV_Assert(u == NULL);
} }
// synchronized call (external UMatDataAutoLock, see UMat::getMat)
void map(UMatData* u, int accessFlags) const void map(UMatData* u, int accessFlags) const
{ {
if(!u) CV_Assert(u && u->handle);
return;
CV_Assert( u->handle != 0 );
UMatDataAutoLock autolock(u);
if(accessFlags & ACCESS_WRITE) if(accessFlags & ACCESS_WRITE)
u->markDeviceCopyObsolete(true); u->markDeviceCopyObsolete(true);
...@@ -4715,11 +4724,16 @@ public: ...@@ -4715,11 +4724,16 @@ public:
} }
#endif #endif
cl_int retval = 0; cl_int retval = CL_SUCCESS;
u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, if (!u->deviceMemMapped())
(CL_MAP_READ | CL_MAP_WRITE), {
0, u->size, 0, 0, 0, &retval); CV_Assert(u->refcount == 1);
if(u->data && retval == CL_SUCCESS) CV_Assert(u->mapcount++ == 0);
u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
(CL_MAP_READ | CL_MAP_WRITE),
0, u->size, 0, 0, 0, &retval);
}
if (u->data && retval == CL_SUCCESS)
{ {
u->markHostCopyObsolete(false); u->markHostCopyObsolete(false);
u->markDeviceMemMapped(true); u->markDeviceMemMapped(true);
...@@ -4765,7 +4779,6 @@ public: ...@@ -4765,7 +4779,6 @@ public:
if( !u->copyOnMap() && u->deviceMemMapped() ) if( !u->copyOnMap() && u->deviceMemMapped() )
{ {
CV_Assert(u->data != NULL); CV_Assert(u->data != NULL);
u->markDeviceMemMapped(false);
#ifdef HAVE_OPENCL_SVM #ifdef HAVE_OPENCL_SVM
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{ {
...@@ -4792,16 +4805,21 @@ public: ...@@ -4792,16 +4805,21 @@ public:
return; return;
} }
#endif #endif
CV_Assert( (retval = clEnqueueUnmapMemObject(q,
(cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS );
if (Device::getDefault().isAMD())
{
// required for multithreaded applications (see stitching test)
CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
}
if (u->refcount == 0) if (u->refcount == 0)
{
CV_Assert(u->mapcount-- == 1);
CV_Assert((retval = clEnqueueUnmapMemObject(q,
(cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS);
if (Device::getDefault().isAMD())
{
// required for multithreaded applications (see stitching test)
CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
}
u->markDeviceMemMapped(false);
u->data = 0; u->data = 0;
u->markDeviceCopyObsolete(false);
u->markHostCopyObsolete(true);
}
} }
else if( u->copyOnMap() && u->deviceCopyObsolete() ) else if( u->copyOnMap() && u->deviceCopyObsolete() )
{ {
...@@ -4811,9 +4829,9 @@ public: ...@@ -4811,9 +4829,9 @@ public:
#endif #endif
CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS ); u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS );
u->markDeviceCopyObsolete(false);
u->markHostCopyObsolete(true);
} }
u->markDeviceCopyObsolete(false);
u->markHostCopyObsolete(true);
} }
bool checkContinuous(int dims, const size_t sz[], bool checkContinuous(int dims, const size_t sz[],
......
...@@ -60,25 +60,71 @@ static Mutex umatLocks[UMAT_NLOCKS]; ...@@ -60,25 +60,71 @@ static Mutex umatLocks[UMAT_NLOCKS];
UMatData::UMatData(const MatAllocator* allocator) UMatData::UMatData(const MatAllocator* allocator)
{ {
prevAllocator = currAllocator = allocator; prevAllocator = currAllocator = allocator;
urefcount = refcount = 0; urefcount = refcount = mapcount = 0;
data = origdata = 0; data = origdata = 0;
size = 0; size = 0;
flags = 0; flags = 0;
handle = 0; handle = 0;
userdata = 0; userdata = 0;
allocatorFlags_ = 0; allocatorFlags_ = 0;
originalUMatData = NULL;
} }
UMatData::~UMatData() UMatData::~UMatData()
{ {
prevAllocator = currAllocator = 0; prevAllocator = currAllocator = 0;
urefcount = refcount = 0; urefcount = refcount = 0;
CV_Assert(mapcount == 0);
data = origdata = 0; data = origdata = 0;
size = 0; size = 0;
flags = 0; flags = 0;
handle = 0; handle = 0;
userdata = 0; userdata = 0;
allocatorFlags_ = 0; allocatorFlags_ = 0;
if (originalUMatData)
{
UMatData* u = originalUMatData;
CV_XADD(&(u->urefcount), -1);
CV_XADD(&(u->refcount), -1);
bool showWarn = false;
if (u->refcount == 0)
{
if (u->urefcount > 0)
showWarn = true;
// simulate Mat::deallocate
if (u->mapcount != 0)
{
(u->currAllocator ? u->currAllocator : /* TODO allocator ? allocator :*/ Mat::getStdAllocator())->unmap(u);
}
else
{
// we don't do "map", so we can't do "unmap"
}
}
if (u->refcount == 0 && u->urefcount == 0) // oops, we need to free resources
{
showWarn = true;
// simulate UMat::deallocate
u->currAllocator->deallocate(u);
}
#ifndef NDEBUG
if (showWarn)
{
static int warn_message_showed = 0;
if (warn_message_showed++ < 100)
{
fflush(stdout);
fprintf(stderr, "\n! OPENCV warning: getUMat()/getMat() call chain possible problem."
"\n! Base object is dead, while nested/derived object is still alive or processed."
"\n! Please check lifetime of UMat/Mat objects!\n");
fflush(stderr);
}
}
#else
(void)showWarn;
#endif
originalUMatData = NULL;
}
} }
void UMatData::lock() void UMatData::lock()
...@@ -221,19 +267,34 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const ...@@ -221,19 +267,34 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
UMat hdr; UMat hdr;
if(!data) if(!data)
return hdr; return hdr;
Size wholeSize;
Point ofs;
locateROI(wholeSize, ofs);
Size sz(cols, rows);
if (ofs.x != 0 || ofs.y != 0)
{
Mat src = *this;
int dtop = ofs.y;
int dbottom = wholeSize.height - src.rows - ofs.y;
int dleft = ofs.x;
int dright = wholeSize.width - src.cols - ofs.x;
src.adjustROI(dtop, dbottom, dleft, dright);
return src.getUMat(accessFlags, usageFlags)(cv::Rect(ofs.x, ofs.y, sz.width, sz.height));
}
CV_Assert(data == datastart);
accessFlags |= ACCESS_RW; accessFlags |= ACCESS_RW;
UMatData* temp_u = u; UMatData* new_u = NULL;
if(!temp_u)
{ {
MatAllocator *a = allocator, *a0 = getStdAllocator(); MatAllocator *a = allocator, *a0 = getStdAllocator();
if(!a) if(!a)
a = a0; a = a0;
temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags); new_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
} }
bool allocated = false; bool allocated = false;
try try
{ {
allocated = UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); allocated = UMat::getStdAllocator()->allocate(new_u, accessFlags, usageFlags);
} }
catch (const cv::Exception& e) catch (const cv::Exception& e)
{ {
...@@ -241,14 +302,26 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const ...@@ -241,14 +302,26 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
} }
if (!allocated) if (!allocated)
{ {
allocated = getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); allocated = getStdAllocator()->allocate(new_u, accessFlags, usageFlags);
CV_Assert(allocated); CV_Assert(allocated);
} }
if (u != NULL)
{
#ifdef HAVE_OPENCL
if (ocl::useOpenCL() && new_u->currAllocator == ocl::getOpenCLAllocator())
{
CV_Assert(new_u->tempUMat());
}
#endif
new_u->originalUMatData = u;
CV_XADD(&(u->refcount), 1);
CV_XADD(&(u->urefcount), 1);
}
hdr.flags = flags; hdr.flags = flags;
setSize(hdr, dims, size.p, step.p); setSize(hdr, dims, size.p, step.p);
finalizeHdr(hdr); finalizeHdr(hdr);
hdr.u = temp_u; hdr.u = new_u;
hdr.offset = data - datastart; hdr.offset = 0; //data - datastart;
hdr.addref(); hdr.addref();
return hdr; return hdr;
} }
...@@ -639,16 +712,25 @@ Mat UMat::getMat(int accessFlags) const ...@@ -639,16 +712,25 @@ Mat UMat::getMat(int accessFlags) const
return Mat(); return Mat();
// TODO Support ACCESS_READ (ACCESS_WRITE) without unnecessary data transfers // TODO Support ACCESS_READ (ACCESS_WRITE) without unnecessary data transfers
accessFlags |= ACCESS_RW; accessFlags |= ACCESS_RW;
u->currAllocator->map(u, accessFlags); UMatDataAutoLock autolock(u);
CV_Assert(u->data != 0); if(CV_XADD(&u->refcount, 1) == 0)
Mat hdr(dims, size.p, type(), u->data + offset, step.p); u->currAllocator->map(u, accessFlags);
hdr.flags = flags; if (u->data != 0)
hdr.u = u; {
hdr.datastart = u->data; Mat hdr(dims, size.p, type(), u->data + offset, step.p);
hdr.data = u->data + offset; hdr.flags = flags;
hdr.datalimit = hdr.dataend = u->data + u->size; hdr.u = u;
CV_XADD(&hdr.u->refcount, 1); hdr.datastart = u->data;
return hdr; hdr.data = u->data + offset;
hdr.datalimit = hdr.dataend = u->data + u->size;
return hdr;
}
else
{
CV_XADD(&u->refcount, -1);
CV_Assert(u->data != 0 && "Error mapping of UMat to host memory.");
return Mat();
}
} }
void* UMat::handle(int accessFlags) const void* UMat::handle(int accessFlags) const
...@@ -656,10 +738,10 @@ void* UMat::handle(int accessFlags) const ...@@ -656,10 +738,10 @@ void* UMat::handle(int accessFlags) const
if( !u ) if( !u )
return 0; return 0;
// check flags: if CPU copy is newer, copy it back to GPU. CV_Assert(u->refcount == 0);
if( u->deviceCopyObsolete() ) CV_Assert(!u->deviceCopyObsolete() || u->copyOnMap());
if (u->deviceCopyObsolete())
{ {
CV_Assert(u->refcount == 0 || u->origdata);
u->currAllocator->unmap(u); u->currAllocator->unmap(u);
} }
......
...@@ -243,9 +243,11 @@ TEST_P(UMatBasicTests, GetUMat) ...@@ -243,9 +243,11 @@ TEST_P(UMatBasicTests, GetUMat)
EXPECT_MAT_NEAR(ub, ua, 0); EXPECT_MAT_NEAR(ub, ua, 0);
} }
{ {
Mat b; UMat u = a.getUMat(ACCESS_RW);
b = a.getUMat(ACCESS_RW).getMat(ACCESS_RW); {
EXPECT_MAT_NEAR(b, a, 0); Mat b = u.getMat(ACCESS_RW);
EXPECT_MAT_NEAR(b, a, 0);
}
} }
{ {
Mat b; Mat b;
...@@ -253,13 +255,15 @@ TEST_P(UMatBasicTests, GetUMat) ...@@ -253,13 +255,15 @@ TEST_P(UMatBasicTests, GetUMat)
EXPECT_MAT_NEAR(b, a, 0); EXPECT_MAT_NEAR(b, a, 0);
} }
{ {
UMat ub; Mat m = ua.getMat(ACCESS_RW);
ub = ua.getMat(ACCESS_RW).getUMat(ACCESS_RW); {
EXPECT_MAT_NEAR(ub, ua, 0); UMat ub = m.getUMat(ACCESS_RW);
EXPECT_MAT_NEAR(ub, ua, 0);
}
} }
} }
INSTANTIATE_TEST_CASE_P(UMat, UMatBasicTests, Combine(testing::Values(CV_8U), testing::Values(1, 2), INSTANTIATE_TEST_CASE_P(UMat, UMatBasicTests, Combine(testing::Values(CV_8U, CV_64F), testing::Values(1, 2),
testing::Values(cv::Size(1, 1), cv::Size(1, 128), cv::Size(128, 1), cv::Size(128, 128), cv::Size(640, 480)), Bool())); testing::Values(cv::Size(1, 1), cv::Size(1, 128), cv::Size(128, 1), cv::Size(128, 128), cv::Size(640, 480)), Bool()));
//////////////////////////////////////////////////////////////// Reshape //////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////// Reshape ////////////////////////////////////////////////////////////////////////
...@@ -1080,7 +1084,7 @@ TEST(UMat, unmap_in_class) ...@@ -1080,7 +1084,7 @@ TEST(UMat, unmap_in_class)
Mat dst; Mat dst;
m.convertTo(dst, CV_32FC1); m.convertTo(dst, CV_32FC1);
// some additional CPU-based per-pixel processing into dst // some additional CPU-based per-pixel processing into dst
intermediateResult = dst.getUMat(ACCESS_READ); intermediateResult = dst.getUMat(ACCESS_READ); // this violates lifetime of base(dst) / derived (intermediateResult) objects. Use copyTo?
std::cout << "data processed..." << std::endl; std::cout << "data processed..." << std::endl;
} // problem is here: dst::~Mat() } // problem is here: dst::~Mat()
std::cout << "leave ProcessData()" << std::endl; std::cout << "leave ProcessData()" << std::endl;
...@@ -1268,5 +1272,61 @@ TEST(UMat, DISABLED_Test_same_behaviour_write_and_write) ...@@ -1268,5 +1272,61 @@ TEST(UMat, DISABLED_Test_same_behaviour_write_and_write)
ASSERT_TRUE(exceptionDetected); // data race ASSERT_TRUE(exceptionDetected); // data race
} }
TEST(UMat, mat_umat_sync)
{
UMat u(10, 10, CV_8UC1, Scalar(1));
{
Mat m = u.getMat(ACCESS_RW).reshape(1);
m.setTo(Scalar(255));
}
UMat uDiff;
compare(u, 255, uDiff, CMP_NE);
ASSERT_EQ(0, countNonZero(uDiff));
}
TEST(UMat, testTempObjects_UMat)
{
UMat u(10, 10, CV_8UC1, Scalar(1));
{
UMat u2 = u.getMat(ACCESS_RW).getUMat(ACCESS_RW);
u2.setTo(Scalar(255));
}
UMat uDiff;
compare(u, 255, uDiff, CMP_NE);
ASSERT_EQ(0, countNonZero(uDiff));
}
TEST(UMat, testTempObjects_Mat)
{
Mat m(10, 10, CV_8UC1, Scalar(1));
{
Mat m2;
ASSERT_ANY_THROW(m2 = m.getUMat(ACCESS_RW).getMat(ACCESS_RW));
}
}
TEST(UMat, testWrongLifetime_UMat)
{
UMat u(10, 10, CV_8UC1, Scalar(1));
{
UMat u2 = u.getMat(ACCESS_RW).getUMat(ACCESS_RW);
u.release(); // base object
u2.release(); // derived object, should show warning message
}
}
TEST(UMat, testWrongLifetime_Mat)
{
Mat m(10, 10, CV_8UC1, Scalar(1));
{
UMat u = m.getUMat(ACCESS_RW);
Mat m2 = u.getMat(ACCESS_RW);
m.release(); // base object
m2.release(); // map of derived object
u.release(); // derived object, should show warning message
}
}
} } // namespace cvtest::ocl } } // namespace cvtest::ocl
...@@ -1009,7 +1009,7 @@ namespace cv ...@@ -1009,7 +1009,7 @@ namespace cv
idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
idxArg = kernel.set(idxArg, (int)iters); // int c_iters idxArg = kernel.set(idxArg, (int)iters); // int c_iters
idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr
return kernel.run(2, globalThreads, localThreads, false); return kernel.run(2, globalThreads, localThreads, true); // sync=true because ocl::Image2D lifetime is not handled well for temp UMat
} }
private: private:
inline static bool isDeviceCPU() inline static bool isDeviceCPU()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment