Commit 5ce38e51 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky Committed by Alexander Alekhin

Merge pull request #10223 from vpisarev:ocl_mac_fixes

* fixed OpenCL functions on Mac, so that the tests pass

* fixed compile warnings; temporarily disabled OCL branch of TV L1 optical flow on mac

* fixed other few warnings on macos
parent a3ec2ac3
...@@ -4172,13 +4172,13 @@ protected: ...@@ -4172,13 +4172,13 @@ protected:
size_t step_; size_t step_;
public: public:
AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment) AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
: size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step) : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
{ {
CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
if (((size_t)ptr_ & (alignment - 1)) != 0) if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
{ {
allocatedPtr_ = new uchar[size_ + alignment - 1]; allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1)); ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
if (readAccess) if (readAccess)
{ {
...@@ -4978,6 +4978,25 @@ public: ...@@ -4978,6 +4978,25 @@ public:
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0)); srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
} }
#ifdef __APPLE__
else
{
const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
size_t new_srcrawofs = srcrawofs & ~(padding-1);
size_t membuf_ofs = srcrawofs - new_srcrawofs;
AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
uchar* ptr = alignedPtr.getAlignedPtr();
CV_Assert(new_srcstep[0] >= new_sz[0]);
total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
total = std::min(total, u->size - new_srcrawofs);
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
new_srcrawofs, total, ptr, 0, 0, 0));
for( size_t i = 0; i < new_sz[1]; i++ )
memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
}
#else
else else
{ {
AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
...@@ -4989,6 +5008,7 @@ public: ...@@ -4989,6 +5008,7 @@ public:
new_dststep[0], 0, new_dststep[0], 0,
ptr, 0, 0, 0)); ptr, 0, 0, 0));
} }
#endif
} }
} }
...@@ -5095,6 +5115,30 @@ public: ...@@ -5095,6 +5115,30 @@ public:
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0)); dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
} }
#ifdef __APPLE__
else
{
const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
size_t new_dstrawofs = dstrawofs & ~(padding-1);
size_t membuf_ofs = dstrawofs - new_dstrawofs;
AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
uchar* ptr = alignedPtr.getAlignedPtr();
CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
total = std::min(total, u->size - new_dstrawofs);
/*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
(int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
(int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
new_dstrawofs, total, ptr, 0, 0, 0));
for( size_t i = 0; i < new_sz[1]; i++ )
memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
new_dstrawofs, total, ptr, 0, 0, 0));
}
#else
else else
{ {
AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
...@@ -5106,6 +5150,7 @@ public: ...@@ -5106,6 +5150,7 @@ public:
new_srcstep[0], 0, new_srcstep[0], 0,
ptr, 0, 0, 0)); ptr, 0, 0, 0));
} }
#endif
} }
u->markHostCopyObsolete(true); u->markHostCopyObsolete(true);
#ifdef HAVE_OPENCL_SVM #ifdef HAVE_OPENCL_SVM
...@@ -5247,6 +5292,41 @@ public: ...@@ -5247,6 +5292,41 @@ public:
CV_OCL_CHECK(retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle, CV_OCL_CHECK(retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
srcrawofs, dstrawofs, total, 0, 0, 0)); srcrawofs, dstrawofs, total, 0, 0, 0));
} }
#ifdef __APPLE__
else
{
const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
size_t new_srcrawofs = srcrawofs & ~(padding-1);
size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
size_t new_dstrawofs = dstrawofs & ~(padding-1);
size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
uchar* srcptr = srcBuf.getAlignedPtr();
uchar* dstptr = dstBuf.getAlignedPtr();
CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
src_total = std::min(src_total, src->size - new_srcrawofs);
size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
dst_total = std::min(dst_total, dst->size - new_dstrawofs);
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
new_srcrawofs, src_total, srcptr, 0, 0, 0));
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
new_dstrawofs, dst_total, dstptr, 0, 0, 0));
for( size_t i = 0; i < new_sz[1]; i++ )
memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
new_dstrawofs, dst_total, dstptr, 0, 0, 0));
}
#else
else else
{ {
CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle, CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
...@@ -5255,6 +5335,7 @@ public: ...@@ -5255,6 +5335,7 @@ public:
new_dststep[0], 0, new_dststep[0], 0,
0, 0, 0)); 0, 0, 0));
} }
#endif
} }
if (retval == CL_SUCCESS) if (retval == CL_SUCCESS)
{ {
......
...@@ -3359,6 +3359,11 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr ...@@ -3359,6 +3359,11 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr
normType &= ~NORM_RELATIVE; normType &= ~NORM_RELATIVE;
bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR; bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
#ifdef __APPLE__
if(normType == NORM_L1 && type == CV_16UC3 && !_mask.empty())
return false;
#endif
if (normsum) if (normsum)
{ {
if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ? if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?
......
...@@ -1403,6 +1403,7 @@ void morph(int op, int src_type, int dst_type, ...@@ -1403,6 +1403,7 @@ void morph(int op, int src_type, int dst_type,
#define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n))) #define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n)))
#ifndef __APPLE__
static bool ocl_morph3x3_8UC1( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, static bool ocl_morph3x3_8UC1( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor,
int op, int actual_op = -1, InputArray _extraMat = noArray()) int op, int actual_op = -1, InputArray _extraMat = noArray())
{ {
...@@ -1628,16 +1629,15 @@ static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kerne ...@@ -1628,16 +1629,15 @@ static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kerne
} }
return kernel.run(2, globalsize, NULL, false); return kernel.run(2, globalsize, NULL, false);
} }
#endif
static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
Point anchor, int iterations, int op, int borderType, Point anchor, int iterations, int op, int borderType,
const Scalar &, int actual_op = -1, InputArray _extraMat = noArray()) const Scalar &, int actual_op = -1, InputArray _extraMat = noArray())
{ {
const ocl::Device & dev = ocl::Device::getDefault(); const ocl::Device & dev = ocl::Device::getDefault();
int type = _src.type(), depth = CV_MAT_DEPTH(type), int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type);
Mat kernel = _kernel.getMat(); Mat kernel = _kernel.getMat();
Size ksize = !kernel.empty() ? kernel.size() : Size(3, 3), ssize = _src.size(); Size ksize = !kernel.empty() ? kernel.size() : Size(3, 3), ssize = _src.size();
...@@ -1664,14 +1664,13 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, ...@@ -1664,14 +1664,13 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
iterations = 1; iterations = 1;
} }
#ifndef __APPLE__
int esz = CV_ELEM_SIZE(type);
// try to use OpenCL kernel adopted for small morph kernel // try to use OpenCL kernel adopted for small morph kernel
if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) && if (dev.isIntel() &&
((ksize.width < 5 && ksize.height < 5 && esz <= 4) || ((ksize.width < 5 && ksize.height < 5 && esz <= 4) ||
(ksize.width == 5 && ksize.height == 5 && cn == 1)) && (ksize.width == 5 && ksize.height == 5 && cn == 1)) &&
(iterations == 1) (iterations == 1)
#if defined __APPLE__
&& cn == 1
#endif
) )
{ {
if (ocl_morph3x3_8UC1(_src, _dst, kernel, anchor, op, actual_op, _extraMat)) if (ocl_morph3x3_8UC1(_src, _dst, kernel, anchor, op, actual_op, _extraMat))
...@@ -1680,6 +1679,7 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, ...@@ -1680,6 +1679,7 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat)) if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat))
return true; return true;
} }
#endif
if (iterations == 0 || kernel.rows*kernel.cols == 1) if (iterations == 0 || kernel.rows*kernel.cols == 1)
{ {
......
...@@ -392,9 +392,11 @@ void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray ...@@ -392,9 +392,11 @@ void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray
{ {
CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION()
#ifndef __APPLE__
CV_OCL_RUN(_flow.isUMat() && CV_OCL_RUN(_flow.isUMat() &&
ocl::Image2D::isFormatSupported(CV_32F, 1, false), ocl::Image2D::isFormatSupported(CV_32F, 1, false),
calc_ocl(_I0, _I1, _flow)) calc_ocl(_I0, _I1, _flow))
#endif
Mat I0 = _I0.getMat(); Mat I0 = _I0.getMat();
Mat I1 = _I1.getMat(); Mat I1 = _I1.getMat();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment