Commit 57120c1a authored by Alexander Alekhin's avatar Alexander Alekhin

ocl: remove support_image2d(): we target on OpenCL 1.1 (with image2d support by default)

parent c0265c60
...@@ -55,20 +55,11 @@ namespace cv ...@@ -55,20 +55,11 @@ namespace cv
{ {
namespace ocl namespace ocl
{ {
static const char noImage2dOption[] = "-D DISABLE_IMAGE2D";
static bool use_image2d = false;
static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth) size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{ {
char optBuf [100] = {0}; char optBuf [100] = {0};
char * optBufPtr = optBuf; char * optBufPtr = optBuf;
if( !use_image2d )
{
strcat(optBufPtr, noImage2dOption);
optBufPtr += strlen(noImage2dOption);
}
cl_kernel kernel; cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr); kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
size_t wave_size = queryWaveFrontSize(kernel); size_t wave_size = queryWaveFrontSize(kernel);
...@@ -149,13 +140,10 @@ public: ...@@ -149,13 +140,10 @@ public:
counters.setTo(Scalar::all(0)); counters.setTo(Scalar::all(0));
integral(img, surf_.sum); integral(img, surf_.sum);
use_image2d = support_image2d();
if(use_image2d) bindImgTex(img, imgTex);
{ bindImgTex(surf_.sum, sumTex);
bindImgTex(img, imgTex); finish();
bindImgTex(surf_.sum, sumTex);
finish();
}
maskSumTex = 0; maskSumTex = 0;
......
...@@ -165,15 +165,11 @@ private: ...@@ -165,15 +165,11 @@ private:
// for faster clamping, there is no buffer padding for the constructed texture // for faster clamping, there is no buffer padding for the constructed texture
Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat); Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
// returns whether the current context supports image2d_t format or not
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
bool CV_EXPORTS isCpuDevice(); bool CV_EXPORTS isCpuDevice();
size_t CV_EXPORTS queryWaveFrontSize(cl_kernel kernel); size_t CV_EXPORTS queryWaveFrontSize(cl_kernel kernel);
inline size_t divUp(size_t total, size_t grain) inline size_t divUp(size_t total, size_t grain)
{ {
return (total + grain - 1) / grain; return (total + grain - 1) / grain;
......
...@@ -448,26 +448,30 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn ...@@ -448,26 +448,30 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn
{ {
stringstream src_sign; stringstream src_sign;
src_sign << source->name; if (source->name)
src_sign << getClContext(ctx);
if (NULL != build_options)
{ {
src_sign << "_" << build_options; src_sign << source->name;
} src_sign << getClContext(ctx);
if (NULL != build_options)
{
src_sign << "_" << build_options;
}
{
cv::AutoLock lockCache(mutexCache);
cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str());
if (!!program)
{ {
clRetainProgram(program); cv::AutoLock lockCache(mutexCache);
return program; cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str());
if (!!program)
{
clRetainProgram(program);
return program;
}
} }
} }
cv::AutoLock lockCache(mutexFiles); cv::AutoLock lockCache(mutexFiles);
// second check // second check
if (source->name)
{ {
cv::AutoLock lockCache(mutexCache); cv::AutoLock lockCache(mutexCache);
cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str()); cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str());
...@@ -493,6 +497,7 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn ...@@ -493,6 +497,7 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn
cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options); cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options);
//Cache the binary for future use if build_options is null //Cache the binary for future use if build_options is null
if (source->name)
{ {
cv::AutoLock lockCache(mutexCache); cv::AutoLock lockCache(mutexCache);
this->addProgram(src_sign.str(), program); this->addProgram(src_sign.str(), program);
......
...@@ -202,8 +202,6 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, ...@@ -202,8 +202,6 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
CV_DbgAssert(support_image2d());
ensureSizeIsEnough(image.size(), CV_32F, eig_); ensureSizeIsEnough(image.size(), CV_32F, eig_);
if (useHarrisDetector) if (useHarrisDetector)
......
...@@ -216,41 +216,11 @@ namespace cv ...@@ -216,41 +216,11 @@ namespace cv
{ {
return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type())); return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type()));
} }
void releaseTexture(cl_mem& texture) void releaseTexture(cl_mem& texture)
{ {
openCLFree(texture); openCLFree(texture);
} }
bool support_image2d(Context *clCxt)
{
const cv::ocl::ProgramEntry _kernel = {"test_func", "__kernel void test_func(image2d_t img) {}", NULL};
static bool _isTested = false;
static bool _support = false;
if(_isTested)
{
return _support;
}
try
{
cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel, "test_func");
cv::ocl::finish();
_support = true;
}
catch (const cv::Exception& e)
{
if(e.code == -217)
{
_support = false;
}
else
{
// throw e once again
throw e;
}
}
_isTested = true;
return _support;
}
}//namespace ocl }//namespace ocl
}//namespace cv }//namespace cv
This diff is collapsed.
...@@ -82,18 +82,16 @@ static void lkSparse_run(oclMat &I, oclMat &J, ...@@ -82,18 +82,16 @@ static void lkSparse_run(oclMat &I, oclMat &J,
int level, dim3 patch, Size winSize, int iters) int level, dim3 patch, Size winSize, int iters)
{ {
Context *clCxt = I.clCxt; Context *clCxt = I.clCxt;
int elemCntPerRow = I.step / I.elemSize();
string kernelName = "lkSparse"; string kernelName = "lkSparse";
bool isImageSupported = support_image2d(); size_t localThreads[3] = { 8, 8, 1 };
size_t localThreads[3] = { 8, isImageSupported ? 8 : 32, 1 }; size_t globalThreads[3] = { 8 * ptcount, 8, 1};
size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1};
int cn = I.oclchannels(); int cn = I.oclchannels();
char calcErr = level==0?1:0; char calcErr = level==0?1:0;
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
cl_mem ITex = isImageSupported ? bindTexture(I) : (cl_mem)I.data; cl_mem ITex = bindTexture(I);
cl_mem JTex = isImageSupported ? bindTexture(J) : (cl_mem)J.data; cl_mem JTex = bindTexture(J);
args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex )); args.push_back( make_pair( sizeof(cl_mem), (void *)&ITex ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex )); args.push_back( make_pair( sizeof(cl_mem), (void *)&JTex ));
...@@ -106,8 +104,6 @@ static void lkSparse_run(oclMat &I, oclMat &J, ...@@ -106,8 +104,6 @@ static void lkSparse_run(oclMat &I, oclMat &J,
args.push_back( make_pair( sizeof(cl_int), (void *)&level )); args.push_back( make_pair( sizeof(cl_int), (void *)&level ));
args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
if (!isImageSupported)
args.push_back( make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x )); args.push_back( make_pair( sizeof(cl_int), (void *)&patch.x ));
args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y )); args.push_back( make_pair( sizeof(cl_int), (void *)&patch.y ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cn )); args.push_back( make_pair( sizeof(cl_int), (void *)&cn ));
...@@ -120,32 +116,23 @@ static void lkSparse_run(oclMat &I, oclMat &J, ...@@ -120,32 +116,23 @@ static void lkSparse_run(oclMat &I, oclMat &J,
if (is_cpu) if (is_cpu)
{ {
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU"); openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU");
releaseTexture(ITex);
releaseTexture(JTex);
} }
else else
{ {
if(isImageSupported) stringstream idxStr;
{ idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth();
stringstream idxStr; cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str());
idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth(); int wave_size = (int)queryWaveFrontSize(kernel);
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str()); openCLSafeCall(clReleaseKernel(kernel));
int wave_size = (int)queryWaveFrontSize(kernel);
openCLSafeCall(clReleaseKernel(kernel)); static char opt[32] = {0};
sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
static char opt[32] = {0};
sprintf(opt, "-D WAVE_SIZE=%d", wave_size); openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads,
args, I.oclchannels(), I.depth(), opt);
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads,
args, I.oclchannels(), I.depth(), opt);
releaseTexture(ITex);
releaseTexture(JTex);
}
else
{
openCLExecuteKernel(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
}
} }
releaseTexture(ITex);
releaseTexture(JTex);
} }
void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err) void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err)
...@@ -226,37 +213,19 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v, ...@@ -226,37 +213,19 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters) oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters)
{ {
Context *clCxt = I.clCxt; Context *clCxt = I.clCxt;
bool isImageSupported = support_image2d();
int elemCntPerRow = I.step / I.elemSize();
string kernelName = "lkDense"; string kernelName = "lkDense";
size_t localThreads[3] = { 16, 16, 1 }; size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { I.cols, I.rows, 1}; size_t globalThreads[3] = { I.cols, I.rows, 1};
bool calcErr; cl_char calcErr = err ? 1 : 0;
if (err)
{
calcErr = true;
}
else
{
calcErr = false;
}
cl_mem ITex; cl_mem ITex;
cl_mem JTex; cl_mem JTex;
if (isImageSupported) ITex = bindTexture(I);
{ JTex = bindTexture(J);
ITex = bindTexture(I);
JTex = bindTexture(J);
}
else
{
ITex = (cl_mem)I.data;
JTex = (cl_mem)J.data;
}
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
...@@ -273,28 +242,15 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v, ...@@ -273,28 +242,15 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
args.push_back( make_pair( sizeof(cl_int), (void *)&prevV.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&prevV.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&I.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
//args.push_back( make_pair( sizeof(cl_mem), (void *)&(*err).data ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&(*err).step ));
if (!isImageSupported)
{
args.push_back( make_pair( sizeof(cl_int), (void *)&elemCntPerRow ) );
}
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width )); args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width ));
args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height )); args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
args.push_back( make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr )); args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
if (isImageSupported) openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
{
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
releaseTexture(ITex); releaseTexture(ITex);
releaseTexture(JTex); releaseTexture(JTex);
}
else
{
openCLExecuteKernel(clCxt, &pyrlk_no_image, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth());
}
} }
void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err) void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err)
......
...@@ -411,9 +411,6 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, ...@@ -411,9 +411,6 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho) void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho)
{ {
Context* clCxt = I0.clCxt; Context* clCxt = I0.clCxt;
const bool isImgSupported = support_image2d(clCxt);
CV_Assert(isImgSupported);
int u1ElementSize = u1.elemSize(); int u1ElementSize = u1.elemSize();
int u1Step = u1.step/u1ElementSize; int u1Step = u1.step/u1ElementSize;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment