Commit 7b8ad4cb authored by Andrey Kamaev's avatar Andrey Kamaev

Refactor OpenCL initialization and allow to use ocl module witout explicit setup

parent dd678121
...@@ -7,7 +7,7 @@ using namespace cv::gpu; ...@@ -7,7 +7,7 @@ using namespace cv::gpu;
using namespace cvtest; using namespace cvtest;
using namespace testing; using namespace testing;
int main(int argc, char** argv) int main(int argc, char **argv)
{ {
try try
{ {
...@@ -50,8 +50,8 @@ int main(int argc, char** argv) ...@@ -50,8 +50,8 @@ int main(int argc, char** argv)
TS::ptr()->init("cv"); TS::ptr()->init("cv");
InitGoogleTest(&argc, argv); InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
std::cerr << e.what() << std::endl; std::cerr << e.what() << std::endl;
......
...@@ -140,15 +140,23 @@ namespace cv ...@@ -140,15 +140,23 @@ namespace cv
protected: protected:
Context(); Context();
friend class auto_ptr<Context>; friend class auto_ptr<Context>;
static auto_ptr<Context> clCxt;
private:
static auto_ptr<Context> clCxt;
static int val;
public: public:
~Context(); ~Context();
static int val; void release();
static Context *getContext(); Info::Impl* impl;
static Context* getContext();
static void setContext(Info &oclinfo); static void setContext(Info &oclinfo);
struct Impl;
Impl *impl; enum {CL_DOUBLE, CL_UNIFIED_MEM};
bool supportsFeature(int ftype);
size_t computeUnits();
void* oclContext();
void* oclCommandQueue();
}; };
//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
......
This diff is collapsed.
...@@ -98,7 +98,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size) ...@@ -98,7 +98,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
{ {
openCLFree(counter); openCLFree(counter);
} }
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err ); counter = clCreateBuffer( (cl_context)getoclContext(), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
openCLSafeCall(err); openCLSafeCall(err);
} }
...@@ -354,7 +354,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in ...@@ -354,7 +354,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols) void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
{ {
unsigned int count; unsigned int count;
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL)); openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
Context *clCxt = map.clCxt; Context *clCxt = map.clCxt;
string kernelName = "edgesHysteresisGlobal"; string kernelName = "edgesHysteresisGlobal";
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
...@@ -364,7 +364,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi ...@@ -364,7 +364,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
int count_i[1] = {0}; int count_i[1] = {0};
while(count > 0) while(count > 0)
{ {
openCLSafeCall(clEnqueueWriteBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
args.clear(); args.clear();
size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1}; size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
...@@ -379,7 +379,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi ...@@ -379,7 +379,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE); openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL)); openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
std::swap(st1, st2); std::swap(st1, st2);
} }
#undef DIVUP #undef DIVUP
......
...@@ -206,7 +206,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla ...@@ -206,7 +206,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1]; clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1]; clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, Context::getContext()->impl->clContext, dim, clLengthsIn ) ); openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, (cl_context)getoclContext(), dim, clLengthsIn ) );
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) ); openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) ); openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
...@@ -220,7 +220,8 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla ...@@ -220,7 +220,8 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) ); openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
//ready to bake //ready to bake
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) ); cl_command_queue clq = (cl_command_queue)getoclCommandQueue();
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &clq, NULL, NULL ) );
} }
cv::ocl::FftPlan::~FftPlan() cv::ocl::FftPlan::~FftPlan()
{ {
...@@ -338,16 +339,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) ...@@ -338,16 +339,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
if (buffersize) if (buffersize)
{ {
cl_int medstatus; cl_int medstatus;
clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus); clMedBuffer = clCreateBuffer ( (cl_context)src.clCxt->oclContext(), CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
openCLSafeCall( medstatus ); openCLSafeCall( medstatus );
} }
cl_command_queue clq = (cl_command_queue)src.clCxt->oclCommandQueue();
openCLSafeCall( clAmdFftEnqueueTransform( plHandle, openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1, 1,
&src.clCxt->impl->clCmdQueue, &clq,
0, NULL, NULL, 0, NULL, NULL,
(cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) ); (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) ); openCLSafeCall( clFinish(clq) );
if(clMedBuffer) if(clMedBuffer)
{ {
openCLFree(clMedBuffer); openCLFree(clMedBuffer);
......
...@@ -1478,7 +1478,7 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, ...@@ -1478,7 +1478,7 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy,
void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale) void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
{ {
if (src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return; return;
......
...@@ -87,7 +87,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, ...@@ -87,7 +87,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
int offb = src2.offset; int offb = src2.offset;
int offc = dst.offset; int offc = dst.offset;
cl_command_queue clq = (cl_command_queue)src1.clCxt->oclCommandQueue();
switch(src1.type()) switch(src1.type())
{ {
case CV_32FC1: case CV_32FC1:
...@@ -97,11 +97,12 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, ...@@ -97,11 +97,12 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
offa /= sizeof(float); offa /= sizeof(float);
offb /= sizeof(float); offb /= sizeof(float);
offc /= sizeof(float); offc /= sizeof(float);
openCLSafeCall openCLSafeCall
( (
clAmdBlasSgemmEx(order, transA, transB, M, N, K, clAmdBlasSgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
); );
break; break;
case CV_64FC1: case CV_64FC1:
...@@ -115,7 +116,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, ...@@ -115,7 +116,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
( (
clAmdBlasDgemmEx(order, transA, transB, M, N, K, clAmdBlasDgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
); );
break; break;
case CV_32FC2: case CV_32FC2:
...@@ -132,7 +133,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, ...@@ -132,7 +133,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
( (
clAmdBlasCgemmEx(order, transA, transB, M, N, K, clAmdBlasCgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
); );
} }
break; break;
...@@ -150,7 +151,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, ...@@ -150,7 +151,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
( (
clAmdBlasZgemmEx(order, transA, transB, M, N, K, clAmdBlasZgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
); );
} }
break; break;
......
...@@ -971,7 +971,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS ...@@ -971,7 +971,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
size_t blocksize = 8; size_t blocksize = 8;
size_t localThreads[3] = { blocksize, blocksize , 1 }; size_t localThreads[3] = { blocksize, blocksize , 1 };
size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->impl->maxComputeUnits) *localThreads[0], size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->computeUnits()) *localThreads[0],
localThreads[1], 1 localThreads[1], 1
}; };
int outputsz = 256 * globalThreads[0] / localThreads[0]; int outputsz = 256 * globalThreads[0] / localThreads[0];
...@@ -1047,21 +1047,21 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS ...@@ -1047,21 +1047,21 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count); stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
//openCLVerifyCall(status); //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
//classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status); //classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status);
//status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL); //status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL);
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode)); nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
//openCLVerifyCall(status); //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0, openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), nodebuffer, 1, 0,
nodenum * sizeof(GpuHidHaarTreeNode), nodenum * sizeof(GpuHidHaarTreeNode),
node, 0, NULL, NULL)); node, 0, NULL, NULL));
candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY, 4 * sizeof(int) * outputsz); candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY, 4 * sizeof(int) * outputsz);
//openCLVerifyCall(status); //openCLVerifyCall(status);
scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount); scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
//openCLVerifyCall(status); //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
//flag = 1; //flag = 1;
//} //}
...@@ -1186,7 +1186,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS ...@@ -1186,7 +1186,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
int grp_per_CU = 12; int grp_per_CU = 12;
size_t blocksize = 8; size_t blocksize = 8;
size_t localThreads[3] = { blocksize, blocksize , 1 }; size_t localThreads[3] = { blocksize, blocksize , 1 };
size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->impl->maxComputeUnits *localThreads[0], size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->computeUnits() *localThreads[0],
localThreads[1], 1 localThreads[1], 1
}; };
int outputsz = 256 * globalThreads[0] / localThreads[0]; int outputsz = 256 * globalThreads[0] / localThreads[0];
...@@ -1195,7 +1195,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS ...@@ -1195,7 +1195,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
nodenum * sizeof(GpuHidHaarTreeNode)); nodenum * sizeof(GpuHidHaarTreeNode));
//openCLVerifyCall(status); //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0, openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), nodebuffer, 1, 0,
nodenum * sizeof(GpuHidHaarTreeNode), nodenum * sizeof(GpuHidHaarTreeNode),
node, 0, NULL, NULL)); node, 0, NULL, NULL));
cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE, cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE,
...@@ -1252,16 +1252,16 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS ...@@ -1252,16 +1252,16 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
int splitnode = stage[0].count + stage[1].count + stage[2].count; int splitnode = stage[0].count + stage[1].count + stage[2].count;
stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count); stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
//openCLVerifyCall(status); //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz); candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz);
//openCLVerifyCall(status); //openCLVerifyCall(status);
scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount); scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
//openCLVerifyCall(status); //openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount); pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount); correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
//int argcount = 0; //int argcount = 0;
vector<pair<size_t, const void *> > args; vector<pair<size_t, const void *> > args;
...@@ -1286,7 +1286,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS ...@@ -1286,7 +1286,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
//openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL)); //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL));
candidate = (int *)clEnqueueMapBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status); candidate = (int *)clEnqueueMapBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);
for(int i = 0; i < outputsz; i++) for(int i = 0; i < outputsz; i++)
{ {
...@@ -1297,7 +1297,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS ...@@ -1297,7 +1297,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
free(scaleinfo); free(scaleinfo);
free(p); free(p);
free(correction); free(correction);
clEnqueueUnmapMemObject(gsum.clCxt->impl->clCmdQueue, candidatebuffer, candidate, 0, 0, 0); clEnqueueUnmapMemObject((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, candidate, 0, 0, 0);
openCLSafeCall(clReleaseMemObject(stagebuffer)); openCLSafeCall(clReleaseMemObject(stagebuffer));
openCLSafeCall(clReleaseMemObject(scaleinfobuffer)); openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
openCLSafeCall(clReleaseMemObject(nodebuffer)); openCLSafeCall(clReleaseMemObject(nodebuffer));
......
...@@ -290,8 +290,8 @@ namespace cv ...@@ -290,8 +290,8 @@ namespace cv
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
if(src.clCxt -> impl -> double_support != 0) if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{ {
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
} }
...@@ -319,7 +319,7 @@ namespace cv ...@@ -319,7 +319,7 @@ namespace cv
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
if(src.clCxt -> impl -> double_support != 0) if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{ {
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
} }
...@@ -383,7 +383,7 @@ namespace cv ...@@ -383,7 +383,7 @@ namespace cv
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
if(src.clCxt -> impl -> double_support != 0) if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{ {
args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d)); args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d)); args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
...@@ -824,12 +824,12 @@ namespace cv ...@@ -824,12 +824,12 @@ namespace cv
string kernelName = "warpAffine" + s[interpolation]; string kernelName = "warpAffine" + s[interpolation];
if(src.clCxt -> impl -> double_support != 0) if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{ {
cl_int st; cl_int st;
coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st ); coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
openCLVerifyCall(st); openCLVerifyCall(st);
openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
} }
else else
{ {
...@@ -839,8 +839,8 @@ namespace cv ...@@ -839,8 +839,8 @@ namespace cv
{ {
float_coeffs[m][n] = coeffs[m][n]; float_coeffs[m][n] = coeffs[m][n];
} }
coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
} }
//TODO: improve this kernel //TODO: improve this kernel
...@@ -894,12 +894,12 @@ namespace cv ...@@ -894,12 +894,12 @@ namespace cv
string s[3] = {"NN", "Linear", "Cubic"}; string s[3] = {"NN", "Linear", "Cubic"};
string kernelName = "warpPerspective" + s[interpolation]; string kernelName = "warpPerspective" + s[interpolation];
if(src.clCxt -> impl -> double_support != 0) if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
{ {
cl_int st; cl_int st;
coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st ); coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
openCLVerifyCall(st); openCLVerifyCall(st);
openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
} }
else else
{ {
...@@ -908,9 +908,9 @@ namespace cv ...@@ -908,9 +908,9 @@ namespace cv
for(int n = 0; n < 3; n++) for(int n = 0; n < 3; n++)
float_coeffs[m][n] = coeffs[m][n]; float_coeffs[m][n] = coeffs[m][n];
coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st ); coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
openCLVerifyCall(st); openCLVerifyCall(st);
openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
} }
//TODO: improve this kernel //TODO: improve this kernel
size_t blkSizeX = 16, blkSizeY = 16; size_t blkSizeX = 16, blkSizeY = 16;
...@@ -1018,7 +1018,7 @@ namespace cv ...@@ -1018,7 +1018,7 @@ namespace cv
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum) void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
{ {
CV_Assert(src.type() == CV_8UC1); CV_Assert(src.type() == CV_8UC1);
if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "select device don't support double"); CV_Error(CV_GpuNotSupported, "select device don't support double");
} }
...@@ -1192,7 +1192,7 @@ namespace cv ...@@ -1192,7 +1192,7 @@ namespace cv
void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
double k, int borderType) double k, int borderType)
{ {
if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "select device don't support double"); CV_Error(CV_GpuNotSupported, "select device don't support double");
} }
...@@ -1206,7 +1206,7 @@ namespace cv ...@@ -1206,7 +1206,7 @@ namespace cv
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
{ {
if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "select device don't support double"); CV_Error(CV_GpuNotSupported, "select device don't support double");
} }
...@@ -1260,7 +1260,7 @@ namespace cv ...@@ -1260,7 +1260,7 @@ namespace cv
if( src.depth() != CV_8U || src.oclchannels() != 4 ) if( src.depth() != CV_8U || src.oclchannels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
// if(src.clCxt->impl->double_support == 0) // if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
// { // {
// CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n"); // CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
// } // }
...@@ -1328,7 +1328,7 @@ namespace cv ...@@ -1328,7 +1328,7 @@ namespace cv
if( src.depth() != CV_8U || src.oclchannels() != 4 ) if( src.depth() != CV_8U || src.oclchannels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
// if(src.clCxt->impl->double_support == 0) // if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
// { // {
// CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n"); // CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
// } // }
......
This diff is collapsed.
...@@ -190,7 +190,7 @@ void cv::ocl::oclMat::upload(const Mat &m) ...@@ -190,7 +190,7 @@ void cv::ocl::oclMat::upload(const Mat &m)
int pitch = wholeSize.width * 3 * m.elemSize1(); int pitch = wholeSize.width * 3 * m.elemSize1();
int tail_padding = m.elemSize1() * 3072; int tail_padding = m.elemSize1() * 3072;
int err; int err;
cl_mem temp = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE, cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
(pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err); (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
openCLVerifyCall(err); openCLVerifyCall(err);
...@@ -242,7 +242,7 @@ void cv::ocl::oclMat::download(cv::Mat &m) const ...@@ -242,7 +242,7 @@ void cv::ocl::oclMat::download(cv::Mat &m) const
int pitch = wholecols * 3 * m.elemSize1(); int pitch = wholecols * 3 * m.elemSize1();
int tail_padding = m.elemSize1() * 3072; int tail_padding = m.elemSize1() * 3072;
int err; int err;
cl_mem temp = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE, cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
(pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err); (pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
openCLVerifyCall(err); openCLVerifyCall(err);
...@@ -595,7 +595,7 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri ...@@ -595,7 +595,7 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri
#ifdef CL_VERSION_1_2 #ifdef CL_VERSION_1_2
if(dst.offset == 0 && dst.cols == dst.wholecols) if(dst.offset == 0 && dst.cols == dst.wholecols)
{ {
clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
} }
else else
{ {
......
...@@ -94,15 +94,15 @@ namespace cv ...@@ -94,15 +94,15 @@ namespace cv
for(size_t i = 0; i < args.size(); i ++) for(size_t i = 0; i < args.size(); i ++)
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL, globalThreads,
localThreads, 0, NULL, NULL)); localThreads, 0, NULL, NULL));
switch(finish_mode) switch(finish_mode)
{ {
case CLFINISH: case CLFINISH:
clFinish(clCxt->impl->clCmdQueue); clFinish((cl_command_queue)clCxt->oclCommandQueue());
case CLFLUSH: case CLFLUSH:
clFlush(clCxt->impl->clCmdQueue); clFlush((cl_command_queue)clCxt->oclCommandQueue());
break; break;
case DISABLE: case DISABLE:
default: default:
...@@ -126,7 +126,7 @@ namespace cv ...@@ -126,7 +126,7 @@ namespace cv
openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
build_options, finish_mode); build_options, finish_mode);
} }
cl_mem bindTexture(const oclMat &mat) cl_mem bindTexture(const oclMat &mat)
{ {
cl_mem texture; cl_mem texture;
...@@ -177,7 +177,7 @@ namespace cv ...@@ -177,7 +177,7 @@ namespace cv
desc.buffer = NULL; desc.buffer = NULL;
desc.num_mip_levels = 0; desc.num_mip_levels = 0;
desc.num_samples = 0; desc.num_samples = 0;
texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
#else #else
texture = clCreateImage2D( texture = clCreateImage2D(
mat.clCxt->impl->clContext, mat.clCxt->impl->clContext,
...@@ -195,10 +195,10 @@ namespace cv ...@@ -195,10 +195,10 @@ namespace cv
cl_mem devData; cl_mem devData;
if (mat.cols * mat.elemSize() != mat.step) if (mat.cols * mat.elemSize() != mat.step)
{ {
devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows
* mat.elemSize(), NULL, NULL); * mat.elemSize(), NULL, NULL);
const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1}; const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin, clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL); regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
} }
else else
...@@ -206,10 +206,10 @@ namespace cv ...@@ -206,10 +206,10 @@ namespace cv
devData = (cl_mem)mat.data; devData = (cl_mem)mat.data;
} }
clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0); clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0);
if ((mat.cols * mat.elemSize() != mat.step)) if ((mat.cols * mat.elemSize() != mat.step))
{ {
clFinish(mat.clCxt->impl->clCmdQueue); clFinish((cl_command_queue)mat.clCxt->oclCommandQueue());
clReleaseMemObject(devData); clReleaseMemObject(devData);
} }
...@@ -223,7 +223,7 @@ namespace cv ...@@ -223,7 +223,7 @@ namespace cv
} }
bool support_image2d(Context *clCxt) bool support_image2d(Context *clCxt)
{return false; {
static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}"; static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}";
static bool _isTested = false; static bool _isTested = false;
static bool _support = false; static bool _support = false;
...@@ -234,7 +234,7 @@ namespace cv ...@@ -234,7 +234,7 @@ namespace cv
try try
{ {
cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func"); cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func");
_support = true; //_support = true;
} }
catch (const cv::Exception& e) catch (const cv::Exception& e)
{ {
......
...@@ -106,7 +106,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) ...@@ -106,7 +106,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2; bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
if (!cv::ocl::Context::getContext()->impl->double_support && is_float) if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE) && is_float)
{ {
CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!"); CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
} }
...@@ -146,7 +146,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) ...@@ -146,7 +146,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
cv::Mat dst(dst_a); cv::Mat dst(dst_a);
a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0; a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
if (!cv::ocl::Context::getContext()->impl->double_support) if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE))
{ {
for (int i = 0; i < contour->total; ++i) for (int i = 0; i < contour->total; ++i)
{ {
......
...@@ -81,33 +81,6 @@ ...@@ -81,33 +81,6 @@
#include "opencv2/ocl/private/util.hpp" #include "opencv2/ocl/private/util.hpp"
#include "safe_call.hpp" #include "safe_call.hpp"
using namespace std;
namespace cv
{
namespace ocl
{
struct Context::Impl
{
//Information of the OpenCL context
cl_context clContext;
cl_command_queue clCmdQueue;
cl_device_id devices;
string devName;
cl_uint maxDimensions;
size_t maxWorkGroupSize;
size_t maxWorkItemSizes[4];
cl_uint maxComputeUnits;
int double_support;
//extra options to recognize vendor specific fp64 extensions
char extra_options[512];
string Binpath;
int unified_memory; //1 means integrated GPU, otherwise this value is 0
};
}
}
#else /* defined(HAVE_OPENCL) */ #else /* defined(HAVE_OPENCL) */
static inline void throw_nogpu() static inline void throw_nogpu()
...@@ -117,4 +90,6 @@ static inline void throw_nogpu() ...@@ -117,4 +90,6 @@ static inline void throw_nogpu()
#endif /* defined(HAVE_OPENCL) */ #endif /* defined(HAVE_OPENCL) */
using namespace std;
#endif /* __OPENCV_PRECOMP_H__ */ #endif /* __OPENCV_PRECOMP_H__ */
...@@ -357,7 +357,7 @@ static void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, ...@@ -357,7 +357,7 @@ static void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar,
#ifdef CL_VERSION_1_2 #ifdef CL_VERSION_1_2
if(dst.offset == 0 && dst.cols == dst.wholecols) if(dst.offset == 0 && dst.cols == dst.wholecols)
{ {
clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
} }
else else
{ {
...@@ -464,7 +464,7 @@ static void copyTo(const oclMat &src, oclMat &m ) ...@@ -464,7 +464,7 @@ static void copyTo(const oclMat &src, oclMat &m )
static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar) static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
{ {
if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return; return;
...@@ -712,7 +712,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next ...@@ -712,7 +712,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
level, /*block, */patch, winSize, iters); level, /*block, */patch, winSize, iters);
} }
clFinish(prevImg.clCxt->impl->clCmdQueue); clFinish((cl_command_queue)prevImg.clCxt->oclCommandQueue());
if(errMat) if(errMat)
delete err; delete err;
...@@ -851,5 +851,5 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextI ...@@ -851,5 +851,5 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextI
copyTo(uPyr_[idx], u); copyTo(uPyr_[idx], u);
copyTo(vPyr_[idx], v); copyTo(vPyr_[idx], v);
clFinish(prevImg.clCxt->impl->clCmdQueue); clFinish((cl_command_queue)prevImg.clCxt->oclCommandQueue());
} }
...@@ -130,7 +130,7 @@ namespace cv ...@@ -130,7 +130,7 @@ namespace cv
static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst) static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
{ {
if(mat_dst.clCxt -> impl -> double_support == 0 && mat_dst.type() == CV_64F) if(!mat_dst.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_dst.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return; return;
...@@ -279,7 +279,7 @@ namespace cv ...@@ -279,7 +279,7 @@ namespace cv
static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst) static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
{ {
if(mat_src.clCxt -> impl -> double_support == 0 && mat_src.type() == CV_64F) if(!mat_src.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_src.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return; return;
......
...@@ -90,10 +90,10 @@ static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterC ...@@ -90,10 +90,10 @@ static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterC
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&input.cols)); openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&input.cols));
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&prefilterCap)); openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&prefilterCap));
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL,
globalThreads, localThreads, 0, NULL, NULL)); globalThreads, localThreads, 0, NULL, NULL));
clFinish(clCxt->impl->clCmdQueue); clFinish((cl_command_queue)clCxt->oclCommandQueue());
openCLSafeCall(clReleaseKernel(kernel)); openCLSafeCall(clReleaseKernel(kernel));
} }
...@@ -150,11 +150,11 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp, ...@@ -150,11 +150,11 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp,
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&winsz2)); openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&winsz2));
openCLSafeCall(clSetKernelArg(kernel, 11, local_mem_size, (void *)NULL)); openCLSafeCall(clSetKernelArg(kernel, 11, local_mem_size, (void *)NULL));
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 2, NULL, openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL)); globalThreads, localThreads, 0, NULL, NULL));
clFinish(clCxt->impl->clCmdQueue); clFinish((cl_command_queue)clCxt->oclCommandQueue());
openCLSafeCall(clReleaseKernel(kernel)); openCLSafeCall(clReleaseKernel(kernel));
} }
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
...@@ -188,10 +188,10 @@ static void postfilter_textureness(oclMat &left, int winSize, ...@@ -188,10 +188,10 @@ static void postfilter_textureness(oclMat &left, int winSize,
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&winSize)); openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&winSize));
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&avergeTexThreshold)); openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&avergeTexThreshold));
openCLSafeCall(clSetKernelArg(kernel, 9, local_mem_size, NULL)); openCLSafeCall(clSetKernelArg(kernel, 9, local_mem_size, NULL));
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 2, NULL, openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL,
globalThreads, localThreads, 0, NULL, NULL)); globalThreads, localThreads, 0, NULL, NULL));
clFinish(clCxt->impl->clCmdQueue); clFinish((cl_command_queue)clCxt->oclCommandQueue());
openCLSafeCall(clReleaseKernel(kernel)); openCLSafeCall(clReleaseKernel(kernel));
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment