Refactor OpenCL initialization and allow to use ocl module witout explicit setup

7b8ad4cb · Andrey Kamaev · dd678121 · 7b8ad4cb · 7b8ad4cb · 7b8ad4cb
Commit 7b8ad4cb authored Mar 17, 2013 by Andrey Kamaev
17 changed files
--- a/modules/nonfree/test/test_main.cpp
+++ b/modules/nonfree/test/test_main.cpp
@@ -7,7 +7,7 @@ using namespace cv::gpu;
 using namespace cvtest;
 using namespace testing;

-int main(int argc, char** argv)
+int main(int argc, char **argv)
 {
    try
    {
@@ -50,8 +50,8 @@ int main(int argc, char** argv)
        TS::ptr()->init("cv");
        InitGoogleTest(&argc, argv);

-        return RUN_ALL_TESTS();
-    }
+    return RUN_ALL_TESTS();
+}
    catch (const std::exception& e)
    {
        std::cerr << e.what() << std::endl;

--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -140,15 +140,23 @@ namespace cv
        protected:
            Context();
            friend class auto_ptr<Context>;
-            static auto_ptr<Context> clCxt;

+        private:
+            static auto_ptr<Context> clCxt;
+            static int val;
        public:
            ~Context();
-            static int val;
-            static Context *getContext();
+            void release();
+            Info::Impl* impl;
+
+            static Context* getContext();
            static void setContext(Info &oclinfo);
-            struct Impl;
-            Impl *impl;
+
+            enum {CL_DOUBLE, CL_UNIFIED_MEM};
+            bool supportsFeature(int ftype);
+            size_t computeUnits();
+            void* oclContext();
+            void* oclCommandQueue();
        };

        //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.

--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
--- a/modules/ocl/src/canny.cpp
+++ b/modules/ocl/src/canny.cpp
@@ -98,7 +98,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
    {
        openCLFree(counter);
    }
-    counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
+    counter = clCreateBuffer( (cl_context)getoclContext(), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
    openCLSafeCall(err);
 }

@@ -354,7 +354,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in
 void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
 {
    unsigned int count;
-    openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
+    openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
    Context *clCxt = map.clCxt;
    string kernelName = "edgesHysteresisGlobal";
    vector< pair<size_t, const void *> > args;
@@ -364,7 +364,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
    int count_i[1] = {0};
    while(count > 0)
    {
-        openCLSafeCall(clEnqueueWriteBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
+        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));

        args.clear();
        size_t globalThreads[3] = {std::min(count, 65535u) * 128, DIVUP(count, 65535), 1};
@@ -379,7 +379,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
        args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));

        openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
-        openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
+        openCLSafeCall(clEnqueueReadBuffer((cl_command_queue)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
        std::swap(st1, st2);
    }
 #undef DIVUP

--- a/modules/ocl/src/fft.cpp
+++ b/modules/ocl/src/fft.cpp
@@ -206,7 +206,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
    clStridesIn[2]  = is_row_dft ? clStridesIn[1]  : dft_size.width * clStridesIn[1];
    clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];

-    openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, Context::getContext()->impl->clContext, dim, clLengthsIn ) );
+    openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, (cl_context)getoclContext(), dim, clLengthsIn ) );

    openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
    openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
@@ -220,7 +220,8 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
    openCLSafeCall( clAmdFftSetPlanScale  ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );

    //ready to bake
-    openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) );
+    cl_command_queue clq = (cl_command_queue)getoclCommandQueue();
+    openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &clq, NULL, NULL ) );
 }
 cv::ocl::FftPlan::~FftPlan()
 {
@@ -338,16 +339,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
    if (buffersize)
    {
        cl_int medstatus;
-        clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
+        clMedBuffer = clCreateBuffer ( (cl_context)src.clCxt->oclContext(), CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
        openCLSafeCall( medstatus );
    }
+    cl_command_queue clq = (cl_command_queue)src.clCxt->oclCommandQueue();
    openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
        is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
        1,
-        &src.clCxt->impl->clCmdQueue,
+        &clq,
        0, NULL, NULL,
        (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
-    openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
+    openCLSafeCall( clFinish(clq) );
    if(clMedBuffer)
    {
        openCLFree(clMedBuffer);

--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
@@ -1478,7 +1478,7 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy,

 void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
 {
-    if (src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
+    if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
    {
        CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
        return;

--- a/modules/ocl/src/gemm.cpp
+++ b/modules/ocl/src/gemm.cpp
@@ -87,7 +87,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
    int offb    = src2.offset;
    int offc    = dst.offset;

-
+    cl_command_queue clq = (cl_command_queue)src1.clCxt->oclCommandQueue();
    switch(src1.type())
    {
    case CV_32FC1:
@@ -97,11 +97,12 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
        offa /= sizeof(float);
        offb /= sizeof(float);
        offc /= sizeof(float);
+
        openCLSafeCall
        (
            clAmdBlasSgemmEx(order, transA, transB, M, N, K,
                             alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
-                             beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+                             beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
        );
        break;
    case CV_64FC1:
@@ -115,7 +116,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
        (
            clAmdBlasDgemmEx(order, transA, transB, M, N, K,
                             alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
-                             beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+                             beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
        );
        break;
    case CV_32FC2:
@@ -132,7 +133,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
        (
            clAmdBlasCgemmEx(order, transA, transB, M, N, K,
                             alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
-                             beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+                             beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
        );
    }
    break;
@@ -150,7 +151,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
        (
            clAmdBlasZgemmEx(order, transA, transB, M, N, K,
                             alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
-                             beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+                             beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL)
        );
    }
    break;

--- a/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@ -971,7 +971,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS

        size_t blocksize = 8;
        size_t localThreads[3] = { blocksize, blocksize , 1 };
-        size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->impl->maxComputeUnits) *localThreads[0],
+        size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->computeUnits()) *localThreads[0],
                                    localThreads[1], 1
                                  };
        int outputsz = 256 * globalThreads[0] / localThreads[0];
@@ -1047,21 +1047,21 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS

        stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
        //openCLVerifyCall(status);
-        openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
+        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));

        //classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status);
        //status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL);

        nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
        //openCLVerifyCall(status);
-        openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
+        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), nodebuffer, 1, 0,
                                            nodenum * sizeof(GpuHidHaarTreeNode),
                                            node, 0, NULL, NULL));
        candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY, 4 * sizeof(int) * outputsz);
        //openCLVerifyCall(status);
        scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
        //openCLVerifyCall(status);
-        openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
+        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
        //flag  = 1;
        //}

@@ -1186,7 +1186,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
        int grp_per_CU = 12;
        size_t blocksize = 8;
        size_t localThreads[3] = { blocksize, blocksize , 1 };
-        size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->impl->maxComputeUnits *localThreads[0],
+        size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->computeUnits() *localThreads[0],
                                    localThreads[1], 1
                                  };
        int outputsz = 256 * globalThreads[0] / localThreads[0];
@@ -1195,7 +1195,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
        nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
                                        nodenum * sizeof(GpuHidHaarTreeNode));
        //openCLVerifyCall(status);
-        openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
+        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), nodebuffer, 1, 0,
                                            nodenum * sizeof(GpuHidHaarTreeNode),
                                            node, 0, NULL, NULL));
        cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE,
@@ -1252,16 +1252,16 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
        int splitnode = stage[0].count + stage[1].count + stage[2].count;
        stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
        //openCLVerifyCall(status);
-        openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
+        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
        candidatebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, 4 * sizeof(int) * outputsz);
        //openCLVerifyCall(status);
        scaleinfobuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
        //openCLVerifyCall(status);
-        openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
+        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL));
        pbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_int4) * loopcount);
-        openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
+        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), pbuffer, 1, 0, sizeof(cl_int4)*loopcount, p, 0, NULL, NULL));
        correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount);
-        openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
+        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL));
        //int argcount = 0;

        vector<pair<size_t, const void *> > args;
@@ -1286,7 +1286,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
        openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);

        //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL));
-        candidate = (int *)clEnqueueMapBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);
+        candidate = (int *)clEnqueueMapBuffer((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status);

        for(int i = 0; i < outputsz; i++)
        {
@@ -1297,7 +1297,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
        free(scaleinfo);
        free(p);
        free(correction);
-        clEnqueueUnmapMemObject(gsum.clCxt->impl->clCmdQueue, candidatebuffer, candidate, 0, 0, 0);
+        clEnqueueUnmapMemObject((cl_command_queue)gsum.clCxt->oclCommandQueue(), candidatebuffer, candidate, 0, 0, 0);
        openCLSafeCall(clReleaseMemObject(stagebuffer));
        openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
        openCLSafeCall(clReleaseMemObject(nodebuffer));

--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -290,8 +290,8 @@ namespace cv
                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
                args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
                float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
- 
-               if(src.clCxt -> impl -> double_support != 0)
+
+               if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                {
                    args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
                }
@@ -319,7 +319,7 @@ namespace cv
                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
                args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
                args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
-                if(src.clCxt -> impl -> double_support != 0)
+                if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                {
                    args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
                }
@@ -383,7 +383,7 @@ namespace cv
                args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
-                if(src.clCxt -> impl -> double_support != 0)
+                if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                {
                    args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
                    args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
@@ -824,12 +824,12 @@ namespace cv
                string kernelName = "warpAffine" + s[interpolation];


-                if(src.clCxt -> impl -> double_support != 0)
+                if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                {
                    cl_int st;
-                    coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
+                    coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
                    openCLVerifyCall(st);
-                    openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
+                    openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
                }
                else
                {
@@ -839,8 +839,8 @@ namespace cv
                        {
                            float_coeffs[m][n] = coeffs[m][n];
                        }
-                    coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
-                    openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
+                        coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
+                        openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));

                }
                //TODO: improve this kernel
@@ -894,12 +894,12 @@ namespace cv
                string s[3] = {"NN", "Linear", "Cubic"};
                string kernelName = "warpPerspective" + s[interpolation];

-                if(src.clCxt -> impl -> double_support != 0)
+                if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
                {
                    cl_int st;
-                    coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
+                    coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
                    openCLVerifyCall(st);
-                    openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
+                    openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
                }
                else
                {
@@ -908,9 +908,9 @@ namespace cv
                        for(int n = 0; n < 3; n++)
                            float_coeffs[m][n] = coeffs[m][n];

-                    coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
+                    coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
                    openCLVerifyCall(st);
-                    openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
+                    openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
                }
                //TODO: improve this kernel
                size_t blkSizeX = 16, blkSizeY = 16;
@@ -1018,7 +1018,7 @@ namespace cv
        void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
        {
            CV_Assert(src.type() == CV_8UC1);
-            if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+            if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
            {
                CV_Error(CV_GpuNotSupported, "select device don't support double");
            }
@@ -1192,7 +1192,7 @@ namespace cv
        void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
                          double k, int borderType)
        {
-            if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+            if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
            {
                CV_Error(CV_GpuNotSupported, "select device don't support double");
            }
@@ -1206,7 +1206,7 @@ namespace cv

        void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
        {
-            if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+            if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
            {
                CV_Error(CV_GpuNotSupported, "select device don't support double");
            }
@@ -1260,7 +1260,7 @@ namespace cv
            if( src.depth() != CV_8U || src.oclchannels() != 4 )
                CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );

-            //            if(src.clCxt->impl->double_support == 0)
+            //            if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
            //            {
            //                CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
            //            }
@@ -1328,7 +1328,7 @@ namespace cv
            if( src.depth() != CV_8U || src.oclchannels() != 4 )
                CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );

-            //            if(src.clCxt->impl->double_support == 0)
+            //            if(!src.clCxt->supportsFeature(Context::CL_DOUBLE))
            //            {
            //                CV_Error( CV_GpuNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
            //            }

--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
--- a/modules/ocl/src/matrix_operations.cpp
+++ b/modules/ocl/src/matrix_operations.cpp
@@ -190,7 +190,7 @@ void cv::ocl::oclMat::upload(const Mat &m)
        int pitch = wholeSize.width * 3 * m.elemSize1();
        int tail_padding = m.elemSize1() * 3072;
        int err;
-        cl_mem temp = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE,
+        cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
                                     (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
        openCLVerifyCall(err);

@@ -242,7 +242,7 @@ void cv::ocl::oclMat::download(cv::Mat &m) const
        int pitch = wholecols * 3 * m.elemSize1();
        int tail_padding = m.elemSize1() * 3072;
        int err;
-        cl_mem temp = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE,
+        cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
                                     (pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
        openCLVerifyCall(err);

@@ -595,7 +595,7 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri
 #ifdef CL_VERSION_1_2
    if(dst.offset == 0 && dst.cols == dst.wholecols)
    {
-        clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
+        clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
    }
    else
    {

--- a/modules/ocl/src/mcwutil.cpp
+++ b/modules/ocl/src/mcwutil.cpp
@@ -94,15 +94,15 @@ namespace cv
            for(size_t i = 0; i < args.size(); i ++)
                openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));

-            openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
+            openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL, globalThreads,
                                                  localThreads, 0, NULL, NULL));

            switch(finish_mode)
            {
            case CLFINISH:
-                clFinish(clCxt->impl->clCmdQueue);
+                clFinish((cl_command_queue)clCxt->oclCommandQueue());
            case CLFLUSH:
-                clFlush(clCxt->impl->clCmdQueue);
+                clFlush((cl_command_queue)clCxt->oclCommandQueue());
                break;
            case DISABLE:
            default:
@@ -126,7 +126,7 @@ namespace cv
            openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
                                  build_options, finish_mode);
        }
- 
+
       cl_mem bindTexture(const oclMat &mat)
        {
            cl_mem texture;
@@ -177,7 +177,7 @@ namespace cv
            desc.buffer           = NULL;
            desc.num_mip_levels   = 0;
            desc.num_samples      = 0;
-            texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+            texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
 #else
            texture = clCreateImage2D(
                mat.clCxt->impl->clContext,
@@ -195,10 +195,10 @@ namespace cv
            cl_mem devData;
            if (mat.cols * mat.elemSize() != mat.step)
            {
-                devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows
+                devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows
                    * mat.elemSize(), NULL, NULL);
                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
-                clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin, 
+                clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
            }
            else
@@ -206,10 +206,10 @@ namespace cv
                devData = (cl_mem)mat.data;
            }

-            clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0);
+            clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0);
            if ((mat.cols * mat.elemSize() != mat.step))
            {
-                clFinish(mat.clCxt->impl->clCmdQueue);
+                clFinish((cl_command_queue)mat.clCxt->oclCommandQueue());
                clReleaseMemObject(devData);
            }

@@ -223,7 +223,7 @@ namespace cv
        }

        bool support_image2d(Context *clCxt)
-        {return false;
+        {
            static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}";
            static bool _isTested = false;
            static bool _support = false;
@@ -234,7 +234,7 @@ namespace cv
            try
            {
                cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func");
-                _support = true;
+                //_support = true;
            }
            catch (const cv::Exception& e)
            {

--- a/modules/ocl/src/moments.cpp
+++ b/modules/ocl/src/moments.cpp
@@ -106,7 +106,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )

        bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;

-        if (!cv::ocl::Context::getContext()->impl->double_support && is_float)
+        if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE) && is_float)
        {
            CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
        }
@@ -146,7 +146,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )

        cv::Mat dst(dst_a);
        a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
-        if (!cv::ocl::Context::getContext()->impl->double_support)
+        if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE))
        {
            for (int i = 0; i < contour->total; ++i)
            {

--- a/modules/ocl/src/precomp.hpp
+++ b/modules/ocl/src/precomp.hpp
@@ -81,33 +81,6 @@
 #include "opencv2/ocl/private/util.hpp"
 #include "safe_call.hpp"

-using namespace std;
-
-namespace cv
-{
-    namespace ocl
-    {
-        struct Context::Impl
-        {
-            //Information of the OpenCL context
-            cl_context clContext;
-            cl_command_queue clCmdQueue;
-            cl_device_id devices;
-            string devName;
-            cl_uint maxDimensions;
-            size_t maxWorkGroupSize;
-            size_t maxWorkItemSizes[4];
-            cl_uint maxComputeUnits;
-            int double_support;
-            //extra options to recognize vendor specific fp64 extensions
-            char extra_options[512];
-            string Binpath;
-            int unified_memory; //1 means integrated GPU, otherwise this value is 0
-        };
-    }
-}
-
-
 #else /* defined(HAVE_OPENCL) */

 static inline void throw_nogpu()
@@ -117,4 +90,6 @@ static inline void throw_nogpu()

 #endif /* defined(HAVE_OPENCL) */

+using namespace std;
+
 #endif /* __OPENCV_PRECOMP_H__ */
--- a/modules/ocl/src/pyrlk.cpp
+++ b/modules/ocl/src/pyrlk.cpp
@@ -357,7 +357,7 @@ static void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar,
 #ifdef CL_VERSION_1_2
    if(dst.offset == 0 && dst.cols == dst.wholecols)
    {
-        clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
+        clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
    }
    else
    {
@@ -464,7 +464,7 @@ static void copyTo(const oclMat &src, oclMat &m )

 static void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
 {
-    if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
+    if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
    {
        CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
        return;
@@ -712,7 +712,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
                     level, /*block, */patch, winSize, iters);
    }

-    clFinish(prevImg.clCxt->impl->clCmdQueue);
+    clFinish((cl_command_queue)prevImg.clCxt->oclCommandQueue());

    if(errMat)
        delete err;
@@ -851,5 +851,5 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextI
    copyTo(uPyr_[idx], u);
    copyTo(vPyr_[idx], v);

-    clFinish(prevImg.clCxt->impl->clCmdQueue);
+    clFinish((cl_command_queue)prevImg.clCxt->oclCommandQueue());
 }
--- a/modules/ocl/src/split_merge.cpp
+++ b/modules/ocl/src/split_merge.cpp
@@ -130,7 +130,7 @@ namespace cv

            static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
            {
-                if(mat_dst.clCxt -> impl -> double_support == 0 && mat_dst.type() == CV_64F)
+                if(!mat_dst.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_dst.type() == CV_64F)
                {
                    CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
                    return;
@@ -279,7 +279,7 @@ namespace cv
            static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
            {

-                if(mat_src.clCxt -> impl -> double_support == 0 && mat_src.type() == CV_64F)
+                if(!mat_src.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_src.type() == CV_64F)
                {
                    CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
                    return;

--- a/modules/ocl/src/stereobm.cpp
+++ b/modules/ocl/src/stereobm.cpp
@@ -90,10 +90,10 @@ static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterC
    openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&input.cols));
    openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&prefilterCap));

-    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL,
+    openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL,
                                          globalThreads, localThreads, 0, NULL, NULL));

-    clFinish(clCxt->impl->clCmdQueue);
+    clFinish((cl_command_queue)clCxt->oclCommandQueue());
    openCLSafeCall(clReleaseKernel(kernel));

 }
@@ -150,11 +150,11 @@ static void stereo_bm(const oclMat &left, const oclMat &right,  oclMat &disp,
    openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&winsz2));
    openCLSafeCall(clSetKernelArg(kernel, 11, local_mem_size, (void *)NULL));

-    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 2, NULL,
+    openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL,
                                          globalThreads, localThreads, 0, NULL, NULL));


-    clFinish(clCxt->impl->clCmdQueue);
+    clFinish((cl_command_queue)clCxt->oclCommandQueue());
    openCLSafeCall(clReleaseKernel(kernel));
 }
 ////////////////////////////////////////////////////////////////////////////
@@ -188,10 +188,10 @@ static void postfilter_textureness(oclMat &left, int winSize,
    openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&winSize));
    openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&avergeTexThreshold));
    openCLSafeCall(clSetKernelArg(kernel, 9, local_mem_size, NULL));
-    openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 2, NULL,
+    openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL,
                                          globalThreads, localThreads, 0, NULL, NULL));

-    clFinish(clCxt->impl->clCmdQueue);
+    clFinish((cl_command_queue)clCxt->oclCommandQueue());
    openCLSafeCall(clReleaseKernel(kernel));
 }
 //////////////////////////////////////////////////////////////////////////////