ocl: file-based ProgramCache refactoring

dd9ff587 · Alexander Alekhin · b00f79ac · dd9ff587 · dd9ff587 · dd9ff587
Commit dd9ff587 authored Sep 25, 2013 by Alexander Alekhin
15 changed files
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@ -445,6 +445,8 @@ macro(ocv_glob_module_sources)
    source_group("Src\\Cuda"      FILES ${lib_cuda_srcs} ${lib_cuda_hdrs})
  endif()
+  source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
  file(GLOB cl_kernels "src/opencl/*.cl")
  if(HAVE_OPENCL AND cl_kernels)
@@ -457,7 +459,6 @@ macro(ocv_glob_module_sources)
    list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
  endif()
-  source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
  source_group("Include" FILES ${lib_hdrs})
  source_group("Include\\detail" FILES ${lib_hdrs_detail})

--- a/cmake/cl2cpp.cmake
+++ b/cmake/cl2cpp.cmake
@@ -20,6 +20,7 @@ namespace cv
 {
 namespace ocl
 {
 ")
 foreach(cl ${cl_list})
@@ -43,12 +44,22 @@ foreach(cl ${cl_list})
  string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof
-  set(STR_CPP "${STR_CPP}const char* ${cl_filename}=\"${lines};\n")
+  string(MD5 hash "${lines}")
-  set(STR_HPP "${STR_HPP}extern const char* ${cl_filename};\n")
+  set(STR_CPP "${STR_CPP}const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n")
+  set(STR_HPP "${STR_HPP}extern const struct ProgramEntry ${cl_filename};\n")
 endforeach()
 set(STR_CPP "${STR_CPP}}\n}\n")
 set(STR_HPP "${STR_HPP}}\n}\n")
-file(WRITE ${OUTPUT} "${STR_CPP}")
+file(WRITE "${OUTPUT}" "${STR_CPP}")
-file(WRITE ${OUTPUT_HPP} "${STR_HPP}")
+if(EXISTS "${OUTPUT_HPP}")
+  file(READ "${OUTPUT_HPP}" hpp_lines)
+endif()
+if("${hpp_lines}" STREQUAL "${STR_HPP}")
+  message(STATUS "${OUTPUT_HPP} contains same content")
+else()
+  file(WRITE "${OUTPUT_HPP}" "${STR_HPP}")
+endif()
--- a/modules/nonfree/src/surf.ocl.cpp
+++ b/modules/nonfree/src/surf.ocl.cpp
@@ -55,11 +55,11 @@ namespace cv
 {
    namespace ocl
    {
-        const char noImage2dOption [] = "-D DISABLE_IMAGE2D";
+        static const char noImage2dOption[] = "-D DISABLE_IMAGE2D";
        static bool use_image2d = false;
-        static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
+        static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
            size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
        {
            char optBuf [100] = {0};

--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -199,24 +199,6 @@ namespace cv
        void CV_EXPORTS finish();
-        //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
-        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
-                                                        const char **source, string kernelName,
-                                                        size_t globalThreads[3], size_t localThreads[3],
-                                                        std::vector< std::pair<size_t, const void *> > &args,
-                                                        int channels, int depth, const char *build_options,
-                                                        bool finish = true, bool measureKernelTime = false,
-                                                        bool cleanUp = true);
-        //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
-        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
-                                                        const char **fileName, const int numFiles, string kernelName,
-                                                        size_t globalThreads[3], size_t localThreads[3],
-                                                        std::vector< std::pair<size_t, const void *> > &args,
-                                                        int channels, int depth, const char *build_options,
-                                                        bool finish = true, bool measureKernelTime = false,
-                                                        bool cleanUp = true);
        //! Enable or disable OpenCL program binary caching onto local disk
        // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
        // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
@@ -233,12 +215,11 @@ namespace cv
            CACHE_DEBUG   = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC)
            CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC)
            CACHE_ALL     = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary
-            CACHE_UPDATE  = 0x1 << 2  // if the binary cache file with the same name is already on the disk, it will be updated.
        };
        CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
        //! set where binary cache to be saved to
-        CV_EXPORTS void setBinpath(const char *path);
+        CV_EXPORTS void setBinaryPath(const char *path);
        class CV_EXPORTS oclMatExpr;
        //////////////////////////////// oclMat ////////////////////////////////

--- a/modules/ocl/include/opencv2/ocl/private/util.hpp
+++ b/modules/ocl/include/opencv2/ocl/private/util.hpp
@@ -55,6 +55,13 @@ namespace cv
 namespace ocl
 {
+struct ProgramEntry
+{
+    const char* name;
+    const char* programStr;
+    const char* programHash;
+};
 inline cl_device_id getClDeviceID(const Context *ctx)
 {
    return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());
@@ -91,18 +98,18 @@ void CV_EXPORTS openCLFree(void *devPtr);
 cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
 void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
 cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
-                                               const char **source, std::string kernelName);
+        const cv::ocl::ProgramEntry* source, std::string kernelName);
 cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
-                                               const char **source, std::string kernelName, const char *build_options);
+        const cv::ocl::ProgramEntry* source, std::string kernelName, const char *build_options);
 void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
-void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
+void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const cv::ocl::ProgramEntry* source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
        int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
-void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
+void CV_EXPORTS openCLExecuteKernel_(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName,
        size_t globalThreads[3], size_t localThreads[3],
        std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
-void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+void CV_EXPORTS openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3],
        size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
-void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+void CV_EXPORTS openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3],
        size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
        int depth, const char *build_options);
@@ -111,8 +118,6 @@ cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_que
 cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
-int CV_EXPORTS savetofile(const Context *clcxt,  cl_program &program, const char *fileName);
 enum FLUSH_MODE
 {
    CLFINISH = 0,
@@ -120,11 +125,12 @@ enum FLUSH_MODE
    DISABLE
 };
-void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3],
        size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
-void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3],
        size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
        int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
 // bind oclMat to OpenCL image textures
 // note:
 //   1. there is no memory management. User need to explicitly release the resource
@@ -183,6 +189,24 @@ inline size_t roundUp(size_t sz, size_t n)
    return result;
 }
+//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt,
+        const cv::ocl::ProgramEntry* source, string kernelName,
+        size_t globalThreads[3], size_t localThreads[3],
+        std::vector< std::pair<size_t, const void *> > &args,
+        int channels, int depth, const char *build_options,
+        bool finish = true, bool measureKernelTime = false,
+        bool cleanUp = true);
+//! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt,
+        const cv::ocl::ProgramEntry* source, const int numFiles, string kernelName,
+        size_t globalThreads[3], size_t localThreads[3],
+        std::vector< std::pair<size_t, const void *> > &args,
+        int channels, int depth, const char *build_options,
+        bool finish = true, bool measureKernelTime = false,
+        bool cleanUp = true);
 }//namespace ocl
 }//namespace cv

--- a/modules/ocl/perf/main.cpp
+++ b/modules/ocl/perf/main.cpp
@@ -91,7 +91,6 @@ int main(int argc, char ** argv)
    }
    cv::ocl::setDevice(devicesInfo[device]);
-    cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE);
    cout << "Device type:" << type << endl
            << "Platform name:" << devicesInfo[device]->platform->platformName << endl

--- a/modules/ocl/src/brute_force_matcher.cpp
+++ b/modules/ocl/src/brute_force_matcher.cpp
@@ -45,10 +45,14 @@
 //M*/
 #include "precomp.hpp"
+#include <functional>
+#include <iterator>
+#include <vector>
 #include "opencl_kernels.hpp"
 using namespace cv;
 using namespace cv::ocl;
+using namespace std;
 static const int OPT_SIZE = 100;

--- a/modules/ocl/src/cl_context.cpp
+++ b/modules/ocl/src/cl_context.cpp
@@ -48,15 +48,16 @@
 #include "precomp.hpp"
 #include <iomanip>
 #include <fstream>
-#include "binarycaching.hpp"
+#include "cl_programcache.hpp"
+#if defined _MSC_VER && _MSC_VER >= 1200
+#  pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610)
+#endif
 #undef __CL_ENABLE_EXCEPTIONS
 #include <CL/cl.hpp>
-namespace cv { namespace ocl {
+namespace cv {
+namespace ocl {
-extern void fft_teardown();
-extern void clBlasTeardown();
 struct PlatformInfoImpl
 {
@@ -174,7 +175,7 @@ static int initializeOpenCLDevices()
                deviceInfo.info.platform = &platformInfo.info;
                platformInfo.deviceIDs[j] = deviceInfo.info._id;
-                cl_device_type type = -1;
+                cl_device_type type = cl_device_type(-1);
                openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type));
                deviceInfo.info.deviceType = DeviceType(type);
@@ -182,7 +183,7 @@ static int initializeOpenCLDevices()
                openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion));
                openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName));
                openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor));
-                cl_uint vendorID = -1;
+                cl_uint vendorID = 0;
                openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID));
                deviceInfo.info.deviceVendorId = vendorID;
                openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion));
@@ -347,9 +348,6 @@ static bool __termination = false;
 ContextImpl::~ContextImpl()
 {
-    fft_teardown();
-    clBlasTeardown();
 #ifdef WIN32
    // if process is on termination stage (ExitProcess was called and other threads were terminated)
    // then disable command queue release because it may cause program hang
@@ -370,8 +368,14 @@ ContextImpl::~ContextImpl()
    clContext = NULL;
 }
+void fft_teardown();
+void clBlasTeardown();
 void ContextImpl::cleanupContext(void)
 {
+    fft_teardown();
+    clBlasTeardown();
    cv::AutoLock lock(currentContextMutex);
    if (currentContext)
        delete currentContext;
@@ -382,6 +386,15 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo)
 {
    CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size());
+    {
+        cv::AutoLock lock(currentContextMutex);
+        if (currentContext)
+        {
+            if (currentContext->deviceInfo._id == deviceInfo->_id)
+                return;
+        }
+    }
    DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id];
    CV_Assert(deviceInfo == &infoImpl.info);
@@ -466,6 +479,30 @@ int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, co
        }
    }
+    if (currentContext == NULL)
+    {
+        // select default device
+        const DeviceInfo* selectedDevice = NULL;
+        for (size_t i = 0; i < devices.size(); i++)
+        {
+            const DeviceInfo* dev = devices[i];
+            if (dev->deviceType == CL_DEVICE_TYPE_GPU)
+            {
+                selectedDevice = dev;
+                break;
+            }
+            else if (dev->deviceType == CL_DEVICE_TYPE_CPU && (selectedDevice == NULL))
+            {
+                selectedDevice = dev;
+            }
+        }
+        if (selectedDevice)
+        {
+            setDevice(selectedDevice);
+        }
+    }
    return (int)devices.size();
 }

--- a/modules/ocl/src/cl_operations.cpp
+++ b/modules/ocl/src/cl_operations.cpp
@@ -48,10 +48,7 @@
 #include "precomp.hpp"
 #include <iomanip>
 #include <fstream>
-#include "binarycaching.hpp"
+#include "cl_programcache.hpp"
-#undef __CL_ENABLE_EXCEPTIONS
-#include <CL/cl.hpp>
 //#define PRINT_KERNEL_RUN_TIME
 #define RUN_TIMES 100
@@ -60,7 +57,8 @@
 #endif
 //#define AMD_DOUBLE_DIFFER
-namespace cv { namespace ocl {
+namespace cv {
+namespace ocl {
 DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT;
 DevMemRW gDeviceMemRW = DEVICE_MEM_R_W;
@@ -179,21 +177,22 @@ void openCLFree(void *devPtr)
    openCLSafeCall(clReleaseMemObject((cl_mem)devPtr));
 }
-cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName)
+cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName)
 {
    return openCLGetKernelFromSource(ctx, source, kernelName, NULL);
 }
-cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName,
+cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
                                    const char *build_options)
 {
    cl_kernel kernel;
    cl_int status = 0;
    CV_Assert(ProgramCache::getProgramCache() != NULL);
-    cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, kernelName, build_options);
+    cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, build_options);
    CV_Assert(program != NULL);
    kernel = clCreateKernel(program, kernelName.c_str(), &status);
    openCLVerifyCall(status);
+    openCLVerifyCall(clReleaseProgram(program));
    return kernel;
 }
@@ -213,7 +212,7 @@ void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThrea
 static double total_execute_time = 0;
 static double total_kernel_time = 0;
 #endif
-void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, size_t globalThreads[3],
+void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
                          size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels,
                          int depth, const char *build_options)
 {
@@ -275,14 +274,14 @@ void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName,
    openCLSafeCall(clReleaseKernel(kernel));
 }
-void openCLExecuteKernel(Context *ctx , const char **source, string kernelName,
+void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
                         size_t globalThreads[3], size_t localThreads[3],
                         vector< pair<size_t, const void *> > &args, int channels, int depth)
 {
    openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args,
                        channels, depth, NULL);
 }
-void openCLExecuteKernel(Context *ctx , const char **source, string kernelName,
+void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
                         size_t globalThreads[3], size_t localThreads[3],
                         vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
@@ -316,7 +315,7 @@ void openCLExecuteKernel(Context *ctx , const char **source, string kernelName,
 #endif
 }
-double openCLExecuteKernelInterop(Context *ctx , const char **source, string kernelName,
+double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
                         size_t globalThreads[3], size_t localThreads[3],
                         vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
                         bool finish, bool measureKernelTime, bool cleanUp)
@@ -391,29 +390,6 @@ double openCLExecuteKernelInterop(Context *ctx , const char **source, string ker
    return kernelTime;
 }
-//double openCLExecuteKernelInterop(Context *ctx , const char **fileName, const int numFiles, string kernelName,
-//                         size_t globalThreads[3], size_t localThreads[3],
-//                         vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
-//                         bool finish, bool measureKernelTime, bool cleanUp)
-//
-//{
-//    std::vector<std::string> fsource;
-//    for (int i = 0 ; i < numFiles ; i++)
-//    {
-//        std::string str;
-//        if (convertToString(fileName[i], str) >= 0)
-//            fsource.push_back(str);
-//    }
-//    const char **source = new const char *[numFiles];
-//    for (int i = 0 ; i < numFiles ; i++)
-//        source[i] = fsource[i].c_str();
-//    double kernelTime = openCLExecuteKernelInterop(ctx ,source, kernelName, globalThreads, localThreads,
-//                         args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
-//    fsource.clear();
-//    delete []source;
-//    return kernelTime;
-//}
 cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
                     const size_t size)
 {
@@ -427,7 +403,6 @@ cl_mem load_constant(cl_context context, cl_command_queue command_queue, const v
                                        value, 0, 0, 0));
    return con_struct;
 }
 }//namespace ocl

--- a/modules/ocl/src/cl_programcache.cpp
+++ b/modules/ocl/src/cl_programcache.cpp
--- a/modules/ocl/src/binarycaching.hpp
+++ b/modules/ocl/src/binarycaching.hpp
@@ -44,13 +44,8 @@
 #include "precomp.hpp"
-using namespace cv;
+namespace cv {
-using namespace cv::ocl;
+namespace ocl {
-using namespace std;
-using std::cout;
-using std::endl;
-namespace cv { namespace ocl {
 class ProgramCache
 {
@@ -61,16 +56,18 @@ protected:
 public:
    static ProgramCache *getProgramCache();
-	cl_program getProgram(const Context *ctx, const char **source, string kernelName,
+    cl_program getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source,
                          const char *build_options);
    void releaseProgram();
 protected:
    //lookup the binary given the file name
-	cl_program progLookup(string srcsign);
+    // (with acquired mutexCache)
+    cl_program progLookup(const string& srcsign);
    //add program to the cache
-	void addProgram(string srcsign, cl_program program);
+    // (with acquired mutexCache)
+    void addProgram(const string& srcsign, cl_program program);
    map <string, cl_program> codeCache;
    unsigned int cacheSize;
@@ -79,6 +76,10 @@ protected:
    //We may need more delicate algorithms when necessary later.
    //Right now, let's just leave it along.
    static const unsigned MAX_PROG_CACHE_SIZE = 1024;
+    // acquire both mutexes in this order: 1) mutexFiles 2) mutexCache
+    static cv::Mutex mutexFiles;
+    static cv::Mutex mutexCache;
 };
 }//namespace ocl

--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -1108,7 +1108,7 @@ namespace cv
            CV_Assert(Dx.offset == 0 && Dy.offset == 0);
        }
-        static void corner_ocl(const char *src_str, string kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy,
+        static void corner_ocl(const cv::ocl::ProgramEntry* source, string kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy,
                        oclMat &dst, int border_type)
        {
            char borderType[30];
@@ -1160,7 +1160,7 @@ namespace cv
            args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
            args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
            args.push_back( make_pair( sizeof(cl_float) , (void *)&k));
-            openCLExecuteKernel(dst.clCxt, &src_str, kernelName, gt, lt, args, -1, -1, build_options);
+            openCLExecuteKernel(dst.clCxt, source, kernelName, gt, lt, args, -1, -1, build_options);
        }
        void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
@@ -1181,7 +1181,7 @@ namespace cv
            CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
            extractCovData(src, dx, dy, blockSize, ksize, borderType);
            dst.create(src.size(), CV_32F);
-            corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType);
+            corner_ocl(&imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType);
        }
        void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
@@ -1200,7 +1200,7 @@ namespace cv
            CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
            extractCovData(src, dx, dy, blockSize, ksize, borderType);
            dst.create(src.size(), CV_32F);
-            corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType);
+            corner_ocl(&imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType);
        }
        /////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
        static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps)
@@ -1749,7 +1749,7 @@ namespace cv
 }
 //////////////////////////////////convolve////////////////////////////////////////////////////
-static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const char **kernelString)
+static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
 {
    CV_Assert(src.depth() == CV_32FC1);
    CV_Assert(temp1.depth() == CV_32F);
@@ -1784,7 +1784,7 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st
    args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows ));
    args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols ));
-    openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
+    openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
 }
 void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y)
 {

--- a/modules/ocl/src/mcwutil.cpp
+++ b/modules/ocl/src/mcwutil.cpp
@@ -72,7 +72,7 @@ namespace cv
    namespace ocl
    {
        // provide additional methods for the user to interact with the command queue after a task is fired
-        static void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
+        static void openCLExecuteKernel_2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
                                   size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels,
                                   int depth, char *build_options, FLUSH_MODE finish_mode)
        {
@@ -118,14 +118,14 @@ namespace cv
            openCLSafeCall(clReleaseKernel(kernel));
        }
-        void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
+        void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName,
                                  size_t globalThreads[3], size_t localThreads[3],
                                  vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode)
        {
            openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args,
                                 channels, depth, NULL, finish_mode);
        }
-        void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
+        void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName,
                                  size_t globalThreads[3], size_t localThreads[3],
                                  vector< pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
@@ -249,7 +249,7 @@ namespace cv
        bool support_image2d(Context *clCxt)
        {
-            static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}";
+            const cv::ocl::ProgramEntry _kernel = {NULL, "__kernel void test_func(image2d_t img) {}", NULL};
            static bool _isTested = false;
            static bool _support = false;
            if(_isTested)
@@ -258,7 +258,7 @@ namespace cv
            }
            try
            {
-                cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func");
+                cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel, "test_func");
                cv::ocl::finish();
                _support = true;
            }

--- a/modules/ocl/src/moments.cpp
+++ b/modules/ocl/src/moments.cpp
@@ -229,7 +229,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
            CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" );
    }
-    if( !moments )
+    if( !mom )
        CV_Error( CV_StsNullPtr, "" );
    memset( mom, 0, sizeof(*mom));

--- a/modules/ocl/test/main.cpp
+++ b/modules/ocl/test/main.cpp
@@ -118,7 +118,6 @@ int main(int argc, char **argv)
    }
    cv::ocl::setDevice(devicesInfo[device]);
-    setBinaryDiskCache(CACHE_UPDATE);
    cout << "Device type: " << type << endl
            << "Platform name: " << devicesInfo[device]->platform->platformName << endl