Commit dd9ff587 authored by Alexander Alekhin's avatar Alexander Alekhin

ocl: file-based ProgramCache refactoring

parent b00f79ac
...@@ -445,6 +445,8 @@ macro(ocv_glob_module_sources) ...@@ -445,6 +445,8 @@ macro(ocv_glob_module_sources)
source_group("Src\\Cuda" FILES ${lib_cuda_srcs} ${lib_cuda_hdrs}) source_group("Src\\Cuda" FILES ${lib_cuda_srcs} ${lib_cuda_hdrs})
endif() endif()
source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
file(GLOB cl_kernels "src/opencl/*.cl") file(GLOB cl_kernels "src/opencl/*.cl")
if(HAVE_OPENCL AND cl_kernels) if(HAVE_OPENCL AND cl_kernels)
...@@ -457,7 +459,6 @@ macro(ocv_glob_module_sources) ...@@ -457,7 +459,6 @@ macro(ocv_glob_module_sources)
list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
endif() endif()
source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
source_group("Include" FILES ${lib_hdrs}) source_group("Include" FILES ${lib_hdrs})
source_group("Include\\detail" FILES ${lib_hdrs_detail}) source_group("Include\\detail" FILES ${lib_hdrs_detail})
......
...@@ -20,6 +20,7 @@ namespace cv ...@@ -20,6 +20,7 @@ namespace cv
{ {
namespace ocl namespace ocl
{ {
") ")
foreach(cl ${cl_list}) foreach(cl ${cl_list})
...@@ -43,12 +44,22 @@ foreach(cl ${cl_list}) ...@@ -43,12 +44,22 @@ foreach(cl ${cl_list})
string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof
set(STR_CPP "${STR_CPP}const char* ${cl_filename}=\"${lines};\n") string(MD5 hash "${lines}")
set(STR_HPP "${STR_HPP}extern const char* ${cl_filename};\n")
set(STR_CPP "${STR_CPP}const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n")
set(STR_HPP "${STR_HPP}extern const struct ProgramEntry ${cl_filename};\n")
endforeach() endforeach()
set(STR_CPP "${STR_CPP}}\n}\n") set(STR_CPP "${STR_CPP}}\n}\n")
set(STR_HPP "${STR_HPP}}\n}\n") set(STR_HPP "${STR_HPP}}\n}\n")
file(WRITE ${OUTPUT} "${STR_CPP}") file(WRITE "${OUTPUT}" "${STR_CPP}")
file(WRITE ${OUTPUT_HPP} "${STR_HPP}")
if(EXISTS "${OUTPUT_HPP}")
file(READ "${OUTPUT_HPP}" hpp_lines)
endif()
if("${hpp_lines}" STREQUAL "${STR_HPP}")
message(STATUS "${OUTPUT_HPP} contains same content")
else()
file(WRITE "${OUTPUT_HPP}" "${STR_HPP}")
endif()
...@@ -55,11 +55,11 @@ namespace cv ...@@ -55,11 +55,11 @@ namespace cv
{ {
namespace ocl namespace ocl
{ {
const char noImage2dOption [] = "-D DISABLE_IMAGE2D"; static const char noImage2dOption[] = "-D DISABLE_IMAGE2D";
static bool use_image2d = false; static bool use_image2d = false;
static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth) size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{ {
char optBuf [100] = {0}; char optBuf [100] = {0};
......
...@@ -199,24 +199,6 @@ namespace cv ...@@ -199,24 +199,6 @@ namespace cv
void CV_EXPORTS finish(); void CV_EXPORTS finish();
//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
const char **source, string kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args,
int channels, int depth, const char *build_options,
bool finish = true, bool measureKernelTime = false,
bool cleanUp = true);
//! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
const char **fileName, const int numFiles, string kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args,
int channels, int depth, const char *build_options,
bool finish = true, bool measureKernelTime = false,
bool cleanUp = true);
//! Enable or disable OpenCL program binary caching onto local disk //! Enable or disable OpenCL program binary caching onto local disk
// After a program (*.cl files in opencl/ folder) is built at runtime, we allow the // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the
// compiled OpenCL program to be cached to the path automatically as "path/*.clb" // compiled OpenCL program to be cached to the path automatically as "path/*.clb"
...@@ -233,12 +215,11 @@ namespace cv ...@@ -233,12 +215,11 @@ namespace cv
CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC) CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC)
CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC) CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC)
CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary
CACHE_UPDATE = 0x1 << 2 // if the binary cache file with the same name is already on the disk, it will be updated.
}; };
CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./"); CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./");
//! set where binary cache to be saved to //! set where binary cache to be saved to
CV_EXPORTS void setBinpath(const char *path); CV_EXPORTS void setBinaryPath(const char *path);
class CV_EXPORTS oclMatExpr; class CV_EXPORTS oclMatExpr;
//////////////////////////////// oclMat //////////////////////////////// //////////////////////////////// oclMat ////////////////////////////////
......
...@@ -55,6 +55,13 @@ namespace cv ...@@ -55,6 +55,13 @@ namespace cv
namespace ocl namespace ocl
{ {
struct ProgramEntry
{
const char* name;
const char* programStr;
const char* programHash;
};
inline cl_device_id getClDeviceID(const Context *ctx) inline cl_device_id getClDeviceID(const Context *ctx)
{ {
return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr()); return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());
...@@ -91,18 +98,18 @@ void CV_EXPORTS openCLFree(void *devPtr); ...@@ -91,18 +98,18 @@ void CV_EXPORTS openCLFree(void *devPtr);
cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
const char **source, std::string kernelName); const cv::ocl::ProgramEntry* source, std::string kernelName);
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
const char **source, std::string kernelName, const char *build_options); const cv::ocl::ProgramEntry* source, std::string kernelName, const char *build_options);
void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads); void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args, void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const cv::ocl::ProgramEntry* source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName, void CV_EXPORTS openCLExecuteKernel_(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName,
size_t globalThreads[3], size_t localThreads[3], size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options); std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], void CV_EXPORTS openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth); size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], void CV_EXPORTS openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, const char *build_options); int depth, const char *build_options);
...@@ -111,8 +118,6 @@ cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_que ...@@ -111,8 +118,6 @@ cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_que
cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr); cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName);
enum FLUSH_MODE enum FLUSH_MODE
{ {
CLFINISH = 0, CLFINISH = 0,
...@@ -120,11 +125,12 @@ enum FLUSH_MODE ...@@ -120,11 +125,12 @@ enum FLUSH_MODE
DISABLE DISABLE
}; };
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
// bind oclMat to OpenCL image textures // bind oclMat to OpenCL image textures
// note: // note:
// 1. there is no memory management. User need to explicitly release the resource // 1. there is no memory management. User need to explicitly release the resource
...@@ -183,6 +189,24 @@ inline size_t roundUp(size_t sz, size_t n) ...@@ -183,6 +189,24 @@ inline size_t roundUp(size_t sz, size_t n)
return result; return result;
} }
//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt,
const cv::ocl::ProgramEntry* source, string kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args,
int channels, int depth, const char *build_options,
bool finish = true, bool measureKernelTime = false,
bool cleanUp = true);
//! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt,
const cv::ocl::ProgramEntry* source, const int numFiles, string kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args,
int channels, int depth, const char *build_options,
bool finish = true, bool measureKernelTime = false,
bool cleanUp = true);
}//namespace ocl }//namespace ocl
}//namespace cv }//namespace cv
......
...@@ -91,7 +91,6 @@ int main(int argc, char ** argv) ...@@ -91,7 +91,6 @@ int main(int argc, char ** argv)
} }
cv::ocl::setDevice(devicesInfo[device]); cv::ocl::setDevice(devicesInfo[device]);
cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE);
cout << "Device type:" << type << endl cout << "Device type:" << type << endl
<< "Platform name:" << devicesInfo[device]->platform->platformName << endl << "Platform name:" << devicesInfo[device]->platform->platformName << endl
......
...@@ -45,10 +45,14 @@ ...@@ -45,10 +45,14 @@
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
#include <functional>
#include <iterator>
#include <vector>
#include "opencl_kernels.hpp" #include "opencl_kernels.hpp"
using namespace cv; using namespace cv;
using namespace cv::ocl; using namespace cv::ocl;
using namespace std;
static const int OPT_SIZE = 100; static const int OPT_SIZE = 100;
......
...@@ -48,15 +48,16 @@ ...@@ -48,15 +48,16 @@
#include "precomp.hpp" #include "precomp.hpp"
#include <iomanip> #include <iomanip>
#include <fstream> #include <fstream>
#include "binarycaching.hpp" #include "cl_programcache.hpp"
#if defined _MSC_VER && _MSC_VER >= 1200
# pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610)
#endif
#undef __CL_ENABLE_EXCEPTIONS #undef __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp> #include <CL/cl.hpp>
namespace cv { namespace ocl { namespace cv {
namespace ocl {
extern void fft_teardown();
extern void clBlasTeardown();
struct PlatformInfoImpl struct PlatformInfoImpl
{ {
...@@ -174,7 +175,7 @@ static int initializeOpenCLDevices() ...@@ -174,7 +175,7 @@ static int initializeOpenCLDevices()
deviceInfo.info.platform = &platformInfo.info; deviceInfo.info.platform = &platformInfo.info;
platformInfo.deviceIDs[j] = deviceInfo.info._id; platformInfo.deviceIDs[j] = deviceInfo.info._id;
cl_device_type type = -1; cl_device_type type = cl_device_type(-1);
openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type)); openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type));
deviceInfo.info.deviceType = DeviceType(type); deviceInfo.info.deviceType = DeviceType(type);
...@@ -182,7 +183,7 @@ static int initializeOpenCLDevices() ...@@ -182,7 +183,7 @@ static int initializeOpenCLDevices()
openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion)); openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion));
openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName)); openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName));
openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor)); openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor));
cl_uint vendorID = -1; cl_uint vendorID = 0;
openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID)); openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID));
deviceInfo.info.deviceVendorId = vendorID; deviceInfo.info.deviceVendorId = vendorID;
openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion)); openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion));
...@@ -347,9 +348,6 @@ static bool __termination = false; ...@@ -347,9 +348,6 @@ static bool __termination = false;
ContextImpl::~ContextImpl() ContextImpl::~ContextImpl()
{ {
fft_teardown();
clBlasTeardown();
#ifdef WIN32 #ifdef WIN32
// if process is on termination stage (ExitProcess was called and other threads were terminated) // if process is on termination stage (ExitProcess was called and other threads were terminated)
// then disable command queue release because it may cause program hang // then disable command queue release because it may cause program hang
...@@ -370,8 +368,14 @@ ContextImpl::~ContextImpl() ...@@ -370,8 +368,14 @@ ContextImpl::~ContextImpl()
clContext = NULL; clContext = NULL;
} }
void fft_teardown();
void clBlasTeardown();
void ContextImpl::cleanupContext(void) void ContextImpl::cleanupContext(void)
{ {
fft_teardown();
clBlasTeardown();
cv::AutoLock lock(currentContextMutex); cv::AutoLock lock(currentContextMutex);
if (currentContext) if (currentContext)
delete currentContext; delete currentContext;
...@@ -382,6 +386,15 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo) ...@@ -382,6 +386,15 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo)
{ {
CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size()); CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size());
{
cv::AutoLock lock(currentContextMutex);
if (currentContext)
{
if (currentContext->deviceInfo._id == deviceInfo->_id)
return;
}
}
DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id]; DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id];
CV_Assert(deviceInfo == &infoImpl.info); CV_Assert(deviceInfo == &infoImpl.info);
...@@ -466,6 +479,30 @@ int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, co ...@@ -466,6 +479,30 @@ int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, co
} }
} }
if (currentContext == NULL)
{
// select default device
const DeviceInfo* selectedDevice = NULL;
for (size_t i = 0; i < devices.size(); i++)
{
const DeviceInfo* dev = devices[i];
if (dev->deviceType == CL_DEVICE_TYPE_GPU)
{
selectedDevice = dev;
break;
}
else if (dev->deviceType == CL_DEVICE_TYPE_CPU && (selectedDevice == NULL))
{
selectedDevice = dev;
}
}
if (selectedDevice)
{
setDevice(selectedDevice);
}
}
return (int)devices.size(); return (int)devices.size();
} }
......
...@@ -48,10 +48,7 @@ ...@@ -48,10 +48,7 @@
#include "precomp.hpp" #include "precomp.hpp"
#include <iomanip> #include <iomanip>
#include <fstream> #include <fstream>
#include "binarycaching.hpp" #include "cl_programcache.hpp"
#undef __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>
//#define PRINT_KERNEL_RUN_TIME //#define PRINT_KERNEL_RUN_TIME
#define RUN_TIMES 100 #define RUN_TIMES 100
...@@ -60,7 +57,8 @@ ...@@ -60,7 +57,8 @@
#endif #endif
//#define AMD_DOUBLE_DIFFER //#define AMD_DOUBLE_DIFFER
namespace cv { namespace ocl { namespace cv {
namespace ocl {
DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT; DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT;
DevMemRW gDeviceMemRW = DEVICE_MEM_R_W; DevMemRW gDeviceMemRW = DEVICE_MEM_R_W;
...@@ -179,21 +177,22 @@ void openCLFree(void *devPtr) ...@@ -179,21 +177,22 @@ void openCLFree(void *devPtr)
openCLSafeCall(clReleaseMemObject((cl_mem)devPtr)); openCLSafeCall(clReleaseMemObject((cl_mem)devPtr));
} }
cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName) cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName)
{ {
return openCLGetKernelFromSource(ctx, source, kernelName, NULL); return openCLGetKernelFromSource(ctx, source, kernelName, NULL);
} }
cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName, cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
const char *build_options) const char *build_options)
{ {
cl_kernel kernel; cl_kernel kernel;
cl_int status = 0; cl_int status = 0;
CV_Assert(ProgramCache::getProgramCache() != NULL); CV_Assert(ProgramCache::getProgramCache() != NULL);
cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, kernelName, build_options); cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, build_options);
CV_Assert(program != NULL); CV_Assert(program != NULL);
kernel = clCreateKernel(program, kernelName.c_str(), &status); kernel = clCreateKernel(program, kernelName.c_str(), &status);
openCLVerifyCall(status); openCLVerifyCall(status);
openCLVerifyCall(clReleaseProgram(program));
return kernel; return kernel;
} }
...@@ -213,7 +212,7 @@ void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThrea ...@@ -213,7 +212,7 @@ void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThrea
static double total_execute_time = 0; static double total_execute_time = 0;
static double total_kernel_time = 0; static double total_kernel_time = 0;
#endif #endif
void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, size_t globalThreads[3], void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
int depth, const char *build_options) int depth, const char *build_options)
{ {
...@@ -275,14 +274,14 @@ void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, ...@@ -275,14 +274,14 @@ void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName,
openCLSafeCall(clReleaseKernel(kernel)); openCLSafeCall(clReleaseKernel(kernel));
} }
void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
size_t globalThreads[3], size_t localThreads[3], size_t globalThreads[3], size_t localThreads[3],
vector< pair<size_t, const void *> > &args, int channels, int depth) vector< pair<size_t, const void *> > &args, int channels, int depth)
{ {
openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args, openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args,
channels, depth, NULL); channels, depth, NULL);
} }
void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
size_t globalThreads[3], size_t localThreads[3], size_t globalThreads[3], size_t localThreads[3],
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options) vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
...@@ -316,7 +315,7 @@ void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, ...@@ -316,7 +315,7 @@ void openCLExecuteKernel(Context *ctx , const char **source, string kernelName,
#endif #endif
} }
double openCLExecuteKernelInterop(Context *ctx , const char **source, string kernelName, double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
size_t globalThreads[3], size_t localThreads[3], size_t globalThreads[3], size_t localThreads[3],
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
bool finish, bool measureKernelTime, bool cleanUp) bool finish, bool measureKernelTime, bool cleanUp)
...@@ -391,29 +390,6 @@ double openCLExecuteKernelInterop(Context *ctx , const char **source, string ker ...@@ -391,29 +390,6 @@ double openCLExecuteKernelInterop(Context *ctx , const char **source, string ker
return kernelTime; return kernelTime;
} }
//double openCLExecuteKernelInterop(Context *ctx , const char **fileName, const int numFiles, string kernelName,
// size_t globalThreads[3], size_t localThreads[3],
// vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
// bool finish, bool measureKernelTime, bool cleanUp)
//
//{
// std::vector<std::string> fsource;
// for (int i = 0 ; i < numFiles ; i++)
// {
// std::string str;
// if (convertToString(fileName[i], str) >= 0)
// fsource.push_back(str);
// }
// const char **source = new const char *[numFiles];
// for (int i = 0 ; i < numFiles ; i++)
// source[i] = fsource[i].c_str();
// double kernelTime = openCLExecuteKernelInterop(ctx ,source, kernelName, globalThreads, localThreads,
// args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
// fsource.clear();
// delete []source;
// return kernelTime;
//}
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
const size_t size) const size_t size)
{ {
...@@ -427,7 +403,6 @@ cl_mem load_constant(cl_context context, cl_command_queue command_queue, const v ...@@ -427,7 +403,6 @@ cl_mem load_constant(cl_context context, cl_command_queue command_queue, const v
value, 0, 0, 0)); value, 0, 0, 0));
return con_struct; return con_struct;
} }
}//namespace ocl }//namespace ocl
......
...@@ -48,18 +48,26 @@ ...@@ -48,18 +48,26 @@
#include "precomp.hpp" #include "precomp.hpp"
#include <iomanip> #include <iomanip>
#include <fstream> #include <fstream>
#include "binarycaching.hpp" #include "cl_programcache.hpp"
#if defined _MSC_VER && _MSC_VER >= 1200
# pragma warning( disable: 4100 4244 4267 4510 4512 4610)
#endif
#undef __CL_ENABLE_EXCEPTIONS #undef __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp> #include <CL/cl.hpp>
namespace cv { namespace ocl { namespace cv { namespace ocl {
#define MAX_PROG_CACHE_SIZE 1024
/* /*
* The binary caching system to eliminate redundant program source compilation. * The binary caching system to eliminate redundant program source compilation.
* Strictly, this is not a cache because we do not implement evictions right now. * Strictly, this is not a cache because we do not implement evictions right now.
* We shall add such features to trade-off memory consumption and performance when necessary. * We shall add such features to trade-off memory consumption and performance when necessary.
*/ */
cv::Mutex ProgramCache::mutexFiles;
cv::Mutex ProgramCache::mutexCache;
std::auto_ptr<ProgramCache> _programCache; std::auto_ptr<ProgramCache> _programCache;
ProgramCache* ProgramCache::getProgramCache() ProgramCache* ProgramCache::getProgramCache()
{ {
...@@ -79,7 +87,7 @@ ProgramCache::~ProgramCache() ...@@ -79,7 +87,7 @@ ProgramCache::~ProgramCache()
releaseProgram(); releaseProgram();
} }
cl_program ProgramCache::progLookup(string srcsign) cl_program ProgramCache::progLookup(const string& srcsign)
{ {
map<string, cl_program>::iterator iter; map<string, cl_program>::iterator iter;
iter = codeCache.find(srcsign); iter = codeCache.find(srcsign);
...@@ -89,10 +97,11 @@ cl_program ProgramCache::progLookup(string srcsign) ...@@ -89,10 +97,11 @@ cl_program ProgramCache::progLookup(string srcsign)
return NULL; return NULL;
} }
void ProgramCache::addProgram(string srcsign , cl_program program) void ProgramCache::addProgram(const string& srcsign, cl_program program)
{ {
if(!progLookup(srcsign)) if (!progLookup(srcsign))
{ {
clRetainProgram(program);
codeCache.insert(map<string, cl_program>::value_type(srcsign, program)); codeCache.insert(map<string, cl_program>::value_type(srcsign, program));
} }
} }
...@@ -108,25 +117,24 @@ void ProgramCache::releaseProgram() ...@@ -108,25 +117,24 @@ void ProgramCache::releaseProgram()
cacheSize = 0; cacheSize = 0;
} }
static int enable_disk_cache = static int enable_disk_cache = true ||
#ifdef _DEBUG #ifdef _DEBUG
false; false;
#else #else
true; true;
#endif #endif
static int update_disk_cache = false;
static String binpath = ""; static String binpath = "";
void setBinaryDiskCache(int mode, String path) void setBinaryDiskCache(int mode, String path)
{ {
enable_disk_cache = 0;
binpath = "";
if(mode == CACHE_NONE) if(mode == CACHE_NONE)
{ {
update_disk_cache = 0;
enable_disk_cache = 0;
return; return;
} }
update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE; enable_disk_cache =
enable_disk_cache |=
#ifdef _DEBUG #ifdef _DEBUG
(mode & CACHE_DEBUG) == CACHE_DEBUG; (mode & CACHE_DEBUG) == CACHE_DEBUG;
#else #else
...@@ -138,108 +146,286 @@ void setBinaryDiskCache(int mode, String path) ...@@ -138,108 +146,286 @@ void setBinaryDiskCache(int mode, String path)
} }
} }
void setBinpath(const char *path) void setBinaryPath(const char *path)
{ {
binpath = path; binpath = path;
} }
int savetofile(const Context*, cl_program &program, const char *fileName) static const int MAX_ENTRIES = 64;
struct ProgramFileCache
{ {
size_t binarySize; struct CV_DECL_ALIGNED(1) ProgramFileHeader
openCLSafeCall(clGetProgramInfo(program, {
CL_PROGRAM_BINARY_SIZES, int hashLength;
sizeof(size_t), //char hash[];
&binarySize, NULL)); };
char* binary = (char*)malloc(binarySize);
if(binary == NULL) struct CV_DECL_ALIGNED(1) ProgramFileTable
{ {
CV_Error(CV_StsNoMem, "Failed to allocate host memory."); int numberOfEntries;
//int firstEntryOffset[];
};
struct CV_DECL_ALIGNED(1) ProgramFileConfigurationEntry
{
int nextEntry;
int dataSize;
int optionsLength;
//char options[];
// char data[];
};
string fileName_;
const char* hash_;
std::fstream f;
ProgramFileCache(const string& fileName, const char* hash)
: fileName_(fileName), hash_(hash)
{
if (hash_ != NULL)
{
f.open(fileName_.c_str(), ios::in|ios::out|ios::binary);
if(f.is_open())
{
int hashLength = 0;
f.read((char*)&hashLength, sizeof(int));
std::vector<char> fhash(hashLength + 1);
f.read(&fhash[0], hashLength);
if (f.eof() || strncmp(hash_, &fhash[0], hashLength) != 0)
{
f.close();
remove(fileName_.c_str());
return;
}
}
}
} }
openCLSafeCall(clGetProgramInfo(program,
CL_PROGRAM_BINARIES,
sizeof(char *),
&binary,
NULL));
FILE *fp = fopen(fileName, "wb+"); int getHash(const string& options)
if(fp != NULL)
{ {
fwrite(binary, binarySize, 1, fp); int hash = 0;
free(binary); for (size_t i = 0; i < options.length(); i++)
fclose(fp); {
hash = (hash << 2) ^ (hash >> 17) ^ options[i];
}
return (hash + (hash >> 16)) & (MAX_ENTRIES - 1);
} }
return 1;
}
cl_program ProgramCache::getProgram(const Context *ctx, const char **source, string kernelName, bool readConfigurationFromFile(const string& options, std::vector<char>& buf)
const char *build_options) {
{ if (hash_ == NULL)
cl_program program; return false;
cl_int status = 0;
stringstream src_sign;
string srcsign;
string filename;
if (NULL != build_options) if (!f.is_open())
return false;
f.seekg(0, std::fstream::end);
size_t fileSize = (size_t)f.tellg();
if (fileSize == 0)
{ {
src_sign << (int64)(*source) << getClContext(ctx) << "_" << build_options; std::cerr << "Invalid file (empty): " << fileName_ << std::endl;
f.close();
remove(fileName_.c_str());
return false;
} }
else f.seekg(0, std::fstream::beg);
int hashLength = 0;
f.read((char*)&hashLength, sizeof(int));
CV_Assert(hashLength > 0);
f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg);
int numberOfEntries = 0;
f.read((char*)&numberOfEntries, sizeof(int));
CV_Assert(numberOfEntries > 0);
if (numberOfEntries != MAX_ENTRIES)
{ {
src_sign << (int64)(*source) << getClContext(ctx); std::cerr << "Invalid file: " << fileName_ << std::endl;
f.close();
remove(fileName_.c_str());
return false;
} }
srcsign = src_sign.str();
program = NULL; std::vector<int> firstEntryOffset(numberOfEntries);
program = ProgramCache::getProgramCache()->progLookup(srcsign); f.read((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
if (!program) int entryNum = getHash(options);
int entryOffset = firstEntryOffset[entryNum];
ProgramFileConfigurationEntry entry;
while (entryOffset > 0)
{ {
//config build programs f.seekg(entryOffset, std::fstream::beg);
std::string all_build_options; assert(sizeof(entry) == sizeof(int)*3);
if (!ctx->getDeviceInfo().compilationExtraOptions.empty()) f.read((char*)&entry, sizeof(entry));
all_build_options += ctx->getDeviceInfo().compilationExtraOptions; std::vector<char> foptions(entry.optionsLength);
if (build_options != NULL) if ((int)options.length() == entry.optionsLength)
{ {
all_build_options += " "; if (entry.optionsLength > 0)
all_build_options += build_options; f.read(&foptions[0], entry.optionsLength);
if (memcmp(&foptions[0], options.c_str(), entry.optionsLength) == 0)
{
buf.resize(entry.dataSize);
f.read(&buf[0], entry.dataSize);
f.seekg(0, std::fstream::beg);
return true;
}
}
if (entry.nextEntry <= 0)
break;
entryOffset = entry.nextEntry;
}
return false;
} }
filename = binpath + kernelName + "_" + ctx->getDeviceInfo().deviceName + all_build_options + ".clb";
FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL; bool writeConfigurationToFile(const string& options, std::vector<char>& buf)
if(fp == NULL || update_disk_cache) {
if (hash_ == NULL)
return true; // don't save dynamic kernels
if (!f.is_open())
{
f.open(fileName_.c_str(), ios::in|ios::out|ios::binary);
if (!f.is_open())
{
f.open(fileName_.c_str(), ios::out|ios::binary);
if (!f.is_open())
return false;
}
}
f.seekg(0, std::fstream::end);
size_t fileSize = (size_t)f.tellg();
if (fileSize == 0)
{
f.seekp(0, std::fstream::beg);
int hashLength = strlen(hash_);
f.write((char*)&hashLength, sizeof(int));
f.write(hash_, hashLength);
int numberOfEntries = MAX_ENTRIES;
f.write((char*)&numberOfEntries, sizeof(int));
std::vector<int> firstEntryOffset(MAX_ENTRIES, 0);
f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
f.close();
f.open(fileName_.c_str(), ios::in|ios::out|ios::binary);
CV_Assert(f.is_open());
f.seekg(0, std::fstream::end);
fileSize = (size_t)f.tellg();
}
f.seekg(0, std::fstream::beg);
int hashLength = 0;
f.read((char*)&hashLength, sizeof(int));
CV_Assert(hashLength > 0);
f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg);
int numberOfEntries = 0;
f.read((char*)&numberOfEntries, sizeof(int));
CV_Assert(numberOfEntries > 0);
if (numberOfEntries != MAX_ENTRIES)
{
std::cerr << "Invalid file: " << fileName_ << std::endl;
f.close();
remove(fileName_.c_str());
return false;
}
size_t tableEntriesOffset = (size_t)f.tellg();
std::vector<int> firstEntryOffset(numberOfEntries);
f.read((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
int entryNum = getHash(options);
int entryOffset = firstEntryOffset[entryNum];
ProgramFileConfigurationEntry entry;
while (entryOffset > 0)
{
f.seekg(entryOffset, std::fstream::beg);
assert(sizeof(entry) == sizeof(int)*3);
f.read((char*)&entry, sizeof(entry));
std::vector<char> foptions(entry.optionsLength);
if ((int)options.length() == entry.optionsLength)
{ {
if(fp != NULL) if (entry.optionsLength > 0)
fclose(fp); f.read(&foptions[0], entry.optionsLength);
CV_Assert(memcmp(&foptions, options.c_str(), entry.optionsLength) != 0);
}
if (entry.nextEntry <= 0)
break;
entryOffset = entry.nextEntry;
}
if (entryOffset > 0)
{
f.seekp(entryOffset, std::fstream::beg);
entry.nextEntry = fileSize;
f.write((char*)&entry, sizeof(entry));
}
else
{
firstEntryOffset[entryNum] = fileSize;
f.seekp(tableEntriesOffset, std::fstream::beg);
f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
}
f.seekp(fileSize, std::fstream::beg);
entry.nextEntry = 0;
entry.dataSize = buf.size();
entry.optionsLength = options.length();
f.write((char*)&entry, sizeof(entry));
f.write(options.c_str(), entry.optionsLength);
f.write(&buf[0], entry.dataSize);
return true;
}
program = clCreateProgramWithSource( cl_program getOrBuildProgram(const Context* ctx, const cv::ocl::ProgramEntry* source, const string& options)
getClContext(ctx), 1, source, NULL, &status); {
cl_int status = 0;
cl_program program = NULL;
std::vector<char> binary;
if (!enable_disk_cache || !readConfigurationFromFile(options, binary))
{
program = clCreateProgramWithSource(getClContext(ctx), 1, (const char**)&source->programStr, NULL, &status);
openCLVerifyCall(status); openCLVerifyCall(status);
cl_device_id device = getClDeviceID(ctx); cl_device_id device = getClDeviceID(ctx);
status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL);
if(status == CL_SUCCESS && enable_disk_cache) if(status == CL_SUCCESS)
savetofile(ctx, program, filename.c_str()); {
if (enable_disk_cache)
{
size_t binarySize;
openCLSafeCall(clGetProgramInfo(program,
CL_PROGRAM_BINARY_SIZES,
sizeof(size_t),
&binarySize, NULL));
std::vector<char> binary(binarySize);
char* ptr = &binary[0];
openCLSafeCall(clGetProgramInfo(program,
CL_PROGRAM_BINARIES,
sizeof(char*),
&ptr,
NULL));
if (!writeConfigurationToFile(options, binary))
{
std::cerr << "Can't write data to file: " << fileName_ << std::endl;
}
}
}
} }
else else
{ {
fseek(fp, 0, SEEK_END);
size_t binarySize = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *binary = new char[binarySize];
CV_Assert(1 == fread(binary, binarySize, 1, fp));
fclose(fp);
cl_int status = 0;
cl_device_id device = getClDeviceID(ctx); cl_device_id device = getClDeviceID(ctx);
size_t size = binary.size();
const char* ptr = &binary[0];
program = clCreateProgramWithBinary(getClContext(ctx), program = clCreateProgramWithBinary(getClContext(ctx),
1, 1, &device,
&device, (const size_t *)&size, (const unsigned char **)&ptr,
(const size_t *)&binarySize, NULL, &status);
(const unsigned char **)&binary,
NULL,
&status);
openCLVerifyCall(status); openCLVerifyCall(status);
status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL);
delete[] binary;
} }
if(status != CL_SUCCESS) if(status != CL_SUCCESS)
...@@ -259,53 +445,77 @@ cl_program ProgramCache::getProgram(const Context *ctx, const char **source, str ...@@ -259,53 +445,77 @@ cl_program ProgramCache::getProgram(const Context *ctx, const char **source, str
memset(buildLog, 0, buildLogSize); memset(buildLog, 0, buildLogSize);
openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx), openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx),
CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL));
std::cout << "\n\t\t\tBUILD LOG\n"; std::cout << "\nBUILD LOG: " << options << "\n";
std::cout << buildLog << endl; std::cout << buildLog << endl;
delete [] buildLog; delete [] buildLog;
} }
openCLVerifyCall(status); openCLVerifyCall(status);
} }
return program;
}
};
cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source,
const char *build_options)
{
stringstream src_sign;
src_sign << (int64)(source->programStr);
src_sign << getClContext(ctx);
if (NULL != build_options)
{
src_sign << "_" << build_options;
}
{
cv::AutoLock lockCache(mutexCache);
cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str());
if (!!program)
{
clRetainProgram(program);
return program;
}
}
cv::AutoLock lockCache(mutexFiles);
// second check
{
cv::AutoLock lockCache(mutexCache);
cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str());
if (!!program)
{
clRetainProgram(program);
return program;
}
}
string all_build_options;
if (!ctx->getDeviceInfo().compilationExtraOptions.empty())
all_build_options += ctx->getDeviceInfo().compilationExtraOptions;
if (build_options != NULL)
{
all_build_options += " ";
all_build_options += build_options;
}
const DeviceInfo& devInfo = ctx->getDeviceInfo();
string filename = binpath + (source->name ? source->name : "NULL") + "_" + devInfo.platform->platformName + "_" + devInfo.deviceName + ".clb";
ProgramFileCache programFileCache(filename, source->programHash);
cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options);
//Cache the binary for future use if build_options is null //Cache the binary for future use if build_options is null
if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE)
this->addProgram(srcsign, program); {
cv::AutoLock lockCache(mutexCache);
this->addProgram(src_sign.str(), program);
}
else else
{
cout << "Warning: code cache has been full.\n"; cout << "Warning: code cache has been full.\n";
} }
return program; return program;
} }
//// Converts the contents of a file into a string
//static int convertToString(const char *filename, std::string& s)
//{
// size_t size;
// char* str;
//
// std::fstream f(filename, (std::fstream::in | std::fstream::binary));
// if(f.is_open())
// {
// size_t fileSize;
// f.seekg(0, std::fstream::end);
// size = fileSize = (size_t)f.tellg();
// f.seekg(0, std::fstream::beg);
//
// str = new char[size+1];
// if(!str)
// {
// f.close();
// return -1;
// }
//
// f.read(str, fileSize);
// f.close();
// str[size] = '\0';
//
// s = str;
// delete[] str;
// return 0;
// }
// printf("Error: Failed to open file %s\n", filename);
// return -1;
//}
} // namespace ocl } // namespace ocl
} // namespace cv } // namespace cv
...@@ -44,13 +44,8 @@ ...@@ -44,13 +44,8 @@
#include "precomp.hpp" #include "precomp.hpp"
using namespace cv; namespace cv {
using namespace cv::ocl; namespace ocl {
using namespace std;
using std::cout;
using std::endl;
namespace cv { namespace ocl {
class ProgramCache class ProgramCache
{ {
...@@ -61,16 +56,18 @@ protected: ...@@ -61,16 +56,18 @@ protected:
public: public:
static ProgramCache *getProgramCache(); static ProgramCache *getProgramCache();
cl_program getProgram(const Context *ctx, const char **source, string kernelName, cl_program getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source,
const char *build_options); const char *build_options);
void releaseProgram(); void releaseProgram();
protected: protected:
//lookup the binary given the file name //lookup the binary given the file name
cl_program progLookup(string srcsign); // (with acquired mutexCache)
cl_program progLookup(const string& srcsign);
//add program to the cache //add program to the cache
void addProgram(string srcsign, cl_program program); // (with acquired mutexCache)
void addProgram(const string& srcsign, cl_program program);
map <string, cl_program> codeCache; map <string, cl_program> codeCache;
unsigned int cacheSize; unsigned int cacheSize;
...@@ -79,6 +76,10 @@ protected: ...@@ -79,6 +76,10 @@ protected:
//We may need more delicate algorithms when necessary later. //We may need more delicate algorithms when necessary later.
//Right now, let's just leave it along. //Right now, let's just leave it along.
static const unsigned MAX_PROG_CACHE_SIZE = 1024; static const unsigned MAX_PROG_CACHE_SIZE = 1024;
// acquire both mutexes in this order: 1) mutexFiles 2) mutexCache
static cv::Mutex mutexFiles;
static cv::Mutex mutexCache;
}; };
}//namespace ocl }//namespace ocl
......
...@@ -1108,7 +1108,7 @@ namespace cv ...@@ -1108,7 +1108,7 @@ namespace cv
CV_Assert(Dx.offset == 0 && Dy.offset == 0); CV_Assert(Dx.offset == 0 && Dy.offset == 0);
} }
static void corner_ocl(const char *src_str, string kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy, static void corner_ocl(const cv::ocl::ProgramEntry* source, string kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy,
oclMat &dst, int border_type) oclMat &dst, int border_type)
{ {
char borderType[30]; char borderType[30];
...@@ -1160,7 +1160,7 @@ namespace cv ...@@ -1160,7 +1160,7 @@ namespace cv
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back( make_pair( sizeof(cl_float) , (void *)&k)); args.push_back( make_pair( sizeof(cl_float) , (void *)&k));
openCLExecuteKernel(dst.clCxt, &src_str, kernelName, gt, lt, args, -1, -1, build_options); openCLExecuteKernel(dst.clCxt, source, kernelName, gt, lt, args, -1, -1, build_options);
} }
void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
...@@ -1181,7 +1181,7 @@ namespace cv ...@@ -1181,7 +1181,7 @@ namespace cv
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
extractCovData(src, dx, dy, blockSize, ksize, borderType); extractCovData(src, dx, dy, blockSize, ksize, borderType);
dst.create(src.size(), CV_32F); dst.create(src.size(), CV_32F);
corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType); corner_ocl(&imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType);
} }
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
...@@ -1200,7 +1200,7 @@ namespace cv ...@@ -1200,7 +1200,7 @@ namespace cv
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
extractCovData(src, dx, dy, blockSize, ksize, borderType); extractCovData(src, dx, dy, blockSize, ksize, borderType);
dst.create(src.size(), CV_32F); dst.create(src.size(), CV_32F);
corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType); corner_ocl(&imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType);
} }
/////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// /////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps) static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps)
...@@ -1749,7 +1749,7 @@ namespace cv ...@@ -1749,7 +1749,7 @@ namespace cv
} }
//////////////////////////////////convolve//////////////////////////////////////////////////// //////////////////////////////////convolve////////////////////////////////////////////////////
static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const char **kernelString) static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
{ {
CV_Assert(src.depth() == CV_32FC1); CV_Assert(src.depth() == CV_32FC1);
CV_Assert(temp1.depth() == CV_32F); CV_Assert(temp1.depth() == CV_32F);
...@@ -1784,7 +1784,7 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st ...@@ -1784,7 +1784,7 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
} }
void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y) void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y)
{ {
......
...@@ -72,7 +72,7 @@ namespace cv ...@@ -72,7 +72,7 @@ namespace cv
namespace ocl namespace ocl
{ {
// provide additional methods for the user to interact with the command queue after a task is fired // provide additional methods for the user to interact with the command queue after a task is fired
static void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], static void openCLExecuteKernel_2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode) int depth, char *build_options, FLUSH_MODE finish_mode)
{ {
...@@ -118,14 +118,14 @@ namespace cv ...@@ -118,14 +118,14 @@ namespace cv
openCLSafeCall(clReleaseKernel(kernel)); openCLSafeCall(clReleaseKernel(kernel));
} }
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName,
size_t globalThreads[3], size_t localThreads[3], size_t globalThreads[3], size_t localThreads[3],
vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode) vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode)
{ {
openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args, openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args,
channels, depth, NULL, finish_mode); channels, depth, NULL, finish_mode);
} }
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName,
size_t globalThreads[3], size_t localThreads[3], size_t globalThreads[3], size_t localThreads[3],
vector< pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode) vector< pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
...@@ -249,7 +249,7 @@ namespace cv ...@@ -249,7 +249,7 @@ namespace cv
bool support_image2d(Context *clCxt) bool support_image2d(Context *clCxt)
{ {
static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}"; const cv::ocl::ProgramEntry _kernel = {NULL, "__kernel void test_func(image2d_t img) {}", NULL};
static bool _isTested = false; static bool _isTested = false;
static bool _support = false; static bool _support = false;
if(_isTested) if(_isTested)
...@@ -258,7 +258,7 @@ namespace cv ...@@ -258,7 +258,7 @@ namespace cv
} }
try try
{ {
cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func"); cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel, "test_func");
cv::ocl::finish(); cv::ocl::finish();
_support = true; _support = true;
} }
......
...@@ -229,7 +229,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) ...@@ -229,7 +229,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" ); CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" );
} }
if( !moments ) if( !mom )
CV_Error( CV_StsNullPtr, "" ); CV_Error( CV_StsNullPtr, "" );
memset( mom, 0, sizeof(*mom)); memset( mom, 0, sizeof(*mom));
......
...@@ -118,7 +118,6 @@ int main(int argc, char **argv) ...@@ -118,7 +118,6 @@ int main(int argc, char **argv)
} }
cv::ocl::setDevice(devicesInfo[device]); cv::ocl::setDevice(devicesInfo[device]);
setBinaryDiskCache(CACHE_UPDATE);
cout << "Device type: " << type << endl cout << "Device type: " << type << endl
<< "Platform name: " << devicesInfo[device]->platform->platformName << endl << "Platform name: " << devicesInfo[device]->platform->platformName << endl
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment