Commit 59ed7d06 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #4072 from vladimir-dudnik:opencl-opencv-interop

parents 6ab9df52 217dd63e
......@@ -71,9 +71,28 @@ using namespace cv::ocl;
//! @{
// TODO static functions in the Context class
//! @brief Creates OpenCL context from D3D11 device
//
//! @param pD3D11Device - pointer to D3D11 device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);
//! @brief Creates OpenCL context from D3D10 device
//
//! @param pD3D10Device - pointer to D3D10 device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);
//! @brief Creates OpenCL context from Direct3DDevice9Ex device
//
//! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);
//! @brief Creates OpenCL context from Direct3DDevice9 device
//
//! @param pDirect3DDevice9 - pointer to Direct3Device9 device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);
//! @}
......@@ -83,19 +102,70 @@ CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDire
//! @addtogroup core_directx
//! @{
//! @brief Converts InputArray to ID3D11Texture2D
//
//! @note Note: function does memory copy from src to
//! pD3D11Texture2D
//
//! @param src - source InputArray
//! @param pD3D11Texture2D - destination D3D11 texture
CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);
//! @brief Converts ID3D11Texture2D to OutputArray
//
//! @note Note: function does memory copy from pD3D11Texture2D
//! to dst
//
//! @param pD3D11Texture2D - source D3D11 texture
//! @param dst - destination OutputArray
CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);
//! @brief Converts InputArray to ID3D10Texture2D
//
//! @note Note: function does memory copy from src to
//! pD3D10Texture2D
//
//! @param src - source InputArray
//! @param pD3D10Texture2D - destination D3D10 texture
CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);
//! @brief Converts ID3D10Texture2D to OutputArray
//
//! @note Note: function does memory copy from pD3D10Texture2D
//! to dst
//
//! @param pD3D10Texture2D - source D3D10 texture
//! @param dst - destination OutputArray
CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);
//! @brief Converts InputArray to IDirect3DSurface9
//
//! @note Note: function does memory copy from src to
//! pDirect3DSurface9
//
//! @param src - source InputArray
//! @param pDirect3DSurface9 - destination D3D10 texture
//! @param surfaceSharedHandle - shared handle
CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);
//! @brief Converts IDirect3DSurface9 to OutputArray
//
//! @note Note: function does memory copy from pDirect3DSurface9
//! to dst
//
//! @param pDirect3DSurface9 - source D3D10 texture
//! @param dst - destination OutputArray
//! @param surfaceSharedHandle - shared handle
CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);
// Get OpenCV type from DirectX type, return -1 if there is no equivalent
//! @brief Get OpenCV type from DirectX type
//! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11
//! @return OpenCV type or -1 if there is no equivalent
CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11
// Get OpenCV type from DirectX type, return -1 if there is no equivalent
//! @brief Get OpenCV type from DirectX type
//! @param iD3DFORMAT - enum D3DTYPE for D3D9
//! @return OpenCV type or -1 if there is no equivalent
CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9
//! @}
......
......@@ -276,6 +276,58 @@ protected:
Impl* p;
};
/*
//! @brief Attaches OpenCL context to OpenCV
//
//! @note Note:
// OpenCV will check if available OpenCL platform has platformName name,
// then assign context to OpenCV and call clRetainContext function.
// The deviceID device will be used as target device and new command queue
// will be created.
//
// Params:
//! @param platformName - name of OpenCL platform to attach,
//! this string is used to check if platform is available
//! to OpenCV at runtime
//! @param platfromID - ID of platform attached context was created for
//! @param context - OpenCL context to be attached to OpenCV
//! @param deviceID - ID of device, must be created from attached context
*/
CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID);
/*
//! @brief Convert OpenCL buffer to UMat
//
//! @note Note:
// OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV.
// Memory content is not copied from clBuffer to UMat. Instead, buffer handle assigned
// to UMat and clRetainMemObject is called.
//
// Params:
//! @param cl_mem_buffer - source clBuffer handle
//! @param step - num of bytes in single row
//! @param rows - number of rows
//! @param cols - number of cols
//! @param type - OpenCV type of image
//! @param dst - destination UMat
*/
CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst);
/*
//! @brief Convert OpenCL image2d_t to UMat
//
//! @note Note:
// OpenCL image2d_t (cl_mem_image), should be compatible with OpenCV
// UMat formats.
// Memory content is copied from image to UMat with
// clEnqueueCopyImageToBuffer function.
//
// Params:
//! @param cl_mem_image - source image2d_t handle
//! @param dst - destination UMat
*/
CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst);
// TODO Move to internal header
void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
......
......@@ -858,9 +858,9 @@ OCL_FUNC_P(cl_context, clCreateContext,
OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context))
/*
OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
/*
OCL_FUNC_P(cl_context, clCreateContextFromType,
(const cl_context_properties * properties,
cl_device_type device_type,
......@@ -945,7 +945,6 @@ OCL_FUNC(cl_int, clGetSupportedImageFormats,
(context, flags, image_type, num_entries, image_formats, num_image_formats))
/*
OCL_FUNC(cl_int, clGetMemObjectInfo,
(cl_mem memobj,
cl_mem_info param_name,
......@@ -962,6 +961,7 @@ OCL_FUNC(cl_int, clGetImageInfo,
size_t * param_value_size_ret),
(image, param_name, param_value_size, param_value, param_value_size_ret))
/*
OCL_FUNC(cl_int, clCreateKernelsInProgram,
(cl_program program,
cl_uint num_kernels,
......@@ -1038,20 +1038,20 @@ OCL_FUNC(cl_int, clEnqueueCopyImage,
cl_event * event),
(command_queue, src_image, dst_image, src_origin, dst_origin,
region, num_events_in_wait_list, event_wait_list, event))
*/
OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
(cl_command_queue command_queue,
cl_mem src_image,
cl_mem dst_buffer,
const size_t * src_origin[3],
const size_t * region[3],
const size_t * src_origin,
const size_t * region,
size_t dst_offset,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event),
(command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
num_events_in_wait_list, event_wait_list, event))
*/
OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
(cl_command_queue command_queue,
......@@ -1100,10 +1100,10 @@ OCL_FUNC(cl_int, clGetKernelInfo,
size_t * param_value_size_ret),
(kernel, param_name, param_value_size, param_value, param_value_size_ret))
OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
*/
OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj))
......@@ -1348,7 +1348,7 @@ OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
#define CL_VERSION_1_2
#endif
#endif
#endif // HAVE_OPENCL
#ifdef _DEBUG
#define CV_OclDbgAssert CV_DbgAssert
......@@ -2925,6 +2925,83 @@ CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
#endif // HAVE_OPENCL_SVM
static void get_platform_name(cl_platform_id id, String& name)
{
// get platform name string length
size_t sz = 0;
if (CL_SUCCESS != clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz))
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformInfo failed!");
// get platform name string
AutoBuffer<char> buf(sz + 1);
if (CL_SUCCESS != clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf, 0))
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformInfo failed!");
// just in case, ensure trailing zero for ASCIIZ string
buf[sz] = 0;
name = (const char*)buf;
}
/*
// Attaches OpenCL context to OpenCV
*/
void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
{
cl_uint cnt = 0;
if(CL_SUCCESS != clGetPlatformIDs(0, 0, &cnt))
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformIDs failed!");
if (cnt == 0)
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "no OpenCL platform available!");
std::vector<cl_platform_id> platforms(cnt);
if(CL_SUCCESS != clGetPlatformIDs(cnt, &platforms[0], 0))
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformIDs failed!");
bool platformAvailable = false;
// check if external platformName contained in list of available platforms in OpenCV
for (unsigned int i = 0; i < cnt; i++)
{
String availablePlatformName;
get_platform_name(platforms[i], availablePlatformName);
// external platform is found in the list of available platforms
if (platformName == availablePlatformName)
{
platformAvailable = true;
break;
}
}
if (!platformAvailable)
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "No matched platforms available!");
// check if platformID corresponds to platformName
String actualPlatformName;
get_platform_name((cl_platform_id)platformID, actualPlatformName);
if (platformName != actualPlatformName)
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "No matched platforms available!");
// do not initialize OpenCL context
Context ctx = Context::getDefault(false);
// attach supplied context to OpenCV
initializeContextFromHandle(ctx, platformID, context, deviceID);
if(CL_SUCCESS != clRetainContext((cl_context)context))
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clRetainContext failed!");
// clear command queue, if any
getCoreTlsData().get()->oclQueue.finish();
Queue q;
getCoreTlsData().get()->oclQueue = q;
return;
} // attachContext()
void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
{
......@@ -3150,10 +3227,10 @@ struct Kernel::Impl
bool haveTempDstUMats;
};
}}
}} // namespace cv::ocl
extern "C" {
extern "C"
{
static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p)
{
((cv::ocl::Kernel::Impl*)p)->finit();
......@@ -5166,6 +5243,167 @@ MatAllocator* getOpenCLAllocator()
return allocator;
}
}} // namespace cv::ocl
namespace cv {
// three funcs below are implemented in umatrix.cpp
void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
bool autoSteps = false );
void updateContinuityFlag(UMat& m);
void finalizeHdr(UMat& m);
} // namespace cv
namespace cv { namespace ocl {
/*
// Convert OpenCL buffer memory to UMat
*/
void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
{
int d = 2;
int sizes[] = { rows, cols };
CV_Assert(0 <= d && d <= CV_MAX_DIM);
dst.release();
dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
dst.usageFlags = USAGE_DEFAULT;
setSize(dst, d, sizes, 0, true);
dst.offset = 0;
cl_mem memobj = (cl_mem)cl_mem_buffer;
cl_mem_object_type mem_type = 0;
CV_Assert(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0) == CL_SUCCESS);
CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
size_t total = 0;
CV_Assert(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0) == CL_SUCCESS);
CV_Assert(clRetainMemObject(memobj) == CL_SUCCESS);
CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
CV_Assert(total >= rows * step);
// attach clBuffer to UMatData
dst.u = new UMatData(getOpenCLAllocator());
dst.u->data = 0;
dst.u->allocatorFlags_ = 0; // not allocated from any OpenCV buffer pool
dst.u->flags = 0;
dst.u->handle = cl_mem_buffer;
dst.u->origdata = 0;
dst.u->prevAllocator = 0;
dst.u->size = total;
finalizeHdr(dst);
dst.addref();
return;
} // convertFromBuffer()
/*
// Convert OpenCL image2d_t memory to UMat
*/
void convertFromImage(void* cl_mem_image, UMat& dst)
{
cl_mem clImage = (cl_mem)cl_mem_image;
cl_mem_object_type mem_type = 0;
CV_Assert(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0) == CL_SUCCESS);
CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
cl_image_format fmt = { 0, 0 };
CV_Assert(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0) == CL_SUCCESS);
int depth = CV_8U;
switch (fmt.image_channel_data_type)
{
case CL_UNORM_INT8:
case CL_UNSIGNED_INT8:
depth = CV_8U;
break;
case CL_SNORM_INT8:
case CL_SIGNED_INT8:
depth = CV_8S;
break;
case CL_UNORM_INT16:
case CL_UNSIGNED_INT16:
depth = CV_16U;
break;
case CL_SNORM_INT16:
case CL_SIGNED_INT16:
depth = CV_16S;
break;
case CL_SIGNED_INT32:
depth = CV_32S;
break;
case CL_FLOAT:
depth = CV_32F;
break;
default:
CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
}
int type = CV_8UC1;
switch (fmt.image_channel_order)
{
case CL_R:
type = CV_MAKE_TYPE(depth, 1);
break;
case CL_RGBA:
case CL_BGRA:
case CL_ARGB:
type = CV_MAKE_TYPE(depth, 4);
break;
default:
CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
break;
}
size_t step = 0;
CV_Assert(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0) == CL_SUCCESS);
size_t w = 0;
CV_Assert(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0) == CL_SUCCESS);
size_t h = 0;
CV_Assert(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0) == CL_SUCCESS);
dst.create((int)h, (int)w, type);
cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
size_t offset = 0;
size_t src_origin[3] = { 0, 0, 0 };
size_t region[3] = { w, h, 1 };
CV_Assert(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL) == CL_SUCCESS);
CV_Assert(clFinish(q) == CL_SUCCESS);
return;
} // convertFromImage()
///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
......
......@@ -46,6 +46,13 @@
namespace cv {
// forward decls, implementation is below in this file
void setSize(UMat& m, int _dims, const int* _sz, const size_t* _steps,
bool autoSteps = false);
void updateContinuityFlag(UMat& m);
void finalizeHdr(UMat& m);
// it should be a prime number for the best hash function
enum { UMAT_NLOCKS = 31 };
static Mutex umatLocks[UMAT_NLOCKS];
......@@ -123,8 +130,8 @@ void swap( UMat& a, UMat& b )
}
static inline void setSize( UMat& m, int _dims, const int* _sz,
const size_t* _steps, bool autoSteps=false )
void setSize( UMat& m, int _dims, const int* _sz,
const size_t* _steps, bool autoSteps )
{
CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM );
if( m.dims != _dims )
......@@ -176,7 +183,8 @@ static inline void setSize( UMat& m, int _dims, const int* _sz,
}
}
static void updateContinuityFlag(UMat& m)
void updateContinuityFlag(UMat& m)
{
int i, j;
for( i = 0; i < m.dims; i++ )
......@@ -199,7 +207,7 @@ static void updateContinuityFlag(UMat& m)
}
static void finalizeHdr(UMat& m)
void finalizeHdr(UMat& m)
{
updateContinuityFlag(m);
int d = m.dims;
......@@ -207,6 +215,7 @@ static void finalizeHdr(UMat& m)
m.rows = m.cols = -1;
}
UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
{
UMat hdr;
......
......@@ -66,6 +66,8 @@ endif()
add_subdirectory(cpp)
# FIXIT: can't use cvconfig.h in samples: add_subdirectory(gpu)
add_subdirectory(opencl)
if(WIN32)
add_subdirectory(directx)
endif()
......
# cmake 3.1 needed for find_package(OpenCL)
if(CMAKE_VERSION VERSION_LESS "3.1")
message(STATUS "OpenCL samples require CMakes 3.1+")
return()
endif()
set(
OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS
opencv_core
opencv_imgproc
opencv_video
opencv_imgcodecs
opencv_videoio
opencv_highgui)
ocv_check_dependencies(${OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS})
if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
find_package(OpenCL 1.2 REQUIRED)
set(project "opencl")
string(TOUPPER "${project}" project_upper)
project("${project}_samples")
ocv_include_modules_recurse(${OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS})
include_directories(${OpenCL_INCLUDE_DIR})
# ---------------------------------------------
# Define executable targets
# ---------------------------------------------
MACRO(OPENCV_DEFINE_OPENCL_EXAMPLE name srcs)
set(the_target "example_${project}_${name}")
add_executable(${the_target} ${srcs})
ocv_target_link_libraries(
${the_target}
${OPENCV_LINKER_LIBS}
${OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS}
${OpenCL_LIBRARY})
set_target_properties(${the_target} PROPERTIES
OUTPUT_NAME "${project}-example-${name}"
PROJECT_LABEL "(EXAMPLE_${project_upper}) ${name}")
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "samples//${project}")
endif()
if(WIN32)
if(MSVC AND NOT BUILD_SHARED_LIBS)
set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG")
endif()
install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/${project}" COMPONENT main)
endif()
ENDMACRO()
file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
foreach(sample_filename ${all_samples})
get_filename_component(sample ${sample_filename} NAME_WE)
file(GLOB sample_srcs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${sample}.*)
OPENCV_DEFINE_OPENCL_EXAMPLE(${sample} ${sample_srcs})
endforeach()
endif()
/*
// The example of interoperability between OpenCL and OpenCV.
// This will loop through frames of video either from input media file
// or camera device and do processing of these data in OpenCL and then
// in OpenCV. In OpenCL it does inversion of pixels in half of frame and
// in OpenCV it does bluring the whole frame.
*/
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#if __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include <opencv2/core/ocl.hpp>
#include <opencv2/core/utility.hpp>
#include <opencv2/video.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
using namespace std;
using namespace cv;
namespace opencl {
class PlatformInfo
{
public:
PlatformInfo()
{}
~PlatformInfo()
{}
cl_int QueryInfo(cl_platform_id id)
{
query_param(id, CL_PLATFORM_PROFILE, m_profile);
query_param(id, CL_PLATFORM_VERSION, m_version);
query_param(id, CL_PLATFORM_NAME, m_name);
query_param(id, CL_PLATFORM_VENDOR, m_vendor);
query_param(id, CL_PLATFORM_EXTENSIONS, m_extensions);
return CL_SUCCESS;
}
std::string Profile() { return m_profile; }
std::string Version() { return m_version; }
std::string Name() { return m_name; }
std::string Vendor() { return m_vendor; }
std::string Extensions() { return m_extensions; }
private:
cl_int query_param(cl_platform_id id, cl_platform_info param, std::string& paramStr)
{
cl_int res;
size_t psize;
cv::AutoBuffer<char> buf;
res = clGetPlatformInfo(id, param, 0, 0, &psize);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetPlatformInfo failed"));
buf.resize(psize);
res = clGetPlatformInfo(id, param, psize, buf, 0);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetPlatformInfo failed"));
// just in case, ensure trailing zero for ASCIIZ string
buf[psize] = 0;
paramStr = buf;
return CL_SUCCESS;
}
private:
std::string m_profile;
std::string m_version;
std::string m_name;
std::string m_vendor;
std::string m_extensions;
};
class DeviceInfo
{
public:
DeviceInfo()
{}
~DeviceInfo()
{}
cl_int QueryInfo(cl_device_id id)
{
query_param(id, CL_DEVICE_TYPE, m_type);
query_param(id, CL_DEVICE_VENDOR_ID, m_vendor_id);
query_param(id, CL_DEVICE_MAX_COMPUTE_UNITS, m_max_compute_units);
query_param(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, m_max_work_item_dimensions);
query_param(id, CL_DEVICE_MAX_WORK_ITEM_SIZES, m_max_work_item_sizes);
query_param(id, CL_DEVICE_MAX_WORK_GROUP_SIZE, m_max_work_group_size);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, m_preferred_vector_width_char);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, m_preferred_vector_width_short);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, m_preferred_vector_width_int);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, m_preferred_vector_width_long);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, m_preferred_vector_width_float);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, m_preferred_vector_width_double);
#if defined(CL_VERSION_1_1)
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, m_preferred_vector_width_half);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, m_native_vector_width_char);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, m_native_vector_width_short);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, m_native_vector_width_int);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, m_native_vector_width_long);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, m_native_vector_width_float);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, m_native_vector_width_double);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, m_native_vector_width_half);
#endif
query_param(id, CL_DEVICE_MAX_CLOCK_FREQUENCY, m_max_clock_frequency);
query_param(id, CL_DEVICE_ADDRESS_BITS, m_address_bits);
query_param(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, m_max_mem_alloc_size);
query_param(id, CL_DEVICE_IMAGE_SUPPORT, m_image_support);
query_param(id, CL_DEVICE_MAX_READ_IMAGE_ARGS, m_max_read_image_args);
query_param(id, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, m_max_write_image_args);
#if defined(CL_VERSION_2_0)
query_param(id, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, m_max_read_write_image_args);
#endif
query_param(id, CL_DEVICE_IMAGE2D_MAX_WIDTH, m_image2d_max_width);
query_param(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, m_image2d_max_height);
query_param(id, CL_DEVICE_IMAGE3D_MAX_WIDTH, m_image3d_max_width);
query_param(id, CL_DEVICE_IMAGE3D_MAX_HEIGHT, m_image3d_max_height);
query_param(id, CL_DEVICE_IMAGE3D_MAX_DEPTH, m_image3d_max_depth);
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, m_image_max_buffer_size);
query_param(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, m_image_max_array_size);
#endif
query_param(id, CL_DEVICE_MAX_SAMPLERS, m_max_samplers);
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, m_image_pitch_alignment);
query_param(id, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, m_image_base_address_alignment);
#endif
#if defined(CL_VERSION_2_0)
query_param(id, CL_DEVICE_MAX_PIPE_ARGS, m_max_pipe_args);
query_param(id, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, m_pipe_max_active_reservations);
query_param(id, CL_DEVICE_PIPE_MAX_PACKET_SIZE, m_pipe_max_packet_size);
#endif
query_param(id, CL_DEVICE_MAX_PARAMETER_SIZE, m_max_parameter_size);
query_param(id, CL_DEVICE_MEM_BASE_ADDR_ALIGN, m_mem_base_addr_align);
query_param(id, CL_DEVICE_SINGLE_FP_CONFIG, m_single_fp_config);
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_DOUBLE_FP_CONFIG, m_double_fp_config);
#endif
query_param(id, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, m_global_mem_cache_type);
query_param(id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, m_global_mem_cacheline_size);
query_param(id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, m_global_mem_cache_size);
query_param(id, CL_DEVICE_GLOBAL_MEM_SIZE, m_global_mem_size);
query_param(id, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, m_max_constant_buffer_size);
query_param(id, CL_DEVICE_MAX_CONSTANT_ARGS, m_max_constant_args);
#if defined(CL_VERSION_2_0)
query_param(id, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, m_max_global_variable_size);
query_param(id, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, m_global_variable_preferred_total_size);
#endif
query_param(id, CL_DEVICE_LOCAL_MEM_TYPE, m_local_mem_type);
query_param(id, CL_DEVICE_LOCAL_MEM_SIZE, m_local_mem_size);
query_param(id, CL_DEVICE_ERROR_CORRECTION_SUPPORT, m_error_correction_support);
#if defined(CL_VERSION_1_1)
query_param(id, CL_DEVICE_HOST_UNIFIED_MEMORY, m_host_unified_memory);
#endif
query_param(id, CL_DEVICE_PROFILING_TIMER_RESOLUTION, m_profiling_timer_resolution);
query_param(id, CL_DEVICE_ENDIAN_LITTLE, m_endian_little);
query_param(id, CL_DEVICE_AVAILABLE, m_available);
query_param(id, CL_DEVICE_COMPILER_AVAILABLE, m_compiler_available);
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_LINKER_AVAILABLE, m_linker_available);
#endif
query_param(id, CL_DEVICE_EXECUTION_CAPABILITIES, m_execution_capabilities);
query_param(id, CL_DEVICE_QUEUE_PROPERTIES, m_queue_properties);
#if defined(CL_VERSION_2_0)
query_param(id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, m_queue_on_host_properties);
query_param(id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, m_queue_on_device_properties);
query_param(id, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, m_queue_on_device_preferred_size);
query_param(id, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, m_queue_on_device_max_size);
query_param(id, CL_DEVICE_MAX_ON_DEVICE_QUEUES, m_max_on_device_queues);
query_param(id, CL_DEVICE_MAX_ON_DEVICE_EVENTS, m_max_on_device_events);
#endif
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_BUILT_IN_KERNELS, m_built_in_kernels);
#endif
query_param(id, CL_DEVICE_PLATFORM, m_platform);
query_param(id, CL_DEVICE_NAME, m_name);
query_param(id, CL_DEVICE_VENDOR, m_vendor);
query_param(id, CL_DRIVER_VERSION, m_driver_version);
query_param(id, CL_DEVICE_PROFILE, m_profile);
query_param(id, CL_DEVICE_VERSION, m_version);
#if defined(CL_VERSION_1_1)
query_param(id, CL_DEVICE_OPENCL_C_VERSION, m_opencl_c_version);
#endif
query_param(id, CL_DEVICE_EXTENSIONS, m_extensions);
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_PRINTF_BUFFER_SIZE, m_printf_buffer_size);
query_param(id, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, m_preferred_interop_user_sync);
query_param(id, CL_DEVICE_PARENT_DEVICE, m_parent_device);
query_param(id, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, m_partition_max_sub_devices);
query_param(id, CL_DEVICE_PARTITION_PROPERTIES, m_partition_properties);
query_param(id, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, m_partition_affinity_domain);
query_param(id, CL_DEVICE_PARTITION_TYPE, m_partition_type);
query_param(id, CL_DEVICE_REFERENCE_COUNT, m_reference_count);
#endif
return CL_SUCCESS;
}
std::string Name() { return m_name; }
private:
template<typename T>
cl_int query_param(cl_device_id id, cl_device_info param, T& value)
{
cl_int res;
size_t size = 0;
res = clGetDeviceInfo(id, param, 0, 0, &size);
if (CL_SUCCESS != res && size != 0)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
if (0 == size)
return CL_SUCCESS;
if (sizeof(T) != size)
throw std::runtime_error(std::string("clGetDeviceInfo: param size mismatch"));
res = clGetDeviceInfo(id, param, size, &value, 0);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
return CL_SUCCESS;
}
template<typename T>
cl_int query_param(cl_device_id id, cl_device_info param, std::vector<T>& value)
{
cl_int res;
size_t size;
res = clGetDeviceInfo(id, param, 0, 0, &size);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
if (0 == size)
return CL_SUCCESS;
value.resize(size / sizeof(T));
res = clGetDeviceInfo(id, param, size, &value[0], 0);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
return CL_SUCCESS;
}
cl_int query_param(cl_device_id id, cl_device_info param, std::string& value)
{
cl_int res;
size_t size;
res = clGetDeviceInfo(id, param, 0, 0, &size);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
value.resize(size + 1);
res = clGetDeviceInfo(id, param, size, &value[0], 0);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
// just in case, ensure trailing zero for ASCIIZ string
value[size] = 0;
return CL_SUCCESS;
}
private:
cl_device_type m_type;
cl_uint m_vendor_id;
cl_uint m_max_compute_units;
cl_uint m_max_work_item_dimensions;
std::vector<size_t> m_max_work_item_sizes;
size_t m_max_work_group_size;
cl_uint m_preferred_vector_width_char;
cl_uint m_preferred_vector_width_short;
cl_uint m_preferred_vector_width_int;
cl_uint m_preferred_vector_width_long;
cl_uint m_preferred_vector_width_float;
cl_uint m_preferred_vector_width_double;
#if defined(CL_VERSION_1_1)
cl_uint m_preferred_vector_width_half;
cl_uint m_native_vector_width_char;
cl_uint m_native_vector_width_short;
cl_uint m_native_vector_width_int;
cl_uint m_native_vector_width_long;
cl_uint m_native_vector_width_float;
cl_uint m_native_vector_width_double;
cl_uint m_native_vector_width_half;
#endif
cl_uint m_max_clock_frequency;
cl_uint m_address_bits;
cl_ulong m_max_mem_alloc_size;
cl_bool m_image_support;
cl_uint m_max_read_image_args;
cl_uint m_max_write_image_args;
#if defined(CL_VERSION_2_0)
cl_uint m_max_read_write_image_args;
#endif
size_t m_image2d_max_width;
size_t m_image2d_max_height;
size_t m_image3d_max_width;
size_t m_image3d_max_height;
size_t m_image3d_max_depth;
#if defined(CL_VERSION_1_2)
size_t m_image_max_buffer_size;
size_t m_image_max_array_size;
#endif
cl_uint m_max_samplers;
#if defined(CL_VERSION_1_2)
cl_uint m_image_pitch_alignment;
cl_uint m_image_base_address_alignment;
#endif
#if defined(CL_VERSION_2_0)
cl_uint m_max_pipe_args;
cl_uint m_pipe_max_active_reservations;
cl_uint m_pipe_max_packet_size;
#endif
size_t m_max_parameter_size;
cl_uint m_mem_base_addr_align;
cl_device_fp_config m_single_fp_config;
#if defined(CL_VERSION_1_2)
cl_device_fp_config m_double_fp_config;
#endif
cl_device_mem_cache_type m_global_mem_cache_type;
cl_uint m_global_mem_cacheline_size;
cl_ulong m_global_mem_cache_size;
cl_ulong m_global_mem_size;
cl_ulong m_max_constant_buffer_size;
cl_uint m_max_constant_args;
#if defined(CL_VERSION_2_0)
size_t m_max_global_variable_size;
size_t m_global_variable_preferred_total_size;
#endif
cl_device_local_mem_type m_local_mem_type;
cl_ulong m_local_mem_size;
cl_bool m_error_correction_support;
#if defined(CL_VERSION_1_1)
cl_bool m_host_unified_memory;
#endif
size_t m_profiling_timer_resolution;
cl_bool m_endian_little;
cl_bool m_available;
cl_bool m_compiler_available;
#if defined(CL_VERSION_1_2)
cl_bool m_linker_available;
#endif
cl_device_exec_capabilities m_execution_capabilities;
cl_command_queue_properties m_queue_properties;
#if defined(CL_VERSION_2_0)
cl_command_queue_properties m_queue_on_host_properties;
cl_command_queue_properties m_queue_on_device_properties;
cl_uint m_queue_on_device_preferred_size;
cl_uint m_queue_on_device_max_size;
cl_uint m_max_on_device_queues;
cl_uint m_max_on_device_events;
#endif
#if defined(CL_VERSION_1_2)
std::string m_built_in_kernels;
#endif
cl_platform_id m_platform;
std::string m_name;
std::string m_vendor;
std::string m_driver_version;
std::string m_profile;
std::string m_version;
#if defined(CL_VERSION_1_1)
std::string m_opencl_c_version;
#endif
std::string m_extensions;
#if defined(CL_VERSION_1_2)
size_t m_printf_buffer_size;
cl_bool m_preferred_interop_user_sync;
cl_device_id m_parent_device;
cl_uint m_partition_max_sub_devices;
std::vector<cl_device_partition_property> m_partition_properties;
cl_device_affinity_domain m_partition_affinity_domain;
std::vector<cl_device_partition_property> m_partition_type;
cl_uint m_reference_count;
#endif
};
} // namespace opencl
class App
{
public:
App(CommandLineParser& cmd);
~App();
int initOpenCL();
int initVideoSource();
int process_frame_with_open_cl(cv::Mat& frame, bool use_buffer, cl_mem* cl_buffer);
int process_cl_buffer_with_opencv(cl_mem buffer, size_t step, int rows, int cols, int type, cv::UMat& u);
int process_cl_image_with_opencv(cl_mem image, cv::UMat& u);
int run();
bool isRunning() { return m_running; }
bool doProcess() { return m_process; }
bool useBuffer() { return m_use_buffer; }
void setRunning(bool running) { m_running = running; }
void setDoProcess(bool process) { m_process = process; }
void setUseBuffer(bool use_buffer) { m_use_buffer = use_buffer; }
protected:
bool nextFrame(cv::Mat& frame) { return m_cap.read(frame); }
void handleKey(char key);
void timerStart();
void timerEnd();
std::string fpsStr() const;
std::string message() const;
private:
bool m_running;
bool m_process;
bool m_use_buffer;
int64 m_t0;
int64 m_t1;
double m_fps;
string m_file_name;
int m_camera_id;
cv::VideoCapture m_cap;
cv::Mat m_frame;
cv::Mat m_frameGray;
opencl::PlatformInfo m_platformInfo;
opencl::DeviceInfo m_deviceInfo;
std::vector<cl_platform_id> m_platform_ids;
cl_context m_context;
cl_device_id m_device_id;
cl_command_queue m_queue;
cl_program m_program;
cl_kernel m_kernelBuf;
cl_kernel m_kernelImg;
cl_mem m_mem_obj;
cl_event m_event;
};
App::App(CommandLineParser& cmd)
{
cout << "\nPress ESC to exit\n" << endl;
cout << "\n 'p' to toggle ON/OFF processing\n" << endl;
cout << "\n SPACE to switch between OpenCL buffer/image\n" << endl;
m_camera_id = cmd.get<int>("camera");
m_file_name = cmd.get<string>("video");
m_running = false;
m_process = false;
m_use_buffer = false;
m_context = 0;
m_device_id = 0;
m_queue = 0;
m_program = 0;
m_kernelBuf = 0;
m_kernelImg = 0;
m_mem_obj = 0;
m_event = 0;
} // ctor
App::~App()
{
if (m_queue)
{
clFinish(m_queue);
clReleaseCommandQueue(m_queue);
m_queue = 0;
}
if (m_program)
{
clReleaseProgram(m_program);
m_program = 0;
}
if (m_mem_obj)
{
clReleaseMemObject(m_mem_obj);
m_mem_obj = 0;
}
if (m_event)
{
clReleaseEvent(m_event);
}
if (m_kernelBuf)
{
clReleaseKernel(m_kernelBuf);
m_kernelBuf = 0;
}
if (m_kernelImg)
{
clReleaseKernel(m_kernelImg);
m_kernelImg = 0;
}
if (m_device_id)
{
clReleaseDevice(m_device_id);
m_device_id = 0;
}
if (m_context)
{
clReleaseContext(m_context);
m_context = 0;
}
} // dtor
int App::initOpenCL()
{
cl_int res = CL_SUCCESS;
cl_uint num_entries = 0;
res = clGetPlatformIDs(0, 0, &num_entries);
if (CL_SUCCESS != res)
return -1;
m_platform_ids.resize(num_entries);
res = clGetPlatformIDs(num_entries, &m_platform_ids[0], 0);
if (CL_SUCCESS != res)
return -1;
unsigned int i;
// create context from first platform with GPU device
for (i = 0; i < m_platform_ids.size(); i++)
{
cl_context_properties props[] =
{
CL_CONTEXT_PLATFORM,
(cl_context_properties)(m_platform_ids[i]),
0
};
m_context = clCreateContextFromType(props, CL_DEVICE_TYPE_GPU, 0, 0, &res);
if (0 == m_context || CL_SUCCESS != res)
continue;
res = clGetContextInfo(m_context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), &m_device_id, 0);
if (CL_SUCCESS != res)
return -1;
m_queue = clCreateCommandQueue(m_context, m_device_id, 0, &res);
if (0 == m_queue || CL_SUCCESS != res)
return -1;
const char* kernelSrc =
"__kernel "
"void bitwise_inv_buf_8uC1("
" __global unsigned char* pSrcDst,"
" int srcDstStep,"
" int rows,"
" int cols)"
"{"
" int x = get_global_id(0);"
" int y = get_global_id(1);"
" int idx = mad24(y, srcDstStep, x);"
" pSrcDst[idx] = ~pSrcDst[idx];"
"}"
"__kernel "
"void bitwise_inv_img_8uC1("
" read_only image2d_t srcImg,"
" write_only image2d_t dstImg)"
"{"
" int x = get_global_id(0);"
" int y = get_global_id(1);"
" int2 coord = (int2)(x, y);"
" uint4 val = read_imageui(srcImg, coord);"
" val.x = (~val.x) & 0x000000FF;"
" write_imageui(dstImg, coord, val);"
"}";
size_t len = strlen(kernelSrc);
m_program = clCreateProgramWithSource(m_context, 1, &kernelSrc, &len, &res);
if (0 == m_program || CL_SUCCESS != res)
return -1;
res = clBuildProgram(m_program, 1, &m_device_id, 0, 0, 0);
if (CL_SUCCESS != res)
return -1;
m_kernelBuf = clCreateKernel(m_program, "bitwise_inv_buf_8uC1", &res);
if (0 == m_kernelBuf || CL_SUCCESS != res)
return -1;
m_kernelImg = clCreateKernel(m_program, "bitwise_inv_img_8uC1", &res);
if (0 == m_kernelImg || CL_SUCCESS != res)
return -1;
m_platformInfo.QueryInfo(m_platform_ids[i]);
m_deviceInfo.QueryInfo(m_device_id);
// attach OpenCL context to OpenCV
cv::ocl::attachContext(m_platformInfo.Name(), m_platform_ids[i], m_context, m_device_id);
break;
}
return m_context != 0 ? CL_SUCCESS : -1;
} // initOpenCL()
int App::initVideoSource()
{
try
{
if (!m_file_name.empty() && m_camera_id == -1)
{
m_cap.open(m_file_name.c_str());
if (!m_cap.isOpened())
throw std::runtime_error(std::string("can't open video file: " + m_file_name));
}
else if (m_camera_id != -1)
{
m_cap.open(m_camera_id);
if (!m_cap.isOpened())
{
std::stringstream msg;
msg << "can't open camera: " << m_camera_id;
throw std::runtime_error(msg.str());
}
}
else
throw std::runtime_error(std::string("specify video source"));
}
catch (std::exception e)
{
cerr << "ERROR: " << e.what() << std::endl;
return -1;
}
return 0;
} // initVideoSource()
// this function is an example of "typical" OpenCL processing pipeline
// It creates OpenCL buffer or image, depending on use_buffer flag,
// from input media frame and process these data
// (inverts each pixel value in half of frame) with OpenCL kernel
int App::process_frame_with_open_cl(cv::Mat& frame, bool use_buffer, cl_mem* mem_obj)
{
cl_int res = CL_SUCCESS;
CV_Assert(mem_obj);
cl_kernel kernel = 0;
cl_mem mem = mem_obj[0];
if (0 == mem)
{
// first time initialization
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
if (use_buffer)
{
// allocate OpenCL memory to keep single frame,
// reuse this memory for subsecuent frames
// memory will be deallocated at dtor
mem = clCreateBuffer(m_context, flags, frame.total(), frame.ptr(), &res);
if (0 == mem || CL_SUCCESS != res)
return -1;
res = clSetKernelArg(m_kernelBuf, 0, sizeof(cl_mem), &mem);
if (CL_SUCCESS != res)
return -1;
res = clSetKernelArg(m_kernelBuf, 1, sizeof(int), &frame.step[0]);
if (CL_SUCCESS != res)
return -1;
res = clSetKernelArg(m_kernelBuf, 2, sizeof(int), &frame.rows);
if (CL_SUCCESS != res)
return -1;
int cols2 = frame.cols / 2;
res = clSetKernelArg(m_kernelBuf, 3, sizeof(int), &cols2);
if (CL_SUCCESS != res)
return -1;
kernel = m_kernelBuf;
}
else
{
cl_image_format fmt;
fmt.image_channel_order = CL_R;
fmt.image_channel_data_type = CL_UNSIGNED_INT8;
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = frame.cols;
desc.image_height = frame.rows;
desc.image_depth = 0;
desc.image_array_size = 0;
desc.image_row_pitch = frame.step[0];
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
desc.buffer = 0;
mem = clCreateImage(m_context, flags, &fmt, &desc, frame.ptr(), &res);
if (0 == mem || CL_SUCCESS != res)
return -1;
res = clSetKernelArg(m_kernelImg, 0, sizeof(cl_mem), &mem);
if (CL_SUCCESS != res)
return -1;
res = clSetKernelArg(m_kernelImg, 1, sizeof(cl_mem), &mem);
if (CL_SUCCESS != res)
return -1;
kernel = m_kernelImg;
}
}
m_event = clCreateUserEvent(m_context, &res);
if (0 == m_event || CL_SUCCESS != res)
return -1;
// process left half of frame in OpenCL
size_t size[] = { frame.cols / 2, frame.rows };
res = clEnqueueNDRangeKernel(m_queue, kernel, 2, 0, size, 0, 0, 0, &m_event);
if (CL_SUCCESS != res)
return -1;
res = clWaitForEvents(1, &m_event);
if (CL_SUCCESS != res)
return - 1;
mem_obj[0] = mem;
return 0;
}
// this function is an example of interoperability between OpenCL buffer
// and OpenCV UMat objects. It converts (without copying data) OpenCL buffer
// to OpenCV UMat and then do blur on these data
int App::process_cl_buffer_with_opencv(cl_mem buffer, size_t step, int rows, int cols, int type, cv::UMat& u)
{
cv::ocl::convertFromBuffer(buffer, step, rows, cols, type, u);
// process right half of frame in OpenCV
cv::Point pt(u.cols / 2, 0);
cv::Size sz(u.cols / 2, u.rows);
cv::Rect roi(pt, sz);
cv::UMat uroi(u, roi);
cv::blur(uroi, uroi, cv::Size(7, 7), cv::Point(-3, -3));
if (buffer)
clReleaseMemObject(buffer);
m_mem_obj = 0;
return 0;
}
// this function is an example of interoperability between OpenCL image
// and OpenCV UMat objects. It converts OpenCL image
// to OpenCV UMat and then do blur on these data
int App::process_cl_image_with_opencv(cl_mem image, cv::UMat& u)
{
cv::ocl::convertFromImage(image, u);
// process right half of frame in OpenCV
cv::Point pt(u.cols / 2, 0);
cv::Size sz(u.cols / 2, u.rows);
cv::Rect roi(pt, sz);
cv::UMat uroi(u, roi);
cv::blur(uroi, uroi, cv::Size(7, 7), cv::Point(-3, -3));
if (image)
clReleaseMemObject(image);
m_mem_obj = 0;
return 0;
}
int App::run()
{
if (0 != initOpenCL())
return -1;
if (0 != initVideoSource())
return -1;
Mat img_to_show;
// set running state until ESC pressed
setRunning(true);
// set process flag to show some data processing
// can be toggled on/off by 'p' button
setDoProcess(true);
// set use buffer flag,
// when it is set to true, will demo interop opencl buffer and cv::Umat,
// otherwise demo interop opencl image and cv::UMat
// can be switched on/of by SPACE button
setUseBuffer(true);
// Iterate over all frames
while (isRunning() && nextFrame(m_frame))
{
cv::cvtColor(m_frame, m_frameGray, COLOR_BGR2GRAY);
UMat uframe;
// work
timerStart();
if (doProcess())
{
process_frame_with_open_cl(m_frameGray, useBuffer(), &m_mem_obj);
if (useBuffer())
process_cl_buffer_with_opencv(
m_mem_obj, m_frameGray.step[0], m_frameGray.rows, m_frameGray.cols, m_frameGray.type(), uframe);
else
process_cl_image_with_opencv(m_mem_obj, uframe);
}
else
{
m_frameGray.copyTo(uframe);
}
timerEnd();
uframe.copyTo(img_to_show);
putText(img_to_show, "Version : " + m_platformInfo.Version(), Point(5, 30), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "Name : " + m_platformInfo.Name(), Point(5, 60), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "Device : " + m_deviceInfo.Name(), Point(5, 90), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
cv::String memtype = useBuffer() ? "buffer" : "image";
putText(img_to_show, "interop with OpenCL " + memtype, Point(5, 120), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "FPS : " + fpsStr(), Point(5, 150), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
imshow("opencl_interop", img_to_show);
handleKey((char)waitKey(3));
}
return 0;
}
void App::handleKey(char key)
{
switch (key)
{
case 27:
setRunning(false);
break;
case ' ':
setUseBuffer(!useBuffer());
break;
case 'p':
case 'P':
setDoProcess( !doProcess() );
break;
default:
break;
}
}
inline void App::timerStart()
{
m_t0 = getTickCount();
}
inline void App::timerEnd()
{
m_t1 = getTickCount();
int64 delta = m_t1 - m_t0;
double freq = getTickFrequency();
m_fps = freq / delta;
}
inline string App::fpsStr() const
{
stringstream ss;
ss << std::fixed << std::setprecision(1) << m_fps;
return ss.str();
}
int main(int argc, char** argv)
{
const char* keys =
"{ help h ? | | print help message }"
"{ camera c | -1 | use camera as input }"
"{ video v | | use video as input }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help"))
{
cmd.printMessage();
return EXIT_SUCCESS;
}
App app(cmd);
try
{
app.run();
}
catch (const cv::Exception& e)
{
cout << "error: " << e.what() << endl;
return 1;
}
catch (const std::exception& e)
{
cout << "error: " << e.what() << endl;
return 1;
}
catch (...)
{
cout << "unknown exception" << endl;
return 1;
}
return EXIT_SUCCESS;
} // main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment