Commit dfb698dc authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #1581 from SpecLad:merge-2.4

parents 685eceb6 076eeffd
......@@ -138,6 +138,7 @@ OCV_OPTION(WITH_CSTRIPES "Include C= support" OFF
OCV_OPTION(WITH_TIFF "Include TIFF support" ON IF (NOT IOS) )
OCV_OPTION(WITH_UNICAP "Include Unicap support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) )
OCV_OPTION(WITH_V4L "Include Video 4 Linux support" ON IF (UNIX AND NOT ANDROID) )
OCV_OPTION(WITH_LIBV4L "Use libv4l for Video 4 Linux support" ON IF (UNIX AND NOT ANDROID) )
OCV_OPTION(WITH_DSHOW "Build HighGUI with DirectShow support" ON IF (WIN32 AND NOT ARM) )
OCV_OPTION(WITH_MSMF "Build HighGUI with Media Foundation support" OFF IF WIN32 )
OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF IF (NOT ANDROID AND NOT APPLE) )
......@@ -799,7 +800,14 @@ endif()
if(HAVE_OPENCL)
status("")
status(" OpenCL")
status(" OpenCL:")
set(__opencl_ver "invalid")
if(HAVE_OPENCL12)
set(__opencl_ver "1.2")
elseif(HAVE_OPENCL11)
set(__opencl_ver "1.1")
endif()
status(" Version:" ${__opencl_ver})
if(OPENCL_INCLUDE_DIR)
status(" Include path:" ${OPENCL_INCLUDE_DIRS})
endif()
......
......@@ -6,7 +6,7 @@ if(APPLE)
else(APPLE)
#find_package(OpenCL QUIET)
if (NOT OPENCL_FOUND)
if(NOT OPENCL_FOUND)
find_path(OPENCL_ROOT_DIR
NAMES OpenCL/cl.h CL/cl.h include/CL/cl.h include/nvidia-current/CL/cl.h
PATHS ENV OCLROOT ENV AMDAPPSDKROOT ENV CUDA_PATH ENV INTELOCLSDKROOT
......@@ -20,32 +20,7 @@ else(APPLE)
DOC "OpenCL include directory"
NO_DEFAULT_PATH)
if(WIN32)
if(X86_64)
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win64 lib/x86_64 lib/x64)
elseif(X86)
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win32 lib/x86)
else()
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib)
endif()
elseif(UNIX)
if(X86_64)
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib64 lib)
elseif(X86)
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib32 lib)
else()
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib)
endif()
else()
set(OPENCL_POSSIBLE_LIB_SUFFIXES lib)
endif()
find_library(OPENCL_LIBRARY
NAMES OpenCL
HINTS ${OPENCL_ROOT_DIR}
PATH_SUFFIXES ${OPENCL_POSSIBLE_LIB_SUFFIXES}
DOC "OpenCL library"
NO_DEFAULT_PATH)
set(OPENCL_LIBRARY "OPENCL_DYNAMIC_LOAD")
mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
include(FindPackageHandleStandardArgs)
......@@ -54,20 +29,30 @@ else(APPLE)
endif(APPLE)
if(OPENCL_FOUND)
set(HAVE_OPENCL 1)
set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
if(WIN32 AND X86_64)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import)
elseif(WIN32)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import)
try_compile(HAVE_OPENCL11
"${OpenCV_BINARY_DIR}"
"${OpenCV_SOURCE_DIR}/cmake/checks/opencl11.cpp"
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIR}"
)
if(NOT HAVE_OPENCL11)
message(STATUS "OpenCL 1.1 not found, ignore OpenCL SDK")
return()
endif()
try_compile(HAVE_OPENCL12
"${OpenCV_BINARY_DIR}"
"${OpenCV_SOURCE_DIR}/cmake/checks/opencl12.cpp"
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIR}"
)
if(NOT HAVE_OPENCL12)
message(STATUS "OpenCL 1.2 not found, will use OpenCL 1.1")
endif()
if(X86_64 AND UNIX)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64)
elseif(X86 AND UNIX)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32)
set(HAVE_OPENCL 1)
set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
if(OPENCL_LIBRARY MATCHES "OPENCL_DYNAMIC_LOAD")
unset(OPENCL_LIBRARIES)
else()
set(OPENCL_LIBRARIES "${OPENCL_LIBRARY}")
endif()
if(WITH_OPENCLAMDFFT)
......@@ -84,16 +69,9 @@ if(OPENCL_FOUND)
PATH_SUFFIXES include
DOC "clAmdFft include directory")
find_library(CLAMDFFT_LIBRARY
NAMES clAmdFft.Runtime
HINTS ${CLAMDFFT_ROOT_DIR}
PATH_SUFFIXES ${CLAMD_POSSIBLE_LIB_SUFFIXES}
DOC "clAmdFft library")
if(CLAMDFFT_LIBRARY AND CLAMDFFT_INCLUDE_DIR)
if(CLAMDFFT_INCLUDE_DIR)
set(HAVE_CLAMDFFT 1)
list(APPEND OPENCL_INCLUDE_DIRS "${CLAMDFFT_INCLUDE_DIR}")
list(APPEND OPENCL_LIBRARIES "${CLAMDFFT_LIBRARY}")
endif()
endif()
......@@ -111,16 +89,9 @@ if(OPENCL_FOUND)
PATH_SUFFIXES include
DOC "clAmdFft include directory")
find_library(CLAMDBLAS_LIBRARY
NAMES clAmdBlas
HINTS ${CLAMDBLAS_ROOT_DIR}
PATH_SUFFIXES ${CLAMD_POSSIBLE_LIB_SUFFIXES}
DOC "clAmdBlas library")
if(CLAMDBLAS_LIBRARY AND CLAMDBLAS_INCLUDE_DIR)
if(CLAMDBLAS_INCLUDE_DIR)
set(HAVE_CLAMDBLAS 1)
list(APPEND OPENCL_INCLUDE_DIRS "${CLAMDBLAS_INCLUDE_DIR}")
list(APPEND OPENCL_LIBRARIES "${CLAMDBLAS_LIBRARY}")
endif()
endif()
endif()
......@@ -154,7 +154,9 @@ endif(WITH_XINE)
# --- V4L ---
ocv_clear_vars(HAVE_LIBV4L HAVE_CAMV4L HAVE_CAMV4L2 HAVE_VIDEOIO)
if(WITH_V4L)
CHECK_MODULE(libv4l1 HAVE_LIBV4L)
if(WITH_LIBV4L)
CHECK_MODULE(libv4l1 HAVE_LIBV4L)
endif()
CHECK_INCLUDE_FILE(linux/videodev.h HAVE_CAMV4L)
CHECK_INCLUDE_FILE(linux/videodev2.h HAVE_CAMV4L2)
CHECK_INCLUDE_FILE(sys/videoio.h HAVE_VIDEOIO)
......
......@@ -428,8 +428,8 @@ endmacro()
# Usage:
# ocv_glob_module_sources(<extra sources&headers in the same format as used in ocv_set_module_sources>)
macro(ocv_glob_module_sources)
file(GLOB lib_srcs "src/*.cpp")
file(GLOB lib_int_hdrs "src/*.hpp" "src/*.h")
file(GLOB_RECURSE lib_srcs "src/*.cpp")
file(GLOB_RECURSE lib_int_hdrs "src/*.hpp" "src/*.h")
file(GLOB lib_hdrs "include/opencv2/*.hpp" "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.hpp" "include/opencv2/${name}/detail/*.h")
......@@ -445,22 +445,23 @@ macro(ocv_glob_module_sources)
source_group("Src\\Cuda" FILES ${lib_cuda_srcs} ${lib_cuda_hdrs})
endif()
source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
file(GLOB cl_kernels "src/opencl/*.cl")
if(HAVE_OPENCL AND cl_kernels)
ocv_include_directories(${OPENCL_INCLUDE_DIRS})
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp"
COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp"
COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake")
source_group("Src\\OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
source_group("OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
endif()
ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail}
SOURCES ${lib_srcs} ${lib_int_hdrs} ${cuda_objs} ${lib_cuda_srcs} ${lib_cuda_hdrs})
source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
source_group("Include" FILES ${lib_hdrs})
source_group("Include\\detail" FILES ${lib_hdrs_detail})
endmacro()
......
#if defined __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
int main(int argc, char** argv)
{
#ifdef CL_VERSION_1_1
#else
#error OpenCL 1.1 not found
#endif
return 0;
}
#if defined __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
int main(int argc, char** argv)
{
#ifdef CL_VERSION_1_2
#else
#error OpenCL 1.2 not found
#endif
return 0;
}
file(GLOB cl_list "${CL_DIR}/*.cl" )
list(SORT cl_list)
file(WRITE ${OUTPUT} "// This file is auto-generated. Do not edit!
string(REPLACE ".cpp" ".hpp" OUTPUT_HPP "${OUTPUT}")
get_filename_component(OUTPUT_HPP_NAME "${OUTPUT_HPP}" NAME)
set(STR_CPP "// This file is auto-generated. Do not edit!
#include \"${OUTPUT_HPP_NAME}\"
namespace cv
{
......@@ -8,6 +14,15 @@ namespace ocl
{
")
set(STR_HPP "// This file is auto-generated. Do not edit!
namespace cv
{
namespace ocl
{
")
foreach(cl ${cl_list})
get_filename_component(cl_filename "${cl}" NAME_WE)
#message("${cl_filename}")
......@@ -29,7 +44,22 @@ foreach(cl ${cl_list})
string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof
file(APPEND ${OUTPUT} "const char* ${cl_filename}=\"${lines};\n")
string(MD5 hash "${lines}")
set(STR_CPP "${STR_CPP}const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n")
set(STR_HPP "${STR_HPP}extern const struct ProgramEntry ${cl_filename};\n")
endforeach()
file(APPEND ${OUTPUT} "}\n}\n")
set(STR_CPP "${STR_CPP}}\n}\n")
set(STR_HPP "${STR_HPP}}\n}\n")
file(WRITE "${OUTPUT}" "${STR_CPP}")
if(EXISTS "${OUTPUT_HPP}")
file(READ "${OUTPUT_HPP}" hpp_lines)
endif()
if("${hpp_lines}" STREQUAL "${STR_HPP}")
message(STATUS "${OUTPUT_HPP} contains same content")
else()
file(WRITE "${OUTPUT_HPP}" "${STR_HPP}")
endif()
......@@ -232,7 +232,16 @@ foreach(__opttype OPT DBG)
endif()
endif()
list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_LIBRARIES})
if(${CUDA_VERSION} VERSION_LESS "5.5")
list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_npp_LIBRARY})
else()
find_cuda_helper_libs(nppc)
find_cuda_helper_libs(nppi)
find_cuda_helper_libs(npps)
list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY})
endif()
if(OpenCV_USE_CUBLAS)
list(APPEND OpenCV_EXTRA_LIBS_${__opttype} ${CUDA_CUBLAS_LIBRARIES})
......
......@@ -105,6 +105,8 @@
/* OpenCL Support */
#cmakedefine HAVE_OPENCL
#cmakedefine HAVE_OPENCL11
#cmakedefine HAVE_OPENCL12
/* OpenEXR codec */
#cmakedefine HAVE_OPENEXR
......
......@@ -50,16 +50,9 @@
#ifdef HAVE_OPENCV_OCL
#define NOT_IMPLEMENTED CV_Error(cv::Error::StsNotImplemented, "Not implemented")
#include "opencl_kernels.hpp"
namespace cv
{
namespace ocl
{
//OpenCL kernel file string pointer
extern const char * retina_kernel;
}
}
#define NOT_IMPLEMENTED CV_Error(cv::Error::StsNotImplemented, "Not implemented")
namespace cv
{
......
......@@ -72,7 +72,7 @@ PARAM_TEST_CASE(Retina_OCL, bool, int, bool, double, double)
double reductionFactor;
double samplingStrength;
std::vector<cv::ocl::Info> infos;
cv::ocl::DevicesInfo infos;
virtual void SetUp()
{
......@@ -84,8 +84,8 @@ PARAM_TEST_CASE(Retina_OCL, bool, int, bool, double, double)
if(!oclInit)
{
cv::ocl::getDevice(infos);
std::cout << "Device name:" << infos[0].DeviceName[0] << std::endl;
cv::ocl::getOpenCLDevices(infos);
std::cout << "Device name:" << infos[0]->deviceName << std::endl;
oclInit = true;
}
}
......
......@@ -856,8 +856,7 @@ static int _capture_V4L (CvCaptureCAM_V4L *capture, char *deviceName)
detect_v4l = try_init_v4l(capture, deviceName);
if ((detect_v4l == -1)
)
if (detect_v4l == -1)
{
fprintf (stderr, "HIGHGUI ERROR: V4L"
": device %s: Unable to open for READ ONLY\n", deviceName);
......@@ -865,8 +864,7 @@ static int _capture_V4L (CvCaptureCAM_V4L *capture, char *deviceName)
return -1;
}
if ((detect_v4l <= 0)
)
if (detect_v4l <= 0)
{
fprintf (stderr, "HIGHGUI ERROR: V4L"
": device %s: Unable to query number of channels\n", deviceName);
......
......@@ -309,15 +309,15 @@ private:
class TBBApproximateSynchronizer: public ApproximateSynchronizerBase
{
public:
TBBApproximateSynchronizer( ApproximateSyncGrabber& approxSyncGrabber ) :
ApproximateSynchronizerBase(approxSyncGrabber)
TBBApproximateSynchronizer( ApproximateSyncGrabber& _approxSyncGrabber ) :
ApproximateSynchronizerBase(_approxSyncGrabber)
{
setMaxBufferSize();
}
void setMaxBufferSize()
{
int maxBufferSize = ApproximateSynchronizerBase::approxSyncGrabber.getMaxBufferSize();
int maxBufferSize = approxSyncGrabber.getMaxBufferSize();
if( maxBufferSize >= 0 )
{
depthQueue.set_capacity( maxBufferSize );
......
......@@ -43,9 +43,10 @@
//
//M*/
#include "precomp.hpp"
#include <cstdio>
#ifdef HAVE_OPENCV_OCL
#include <cstdio>
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
......@@ -54,14 +55,11 @@ namespace cv
{
namespace ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *surf;
const char noImage2dOption [] = "-D DISABLE_IMAGE2D";
static const char noImage2dOption[] = "-D DISABLE_IMAGE2D";
static bool use_image2d = false;
static void openCLExecuteKernelSURF(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{
char optBuf [100] = {0};
......@@ -73,7 +71,7 @@ namespace cv
}
cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel);
size_t wave_size = queryWaveFrontSize(kernel);
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
......@@ -596,7 +594,7 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
if(sumTex)
{
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sumTex));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sumTex));
}
else
{
......
......@@ -3,6 +3,16 @@ Matrix Reductions
.. highlight:: cpp
ocl::absSum
---------------
Returns the sum of absolute values for matrix elements.
.. ocv:function:: Scalar ocl::absSum(const oclMat &m)
:param m: The Source image of all depth.
Counts the abs sum of matrix elements for each channel. Supports all data types.
ocl::countNonZero
---------------------
Returns the number of non-zero elements in src
......@@ -11,7 +21,7 @@ Returns the number of non-zero elements in src
:param src: Single-channel array
Counts non-zero array elements.
Counts non-zero array elements. Supports all data types.
ocl::minMax
------------------
......@@ -49,32 +59,22 @@ Returns void
The functions minMaxLoc find minimum and maximum element values and their positions. The extremums are searched across the whole array, or, if mask is not an empty array, in the specified array region. The functions do not work with multi-channel arrays.
ocl::Sum
ocl::sqrSum
------------------
Returns the sum of matrix elements for each channel
.. ocv:function:: Scalar ocl::sum(const oclMat &m)
:param m: The Source image of all depth.
Counts the sum of matrix elements for each channel.
ocl::absSum
---------------
Returns the sum of absolute values for matrix elements.
Returns the squared sum of matrix elements for each channel
.. ocv:function:: Scalar ocl::absSum(const oclMat &m)
.. ocv:function:: Scalar ocl::sqrSum(const oclMat &m)
:param m: The Source image of all depth.
Counts the abs sum of matrix elements for each channel.
Counts the squared sum of matrix elements for each channel. Supports all data types.
ocl::sqrSum
ocl::sum
------------------
Returns the squared sum of matrix elements for each channel
Returns the sum of matrix elements for each channel
.. ocv:function:: Scalar ocl::sqrSum(const oclMat &m)
.. ocv:function:: Scalar ocl::sum(const oclMat &m)
:param m: The Source image of all depth.
Counts the squared sum of matrix elements for each channel.
Counts the sum of matrix elements for each channel.
This diff is collapsed.
......@@ -3,56 +3,40 @@ Data Structures and Utility Functions
.. highlight:: cpp
ocl::Info
-------------
.. ocv:class:: ocl::Info
ocl::getOpenCLPlatforms
-----------------------
Returns the list of OpenCL platforms
this class should be maintained by the user and be passed to getDevice
.. ocv:function:: int ocl::getOpenCLPlatforms( PlatformsInfo& platforms )
ocl::getDevice
------------------
:param platforms: Output variable
ocl::getOpenCLDevices
---------------------
Returns the list of devices
.. ocv:function:: int ocl::getDevice( std::vector<Info> & oclinfo, int devicetype=CVCL_DEVICE_TYPE_GPU )
.. ocv:function:: int ocl::getOpenCLDevices( DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU, const PlatformInfo* platform = NULL )
:param oclinfo: Output vector of ``ocl::Info`` structures
:param devices: Output variable
:param devicetype: One of ``CVCL_DEVICE_TYPE_GPU``, ``CVCL_DEVICE_TYPE_CPU`` or ``CVCL_DEVICE_TYPE_DEFAULT``.
:param deviceType: Bitmask of ``CVCL_DEVICE_TYPE_GPU``, ``CVCL_DEVICE_TYPE_CPU`` or ``CVCL_DEVICE_TYPE_DEFAULT``.
the function must be called before any other ``cv::ocl`` functions; it initializes ocl runtime.
:param platform: Specifies preferrable platform
ocl::setDevice
------------------
--------------
Returns void
.. ocv:function:: void ocl::setDevice( Info &oclinfo, int devnum = 0 )
.. ocv:function:: void ocl::setDevice( const DeviceInfo* info )
:param oclinfo: Output vector of ``ocl::Info`` structures
:param info: device info
:param devnum: the selected OpenCL device under this platform.
ocl::setBinpath
ocl::setBinaryPath
------------------
Returns void
.. ocv:function:: void ocl::setBinpath(const char *path)
.. ocv:function:: void ocl::setBinaryPath(const char *path)
:param path: the path of OpenCL kernel binaries
If you call this function and set a valid path, the OCL module will save the compiled kernel to the address in the first time and reload the binary since that. It can save compilation time at the runtime.
ocl::getoclContext
----------------------
Returns the pointer to the opencl context
.. ocv:function:: void* ocl::getoclContext()
Thefunction are used to get opencl context so that opencv can interactive with other opencl program.
ocl::getoclCommandQueue
--------------------------
Returns the pointer to the opencl command queue
.. ocv:function:: void* ocl::getoclCommandQueue()
Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program.
This diff is collapsed.
#ifndef __OPENCV_OCL_CL_RUNTIME_HPP__
#define __OPENCV_OCL_CL_RUNTIME_HPP__
#ifdef HAVE_OPENCL
#if defined(HAVE_OPENCL12)
#include "cl_runtime_opencl12.hpp"
#elif defined(HAVE_OPENCL11)
#include "cl_runtime_opencl11.hpp"
#else
#error Invalid OpenCL configuration
#endif
#endif
#endif // __OPENCV_OCL_CL_RUNTIME_HPP__
//
// AUTOGENERATED, DO NOT EDIT
//
#ifndef __OPENCV_OCL_CLAMDFFT_RUNTIME_HPP__
#define __OPENCV_OCL_CLAMDFFT_RUNTIME_HPP__
#ifdef HAVE_CLAMDFFT
// generated by parser_clamdfft.py
#define clAmdFftSetup clAmdFftSetup_
#define clAmdFftTeardown clAmdFftTeardown_
#define clAmdFftGetVersion clAmdFftGetVersion_
#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_
#define clAmdFftCopyPlan clAmdFftCopyPlan_
#define clAmdFftBakePlan clAmdFftBakePlan_
#define clAmdFftDestroyPlan clAmdFftDestroyPlan_
#define clAmdFftGetPlanContext clAmdFftGetPlanContext_
#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_
#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_
#define clAmdFftGetPlanScale clAmdFftGetPlanScale_
#define clAmdFftSetPlanScale clAmdFftSetPlanScale_
#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_
#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_
#define clAmdFftGetPlanDim clAmdFftGetPlanDim_
#define clAmdFftSetPlanDim clAmdFftSetPlanDim_
#define clAmdFftGetPlanLength clAmdFftGetPlanLength_
#define clAmdFftSetPlanLength clAmdFftSetPlanLength_
#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_
#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_
#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_
#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_
#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_
#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_
#define clAmdFftGetLayout clAmdFftGetLayout_
#define clAmdFftSetLayout clAmdFftSetLayout_
#define clAmdFftGetResultLocation clAmdFftGetResultLocation_
#define clAmdFftSetResultLocation clAmdFftSetResultLocation_
#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_
#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_
#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_
#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_
#include <clAmdFft.h>
// generated by parser_clamdfft.py
#undef clAmdFftSetup
#define clAmdFftSetup clAmdFftSetup_pfn
#undef clAmdFftTeardown
#define clAmdFftTeardown clAmdFftTeardown_pfn
#undef clAmdFftGetVersion
#define clAmdFftGetVersion clAmdFftGetVersion_pfn
#undef clAmdFftCreateDefaultPlan
#define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_pfn
#undef clAmdFftCopyPlan
#define clAmdFftCopyPlan clAmdFftCopyPlan_pfn
#undef clAmdFftBakePlan
#define clAmdFftBakePlan clAmdFftBakePlan_pfn
#undef clAmdFftDestroyPlan
#define clAmdFftDestroyPlan clAmdFftDestroyPlan_pfn
#undef clAmdFftGetPlanContext
#define clAmdFftGetPlanContext clAmdFftGetPlanContext_pfn
#undef clAmdFftGetPlanPrecision
#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_pfn
#undef clAmdFftSetPlanPrecision
#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn
#undef clAmdFftGetPlanScale
#define clAmdFftGetPlanScale clAmdFftGetPlanScale_pfn
#undef clAmdFftSetPlanScale
#define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn
#undef clAmdFftGetPlanBatchSize
#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_pfn
#undef clAmdFftSetPlanBatchSize
#define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_pfn
#undef clAmdFftGetPlanDim
#define clAmdFftGetPlanDim clAmdFftGetPlanDim_pfn
#undef clAmdFftSetPlanDim
#define clAmdFftSetPlanDim clAmdFftSetPlanDim_pfn
#undef clAmdFftGetPlanLength
#define clAmdFftGetPlanLength clAmdFftGetPlanLength_pfn
#undef clAmdFftSetPlanLength
#define clAmdFftSetPlanLength clAmdFftSetPlanLength_pfn
#undef clAmdFftGetPlanInStride
#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_pfn
#undef clAmdFftSetPlanInStride
#define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_pfn
#undef clAmdFftGetPlanOutStride
#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_pfn
#undef clAmdFftSetPlanOutStride
#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn
#undef clAmdFftGetPlanDistance
#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_pfn
#undef clAmdFftSetPlanDistance
#define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_pfn
#undef clAmdFftGetLayout
#define clAmdFftGetLayout clAmdFftGetLayout_pfn
#undef clAmdFftSetLayout
#define clAmdFftSetLayout clAmdFftSetLayout_pfn
#undef clAmdFftGetResultLocation
#define clAmdFftGetResultLocation clAmdFftGetResultLocation_pfn
#undef clAmdFftSetResultLocation
#define clAmdFftSetResultLocation clAmdFftSetResultLocation_pfn
#undef clAmdFftGetPlanTransposeResult
#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_pfn
#undef clAmdFftSetPlanTransposeResult
#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_pfn
#undef clAmdFftGetTmpBufSize
#define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_pfn
#undef clAmdFftEnqueueTransform
#define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_pfn
#ifndef CL_RUNTIME_EXPORT
#if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_OCL_SHARED)) && (defined WIN32 || defined _WIN32 || defined WINCE)
#define CL_RUNTIME_EXPORT __declspec(dllimport)
#else
#define CL_RUNTIME_EXPORT
#endif
#endif
// generated by parser_clamdfft.py
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetup)(const clAmdFftSetupData* setupData);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftTeardown)();
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCreateDefaultPlan)(clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, const size_t* clLengths);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCopyPlan)(clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftBakePlan)(clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, void (CL_CALLBACK* pfn_notify) (clAmdFftPlanHandle plHandle, void* user_data), void* user_data);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftDestroyPlan)(clAmdFftPlanHandle* plHandle);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanContext)(const clAmdFftPlanHandle plHandle, cl_context* context);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanPrecision)(const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanScale)(const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanBatchSize)(const clAmdFftPlanHandle plHandle, size_t* batchSize);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanBatchSize)(clAmdFftPlanHandle plHandle, size_t batchSize);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDim)(const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDim)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanLength)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanInStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanOutStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDistance)(const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetLayout)(const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetLayout)(clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetResultLocation)(const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanTransposeResult)(const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed* transposed);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetTmpBufSize)(const clAmdFftPlanHandle plHandle, size_t* buffersize);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftEnqueueTransform)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_mem* inputBuffers, cl_mem* outputBuffers, cl_mem tmpBuffer);
#endif
#endif // __OPENCV_OCL_CLAMDFFT_RUNTIME_HPP__
......@@ -51,45 +51,59 @@ const char * impls[] =
#endif
};
using namespace cv::ocl;
int main(int argc, char ** argv)
{
const char * keys =
"{ h help | false | print help message }"
"{ t type | gpu | set device type:cpu or gpu}"
"{ p platform | 0 | set platform id }"
"{ p platform | -1 | set platform id }"
"{ d device | 0 | set device id }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help"))
if (getenv("OPENCV_OPENCL_DEVICE") == NULL) // TODO Remove this after buildbot updates
{
cout << "Available options besides google test option:" << endl;
cmd.printMessage();
return 0;
}
CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help"))
{
cout << "Available options besides google test option:" << endl;
cmd.printMessage();
return 0;
}
string type = cmd.get<string>("type");
unsigned int pid = cmd.get<unsigned int>("platform");
int device = cmd.get<int>("device");
string type = cmd.get<string>("type");
int pid = cmd.get<int>("platform");
int device = cmd.get<int>("device");
int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU :
cv::ocl::CVCL_DEVICE_TYPE_GPU;
int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU :
cv::ocl::CVCL_DEVICE_TYPE_GPU;
std::vector<cv::ocl::Info> oclinfo;
int devnums = cv::ocl::getDevice(oclinfo, flag);
if (devnums <= device || device < 0)
{
std::cout << "device invalid\n";
return -1;
}
cv::ocl::PlatformsInfo platformsInfo;
cv::ocl::getOpenCLPlatforms(platformsInfo);
if (pid >= (int)platformsInfo.size())
{
std::cout << "platform is invalid\n";
return 1;
}
if (pid >= oclinfo.size())
{
std::cout << "platform invalid\n";
return -1;
cv::ocl::DevicesInfo devicesInfo;
int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]);
if (device < 0 || device >= devnums)
{
std::cout << "device/platform invalid\n";
return 1;
}
cv::ocl::setDevice(devicesInfo[device]);
}
cv::ocl::setDevice(oclinfo[pid], device);
cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE);
const DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo();
cout << "Device type: " << (deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU ?
"CPU" :
(deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown")) << endl
<< "Platform name: " << deviceInfo.platform->platformName << endl
<< "Device name: " << deviceInfo.deviceName << endl;
CV_PERF_TEST_MAIN_INTERNALS(ocl, impls)
}
......@@ -167,7 +167,7 @@ PERF_TEST_P(VideoMOGFixture, MOG,
typedef tuple<string, int> VideoMOG2ParamType;
typedef TestBaseWithParam<VideoMOG2ParamType> VideoMOG2Fixture;
PERF_TEST_P(VideoMOG2Fixture, MOG2,
PERF_TEST_P(VideoMOG2Fixture, DISABLED_MOG2, // TODO Disabled: random hungs on buildslave
::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
::testing::Values(1, 3)))
{
......
......@@ -198,7 +198,7 @@ PERF_TEST_P(cornerHarrisFixture, cornerHarris,
typedef TestBaseWithParam<Size> integralFixture;
PERF_TEST_P(integralFixture, DISABLED_integral, OCL_TYPICAL_MAT_SIZES) // TODO does not work properly
PERF_TEST_P(integralFixture, integral, OCL_TYPICAL_MAT_SIZES)
{
const Size srcSize = GetParam();
......
......@@ -161,7 +161,7 @@ PERF_TEST_P(setToFixture, setTo,
typedef tuple<Size, int, int> uploadParams;
typedef TestBaseWithParam<uploadParams> uploadFixture;
PERF_TEST_P(uploadFixture, DISABLED_upload,
PERF_TEST_P(uploadFixture, upload,
testing::Combine(
OCL_TYPICAL_MAT_SIZES,
testing::Range(CV_8U, CV_64F),
......@@ -190,15 +190,14 @@ PERF_TEST_P(uploadFixture, DISABLED_upload,
else
OCL_PERF_ELSE
int value = 0;
SANITY_CHECK(value);
SANITY_CHECK_NOTHING();
}
/////////////////// download ///////////////////////////
typedef TestBaseWithParam<uploadParams> downloadFixture;
PERF_TEST_P(downloadFixture, DISABLED_download,
PERF_TEST_P(downloadFixture, download,
testing::Combine(
OCL_TYPICAL_MAT_SIZES,
testing::Range(CV_8U, CV_64F),
......@@ -227,6 +226,5 @@ PERF_TEST_P(downloadFixture, DISABLED_download,
else
OCL_PERF_ELSE
int value = 0;
SANITY_CHECK(value);
SANITY_CHECK_NOTHING();
}
......@@ -51,18 +51,21 @@ using std::tr1::get;
///////////// norm////////////////////////
typedef TestBaseWithParam<Size> normFixture;
typedef tuple<Size, MatType> normParams;
typedef TestBaseWithParam<normParams> normFixture;
PERF_TEST_P(normFixture, DISABLED_norm, OCL_TYPICAL_MAT_SIZES) // TODO doesn't work properly
PERF_TEST_P(normFixture, norm, testing::Combine(
OCL_TYPICAL_MAT_SIZES,
OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
{
const Size srcSize = GetParam();
const std::string impl = getSelectedImpl();
const normParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
double value = 0.0;
const double eps = CV_MAT_DEPTH(type) == CV_8U ? DBL_EPSILON : 1e-3;
Mat src1(srcSize, CV_8UC1), src2(srcSize, CV_8UC1);
declare.in(src1, src2);
randu(src1, 0, 1);
randu(src2, 0, 1);
Mat src1(srcSize, type), src2(srcSize, type);
declare.in(src1, src2, WARMUP_RNG);
if (RUN_OCL_IMPL)
{
......@@ -70,7 +73,7 @@ PERF_TEST_P(normFixture, DISABLED_norm, OCL_TYPICAL_MAT_SIZES) // TODO doesn't w
OCL_TEST_CYCLE() value = cv::ocl::norm(oclSrc1, oclSrc2, NORM_INF);
SANITY_CHECK(value);
SANITY_CHECK(value, eps);
}
else if (RUN_PLAIN_IMPL)
{
......
This diff is collapsed.
......@@ -44,14 +44,15 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
namespace cv
{
namespace ocl
{
extern const char* bgfg_mog;
typedef struct _contant_struct
{
cl_float c_Tb;
......@@ -392,7 +393,7 @@ void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float var
constants->c_tau = tau;
constants->c_shadowVal = shadowVal;
cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()),
cl_constants = load_constant(*((cl_context*)getClContextPtr()), *((cl_command_queue*)getClCommandQueuePtr()),
(void *)constants, sizeof(_contant_struct));
}
......
......@@ -44,20 +44,11 @@
//M*/
#include "precomp.hpp"
#include <iomanip>
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
namespace cv
{
namespace ocl
{
////////////////////////////////////OpenCL kernel strings//////////////////////////
extern const char *blend_linear;
}
}
void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
oclMat &result)
{
......
......@@ -45,22 +45,14 @@
//M*/
#include "precomp.hpp"
#include <functional>
#include <iterator>
#include <vector>
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
namespace cv
{
namespace ocl
{
////////////////////////////////////OpenCL kernel strings//////////////////////////
extern const char *brute_force_match;
}
}
static const int OPT_SIZE = 100;
static const char * T_ARR [] = {
......@@ -244,7 +236,7 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, const oclM
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
bool is_cpu = isCpuDevice();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType);
......@@ -264,7 +256,7 @@ static void matchDispatcher(const oclMat &query, const oclMat *trains, int n, co
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
bool is_cpu = isCpuDevice();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
......@@ -285,7 +277,7 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, float maxD
{
const oclMat zeroMask;
const oclMat &tempMask = mask.data ? mask : zeroMask;
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
bool is_cpu = isCpuDevice();
if (query.cols <= 64)
{
matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
......@@ -468,7 +460,7 @@ static void calcDistanceDispatcher(const oclMat &query, const oclMat &train, con
static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
const oclMat &trainIdx, const oclMat &distance, int distType)
{
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
bool is_cpu = isCpuDevice();
if (query.cols <= 64)
{
knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType);
......
......@@ -44,19 +44,11 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
namespace cv
{
namespace ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *build_warps;
}
}
//////////////////////////////////////////////////////////////////////////////
// buildWarpPlaneMaps
......
......@@ -44,19 +44,11 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
namespace cv
{
namespace ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern const char *imgproc_canny;
}
}
cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(NULL)
{
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
......@@ -98,7 +90,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
{
openCLFree(counter);
}
counter = clCreateBuffer( *((cl_context*)getoclContext()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
counter = clCreateBuffer( *((cl_context*)getClContextPtr()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
openCLSafeCall(err);
}
......@@ -351,7 +343,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
{
unsigned int count;
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
Context *clCxt = map.clCxt;
String kernelName = "edgesHysteresisGlobal";
std::vector< std::pair<size_t, const void *> > args;
......@@ -360,7 +352,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
int count_i[1] = {0};
while(count > 0)
{
openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
args.clear();
size_t globalThreads[3] = {std::min(count, 65535u) * 128, divUp(count, 65535), 1};
......@@ -375,7 +367,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
std::swap(st1, st2);
}
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -44,44 +44,43 @@
#include "precomp.hpp"
using namespace cv;
using namespace cv::ocl;
namespace cv {
namespace ocl {
namespace cv
class ProgramCache
{
namespace ocl
{
class ProgramCache
{
protected:
ProgramCache();
friend class std::auto_ptr<ProgramCache>;
static std::auto_ptr<ProgramCache> programCache;
protected:
ProgramCache();
~ProgramCache();
friend class std::auto_ptr<ProgramCache>;
public:
static ProgramCache *getProgramCache();
public:
~ProgramCache();
static ProgramCache *getProgramCache()
{
if( NULL == programCache.get())
programCache.reset(new ProgramCache());
return programCache.get();
}
cl_program getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source,
const char *build_options);
//lookup the binary given the file name
cl_program progLookup(String srcsign);
void releaseProgram();
protected:
//lookup the binary given the file name
// (with acquired mutexCache)
cl_program progLookup(const String& srcsign);
//add program to the cache
void addProgram(String srcsign, cl_program program);
void releaseProgram();
//add program to the cache
// (with acquired mutexCache)
void addProgram(const String& srcsign, cl_program program);
std::map <String, cl_program> codeCache;
unsigned int cacheSize;
//The presumed watermark for the cache volume (256MB). Is it enough?
//We may need more delicate algorithms when necessary later.
//Right now, let's just leave it along.
static const unsigned MAX_PROG_CACHE_SIZE = 1024;
};
std::map <String, cl_program> codeCache;
unsigned int cacheSize;
}//namespace ocl
//The presumed watermark for the cache volume (256MB). Is it enough?
//We may need more delicate algorithms when necessary later.
//Right now, let's just leave it along.
static const unsigned MAX_PROG_CACHE_SIZE = 1024;
// acquire both mutexes in this order: 1) mutexFiles 2) mutexCache
static cv::Mutex mutexFiles;
static cv::Mutex mutexCache;
};
}//namespace ocl
}//namespace cv
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
#!/bin/bash -e
echo "Generate files for CL runtime..."
cat sources/opencl11/cl.h | python parser_cl.py cl_runtime_opencl11
cat sources/opencl12/cl.h | python parser_cl.py cl_runtime_opencl12
cat sources/clAmdBlas.h | python parser_clamdblas.py
cat sources/clAmdFft.h | python parser_clamdfft.py
echo "Generate files for CL runtime... Done"
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
@CL_FN_ENUMS@
@CL_FN_NAMES@
namespace {
@CL_FN_SWITCH@
}
@CL_FN_DEFINITIONS@
@CL_FN_PTRS@
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment