Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
97aa8d33
Commit
97aa8d33
authored
Jan 29, 2015
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3603 from alalek:ocl_svm
parents
bdebf4ce
0a07d780
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
1537 additions
and
155 deletions
+1537
-155
CMakeLists.txt
CMakeLists.txt
+1
-0
OpenCVDetectOpenCL.cmake
cmake/OpenCVDetectOpenCL.cmake
+4
-0
cvconfig.h.in
cmake/templates/cvconfig.h.in
+1
-0
mat.hpp
modules/core/include/opencv2/core/mat.hpp
+4
-3
ocl.hpp
modules/core/include/opencv2/core/ocl.hpp
+17
-3
opencl_svm.hpp
modules/core/include/opencv2/core/opencl/opencl_svm.hpp
+81
-0
opencl_core.hpp
.../core/include/opencv2/core/opencl/runtime/opencl_core.hpp
+12
-0
opencl_svm_20.hpp
...ore/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
+52
-0
opencl_svm_definitions.hpp
...de/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
+42
-0
opencl_svm_hsa_extension.hpp
.../opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
+166
-0
matmul.cpp
modules/core/src/matmul.cpp
+10
-0
matrix.cpp
modules/core/src/matrix.cpp
+2
-1
ocl.cpp
modules/core/src/ocl.cpp
+1074
-144
opencl_core.cpp
modules/core/src/opencl/runtime/opencl_core.cpp
+67
-0
umatrix.cpp
modules/core/src/umatrix.cpp
+4
-4
No files found.
CMakeLists.txt
View file @
97aa8d33
...
@@ -162,6 +162,7 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
...
@@ -162,6 +162,7 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
OCV_OPTION
(
WITH_XINE
"Include Xine support (GPL)"
OFF
IF
(
UNIX AND NOT APPLE AND NOT ANDROID
)
)
OCV_OPTION
(
WITH_XINE
"Include Xine support (GPL)"
OFF
IF
(
UNIX AND NOT APPLE AND NOT ANDROID
)
)
OCV_OPTION
(
WITH_CLP
"Include Clp support (EPL)"
OFF
)
OCV_OPTION
(
WITH_CLP
"Include Clp support (EPL)"
OFF
)
OCV_OPTION
(
WITH_OPENCL
"Include OpenCL Runtime support"
ON
IF
(
NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCL
"Include OpenCL Runtime support"
ON
IF
(
NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCL_SVM
"Include OpenCL Shared Virtual Memory support"
OFF
)
# experimental
OCV_OPTION
(
WITH_OPENCLAMDFFT
"Include AMD OpenCL FFT library support"
ON
IF
(
NOT ANDROID AND NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCLAMDFFT
"Include AMD OpenCL FFT library support"
ON
IF
(
NOT ANDROID AND NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCLAMDBLAS
"Include AMD OpenCL BLAS library support"
ON
IF
(
NOT ANDROID AND NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCLAMDBLAS
"Include AMD OpenCL BLAS library support"
ON
IF
(
NOT ANDROID AND NOT IOS
)
)
OCV_OPTION
(
WITH_DIRECTX
"Include DirectX support"
ON IF WIN32
)
OCV_OPTION
(
WITH_DIRECTX
"Include DirectX support"
ON IF WIN32
)
...
...
cmake/OpenCVDetectOpenCL.cmake
View file @
97aa8d33
...
@@ -26,6 +26,10 @@ if(OPENCL_FOUND)
...
@@ -26,6 +26,10 @@ if(OPENCL_FOUND)
set
(
HAVE_OPENCL 1
)
set
(
HAVE_OPENCL 1
)
if
(
WITH_OPENCL_SVM
)
set
(
HAVE_OPENCL_SVM 1
)
endif
()
if
(
HAVE_OPENCL_STATIC
)
if
(
HAVE_OPENCL_STATIC
)
set
(
OPENCL_LIBRARIES
"
${
OPENCL_LIBRARY
}
"
)
set
(
OPENCL_LIBRARIES
"
${
OPENCL_LIBRARY
}
"
)
else
()
else
()
...
...
cmake/templates/cvconfig.h.in
View file @
97aa8d33
...
@@ -122,6 +122,7 @@
...
@@ -122,6 +122,7 @@
/* OpenCL Support */
/* OpenCL Support */
#cmakedefine HAVE_OPENCL
#cmakedefine HAVE_OPENCL
#cmakedefine HAVE_OPENCL_STATIC
#cmakedefine HAVE_OPENCL_STATIC
#cmakedefine HAVE_OPENCL_SVM
/* OpenEXR codec */
/* OpenEXR codec */
#cmakedefine HAVE_OPENEXR
#cmakedefine HAVE_OPENEXR
...
...
modules/core/include/opencv2/core/mat.hpp
View file @
97aa8d33
...
@@ -376,9 +376,10 @@ enum UMatUsageFlags
...
@@ -376,9 +376,10 @@ enum UMatUsageFlags
{
{
USAGE_DEFAULT
=
0
,
USAGE_DEFAULT
=
0
,
//
default
allocation policy is platform and usage specific
//
buffer
allocation policy is platform and usage specific
USAGE_ALLOCATE_HOST_MEMORY
=
1
<<
0
,
USAGE_ALLOCATE_HOST_MEMORY
=
1
<<
0
,
USAGE_ALLOCATE_DEVICE_MEMORY
=
1
<<
1
,
USAGE_ALLOCATE_DEVICE_MEMORY
=
1
<<
1
,
USAGE_ALLOCATE_SHARED_MEMORY
=
1
<<
2
,
// It is not equal to: USAGE_ALLOCATE_HOST_MEMORY | USAGE_ALLOCATE_DEVICE_MEMORY
__UMAT_USAGE_FLAGS_32BIT
=
0x7fffffff
// Binary compatibility hint
__UMAT_USAGE_FLAGS_32BIT
=
0x7fffffff
// Binary compatibility hint
};
};
...
@@ -414,7 +415,7 @@ public:
...
@@ -414,7 +415,7 @@ public:
const
size_t
dstofs
[],
const
size_t
dststep
[],
bool
sync
)
const
;
const
size_t
dstofs
[],
const
size_t
dststep
[],
bool
sync
)
const
;
// default implementation returns DummyBufferPoolController
// default implementation returns DummyBufferPoolController
virtual
BufferPoolController
*
getBufferPoolController
()
const
;
virtual
BufferPoolController
*
getBufferPoolController
(
const
char
*
id
=
NULL
)
const
;
};
};
...
@@ -480,7 +481,7 @@ struct CV_EXPORTS UMatData
...
@@ -480,7 +481,7 @@ struct CV_EXPORTS UMatData
int
refcount
;
int
refcount
;
uchar
*
data
;
uchar
*
data
;
uchar
*
origdata
;
uchar
*
origdata
;
size_t
size
,
capacity
;
size_t
size
;
int
flags
;
int
flags
;
void
*
handle
;
void
*
handle
;
...
...
modules/core/include/opencv2/core/ocl.hpp
View file @
97aa8d33
...
@@ -56,6 +56,8 @@ CV_EXPORTS_W bool haveAmdFft();
...
@@ -56,6 +56,8 @@ CV_EXPORTS_W bool haveAmdFft();
CV_EXPORTS_W
void
setUseOpenCL
(
bool
flag
);
CV_EXPORTS_W
void
setUseOpenCL
(
bool
flag
);
CV_EXPORTS_W
void
finish
();
CV_EXPORTS_W
void
finish
();
CV_EXPORTS
bool
haveSVM
();
class
CV_EXPORTS
Context
;
class
CV_EXPORTS
Context
;
class
CV_EXPORTS
Device
;
class
CV_EXPORTS
Device
;
class
CV_EXPORTS
Kernel
;
class
CV_EXPORTS
Kernel
;
...
@@ -248,7 +250,10 @@ public:
...
@@ -248,7 +250,10 @@ public:
void
*
ptr
()
const
;
void
*
ptr
()
const
;
friend
void
initializeContextFromHandle
(
Context
&
ctx
,
void
*
platform
,
void
*
context
,
void
*
device
);
friend
void
initializeContextFromHandle
(
Context
&
ctx
,
void
*
platform
,
void
*
context
,
void
*
device
);
protected
:
bool
useSVM
()
const
;
void
setUseSVM
(
bool
enabled
);
struct
Impl
;
struct
Impl
;
Impl
*
p
;
Impl
*
p
;
};
};
...
@@ -666,8 +671,17 @@ protected:
...
@@ -666,8 +671,17 @@ protected:
CV_EXPORTS
MatAllocator
*
getOpenCLAllocator
();
CV_EXPORTS
MatAllocator
*
getOpenCLAllocator
();
CV_EXPORTS_W
bool
isPerformanceCheckBypassed
();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition))
#ifdef __OPENCV_BUILD
namespace
internal
{
CV_EXPORTS
bool
isPerformanceCheckBypassed
();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
CV_EXPORTS
bool
isCLBuffer
(
UMat
&
u
);
}
// namespace internal
#endif
//! @}
//! @}
...
...
modules/core/include/opencv2/core/opencl/opencl_svm.hpp
0 → 100644
View file @
97aa8d33
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OPENCL_SVM_HPP__
#define __OPENCV_CORE_OPENCL_SVM_HPP__
//
// Internal usage only (binary compatibility is not guaranteed)
//
#ifndef __OPENCV_BUILD
#error Internal header file
#endif
#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
#include "runtime/opencl_core.hpp"
#include "runtime/opencl_svm_20.hpp"
#include "runtime/opencl_svm_hsa_extension.hpp"
namespace
cv
{
namespace
ocl
{
namespace
svm
{
struct
SVMCapabilities
{
enum
Value
{
SVM_COARSE_GRAIN_BUFFER
=
(
1
<<
0
),
SVM_FINE_GRAIN_BUFFER
=
(
1
<<
1
),
SVM_FINE_GRAIN_SYSTEM
=
(
1
<<
2
),
SVM_ATOMICS
=
(
1
<<
3
),
};
int
value_
;
SVMCapabilities
(
int
capabilities
=
0
)
:
value_
(
capabilities
)
{
}
operator
int
()
const
{
return
value_
;
}
inline
bool
isNoSVMSupport
()
const
{
return
value_
==
0
;
}
inline
bool
isSupportCoarseGrainBuffer
()
const
{
return
(
value_
&
SVM_COARSE_GRAIN_BUFFER
)
!=
0
;
}
inline
bool
isSupportFineGrainBuffer
()
const
{
return
(
value_
&
SVM_FINE_GRAIN_BUFFER
)
!=
0
;
}
inline
bool
isSupportFineGrainSystem
()
const
{
return
(
value_
&
SVM_FINE_GRAIN_SYSTEM
)
!=
0
;
}
inline
bool
isSupportAtomics
()
const
{
return
(
value_
&
SVM_ATOMICS
)
!=
0
;
}
};
CV_EXPORTS
const
SVMCapabilities
getSVMCapabilitites
(
const
ocl
::
Context
&
context
);
struct
SVMFunctions
{
clSVMAllocAMD_fn
fn_clSVMAlloc
;
clSVMFreeAMD_fn
fn_clSVMFree
;
clSetKernelArgSVMPointerAMD_fn
fn_clSetKernelArgSVMPointer
;
//clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
//clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
clEnqueueSVMMemcpyAMD_fn
fn_clEnqueueSVMMemcpy
;
clEnqueueSVMMemFillAMD_fn
fn_clEnqueueSVMMemFill
;
clEnqueueSVMMapAMD_fn
fn_clEnqueueSVMMap
;
clEnqueueSVMUnmapAMD_fn
fn_clEnqueueSVMUnmap
;
inline
SVMFunctions
()
:
fn_clSVMAlloc
(
NULL
),
fn_clSVMFree
(
NULL
),
fn_clSetKernelArgSVMPointer
(
NULL
),
/*fn_clSetKernelExecInfo(NULL),*/
/*fn_clEnqueueSVMFree(NULL),*/
fn_clEnqueueSVMMemcpy
(
NULL
),
fn_clEnqueueSVMMemFill
(
NULL
),
fn_clEnqueueSVMMap
(
NULL
),
fn_clEnqueueSVMUnmap
(
NULL
)
{
// nothing
}
inline
bool
isValid
()
const
{
return
fn_clSVMAlloc
!=
NULL
&&
fn_clSVMFree
&&
fn_clSetKernelArgSVMPointer
&&
/*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/
fn_clEnqueueSVMMemcpy
&&
fn_clEnqueueSVMMemFill
&&
fn_clEnqueueSVMMap
&&
fn_clEnqueueSVMUnmap
;
}
};
// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
CV_EXPORTS
const
SVMFunctions
*
getSVMFunctions
(
const
ocl
::
Context
&
context
);
CV_EXPORTS
bool
useSVM
(
UMatUsageFlags
usageFlags
);
}}}
//namespace cv::ocl::svm
#endif
#endif // __OPENCV_CORE_OPENCL_SVM_HPP__
/* End of file. */
modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp
View file @
97aa8d33
...
@@ -62,6 +62,18 @@
...
@@ -62,6 +62,18 @@
#endif
#endif
#endif
#endif
#ifdef HAVE_OPENCL_SVM
#define clSVMAlloc clSVMAlloc_
#define clSVMFree clSVMFree_
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_
#define clSetKernelExecInfo clSetKernelExecInfo_
#define clEnqueueSVMFree clEnqueueSVMFree_
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_
#define clEnqueueSVMMap clEnqueueSVMMap_
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_
#endif
#include "autogenerated/opencl_core.hpp"
#include "autogenerated/opencl_core.hpp"
#endif // HAVE_OPENCL_STATIC
#endif // HAVE_OPENCL_STATIC
...
...
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
0 → 100644
View file @
97aa8d33
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#include "opencl_svm_definitions.hpp"
#ifndef HAVE_OPENCL_STATIC
#undef clSVMAlloc
#define clSVMAlloc clSVMAlloc_pfn
#undef clSVMFree
#define clSVMFree clSVMFree_pfn
#undef clSetKernelArgSVMPointer
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn
#undef clSetKernelExecInfo
//#define clSetKernelExecInfo clSetKernelExecInfo_pfn
#undef clEnqueueSVMFree
//#define clEnqueueSVMFree clEnqueueSVMFree_pfn
#undef clEnqueueSVMMemcpy
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn
#undef clEnqueueSVMMemFill
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn
#undef clEnqueueSVMMap
#define clEnqueueSVMMap clEnqueueSVMMap_pfn
#undef clEnqueueSVMUnmap
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn
extern
CL_RUNTIME_EXPORT
void
*
(
CL_API_CALL
*
clSVMAlloc
)(
cl_context
context
,
cl_svm_mem_flags
flags
,
size_t
size
,
unsigned
int
alignment
);
extern
CL_RUNTIME_EXPORT
void
(
CL_API_CALL
*
clSVMFree
)(
cl_context
context
,
void
*
svm_pointer
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clSetKernelArgSVMPointer
)(
cl_kernel
kernel
,
cl_uint
arg_index
,
const
void
*
arg_value
);
//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemcpy
)(
cl_command_queue
command_queue
,
cl_bool
blocking_copy
,
void
*
dst_ptr
,
const
void
*
src_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemFill
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
const
void
*
pattern
,
size_t
pattern_size
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMMap
)(
cl_command_queue
command_queue
,
cl_bool
blocking_map
,
cl_map_flags
map_flags
,
void
*
svm_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMUnmap
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
#endif // HAVE_OPENCL_STATIC
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
0 → 100644
View file @
97aa8d33
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#if defined(HAVE_OPENCL_SVM)
#if defined(CL_VERSION_2_0)
// OpenCL 2.0 contains SVM definitions
#else
typedef
cl_bitfield
cl_device_svm_capabilities
;
typedef
cl_bitfield
cl_svm_mem_flags
;
typedef
cl_uint
cl_kernel_exec_info
;
//
// TODO Add real values after OpenCL 2.0 release
//
#ifndef CL_DEVICE_SVM_CAPABILITIES
#define CL_DEVICE_SVM_CAPABILITIES 0x1053
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS (1 << 3)
#endif
#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER
#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10)
#endif
#ifndef CL_MEM_SVM_ATOMICS
#define CL_MEM_SVM_ATOMICS (1 << 11)
#endif
#endif // CL_VERSION_2_0
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
0 → 100644
View file @
97aa8d33
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD
//
// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
// Below is the original copyright.
//
/*******************************************************************************
* Copyright (c) 2008-2013 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/*******************************************
* Shared Virtual Memory (SVM) extension
*******************************************/
typedef
cl_bitfield
cl_device_svm_capabilities_amd
;
typedef
cl_bitfield
cl_svm_mem_flags_amd
;
typedef
cl_uint
cl_kernel_exec_info_amd
;
/* cl_device_info */
#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054
/* cl_device_svm_capabilities_amd */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3)
/* cl_svm_mem_flags_amd */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10)
#define CL_MEM_SVM_ATOMICS_AMD (1 << 11)
/* cl_mem_info */
#define CL_MEM_USES_SVM_POINTER_AMD 0x1109
/* cl_kernel_exec_info_amd */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7
/* cl_command_type */
#define CL_COMMAND_SVM_FREE_AMD 0x1209
#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A
#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B
#define CL_COMMAND_SVM_MAP_AMD 0x120C
#define CL_COMMAND_SVM_UNMAP_AMD 0x120D
typedef
CL_API_ENTRY
void
*
(
CL_API_CALL
*
clSVMAllocAMD_fn
)(
cl_context
/* context */
,
cl_svm_mem_flags_amd
/* flags */
,
size_t
/* size */
,
unsigned
int
/* alignment */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
void
(
CL_API_CALL
*
clSVMFreeAMD_fn
)(
cl_context
/* context */
,
void
*
/* svm_pointer */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMFreeAMD_fn
)(
cl_command_queue
/* command_queue */
,
cl_uint
/* num_svm_pointers */
,
void
**
/* svm_pointers */
,
void
(
CL_CALLBACK
*
)(
/*pfn_free_func*/
cl_command_queue
/* queue */
,
cl_uint
/* num_svm_pointers */
,
void
**
/* svm_pointers */
,
void
*
/* user_data */
),
void
*
/* user_data */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemcpyAMD_fn
)(
cl_command_queue
/* command_queue */
,
cl_bool
/* blocking_copy */
,
void
*
/* dst_ptr */
,
const
void
*
/* src_ptr */
,
size_t
/* size */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemFillAMD_fn
)(
cl_command_queue
/* command_queue */
,
void
*
/* svm_ptr */
,
const
void
*
/* pattern */
,
size_t
/* pattern_size */
,
size_t
/* size */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMMapAMD_fn
)(
cl_command_queue
/* command_queue */
,
cl_bool
/* blocking_map */
,
cl_map_flags
/* map_flags */
,
void
*
/* svm_ptr */
,
size_t
/* size */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMUnmapAMD_fn
)(
cl_command_queue
/* command_queue */
,
void
*
/* svm_ptr */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clSetKernelArgSVMPointerAMD_fn
)(
cl_kernel
/* kernel */
,
cl_uint
/* arg_index */
,
const
void
*
/* arg_value */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clSetKernelExecInfoAMD_fn
)(
cl_kernel
/* kernel */
,
cl_kernel_exec_info_amd
/* param_name */
,
size_t
/* param_value_size */
,
const
void
*
/* param_value */
)
CL_EXT_SUFFIX__VERSION_1_2
;
#endif
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
modules/core/src/matmul.cpp
View file @
97aa8d33
...
@@ -721,6 +721,16 @@ static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
...
@@ -721,6 +721,16 @@ static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
return
false
;
return
false
;
UMat
A
=
matA
.
getUMat
(),
B
=
matB
.
getUMat
(),
D
=
matD
.
getUMat
();
UMat
A
=
matA
.
getUMat
(),
B
=
matB
.
getUMat
(),
D
=
matD
.
getUMat
();
if
(
!
ocl
::
internal
::
isCLBuffer
(
A
)
||
!
ocl
::
internal
::
isCLBuffer
(
B
)
||
!
ocl
::
internal
::
isCLBuffer
(
D
))
{
return
false
;
}
if
(
haveC
)
{
UMat
C
=
matC
.
getUMat
();
if
(
!
ocl
::
internal
::
isCLBuffer
(
C
))
return
false
;
}
if
(
haveC
)
if
(
haveC
)
ctrans
?
transpose
(
matC
,
D
)
:
matC
.
copyTo
(
D
);
ctrans
?
transpose
(
matC
,
D
)
:
matC
.
copyTo
(
D
);
else
else
...
...
modules/core/src/matrix.cpp
View file @
97aa8d33
...
@@ -159,8 +159,9 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s
...
@@ -159,8 +159,9 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s
memcpy
(
ptrs
[
1
],
ptrs
[
0
],
planesz
);
memcpy
(
ptrs
[
1
],
ptrs
[
0
],
planesz
);
}
}
BufferPoolController
*
MatAllocator
::
getBufferPoolController
()
const
BufferPoolController
*
MatAllocator
::
getBufferPoolController
(
const
char
*
id
)
const
{
{
(
void
)
id
;
static
DummyBufferPoolController
dummy
;
static
DummyBufferPoolController
dummy
;
return
&
dummy
;
return
&
dummy
;
}
}
...
...
modules/core/src/ocl.cpp
View file @
97aa8d33
...
@@ -48,6 +48,8 @@
...
@@ -48,6 +48,8 @@
#define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
#define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
#define CV_OPENCL_SHOW_RUN_ERRORS 0
#define CV_OPENCL_SHOW_RUN_ERRORS 0
#define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
#define CV_OPENCL_SHOW_SVM_LOG 0
#include "opencv2/core/bufferpool.hpp"
#include "opencv2/core/bufferpool.hpp"
#ifndef LOG_BUFFER_POOL
#ifndef LOG_BUFFER_POOL
...
@@ -111,6 +113,20 @@ static size_t getConfigurationParameterForSize(const char* name, size_t defaultV
...
@@ -111,6 +113,20 @@ static size_t getConfigurationParameterForSize(const char* name, size_t defaultV
CV_ErrorNoReturn
(
cv
::
Error
::
StsBadArg
,
cv
::
format
(
"Invalid value for %s parameter: %s"
,
name
,
value
.
c_str
()));
CV_ErrorNoReturn
(
cv
::
Error
::
StsBadArg
,
cv
::
format
(
"Invalid value for %s parameter: %s"
,
name
,
value
.
c_str
()));
}
}
#if CV_OPENCL_SHOW_SVM_LOG
// TODO add timestamp logging
#define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
#else
#define CV_OPENCL_SVM_TRACE_P(...)
#endif
#if CV_OPENCL_SHOW_SVM_ERROR_LOG
// TODO add timestamp logging
#define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
#else
#define CV_OPENCL_SVM_TRACE_ERROR_P(...)
#endif
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
...
@@ -920,6 +936,7 @@ OCL_FUNC(cl_int, clGetSupportedImageFormats,
...
@@ -920,6 +936,7 @@ OCL_FUNC(cl_int, clGetSupportedImageFormats,
cl_uint
*
num_image_formats
),
cl_uint
*
num_image_formats
),
(
context
,
flags
,
image_type
,
num_entries
,
image_formats
,
num_image_formats
))
(
context
,
flags
,
image_type
,
num_entries
,
image_formats
,
num_image_formats
))
/*
/*
OCL_FUNC(cl_int, clGetMemObjectInfo,
OCL_FUNC(cl_int, clGetMemObjectInfo,
(cl_mem memobj,
(cl_mem memobj,
...
@@ -1342,6 +1359,12 @@ static bool isRaiseError()
...
@@ -1342,6 +1359,12 @@ static bool isRaiseError()
#define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0)
#define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0)
#endif
#endif
#ifdef HAVE_OPENCL_SVM
#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
#include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
#include "opencv2/core/opencl/opencl_svm.hpp"
#endif
namespace
cv
{
namespace
ocl
{
namespace
cv
{
namespace
ocl
{
struct
UMat2D
struct
UMat2D
...
@@ -1627,6 +1650,15 @@ bool haveAmdFft()
...
@@ -1627,6 +1650,15 @@ bool haveAmdFft()
#endif
#endif
bool
haveSVM
()
{
#ifdef HAVE_OPENCL_SVM
return
true
;
#else
return
false
;
#endif
}
void
finish
()
void
finish
()
{
{
Queue
::
getDefault
().
finish
();
Queue
::
getDefault
().
finish
();
...
@@ -2357,12 +2389,86 @@ not_found:
...
@@ -2357,12 +2389,86 @@ not_found:
}
}
#endif
#endif
#ifdef HAVE_OPENCL_SVM
namespace
svm
{
enum
AllocatorFlags
{
// don't use first 16 bits
OPENCL_SVM_COARSE_GRAIN_BUFFER
=
1
<<
16
,
// clSVMAlloc + SVM map/unmap
OPENCL_SVM_FINE_GRAIN_BUFFER
=
2
<<
16
,
// clSVMAlloc
OPENCL_SVM_FINE_GRAIN_SYSTEM
=
3
<<
16
,
// direct access
OPENCL_SVM_BUFFER_MASK
=
3
<<
16
,
OPENCL_SVM_BUFFER_MAP
=
4
<<
16
};
static
bool
checkForceSVMUmatUsage
()
{
static
bool
initialized
=
false
;
static
bool
force
=
false
;
if
(
!
initialized
)
{
force
=
getBoolParameter
(
"OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE"
,
false
);
initialized
=
true
;
}
return
force
;
}
static
bool
checkDisableSVMUMatUsage
()
{
static
bool
initialized
=
false
;
static
bool
force
=
false
;
if
(
!
initialized
)
{
force
=
getBoolParameter
(
"OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE"
,
false
);
initialized
=
true
;
}
return
force
;
}
static
bool
checkDisableSVM
()
{
static
bool
initialized
=
false
;
static
bool
force
=
false
;
if
(
!
initialized
)
{
force
=
getBoolParameter
(
"OPENCV_OPENCL_SVM_DISABLE"
,
false
);
initialized
=
true
;
}
return
force
;
}
// see SVMCapabilities
static
unsigned
int
getSVMCapabilitiesMask
()
{
static
bool
initialized
=
false
;
static
unsigned
int
mask
=
0
;
if
(
!
initialized
)
{
const
char
*
envValue
=
getenv
(
"OPENCV_OPENCL_SVM_CAPABILITIES_MASK"
);
if
(
envValue
==
NULL
)
{
return
~
0U
;
// all bits 1
}
mask
=
atoi
(
envValue
);
initialized
=
true
;
}
return
mask
;
}
}
// namespace
#endif
struct
Context
::
Impl
struct
Context
::
Impl
{
{
Impl
()
static
Context
::
Impl
*
get
(
Context
&
context
)
{
return
context
.
p
;
}
void
__init
()
{
{
refcount
=
1
;
refcount
=
1
;
handle
=
0
;
handle
=
0
;
#ifdef HAVE_OPENCL_SVM
svmInitialized
=
false
;
#endif
}
Impl
()
{
__init
();
}
}
void
setDefault
()
void
setDefault
()
...
@@ -2401,8 +2507,7 @@ struct Context::Impl
...
@@ -2401,8 +2507,7 @@ struct Context::Impl
Impl
(
int
dtype0
)
Impl
(
int
dtype0
)
{
{
refcount
=
1
;
__init
();
handle
=
0
;
cl_int
retval
=
0
;
cl_int
retval
=
0
;
cl_platform_id
pl
=
(
cl_platform_id
)
Platform
::
getDefault
().
ptr
();
cl_platform_id
pl
=
(
cl_platform_id
)
Platform
::
getDefault
().
ptr
();
...
@@ -2419,7 +2524,7 @@ struct Context::Impl
...
@@ -2419,7 +2524,7 @@ struct Context::Impl
AutoBuffer
<
void
*>
dlistbuf
(
nd0
*
2
+
1
);
AutoBuffer
<
void
*>
dlistbuf
(
nd0
*
2
+
1
);
cl_device_id
*
dlist
=
(
cl_device_id
*
)(
void
**
)
dlistbuf
;
cl_device_id
*
dlist
=
(
cl_device_id
*
)(
void
**
)
dlistbuf
;
cl_device_id
*
dlist_new
=
dlist
+
nd0
;
cl_device_id
*
dlist_new
=
dlist
+
nd0
;
CV_OclDbgAssert
(
clGetDeviceIDs
(
pl
,
dtype
,
nd0
,
dlist
,
&
nd0
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clGetDeviceIDs
(
pl
,
dtype
,
nd0
,
dlist
,
&
nd0
)
==
CL_SUCCESS
);
String
name0
;
String
name0
;
for
(
i
=
0
;
i
<
nd0
;
i
++
)
for
(
i
=
0
;
i
<
nd0
;
i
++
)
...
@@ -2496,6 +2601,144 @@ struct Context::Impl
...
@@ -2496,6 +2601,144 @@ struct Context::Impl
};
};
typedef
std
::
map
<
HashKey
,
Program
>
phash_t
;
typedef
std
::
map
<
HashKey
,
Program
>
phash_t
;
phash_t
phash
;
phash_t
phash
;
#ifdef HAVE_OPENCL_SVM
bool
svmInitialized
;
bool
svmAvailable
;
bool
svmEnabled
;
svm
::
SVMCapabilities
svmCapabilities
;
svm
::
SVMFunctions
svmFunctions
;
void
svmInit
()
{
CV_Assert
(
handle
!=
NULL
);
const
Device
&
device
=
devices
[
0
];
cl_device_svm_capabilities
deviceCaps
=
0
;
CV_Assert
(((
void
)
0
,
CL_DEVICE_SVM_CAPABILITIES
==
CL_DEVICE_SVM_CAPABILITIES_AMD
));
// Check assumption
cl_int
status
=
clGetDeviceInfo
((
cl_device_id
)
device
.
ptr
(),
CL_DEVICE_SVM_CAPABILITIES
,
sizeof
(
deviceCaps
),
&
deviceCaps
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d
\n
"
,
status
);
goto
noSVM
;
}
CV_OPENCL_SVM_TRACE_P
(
"CL_DEVICE_SVM_CAPABILITIES returned: 0x%x
\n
"
,
(
int
)
deviceCaps
);
CV_Assert
(((
void
)
0
,
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER
==
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD
));
// Check assumption
svmCapabilities
.
value_
=
((
deviceCaps
&
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER
)
?
svm
::
SVMCapabilities
::
SVM_COARSE_GRAIN_BUFFER
:
0
)
|
((
deviceCaps
&
CL_DEVICE_SVM_FINE_GRAIN_BUFFER
)
?
svm
::
SVMCapabilities
::
SVM_FINE_GRAIN_BUFFER
:
0
)
|
((
deviceCaps
&
CL_DEVICE_SVM_FINE_GRAIN_SYSTEM
)
?
svm
::
SVMCapabilities
::
SVM_FINE_GRAIN_SYSTEM
:
0
)
|
((
deviceCaps
&
CL_DEVICE_SVM_ATOMICS
)
?
svm
::
SVMCapabilities
::
SVM_ATOMICS
:
0
);
svmCapabilities
.
value_
&=
svm
::
getSVMCapabilitiesMask
();
if
(
svmCapabilities
.
value_
==
0
)
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"svmCapabilities is empty
\n
"
);
goto
noSVM
;
}
try
{
// Try OpenCL 2.0
CV_OPENCL_SVM_TRACE_P
(
"Try SVM from OpenCL 2.0 ...
\n
"
);
void
*
ptr
=
clSVMAlloc
(
handle
,
CL_MEM_READ_WRITE
,
100
,
0
);
if
(
!
ptr
)
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"clSVMAlloc returned NULL...
\n
"
);
CV_ErrorNoReturn
(
Error
::
StsBadArg
,
"clSVMAlloc returned NULL"
);
}
try
{
bool
error
=
false
;
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
(
CL_SUCCESS
!=
clEnqueueSVMMap
(
q
,
CL_TRUE
,
CL_MAP_WRITE
,
ptr
,
100
,
0
,
NULL
,
NULL
))
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"clEnqueueSVMMap failed...
\n
"
);
CV_ErrorNoReturn
(
Error
::
StsBadArg
,
"clEnqueueSVMMap FAILED"
);
}
clFinish
(
q
);
try
{
((
int
*
)
ptr
)[
0
]
=
100
;
}
catch
(...)
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"SVM buffer access test FAILED
\n
"
);
error
=
true
;
}
if
(
CL_SUCCESS
!=
clEnqueueSVMUnmap
(
q
,
ptr
,
0
,
NULL
,
NULL
))
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"clEnqueueSVMUnmap failed...
\n
"
);
CV_ErrorNoReturn
(
Error
::
StsBadArg
,
"clEnqueueSVMUnmap FAILED"
);
}
clFinish
(
q
);
if
(
error
)
{
CV_ErrorNoReturn
(
Error
::
StsBadArg
,
"OpenCL SVM buffer access test was FAILED"
);
}
}
catch
(...)
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"OpenCL SVM buffer access test was FAILED
\n
"
);
clSVMFree
(
handle
,
ptr
);
throw
;
}
clSVMFree
(
handle
,
ptr
);
svmFunctions
.
fn_clSVMAlloc
=
clSVMAlloc
;
svmFunctions
.
fn_clSVMFree
=
clSVMFree
;
svmFunctions
.
fn_clSetKernelArgSVMPointer
=
clSetKernelArgSVMPointer
;
//svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
//svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
svmFunctions
.
fn_clEnqueueSVMMemcpy
=
clEnqueueSVMMemcpy
;
svmFunctions
.
fn_clEnqueueSVMMemFill
=
clEnqueueSVMMemFill
;
svmFunctions
.
fn_clEnqueueSVMMap
=
clEnqueueSVMMap
;
svmFunctions
.
fn_clEnqueueSVMUnmap
=
clEnqueueSVMUnmap
;
}
catch
(...)
{
CV_OPENCL_SVM_TRACE_P
(
"clSVMAlloc failed, trying HSA extension...
\n
"
);
try
{
// Try HSA extension
String
extensions
=
device
.
extensions
();
if
(
extensions
.
find
(
"cl_amd_svm"
)
==
String
::
npos
)
{
CV_OPENCL_SVM_TRACE_P
(
"Device extension doesn't have cl_amd_svm: %s
\n
"
,
extensions
.
c_str
());
goto
noSVM
;
}
cl_platform_id
p
=
NULL
;
status
=
clGetDeviceInfo
((
cl_device_id
)
device
.
ptr
(),
CL_DEVICE_PLATFORM
,
sizeof
(
cl_platform_id
),
&
p
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
svmFunctions
.
fn_clSVMAlloc
=
(
clSVMAllocAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clSVMAllocAMD"
);
svmFunctions
.
fn_clSVMFree
=
(
clSVMFreeAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clSVMFreeAMD"
);
svmFunctions
.
fn_clSetKernelArgSVMPointer
=
(
clSetKernelArgSVMPointerAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clSetKernelArgSVMPointerAMD"
);
//svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
//svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
svmFunctions
.
fn_clEnqueueSVMMemcpy
=
(
clEnqueueSVMMemcpyAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clEnqueueSVMMemcpyAMD"
);
svmFunctions
.
fn_clEnqueueSVMMemFill
=
(
clEnqueueSVMMemFillAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clEnqueueSVMMemFillAMD"
);
svmFunctions
.
fn_clEnqueueSVMMap
=
(
clEnqueueSVMMapAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clEnqueueSVMMapAMD"
);
svmFunctions
.
fn_clEnqueueSVMUnmap
=
(
clEnqueueSVMUnmapAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clEnqueueSVMUnmapAMD"
);
CV_Assert
(
svmFunctions
.
isValid
());
}
catch
(...)
{
CV_OPENCL_SVM_TRACE_P
(
"Something is totally wrong
\n
"
);
goto
noSVM
;
}
}
svmAvailable
=
true
;
svmEnabled
=
!
svm
::
checkDisableSVM
();
svmInitialized
=
true
;
CV_OPENCL_SVM_TRACE_P
(
"OpenCV OpenCL SVM support initialized
\n
"
);
return
;
noSVM:
CV_OPENCL_SVM_TRACE_P
(
"OpenCL SVM is not detected
\n
"
);
svmAvailable
=
false
;
svmEnabled
=
false
;
svmCapabilities
.
value_
=
0
;
svmInitialized
=
true
;
svmFunctions
.
fn_clSVMAlloc
=
NULL
;
return
;
}
#endif
};
};
...
@@ -2610,6 +2853,71 @@ Program Context::getProg(const ProgramSource& prog,
...
@@ -2610,6 +2853,71 @@ Program Context::getProg(const ProgramSource& prog,
return
p
?
p
->
getProg
(
prog
,
buildopts
,
errmsg
)
:
Program
();
return
p
?
p
->
getProg
(
prog
,
buildopts
,
errmsg
)
:
Program
();
}
}
#ifdef HAVE_OPENCL_SVM
bool
Context
::
useSVM
()
const
{
Context
::
Impl
*
i
=
p
;
CV_Assert
(
i
);
if
(
!
i
->
svmInitialized
)
i
->
svmInit
();
return
i
->
svmEnabled
;
}
void
Context
::
setUseSVM
(
bool
enabled
)
{
Context
::
Impl
*
i
=
p
;
CV_Assert
(
i
);
if
(
!
i
->
svmInitialized
)
i
->
svmInit
();
if
(
enabled
&&
!
i
->
svmAvailable
)
{
CV_ErrorNoReturn
(
Error
::
StsError
,
"OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device"
);
}
i
->
svmEnabled
=
enabled
;
}
#else
bool
Context
::
useSVM
()
const
{
return
false
;
}
void
Context
::
setUseSVM
(
bool
enabled
)
{
CV_Assert
(
!
enabled
);
}
#endif
#ifdef HAVE_OPENCL_SVM
namespace
svm
{
const
SVMCapabilities
getSVMCapabilitites
(
const
ocl
::
Context
&
context
)
{
Context
::
Impl
*
i
=
context
.
p
;
CV_Assert
(
i
);
if
(
!
i
->
svmInitialized
)
i
->
svmInit
();
return
i
->
svmCapabilities
;
}
CV_EXPORTS
const
SVMFunctions
*
getSVMFunctions
(
const
ocl
::
Context
&
context
)
{
Context
::
Impl
*
i
=
context
.
p
;
CV_Assert
(
i
);
CV_Assert
(
i
->
svmInitialized
);
// getSVMCapabilitites() must be called first
CV_Assert
(
i
->
svmFunctions
.
fn_clSVMAlloc
!=
NULL
);
return
&
i
->
svmFunctions
;
}
CV_EXPORTS
bool
useSVM
(
UMatUsageFlags
usageFlags
)
{
if
(
checkForceSVMUmatUsage
())
return
true
;
if
(
checkDisableSVMUMatUsage
())
return
false
;
if
((
usageFlags
&
USAGE_ALLOCATE_SHARED_MEMORY
)
!=
0
)
return
true
;
return
false
;
// don't use SVM by default
}
}
// namespace cv::ocl::svm
#endif // HAVE_OPENCL_SVM
void
initializeContextFromHandle
(
Context
&
ctx
,
void
*
platform
,
void
*
_context
,
void
*
_device
)
void
initializeContextFromHandle
(
Context
&
ctx
,
void
*
platform
,
void
*
_context
,
void
*
_device
)
{
{
cl_context
context
=
(
cl_context
)
_context
;
cl_context
context
=
(
cl_context
)
_context
;
...
@@ -2979,12 +3287,33 @@ int Kernel::set(int i, const KernelArg& arg)
...
@@ -2979,12 +3287,33 @@ int Kernel::set(int i, const KernelArg& arg)
return
-
1
;
return
-
1
;
}
}
#ifdef HAVE_OPENCL_SVM
if
((
arg
.
m
->
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
const
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
uchar
*&
svmDataPtr
=
(
uchar
*&
)
arg
.
m
->
u
->
handle
;
CV_OPENCL_SVM_TRACE_P
(
"clSetKernelArgSVMPointer: %p
\n
"
,
svmDataPtr
);
#if 1 // TODO
cl_int
status
=
svmFns
->
fn_clSetKernelArgSVMPointer
(
p
->
handle
,
(
cl_uint
)
i
,
svmDataPtr
);
#else
cl_int
status
=
svmFns
->
fn_clSetKernelArgSVMPointer
(
p
->
handle
,
(
cl_uint
)
i
,
&
svmDataPtr
);
#endif
CV_Assert
(
status
==
CL_SUCCESS
);
}
else
#endif
{
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
,
sizeof
(
h
),
&
h
)
==
CL_SUCCESS
);
}
if
(
ptronly
)
if
(
ptronly
)
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
++
,
sizeof
(
h
),
&
h
)
==
CL_SUCCESS
);
{
i
++
;
}
else
if
(
arg
.
m
->
dims
<=
2
)
else
if
(
arg
.
m
->
dims
<=
2
)
{
{
UMat2D
u2d
(
*
arg
.
m
);
UMat2D
u2d
(
*
arg
.
m
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
,
sizeof
(
h
),
&
h
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
1
),
sizeof
(
u2d
.
step
),
&
u2d
.
step
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
1
),
sizeof
(
u2d
.
step
),
&
u2d
.
step
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
2
),
sizeof
(
u2d
.
offset
),
&
u2d
.
offset
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
2
),
sizeof
(
u2d
.
offset
),
&
u2d
.
offset
)
==
CL_SUCCESS
);
i
+=
3
;
i
+=
3
;
...
@@ -3000,7 +3329,6 @@ int Kernel::set(int i, const KernelArg& arg)
...
@@ -3000,7 +3329,6 @@ int Kernel::set(int i, const KernelArg& arg)
else
else
{
{
UMat3D
u3d
(
*
arg
.
m
);
UMat3D
u3d
(
*
arg
.
m
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
,
sizeof
(
h
),
&
h
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
1
),
sizeof
(
u3d
.
slicestep
),
&
u3d
.
slicestep
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
1
),
sizeof
(
u3d
.
slicestep
),
&
u3d
.
slicestep
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
2
),
sizeof
(
u3d
.
step
),
&
u3d
.
step
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
2
),
sizeof
(
u3d
.
step
),
&
u3d
.
step
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
3
),
sizeof
(
u3d
.
offset
),
&
u3d
.
offset
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
3
),
sizeof
(
u3d
.
offset
),
&
u3d
.
offset
)
==
CL_SUCCESS
);
...
@@ -3433,39 +3761,55 @@ ProgramSource::hash_t ProgramSource::hash() const
...
@@ -3433,39 +3761,55 @@ ProgramSource::hash_t ProgramSource::hash() const
//////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
//////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
template
<
typename
T
>
class
OpenCLBufferPool
class
OpenCLBufferPool
{
{
protected
:
protected
:
~
OpenCLBufferPool
()
{
}
~
OpenCLBufferPool
()
{
}
public
:
public
:
virtual
cl_mem
allocate
(
size_t
size
,
CV_OUT
size_t
&
capacity
)
=
0
;
virtual
T
allocate
(
size_t
size
)
=
0
;
virtual
void
release
(
cl_mem
handle
,
size_t
capacity
)
=
0
;
virtual
void
release
(
T
buffer
)
=
0
;
};
};
class
OpenCLBufferPoolImpl
:
public
BufferPoolController
,
public
OpenCLBufferPool
template
<
typename
Derived
,
typename
BufferEntry
,
typename
T
>
class
OpenCLBufferPoolBaseImpl
:
public
BufferPoolController
,
public
OpenCLBufferPool
<
T
>
{
{
public
:
private
:
struct
BufferEntry
inline
Derived
&
derived
()
{
return
*
static_cast
<
Derived
*>
(
this
);
}
{
cl_mem
clBuffer_
;
size_t
capacity_
;
};
protected
:
protected
:
Mutex
mutex_
;
Mutex
mutex_
;
size_t
currentReservedSize
;
size_t
currentReservedSize
;
size_t
maxReservedSize
;
size_t
maxReservedSize
;
std
::
list
<
BufferEntry
>
reservedEntries_
;
// LRU order
std
::
list
<
BufferEntry
>
allocatedEntries_
;
// Allocated and used entries
std
::
list
<
BufferEntry
>
reservedEntries_
;
// LRU order. Allocated, but not used entries
// synchronized
bool
_findAndRemoveEntryFromAllocatedList
(
CV_OUT
BufferEntry
&
entry
,
T
buffer
)
{
typename
std
::
list
<
BufferEntry
>::
iterator
i
=
allocatedEntries_
.
begin
();
for
(;
i
!=
allocatedEntries_
.
end
();
++
i
)
{
BufferEntry
&
e
=
*
i
;
if
(
e
.
clBuffer_
==
buffer
)
{
entry
=
e
;
allocatedEntries_
.
erase
(
i
);
return
true
;
}
}
return
false
;
}
// synchronized
// synchronized
bool
_findAndRemoveEntryFromReservedList
(
CV_OUT
BufferEntry
&
entry
,
const
size_t
size
)
bool
_findAndRemoveEntryFromReservedList
(
CV_OUT
BufferEntry
&
entry
,
const
size_t
size
)
{
{
if
(
reservedEntries_
.
empty
())
if
(
reservedEntries_
.
empty
())
return
false
;
return
false
;
std
::
list
<
BufferEntry
>::
iterator
i
=
reservedEntries_
.
begin
();
typename
std
::
list
<
BufferEntry
>::
iterator
i
=
reservedEntries_
.
begin
();
std
::
list
<
BufferEntry
>::
iterator
result_pos
=
reservedEntries_
.
end
();
typename
std
::
list
<
BufferEntry
>::
iterator
result_pos
=
reservedEntries_
.
end
();
BufferEntry
result
=
{
NULL
,
0
}
;
BufferEntry
result
;
size_t
minDiff
=
(
size_t
)(
-
1
);
size_t
minDiff
=
(
size_t
)(
-
1
);
for
(;
i
!=
reservedEntries_
.
end
();
++
i
)
for
(;
i
!=
reservedEntries_
.
end
();
++
i
)
{
{
...
@@ -3489,6 +3833,7 @@ protected:
...
@@ -3489,6 +3833,7 @@ protected:
reservedEntries_
.
erase
(
result_pos
);
reservedEntries_
.
erase
(
result_pos
);
entry
=
result
;
entry
=
result
;
currentReservedSize
-=
entry
.
capacity_
;
currentReservedSize
-=
entry
.
capacity_
;
allocatedEntries_
.
push_back
(
entry
);
return
true
;
return
true
;
}
}
return
false
;
return
false
;
...
@@ -3503,7 +3848,7 @@ protected:
...
@@ -3503,7 +3848,7 @@ protected:
const
BufferEntry
&
entry
=
reservedEntries_
.
back
();
const
BufferEntry
&
entry
=
reservedEntries_
.
back
();
CV_DbgAssert
(
currentReservedSize
>=
entry
.
capacity_
);
CV_DbgAssert
(
currentReservedSize
>=
entry
.
capacity_
);
currentReservedSize
-=
entry
.
capacity_
;
currentReservedSize
-=
entry
.
capacity_
;
_releaseBufferEntry
(
entry
);
derived
().
_releaseBufferEntry
(
entry
);
reservedEntries_
.
pop_back
();
reservedEntries_
.
pop_back
();
}
}
}
}
...
@@ -3523,72 +3868,45 @@ protected:
...
@@ -3523,72 +3868,45 @@ protected:
return
1024
*
1024
;
return
1024
*
1024
;
}
}
void
_allocateBufferEntry
(
BufferEntry
&
entry
,
size_t
size
)
{
CV_DbgAssert
(
entry
.
clBuffer_
==
NULL
);
entry
.
capacity_
=
alignSize
(
size
,
(
int
)
_allocationGranularity
(
size
));
Context
&
ctx
=
Context
::
getDefault
();
cl_int
retval
=
CL_SUCCESS
;
entry
.
clBuffer_
=
clCreateBuffer
((
cl_context
)
ctx
.
ptr
(),
CL_MEM_READ_WRITE
,
entry
.
capacity_
,
0
,
&
retval
);
CV_Assert
(
retval
==
CL_SUCCESS
);
CV_Assert
(
entry
.
clBuffer_
!=
NULL
);
if
(
retval
==
CL_SUCCESS
)
{
CV_IMPL_ADD
(
CV_IMPL_OCL
);
}
LOG_BUFFER_POOL
(
"OpenCL allocate %lld (0x%llx) bytes: %p
\n
"
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
,
entry
.
clBuffer_
);
}
void
_releaseBufferEntry
(
const
BufferEntry
&
entry
)
{
CV_Assert
(
entry
.
capacity_
!=
0
);
CV_Assert
(
entry
.
clBuffer_
!=
NULL
);
LOG_BUFFER_POOL
(
"OpenCL release buffer: %p, %lld (0x%llx) bytes
\n
"
,
entry
.
clBuffer_
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
);
clReleaseMemObject
(
entry
.
clBuffer_
);
}
public
:
public
:
OpenCLBufferPoolImpl
()
OpenCLBufferPoolBaseImpl
()
:
currentReservedSize
(
0
),
maxReservedSize
(
0
)
:
currentReservedSize
(
0
),
maxReservedSize
(
0
)
{
{
int
poolSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
1
<<
27
:
0
;
// nothing
maxReservedSize
=
getConfigurationParameterForSize
(
"OPENCV_OPENCL_BUFFERPOOL_LIMIT"
,
poolSize
);
}
}
virtual
~
OpenCLBufferPoolImpl
()
virtual
~
OpenCLBufferPool
Base
Impl
()
{
{
freeAllReservedBuffers
();
freeAllReservedBuffers
();
CV_Assert
(
reservedEntries_
.
empty
());
CV_Assert
(
reservedEntries_
.
empty
());
}
}
public
:
public
:
virtual
cl_mem
allocate
(
size_t
size
,
CV_OUT
size_t
&
capacity
)
virtual
T
allocate
(
size_t
size
)
{
{
BufferEntry
entry
=
{
NULL
,
0
};
AutoLock
locker
(
mutex_
);
if
(
maxReservedSize
>
0
)
BufferEntry
entry
;
if
(
maxReservedSize
>
0
&&
_findAndRemoveEntryFromReservedList
(
entry
,
size
))
{
{
AutoLock
locker
(
mutex_
);
CV_DbgAssert
(
size
<=
entry
.
capacity_
);
if
(
_findAndRemoveEntryFromReservedList
(
entry
,
size
))
LOG_BUFFER_POOL
(
"Reuse reserved buffer: %p
\n
"
,
entry
.
clBuffer_
);
{
}
CV_DbgAssert
(
size
<=
entry
.
capacity_
);
else
LOG_BUFFER_POOL
(
"Reuse reserved buffer: %p
\n
"
,
entry
.
clBuffer_
);
{
capacity
=
entry
.
capacity_
;
derived
().
_allocateBufferEntry
(
entry
,
size
);
return
entry
.
clBuffer_
;
}
}
}
_allocateBufferEntry
(
entry
,
size
);
capacity
=
entry
.
capacity_
;
return
entry
.
clBuffer_
;
return
entry
.
clBuffer_
;
}
}
virtual
void
release
(
cl_mem
handle
,
size_t
capacity
)
virtual
void
release
(
T
buffer
)
{
{
BufferEntry
entry
=
{
handle
,
capacity
};
AutoLock
locker
(
mutex_
);
BufferEntry
entry
;
CV_Assert
(
_findAndRemoveEntryFromAllocatedList
(
entry
,
buffer
));
if
(
maxReservedSize
==
0
||
entry
.
capacity_
>
maxReservedSize
/
8
)
if
(
maxReservedSize
==
0
||
entry
.
capacity_
>
maxReservedSize
/
8
)
{
{
_releaseBufferEntry
(
entry
);
derived
().
_releaseBufferEntry
(
entry
);
}
}
else
else
{
{
AutoLock
locker
(
mutex_
);
reservedEntries_
.
push_front
(
entry
);
reservedEntries_
.
push_front
(
entry
);
currentReservedSize
+=
entry
.
capacity_
;
currentReservedSize
+=
entry
.
capacity_
;
_checkSizeOfReservedEntries
();
_checkSizeOfReservedEntries
();
...
@@ -3604,7 +3922,7 @@ public:
...
@@ -3604,7 +3922,7 @@ public:
maxReservedSize
=
size
;
maxReservedSize
=
size
;
if
(
maxReservedSize
<
oldMaxReservedSize
)
if
(
maxReservedSize
<
oldMaxReservedSize
)
{
{
std
::
list
<
BufferEntry
>::
iterator
i
=
reservedEntries_
.
begin
();
typename
std
::
list
<
BufferEntry
>::
iterator
i
=
reservedEntries_
.
begin
();
for
(;
i
!=
reservedEntries_
.
end
();)
for
(;
i
!=
reservedEntries_
.
end
();)
{
{
const
BufferEntry
&
entry
=
*
i
;
const
BufferEntry
&
entry
=
*
i
;
...
@@ -3612,7 +3930,7 @@ public:
...
@@ -3612,7 +3930,7 @@ public:
{
{
CV_DbgAssert
(
currentReservedSize
>=
entry
.
capacity_
);
CV_DbgAssert
(
currentReservedSize
>=
entry
.
capacity_
);
currentReservedSize
-=
entry
.
capacity_
;
currentReservedSize
-=
entry
.
capacity_
;
_releaseBufferEntry
(
entry
);
derived
().
_releaseBufferEntry
(
entry
);
i
=
reservedEntries_
.
erase
(
i
);
i
=
reservedEntries_
.
erase
(
i
);
continue
;
continue
;
}
}
...
@@ -3624,16 +3942,123 @@ public:
...
@@ -3624,16 +3942,123 @@ public:
virtual
void
freeAllReservedBuffers
()
virtual
void
freeAllReservedBuffers
()
{
{
AutoLock
locker
(
mutex_
);
AutoLock
locker
(
mutex_
);
std
::
list
<
BufferEntry
>::
const_iterator
i
=
reservedEntries_
.
begin
();
typename
std
::
list
<
BufferEntry
>::
const_iterator
i
=
reservedEntries_
.
begin
();
for
(;
i
!=
reservedEntries_
.
end
();
++
i
)
for
(;
i
!=
reservedEntries_
.
end
();
++
i
)
{
{
const
BufferEntry
&
entry
=
*
i
;
const
BufferEntry
&
entry
=
*
i
;
_releaseBufferEntry
(
entry
);
derived
().
_releaseBufferEntry
(
entry
);
}
}
reservedEntries_
.
clear
();
reservedEntries_
.
clear
();
}
}
};
};
struct
CLBufferEntry
{
cl_mem
clBuffer_
;
size_t
capacity_
;
CLBufferEntry
()
:
clBuffer_
((
cl_mem
)
NULL
),
capacity_
(
0
)
{
}
};
class
OpenCLBufferPoolImpl
:
public
OpenCLBufferPoolBaseImpl
<
OpenCLBufferPoolImpl
,
CLBufferEntry
,
cl_mem
>
{
public
:
typedef
struct
CLBufferEntry
BufferEntry
;
protected
:
int
createFlags_
;
public
:
OpenCLBufferPoolImpl
(
int
createFlags
=
0
)
:
createFlags_
(
createFlags
)
{
}
void
_allocateBufferEntry
(
BufferEntry
&
entry
,
size_t
size
)
{
CV_DbgAssert
(
entry
.
clBuffer_
==
NULL
);
entry
.
capacity_
=
alignSize
(
size
,
(
int
)
_allocationGranularity
(
size
));
Context
&
ctx
=
Context
::
getDefault
();
cl_int
retval
=
CL_SUCCESS
;
entry
.
clBuffer_
=
clCreateBuffer
((
cl_context
)
ctx
.
ptr
(),
CL_MEM_READ_WRITE
|
createFlags_
,
entry
.
capacity_
,
0
,
&
retval
);
CV_Assert
(
retval
==
CL_SUCCESS
);
CV_Assert
(
entry
.
clBuffer_
!=
NULL
);
if
(
retval
==
CL_SUCCESS
)
{
CV_IMPL_ADD
(
CV_IMPL_OCL
);
}
LOG_BUFFER_POOL
(
"OpenCL allocate %lld (0x%llx) bytes: %p
\n
"
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
,
entry
.
clBuffer_
);
allocatedEntries_
.
push_back
(
entry
);
}
void
_releaseBufferEntry
(
const
BufferEntry
&
entry
)
{
CV_Assert
(
entry
.
capacity_
!=
0
);
CV_Assert
(
entry
.
clBuffer_
!=
NULL
);
LOG_BUFFER_POOL
(
"OpenCL release buffer: %p, %lld (0x%llx) bytes
\n
"
,
entry
.
clBuffer_
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
);
clReleaseMemObject
(
entry
.
clBuffer_
);
}
};
#ifdef HAVE_OPENCL_SVM
struct
CLSVMBufferEntry
{
void
*
clBuffer_
;
size_t
capacity_
;
CLSVMBufferEntry
()
:
clBuffer_
(
NULL
),
capacity_
(
0
)
{
}
};
class
OpenCLSVMBufferPoolImpl
:
public
OpenCLBufferPoolBaseImpl
<
OpenCLSVMBufferPoolImpl
,
CLSVMBufferEntry
,
void
*>
{
public
:
typedef
struct
CLSVMBufferEntry
BufferEntry
;
public
:
OpenCLSVMBufferPoolImpl
()
{
}
void
_allocateBufferEntry
(
BufferEntry
&
entry
,
size_t
size
)
{
CV_DbgAssert
(
entry
.
clBuffer_
==
NULL
);
entry
.
capacity_
=
alignSize
(
size
,
(
int
)
_allocationGranularity
(
size
));
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMCapabilities
svmCaps
=
svm
::
getSVMCapabilitites
(
ctx
);
bool
isFineGrainBuffer
=
svmCaps
.
isSupportFineGrainBuffer
();
cl_svm_mem_flags
memFlags
=
CL_MEM_READ_WRITE
|
(
isFineGrainBuffer
?
CL_MEM_SVM_FINE_GRAIN_BUFFER
:
0
);
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_OPENCL_SVM_TRACE_P
(
"clSVMAlloc: %d
\n
"
,
(
int
)
entry
.
capacity_
);
void
*
buf
=
svmFns
->
fn_clSVMAlloc
((
cl_context
)
ctx
.
ptr
(),
memFlags
,
entry
.
capacity_
,
0
);
CV_Assert
(
buf
);
entry
.
clBuffer_
=
buf
;
{
CV_IMPL_ADD
(
CV_IMPL_OCL
);
}
LOG_BUFFER_POOL
(
"OpenCL SVM allocate %lld (0x%llx) bytes: %p
\n
"
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
,
entry
.
clBuffer_
);
allocatedEntries_
.
push_back
(
entry
);
}
void
_releaseBufferEntry
(
const
BufferEntry
&
entry
)
{
CV_Assert
(
entry
.
capacity_
!=
0
);
CV_Assert
(
entry
.
clBuffer_
!=
NULL
);
LOG_BUFFER_POOL
(
"OpenCL release SVM buffer: %p, %lld (0x%llx) bytes
\n
"
,
entry
.
clBuffer_
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
);
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_OPENCL_SVM_TRACE_P
(
"clSVMFree: %p
\n
"
,
entry
.
clBuffer_
);
svmFns
->
fn_clSVMFree
((
cl_context
)
ctx
.
ptr
(),
entry
.
clBuffer_
);
}
};
#endif
#if defined _MSC_VER
#if defined _MSC_VER
#pragma warning(disable:4127) // conditional expression is constant
#pragma warning(disable:4127) // conditional expression is constant
#endif
#endif
...
@@ -3697,12 +4122,37 @@ private:
...
@@ -3697,12 +4122,37 @@ private:
class
OpenCLAllocator
:
public
MatAllocator
class
OpenCLAllocator
:
public
MatAllocator
{
{
mutable
OpenCLBufferPoolImpl
bufferPool
;
mutable
OpenCLBufferPoolImpl
bufferPool
;
mutable
OpenCLBufferPoolImpl
bufferPoolHostPtr
;
#ifdef HAVE_OPENCL_SVM
mutable
OpenCLSVMBufferPoolImpl
bufferPoolSVM
;
#endif
enum
AllocatorFlags
enum
AllocatorFlags
{
{
ALLOCATOR_FLAGS_BUFFER_POOL_USED
=
1
<<
0
ALLOCATOR_FLAGS_BUFFER_POOL_USED
=
1
<<
0
,
ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED
=
1
<<
1
#ifdef HAVE_OPENCL_SVM
,
ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED
=
1
<<
2
#endif
};
};
public
:
public
:
OpenCLAllocator
()
{
matStdAllocator
=
Mat
::
getStdAllocator
();
}
OpenCLAllocator
()
:
bufferPool
(
0
),
bufferPoolHostPtr
(
CL_MEM_ALLOC_HOST_PTR
)
{
size_t
defaultPoolSize
,
poolSize
;
defaultPoolSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
1
<<
27
:
0
;
poolSize
=
getConfigurationParameterForSize
(
"OPENCV_OPENCL_BUFFERPOOL_LIMIT"
,
defaultPoolSize
);
bufferPool
.
setMaxReservedSize
(
poolSize
);
poolSize
=
getConfigurationParameterForSize
(
"OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT"
,
defaultPoolSize
);
bufferPoolHostPtr
.
setMaxReservedSize
(
poolSize
);
#ifdef HAVE_OPENCL_SVM
poolSize
=
getConfigurationParameterForSize
(
"OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT"
,
defaultPoolSize
);
bufferPoolSVM
.
setMaxReservedSize
(
poolSize
);
#endif
matStdAllocator
=
Mat
::
getStdAllocator
();
}
UMatData
*
defaultAllocate
(
int
dims
,
const
int
*
sizes
,
int
type
,
void
*
data
,
size_t
*
step
,
UMatData
*
defaultAllocate
(
int
dims
,
const
int
*
sizes
,
int
type
,
void
*
data
,
size_t
*
step
,
int
flags
,
UMatUsageFlags
usageFlags
)
const
int
flags
,
UMatUsageFlags
usageFlags
)
const
...
@@ -3739,33 +4189,47 @@ public:
...
@@ -3739,33 +4189,47 @@ public:
}
}
Context
&
ctx
=
Context
::
getDefault
();
Context
&
ctx
=
Context
::
getDefault
();
int
createFlags
=
0
,
flags0
=
0
;
int
createFlags
=
0
,
flags0
=
0
;
getBestFlags
(
ctx
,
flags
,
usageFlags
,
createFlags
,
flags0
);
getBestFlags
(
ctx
,
flags
,
usageFlags
,
createFlags
,
flags0
);
size_t
capacity
=
0
;
void
*
handle
=
NULL
;
void
*
handle
=
NULL
;
int
allocatorFlags
=
0
;
int
allocatorFlags
=
0
;
#ifdef HAVE_OPENCL_SVM
const
svm
::
SVMCapabilities
svmCaps
=
svm
::
getSVMCapabilitites
(
ctx
);
if
(
ctx
.
useSVM
()
&&
svm
::
useSVM
(
usageFlags
)
&&
!
svmCaps
.
isNoSVMSupport
())
{
allocatorFlags
=
ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED
;
handle
=
bufferPoolSVM
.
allocate
(
total
);
// this property is constant, so single buffer pool can be used here
bool
isFineGrainBuffer
=
svmCaps
.
isSupportFineGrainBuffer
();
allocatorFlags
|=
isFineGrainBuffer
?
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
:
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
;
}
else
#endif
if
(
createFlags
==
0
)
if
(
createFlags
==
0
)
{
{
handle
=
bufferPool
.
allocate
(
total
,
capacity
);
if
(
!
handle
)
return
defaultAllocate
(
dims
,
sizes
,
type
,
data
,
step
,
flags
,
usageFlags
);
allocatorFlags
=
ALLOCATOR_FLAGS_BUFFER_POOL_USED
;
allocatorFlags
=
ALLOCATOR_FLAGS_BUFFER_POOL_USED
;
handle
=
bufferPool
.
allocate
(
total
);
}
else
if
(
createFlags
==
CL_MEM_ALLOC_HOST_PTR
)
{
allocatorFlags
=
ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED
;
handle
=
bufferPoolHostPtr
.
allocate
(
total
);
}
}
else
else
{
{
capacity
=
total
;
CV_Assert
(
handle
!=
NULL
);
// Unsupported, throw
cl_int
retval
=
0
;
handle
=
clCreateBuffer
((
cl_context
)
ctx
.
ptr
(),
CL_MEM_READ_WRITE
|
createFlags
,
total
,
0
,
&
retval
);
if
(
!
handle
||
retval
!=
CL_SUCCESS
)
return
defaultAllocate
(
dims
,
sizes
,
type
,
data
,
step
,
flags
,
usageFlags
);
CV_IMPL_ADD
(
CV_IMPL_OCL
)
}
}
if
(
!
handle
)
return
defaultAllocate
(
dims
,
sizes
,
type
,
data
,
step
,
flags
,
usageFlags
);
UMatData
*
u
=
new
UMatData
(
this
);
UMatData
*
u
=
new
UMatData
(
this
);
u
->
data
=
0
;
u
->
data
=
0
;
u
->
size
=
total
;
u
->
size
=
total
;
u
->
capacity
=
capacity
;
u
->
handle
=
handle
;
u
->
handle
=
handle
;
u
->
flags
=
flags0
;
u
->
flags
=
flags0
;
u
->
allocatorFlags_
=
allocatorFlags
;
u
->
allocatorFlags_
=
allocatorFlags
;
...
@@ -3788,22 +4252,81 @@ public:
...
@@ -3788,22 +4252,81 @@ public:
getBestFlags
(
ctx
,
accessFlags
,
usageFlags
,
createFlags
,
flags0
);
getBestFlags
(
ctx
,
accessFlags
,
usageFlags
,
createFlags
,
flags0
);
cl_context
ctx_handle
=
(
cl_context
)
ctx
.
ptr
();
cl_context
ctx_handle
=
(
cl_context
)
ctx
.
ptr
();
cl_int
retval
=
0
;
int
allocatorFlags
=
0
;
int
tempUMatFlags
=
UMatData
::
TEMP_UMAT
;
int
tempUMatFlags
=
0
;
u
->
handle
=
clCreateBuffer
(
ctx_handle
,
CL_MEM_USE_HOST_PTR
|
CL_MEM_READ_WRITE
,
void
*
handle
=
NULL
;
u
->
size
,
u
->
origdata
,
&
retval
);
cl_int
retval
=
CL_SUCCESS
;
if
((
!
u
->
handle
||
retval
!=
CL_SUCCESS
)
&&
!
(
accessFlags
&
ACCESS_FAST
))
#ifdef HAVE_OPENCL_SVM
svm
::
SVMCapabilities
svmCaps
=
svm
::
getSVMCapabilitites
(
ctx
);
bool
useSVM
=
ctx
.
useSVM
()
&&
svm
::
useSVM
(
usageFlags
);
if
(
useSVM
&&
svmCaps
.
isSupportFineGrainSystem
())
{
{
u
->
handle
=
clCreateBuffer
(
ctx_handle
,
CL_MEM_COPY_HOST_PTR
|
CL_MEM_READ_WRITE
|
createFlags
,
allocatorFlags
=
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
;
u
->
size
,
u
->
origdata
,
&
retval
);
tempUMatFlags
=
UMatData
::
TEMP_UMAT
;
tempUMatFlags
=
UMatData
::
TEMP_COPIED_UMAT
;
handle
=
u
->
origdata
;
CV_OPENCL_SVM_TRACE_P
(
"Use fine grain system: %d (%p)
\n
"
,
(
int
)
u
->
size
,
handle
);
}
else
if
(
useSVM
&&
(
svmCaps
.
isSupportFineGrainBuffer
()
||
svmCaps
.
isSupportCoarseGrainBuffer
()))
{
if
(
!
(
accessFlags
&
ACCESS_FAST
))
// memcpy used
{
bool
isFineGrainBuffer
=
svmCaps
.
isSupportFineGrainBuffer
();
cl_svm_mem_flags
memFlags
=
createFlags
|
(
isFineGrainBuffer
?
CL_MEM_SVM_FINE_GRAIN_BUFFER
:
0
);
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_OPENCL_SVM_TRACE_P
(
"clSVMAlloc + copy: %d
\n
"
,
(
int
)
u
->
size
);
handle
=
svmFns
->
fn_clSVMAlloc
((
cl_context
)
ctx
.
ptr
(),
memFlags
,
u
->
size
,
0
);
CV_Assert
(
handle
);
cl_command_queue
q
=
NULL
;
if
(
!
isFineGrainBuffer
)
{
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMap: %p (%d)
\n
"
,
handle
,
(
int
)
u
->
size
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMap
(
q
,
CL_TRUE
,
CL_MAP_WRITE
,
handle
,
u
->
size
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
memcpy
(
handle
,
u
->
origdata
,
u
->
size
);
if
(
!
isFineGrainBuffer
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
tempUMatFlags
=
UMatData
::
TEMP_UMAT
|
UMatData
::
TEMP_COPIED_UMAT
;
allocatorFlags
|=
isFineGrainBuffer
?
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
:
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
;
}
}
else
#endif
{
tempUMatFlags
=
UMatData
::
TEMP_UMAT
;
handle
=
clCreateBuffer
(
ctx_handle
,
CL_MEM_USE_HOST_PTR
|
createFlags
,
u
->
size
,
u
->
origdata
,
&
retval
);
if
((
!
handle
||
retval
<
0
)
&&
!
(
accessFlags
&
ACCESS_FAST
))
{
handle
=
clCreateBuffer
(
ctx_handle
,
CL_MEM_COPY_HOST_PTR
|
CL_MEM_READ_WRITE
|
createFlags
,
u
->
size
,
u
->
origdata
,
&
retval
);
tempUMatFlags
|=
UMatData
::
TEMP_COPIED_UMAT
;
}
}
}
if
(
!
u
->
handle
||
retval
!=
CL_SUCCESS
)
if
(
!
handle
||
retval
!=
CL_SUCCESS
)
return
false
;
return
false
;
u
->
handle
=
handle
;
u
->
prevAllocator
=
u
->
currAllocator
;
u
->
prevAllocator
=
u
->
currAllocator
;
u
->
currAllocator
=
this
;
u
->
currAllocator
=
this
;
u
->
flags
|=
tempUMatFlags
;
u
->
flags
|=
tempUMatFlags
;
u
->
allocatorFlags_
=
allocatorFlags
;
}
}
if
(
accessFlags
&
ACCESS_WRITE
)
if
(
accessFlags
&
ACCESS_WRITE
)
u
->
markHostCopyObsolete
(
true
);
u
->
markHostCopyObsolete
(
true
);
...
@@ -3848,34 +4371,93 @@ public:
...
@@ -3848,34 +4371,93 @@ public:
CV_Assert
(
u
->
urefcount
>=
0
);
CV_Assert
(
u
->
urefcount
>=
0
);
CV_Assert
(
u
->
refcount
>=
0
);
CV_Assert
(
u
->
refcount
>=
0
);
// TODO: !!! when we add Shared Virtual Memory Support,
// this function (as well as the others) should be corrected
CV_Assert
(
u
->
handle
!=
0
&&
u
->
urefcount
==
0
);
CV_Assert
(
u
->
handle
!=
0
&&
u
->
urefcount
==
0
);
if
(
u
->
tempUMat
())
if
(
u
->
tempUMat
())
{
{
// UMatDataAutoLock lock(u);
// UMatDataAutoLock lock(u);
if
(
u
->
hostCopyObsolete
()
&&
u
->
refcount
>
0
)
if
(
u
->
hostCopyObsolete
()
&&
u
->
refcount
>
0
)
{
{
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
#ifdef HAVE_OPENCL_SVM
if
(
u
->
tempCopiedUMat
()
)
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
{
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
origdata
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
Context
&
ctx
=
Context
::
getDefault
();
CV_OclDbgAssert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
CV_DbgAssert
(
svmFns
->
isValid
());
if
(
u
->
tempCopiedUMat
()
)
{
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
||
(
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
);
bool
isFineGrainBuffer
=
(
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
;
cl_command_queue
q
=
NULL
;
if
(
!
isFineGrainBuffer
)
{
CV_DbgAssert
(((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
==
0
));
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMap: %p (%d)
\n
"
,
u
->
handle
,
(
int
)
u
->
size
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMap
(
q
,
CL_FALSE
,
CL_MAP_READ
,
u
->
handle
,
u
->
size
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
clFinish
(
q
);
memcpy
(
u
->
origdata
,
u
->
handle
,
u
->
size
);
if
(
!
isFineGrainBuffer
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
u
->
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
u
->
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
}
else
{
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
);
// nothing
}
}
}
else
else
#endif
{
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
(
u
->
tempCopiedUMat
()
)
{
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
origdata
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
CV_OclDbgAssert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
}
else
{
// TODO Is it really needed for clCreateBuffer with CL_MEM_USE_HOST_PTR?
cl_int
retval
=
0
;
void
*
data
=
clEnqueueMapBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
(
CL_MAP_READ
|
CL_MAP_WRITE
),
0
,
u
->
size
,
0
,
0
,
0
,
&
retval
);
CV_OclDbgAssert
(
retval
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clEnqueueUnmapMemObject
(
q
,
(
cl_mem
)
u
->
handle
,
data
,
0
,
0
,
0
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clFinish
(
q
)
==
CL_SUCCESS
);
}
}
u
->
markHostCopyObsolete
(
false
);
}
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
if
(
u
->
tempCopiedUMat
()
)
{
{
cl_int
retval
=
0
;
Context
&
ctx
=
Context
::
getDefault
();
void
*
data
=
clEnqueueMapBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
(
CL_MAP_READ
|
CL_MAP_WRITE
),
CV_DbgAssert
(
svmFns
->
isValid
());
0
,
u
->
size
,
0
,
0
,
0
,
&
retval
);
CV_OclDbgAssert
(
retval
==
CL_SUCCESS
);
CV_OPENCL_SVM_TRACE_P
(
"clSVMFree: %p
\n
"
,
u
->
handle
);
CV_OclDbgAssert
(
clEnqueueUnmapMemObject
(
q
,
(
cl_mem
)
u
->
handle
,
data
,
0
,
0
,
0
)
==
CL_SUCCESS
);
svmFns
->
fn_clSVMFree
((
cl_context
)
ctx
.
ptr
(),
u
->
handle
);
CV_OclDbgAssert
(
clFinish
(
q
)
==
CL_SUCCESS
);
}
}
}
}
u
->
markHostCopyObsolete
(
false
);
else
clReleaseMemObject
((
cl_mem
)
u
->
handle
);
#endif
{
clReleaseMemObject
((
cl_mem
)
u
->
handle
);
}
u
->
handle
=
0
;
u
->
handle
=
0
;
u
->
currAllocator
=
u
->
prevAllocator
;
u
->
currAllocator
=
u
->
prevAllocator
;
if
(
u
->
data
&&
u
->
copyOnMap
()
&&
!
(
u
->
flags
&
UMatData
::
USER_ALLOCATED
))
if
(
u
->
data
&&
u
->
copyOnMap
()
&&
!
(
u
->
flags
&
UMatData
::
USER_ALLOCATED
))
...
@@ -3894,14 +4476,42 @@ public:
...
@@ -3894,14 +4476,42 @@ public:
}
}
if
(
u
->
allocatorFlags_
&
ALLOCATOR_FLAGS_BUFFER_POOL_USED
)
if
(
u
->
allocatorFlags_
&
ALLOCATOR_FLAGS_BUFFER_POOL_USED
)
{
{
bufferPool
.
release
((
cl_mem
)
u
->
handle
,
u
->
capacity
);
bufferPool
.
release
((
cl_mem
)
u
->
handle
);
}
else
if
(
u
->
allocatorFlags_
&
ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED
)
{
bufferPoolHostPtr
.
release
((
cl_mem
)
u
->
handle
);
}
#ifdef HAVE_OPENCL_SVM
else
if
(
u
->
allocatorFlags_
&
ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED
)
{
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
)
{
//nothing
}
else
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
||
(
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
!=
0
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
u
->
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
u
->
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
}
bufferPoolSVM
.
release
((
void
*
)
u
->
handle
);
}
}
#endif
else
else
{
{
clReleaseMemObject
((
cl_mem
)
u
->
handle
);
clReleaseMemObject
((
cl_mem
)
u
->
handle
);
}
}
u
->
handle
=
0
;
u
->
handle
=
0
;
u
->
capacity
=
0
;
delete
u
;
delete
u
;
}
}
}
}
...
@@ -3925,13 +4535,41 @@ public:
...
@@ -3925,13 +4535,41 @@ public:
{
{
if
(
!
u
->
copyOnMap
()
)
if
(
!
u
->
copyOnMap
()
)
{
{
// TODO
// because there can be other map requests for the same UMat with different access flags,
// we use the universal (read-write) access mode.
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
==
0
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMap: %p (%d)
\n
"
,
u
->
handle
,
(
int
)
u
->
size
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMap
(
q
,
CL_FALSE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
u
->
handle
,
u
->
size
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
u
->
allocatorFlags_
|=
svm
::
OPENCL_SVM_BUFFER_MAP
;
}
}
clFinish
(
q
);
u
->
data
=
(
uchar
*
)
u
->
handle
;
u
->
markHostCopyObsolete
(
false
);
u
->
markDeviceMemMapped
(
true
);
return
;
}
#endif
if
(
u
->
data
)
// FIXIT Workaround for UMat synchronization issue
if
(
u
->
data
)
// FIXIT Workaround for UMat synchronization issue
{
{
//CV_Assert(u->hostCopyObsolete() == false);
//CV_Assert(u->hostCopyObsolete() == false);
return
;
return
;
}
}
// because there can be other map requests for the same UMat with different access flags,
// we use the universal (read-write) access mode.
cl_int
retval
=
0
;
cl_int
retval
=
0
;
u
->
data
=
(
uchar
*
)
clEnqueueMapBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
u
->
data
=
(
uchar
*
)
clEnqueueMapBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
(
CL_MAP_READ
|
CL_MAP_WRITE
),
(
CL_MAP_READ
|
CL_MAP_WRITE
),
...
@@ -3943,6 +4581,7 @@ public:
...
@@ -3943,6 +4581,7 @@ public:
return
;
return
;
}
}
// TODO Is it really a good idea and was it tested well?
// if map failed, switch to copy-on-map mode for the particular buffer
// if map failed, switch to copy-on-map mode for the particular buffer
u
->
flags
|=
UMatData
::
COPY_ON_MAP
;
u
->
flags
|=
UMatData
::
COPY_ON_MAP
;
}
}
...
@@ -3957,6 +4596,9 @@ public:
...
@@ -3957,6 +4596,9 @@ public:
if
(
(
accessFlags
&
ACCESS_READ
)
!=
0
&&
u
->
hostCopyObsolete
()
)
if
(
(
accessFlags
&
ACCESS_READ
)
!=
0
&&
u
->
hostCopyObsolete
()
)
{
{
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
#ifdef HAVE_OPENCL_SVM
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
0
);
#endif
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
u
->
markHostCopyObsolete
(
false
);
u
->
markHostCopyObsolete
(
false
);
...
@@ -3983,6 +4625,31 @@ public:
...
@@ -3983,6 +4625,31 @@ public:
{
{
CV_Assert
(
u
->
data
!=
NULL
);
CV_Assert
(
u
->
data
!=
NULL
);
u
->
markDeviceMemMapped
(
false
);
u
->
markDeviceMemMapped
(
false
);
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
!=
0
);
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
u
->
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
u
->
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
clFinish
(
q
);
u
->
allocatorFlags_
&=
~
svm
::
OPENCL_SVM_BUFFER_MAP
;
}
}
u
->
data
=
0
;
u
->
markDeviceCopyObsolete
(
false
);
u
->
markHostCopyObsolete
(
false
);
return
;
}
#endif
CV_Assert
(
(
retval
=
clEnqueueUnmapMemObject
(
q
,
CV_Assert
(
(
retval
=
clEnqueueUnmapMemObject
(
q
,
(
cl_mem
)
u
->
handle
,
u
->
data
,
0
,
0
,
0
))
==
CL_SUCCESS
);
(
cl_mem
)
u
->
handle
,
u
->
data
,
0
,
0
,
0
))
==
CL_SUCCESS
);
if
(
Device
::
getDefault
().
isAMD
())
if
(
Device
::
getDefault
().
isAMD
())
...
@@ -3995,6 +4662,9 @@ public:
...
@@ -3995,6 +4662,9 @@ public:
else
if
(
u
->
copyOnMap
()
&&
u
->
deviceCopyObsolete
()
)
else
if
(
u
->
copyOnMap
()
&&
u
->
deviceCopyObsolete
()
)
{
{
AlignedDataPtr
<
true
,
false
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
AlignedDataPtr
<
true
,
false
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
#ifdef HAVE_OPENCL_SVM
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
0
);
#endif
CV_Assert
(
(
retval
=
clEnqueueWriteBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
CV_Assert
(
(
retval
=
clEnqueueWriteBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
))
==
CL_SUCCESS
);
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
))
==
CL_SUCCESS
);
}
}
...
@@ -4102,17 +4772,78 @@ public:
...
@@ -4102,17 +4772,78 @@ public:
srcrawofs
,
new_srcofs
,
new_srcstep
,
srcrawofs
,
new_srcofs
,
new_srcstep
,
dstrawofs
,
new_dstofs
,
new_dststep
);
dstrawofs
,
new_dstofs
,
new_dststep
);
AlignedDataPtr
<
false
,
true
>
alignedPtr
((
uchar
*
)
dstptr
,
sz
[
0
]
*
dststep
[
0
],
CV_OPENCL_DATA_PTR_ALIGNMENT
);
#ifdef HAVE_OPENCL_SVM
if
(
iscontinuous
)
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
{
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
CV_DbgAssert
(
u
->
data
==
NULL
||
u
->
data
==
u
->
handle
);
srcrawofs
,
total
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
==
0
);
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMap: %p (%d)
\n
"
,
u
->
handle
,
(
int
)
u
->
size
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMap
(
q
,
CL_FALSE
,
CL_MAP_READ
,
u
->
handle
,
u
->
size
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
clFinish
(
q
);
if
(
iscontinuous
)
{
memcpy
(
dstptr
,
(
uchar
*
)
u
->
handle
+
srcrawofs
,
total
);
}
else
{
// This code is from MatAllocator::download()
int
isz
[
CV_MAX_DIM
];
uchar
*
srcptr
=
(
uchar
*
)
u
->
handle
;
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
CV_Assert
(
sz
[
i
]
<=
(
size_t
)
INT_MAX
);
if
(
sz
[
i
]
==
0
)
return
;
if
(
srcofs
)
srcptr
+=
srcofs
[
i
]
*
(
i
<=
dims
-
2
?
srcstep
[
i
]
:
1
);
isz
[
i
]
=
(
int
)
sz
[
i
];
}
Mat
src
(
dims
,
isz
,
CV_8U
,
srcptr
,
srcstep
);
Mat
dst
(
dims
,
isz
,
CV_8U
,
dstptr
,
dststep
);
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
,
2
);
size_t
j
,
planesz
=
it
.
size
;
for
(
j
=
0
;
j
<
it
.
nplanes
;
j
++
,
++
it
)
memcpy
(
ptrs
[
1
],
ptrs
[
0
],
planesz
);
}
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
u
->
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
u
->
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
clFinish
(
q
);
}
}
}
else
else
#endif
{
{
CV_Assert
(
clEnqueueReadBufferRect
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
AlignedDataPtr
<
false
,
true
>
alignedPtr
((
uchar
*
)
dstptr
,
sz
[
0
]
*
dststep
[
0
],
CV_OPENCL_DATA_PTR_ALIGNMENT
);
new_srcofs
,
new_dstofs
,
new_sz
,
new_srcstep
[
0
],
new_srcstep
[
1
],
if
(
iscontinuous
)
new_dststep
[
0
],
new_dststep
[
1
],
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
{
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
srcrawofs
,
total
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
>=
0
);
}
else
{
CV_Assert
(
clEnqueueReadBufferRect
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
new_srcofs
,
new_dstofs
,
new_sz
,
new_srcstep
[
0
],
new_srcstep
[
1
],
new_dststep
[
0
],
new_dststep
[
1
],
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
>=
0
);
}
}
}
}
}
...
@@ -4153,20 +4884,91 @@ public:
...
@@ -4153,20 +4884,91 @@ public:
CV_Assert
(
u
->
handle
!=
0
);
CV_Assert
(
u
->
handle
!=
0
);
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
AlignedDataPtr
<
true
,
false
>
alignedPtr
((
uchar
*
)
srcptr
,
sz
[
0
]
*
srcstep
[
0
],
CV_OPENCL_DATA_PTR_ALIGNMENT
);
#ifdef HAVE_OPENCL_SVM
if
(
iscontinuous
)
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
{
CV_Assert
(
clEnqueueWriteBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CV_DbgAssert
(
u
->
data
==
NULL
||
u
->
data
==
u
->
handle
);
CL_TRUE
,
dstrawofs
,
total
,
srcptr
,
0
,
0
,
0
)
==
CL_SUCCESS
);
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
==
0
);
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMap: %p (%d)
\n
"
,
u
->
handle
,
(
int
)
u
->
size
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMap
(
q
,
CL_FALSE
,
CL_MAP_WRITE
,
u
->
handle
,
u
->
size
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
clFinish
(
q
);
if
(
iscontinuous
)
{
memcpy
((
uchar
*
)
u
->
handle
+
dstrawofs
,
srcptr
,
total
);
}
else
{
// This code is from MatAllocator::upload()
int
isz
[
CV_MAX_DIM
];
uchar
*
dstptr
=
(
uchar
*
)
u
->
handle
;
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
CV_Assert
(
sz
[
i
]
<=
(
size_t
)
INT_MAX
);
if
(
sz
[
i
]
==
0
)
return
;
if
(
dstofs
)
dstptr
+=
dstofs
[
i
]
*
(
i
<=
dims
-
2
?
dststep
[
i
]
:
1
);
isz
[
i
]
=
(
int
)
sz
[
i
];
}
Mat
src
(
dims
,
isz
,
CV_8U
,
(
void
*
)
srcptr
,
srcstep
);
Mat
dst
(
dims
,
isz
,
CV_8U
,
dstptr
,
dststep
);
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
,
2
);
size_t
j
,
planesz
=
it
.
size
;
for
(
j
=
0
;
j
<
it
.
nplanes
;
j
++
,
++
it
)
memcpy
(
ptrs
[
1
],
ptrs
[
0
],
planesz
);
}
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
u
->
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
u
->
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
clFinish
(
q
);
}
}
}
else
else
#endif
{
{
CV_Assert
(
clEnqueueWriteBufferRect
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
AlignedDataPtr
<
true
,
false
>
alignedPtr
((
uchar
*
)
srcptr
,
sz
[
0
]
*
srcstep
[
0
],
CV_OPENCL_DATA_PTR_ALIGNMENT
);
new_dstofs
,
new_srcofs
,
new_sz
,
new_dststep
[
0
],
new_dststep
[
1
],
if
(
iscontinuous
)
new_srcstep
[
0
],
new_srcstep
[
1
],
srcptr
,
0
,
0
,
0
)
==
CL_SUCCESS
);
{
CV_Assert
(
clEnqueueWriteBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
dstrawofs
,
total
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
>=
0
);
}
else
{
CV_Assert
(
clEnqueueWriteBufferRect
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
new_dstofs
,
new_srcofs
,
new_sz
,
new_dststep
[
0
],
new_dststep
[
1
],
new_srcstep
[
0
],
new_srcstep
[
1
],
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
>=
0
);
}
}
}
u
->
markHostCopyObsolete
(
true
);
u
->
markHostCopyObsolete
(
true
);
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
||
(
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
)
{
// nothing
}
else
#endif
{
u
->
markHostCopyObsolete
(
true
);
}
u
->
markDeviceCopyObsolete
(
false
);
u
->
markDeviceCopyObsolete
(
false
);
}
}
...
@@ -4198,7 +5000,17 @@ public:
...
@@ -4198,7 +5000,17 @@ public:
{
{
download
(
src
,
dst
->
data
+
dstrawofs
,
dims
,
sz
,
srcofs
,
srcstep
,
dststep
);
download
(
src
,
dst
->
data
+
dstrawofs
,
dims
,
sz
,
srcofs
,
srcstep
,
dststep
);
dst
->
markHostCopyObsolete
(
false
);
dst
->
markHostCopyObsolete
(
false
);
dst
->
markDeviceCopyObsolete
(
true
);
#ifdef HAVE_OPENCL_SVM
if
((
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
||
(
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
)
{
// nothing
}
else
#endif
{
dst
->
markDeviceCopyObsolete
(
true
);
}
return
;
return
;
}
}
...
@@ -4206,26 +5018,110 @@ public:
...
@@ -4206,26 +5018,110 @@ public:
CV_Assert
(
dst
->
refcount
==
0
);
CV_Assert
(
dst
->
refcount
==
0
);
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
cl_int
retval
;
cl_int
retval
=
CL_SUCCESS
;
if
(
iscontinuous
)
#ifdef HAVE_OPENCL_SVM
if
((
src
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
||
(
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
{
CV_Assert
(
(
retval
=
clEnqueueCopyBuffer
(
q
,
(
cl_mem
)
src
->
handle
,
(
cl_mem
)
dst
->
handle
,
if
((
src
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
&&
srcrawofs
,
dstrawofs
,
total
,
0
,
0
,
0
))
==
CL_SUCCESS
);
(
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
if
(
iscontinuous
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMemcpy: %p <-- %p (%d)
\n
"
,
(
uchar
*
)
dst
->
handle
+
dstrawofs
,
(
uchar
*
)
src
->
handle
+
srcrawofs
,
(
int
)
total
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMemcpy
(
q
,
CL_TRUE
,
(
uchar
*
)
dst
->
handle
+
dstrawofs
,
(
uchar
*
)
src
->
handle
+
srcrawofs
,
total
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
else
{
clFinish
(
q
);
// This code is from MatAllocator::download()/upload()
int
isz
[
CV_MAX_DIM
];
uchar
*
srcptr
=
(
uchar
*
)
src
->
handle
;
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
CV_Assert
(
sz
[
i
]
<=
(
size_t
)
INT_MAX
);
if
(
sz
[
i
]
==
0
)
return
;
if
(
srcofs
)
srcptr
+=
srcofs
[
i
]
*
(
i
<=
dims
-
2
?
srcstep
[
i
]
:
1
);
isz
[
i
]
=
(
int
)
sz
[
i
];
}
Mat
m_src
(
dims
,
isz
,
CV_8U
,
srcptr
,
srcstep
);
uchar
*
dstptr
=
(
uchar
*
)
dst
->
handle
;
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
if
(
dstofs
)
dstptr
+=
dstofs
[
i
]
*
(
i
<=
dims
-
2
?
dststep
[
i
]
:
1
);
}
Mat
m_dst
(
dims
,
isz
,
CV_8U
,
dstptr
,
dststep
);
const
Mat
*
arrays
[]
=
{
&
m_src
,
&
m_dst
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
,
2
);
size_t
j
,
planesz
=
it
.
size
;
for
(
j
=
0
;
j
<
it
.
nplanes
;
j
++
,
++
it
)
memcpy
(
ptrs
[
1
],
ptrs
[
0
],
planesz
);
}
}
else
{
if
((
src
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
map
(
src
,
ACCESS_READ
);
upload
(
dst
,
src
->
data
+
srcrawofs
,
dims
,
sz
,
dstofs
,
dststep
,
srcstep
);
unmap
(
src
);
}
else
{
map
(
dst
,
ACCESS_WRITE
);
download
(
src
,
dst
->
data
+
dstrawofs
,
dims
,
sz
,
srcofs
,
srcstep
,
dststep
);
unmap
(
dst
);
}
}
}
}
else
else
#endif
{
{
CV_Assert
(
(
retval
=
clEnqueueCopyBufferRect
(
q
,
(
cl_mem
)
src
->
handle
,
(
cl_mem
)
dst
->
handle
,
if
(
iscontinuous
)
new_srcofs
,
new_dstofs
,
new_sz
,
{
new_srcstep
[
0
],
new_srcstep
[
1
],
CV_Assert
(
(
retval
=
clEnqueueCopyBuffer
(
q
,
(
cl_mem
)
src
->
handle
,
(
cl_mem
)
dst
->
handle
,
new_dststep
[
0
],
new_dststep
[
1
],
srcrawofs
,
dstrawofs
,
total
,
0
,
0
,
0
))
==
CL_SUCCESS
);
0
,
0
,
0
))
==
CL_SUCCESS
);
}
else
{
CV_Assert
(
(
retval
=
clEnqueueCopyBufferRect
(
q
,
(
cl_mem
)
src
->
handle
,
(
cl_mem
)
dst
->
handle
,
new_srcofs
,
new_dstofs
,
new_sz
,
new_srcstep
[
0
],
new_srcstep
[
1
],
new_dststep
[
0
],
new_dststep
[
1
],
0
,
0
,
0
))
==
CL_SUCCESS
);
}
}
}
if
(
retval
==
CL_SUCCESS
)
if
(
retval
==
CL_SUCCESS
)
{
{
CV_IMPL_ADD
(
CV_IMPL_OCL
)
CV_IMPL_ADD
(
CV_IMPL_OCL
)
}
}
dst
->
markHostCopyObsolete
(
true
);
#ifdef HAVE_OPENCL_SVM
if
((
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
||
(
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
)
{
// nothing
}
else
#endif
{
dst
->
markHostCopyObsolete
(
true
);
}
dst
->
markDeviceCopyObsolete
(
false
);
dst
->
markDeviceCopyObsolete
(
false
);
if
(
_sync
)
if
(
_sync
)
...
@@ -4234,7 +5130,23 @@ public:
...
@@ -4234,7 +5130,23 @@ public:
}
}
}
}
BufferPoolController
*
getBufferPoolController
()
const
{
return
&
bufferPool
;
}
BufferPoolController
*
getBufferPoolController
(
const
char
*
id
)
const
{
#ifdef HAVE_OPENCL_SVM
if
((
svm
::
checkForceSVMUmatUsage
()
&&
(
id
==
NULL
||
strcmp
(
id
,
"OCL"
)
==
0
))
||
(
id
!=
NULL
&&
strcmp
(
id
,
"SVM"
)
==
0
))
{
return
&
bufferPoolSVM
;
}
#endif
if
(
id
!=
NULL
&&
strcmp
(
id
,
"HOST_ALLOC"
)
==
0
)
{
return
&
bufferPoolHostPtr
;
}
if
(
id
!=
NULL
&&
strcmp
(
id
,
"OCL"
)
!=
0
)
{
CV_ErrorNoReturn
(
cv
::
Error
::
StsBadArg
,
"getBufferPoolController(): unknown BufferPool ID
\n
"
);
}
return
&
bufferPool
;
}
MatAllocator
*
matStdAllocator
;
MatAllocator
*
matStdAllocator
;
};
};
...
@@ -4818,7 +5730,7 @@ void* Image2D::ptr() const
...
@@ -4818,7 +5730,7 @@ void* Image2D::ptr() const
return
p
?
p
->
handle
:
0
;
return
p
?
p
->
handle
:
0
;
}
}
bool
isPerformanceCheckBypassed
()
bool
i
nternal
::
i
sPerformanceCheckBypassed
()
{
{
static
bool
initialized
=
false
;
static
bool
initialized
=
false
;
static
bool
value
=
false
;
static
bool
value
=
false
;
...
@@ -4830,4 +5742,22 @@ bool isPerformanceCheckBypassed()
...
@@ -4830,4 +5742,22 @@ bool isPerformanceCheckBypassed()
return
value
;
return
value
;
}
}
bool
internal
::
isCLBuffer
(
UMat
&
u
)
{
void
*
h
=
u
.
handle
(
ACCESS_RW
);
if
(
!
h
)
return
true
;
CV_DbgAssert
(
u
.
u
->
currAllocator
==
getOpenCLAllocator
());
#if 1
if
((
u
.
u
->
allocatorFlags_
&
0xffff0000
)
!=
0
)
// OpenCL SVM flags are stored here
return
false
;
#else
cl_mem_object_type
type
=
0
;
cl_int
ret
=
clGetMemObjectInfo
((
cl_mem
)
h
,
CL_MEM_TYPE
,
sizeof
(
type
),
&
type
,
NULL
);
if
(
ret
!=
CL_SUCCESS
||
type
!=
CL_MEM_OBJECT_BUFFER
)
return
false
;
#endif
return
true
;
}
}}
}}
modules/core/src/opencl/runtime/opencl_core.cpp
View file @
97aa8d33
...
@@ -182,6 +182,65 @@ static void* opencl_check_fn(int ID);
...
@@ -182,6 +182,65 @@ static void* opencl_check_fn(int ID);
#define CUSTOM_FUNCTION_ID 1000
#define CUSTOM_FUNCTION_ID 1000
#ifdef HAVE_OPENCL_SVM
#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
#define SVM_FUNCTION_ID_START CUSTOM_FUNCTION_ID
#define SVM_FUNCTION_ID_END CUSTOM_FUNCTION_ID + 100
enum
OPENCL_FN_SVM_ID
{
OPENCL_FN_clSVMAlloc
=
SVM_FUNCTION_ID_START
,
OPENCL_FN_clSVMFree
,
OPENCL_FN_clSetKernelArgSVMPointer
,
OPENCL_FN_clSetKernelExecInfo
,
OPENCL_FN_clEnqueueSVMFree
,
OPENCL_FN_clEnqueueSVMMemcpy
,
OPENCL_FN_clEnqueueSVMMemFill
,
OPENCL_FN_clEnqueueSVMMap
,
OPENCL_FN_clEnqueueSVMUnmap
,
};
void
*
(
CL_API_CALL
*
clSVMAlloc
)(
cl_context
context
,
cl_svm_mem_flags
flags
,
size_t
size
,
unsigned
int
alignment
)
=
opencl_fn4
<
OPENCL_FN_clSVMAlloc
,
void
*
,
cl_context
,
cl_svm_mem_flags
,
size_t
,
unsigned
int
>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clSVMAlloc_definition
=
{
"clSVMAlloc"
,
(
void
**
)
&
clSVMAlloc
};
void
(
CL_API_CALL
*
clSVMFree
)(
cl_context
context
,
void
*
svm_pointer
)
=
opencl_fn2
<
OPENCL_FN_clSVMFree
,
void
,
cl_context
,
void
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clSVMFree_definition
=
{
"clSVMFree"
,
(
void
**
)
&
clSVMFree
};
cl_int
(
CL_API_CALL
*
clSetKernelArgSVMPointer
)(
cl_kernel
kernel
,
cl_uint
arg_index
,
const
void
*
arg_value
)
=
opencl_fn3
<
OPENCL_FN_clSetKernelArgSVMPointer
,
cl_int
,
cl_kernel
,
cl_uint
,
const
void
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clSetKernelArgSVMPointer_definition
=
{
"clSetKernelArgSVMPointer"
,
(
void
**
)
&
clSetKernelArgSVMPointer
};
//void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value) =
// opencl_fn4<OPENCL_FN_clSetKernelExecInfo, void*, cl_kernel, cl_kernel_exec_info, size_t, const void*>::switch_fn;
//static const struct DynamicFnEntry _clSetKernelExecInfo_definition = { "clSetKernelExecInfo", (void**)&clSetKernelExecInfo};
//cl_int (CL_API_CALL *clEnqueueSVMFree)(...) =
// opencl_fn8<OPENCL_FN_clEnqueueSVMFree, cl_int, ...>::switch_fn;
//static const struct DynamicFnEntry _clEnqueueSVMFree_definition = { "clEnqueueSVMFree", (void**)&clEnqueueSVMFree};
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemcpy
)(
cl_command_queue
command_queue
,
cl_bool
blocking_copy
,
void
*
dst_ptr
,
const
void
*
src_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn8
<
OPENCL_FN_clEnqueueSVMMemcpy
,
cl_int
,
cl_command_queue
,
cl_bool
,
void
*
,
const
void
*
,
size_t
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMMemcpy_definition
=
{
"clEnqueueSVMMemcpy"
,
(
void
**
)
&
clEnqueueSVMMemcpy
};
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemFill
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
const
void
*
pattern
,
size_t
pattern_size
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn8
<
OPENCL_FN_clEnqueueSVMMemFill
,
cl_int
,
cl_command_queue
,
void
*
,
const
void
*
,
size_t
,
size_t
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMMemFill_definition
=
{
"clEnqueueSVMMemFill"
,
(
void
**
)
&
clEnqueueSVMMemFill
};
cl_int
(
CL_API_CALL
*
clEnqueueSVMMap
)(
cl_command_queue
command_queue
,
cl_bool
blocking_map
,
cl_map_flags
map_flags
,
void
*
svm_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn8
<
OPENCL_FN_clEnqueueSVMMap
,
cl_int
,
cl_command_queue
,
cl_bool
,
cl_map_flags
,
void
*
,
size_t
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMMap_definition
=
{
"clEnqueueSVMMap"
,
(
void
**
)
&
clEnqueueSVMMap
};
cl_int
(
CL_API_CALL
*
clEnqueueSVMUnmap
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn5
<
OPENCL_FN_clEnqueueSVMUnmap
,
cl_int
,
cl_command_queue
,
void
*
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMUnmap_definition
=
{
"clEnqueueSVMUnmap"
,
(
void
**
)
&
clEnqueueSVMUnmap
};
static
const
struct
DynamicFnEntry
*
opencl_svm_fn_list
[]
=
{
&
_clSVMAlloc_definition
,
&
_clSVMFree_definition
,
&
_clSetKernelArgSVMPointer_definition
,
NULL
/*&_clSetKernelExecInfo_definition*/
,
NULL
/*&_clEnqueueSVMFree_definition*/
,
&
_clEnqueueSVMMemcpy_definition
,
&
_clEnqueueSVMMemFill_definition
,
&
_clEnqueueSVMMap_definition
,
&
_clEnqueueSVMUnmap_definition
,
};
#endif // HAVE_OPENCL_SVM
//
//
// END OF CUSTOM FUNCTIONS HERE
// END OF CUSTOM FUNCTIONS HERE
//
//
...
@@ -194,6 +253,14 @@ static void* opencl_check_fn(int ID)
...
@@ -194,6 +253,14 @@ static void* opencl_check_fn(int ID)
assert
(
ID
>=
0
&&
ID
<
(
int
)(
sizeof
(
opencl_fn_list
)
/
sizeof
(
opencl_fn_list
[
0
])));
assert
(
ID
>=
0
&&
ID
<
(
int
)(
sizeof
(
opencl_fn_list
)
/
sizeof
(
opencl_fn_list
[
0
])));
e
=
opencl_fn_list
[
ID
];
e
=
opencl_fn_list
[
ID
];
}
}
#ifdef HAVE_OPENCL_SVM
else
if
(
ID
>=
SVM_FUNCTION_ID_START
&&
ID
<
SVM_FUNCTION_ID_END
)
{
ID
=
ID
-
SVM_FUNCTION_ID_START
;
assert
(
ID
>=
0
&&
ID
<
(
int
)(
sizeof
(
opencl_svm_fn_list
)
/
sizeof
(
opencl_svm_fn_list
[
0
])));
e
=
opencl_svm_fn_list
[
ID
];
}
#endif
else
else
{
{
CV_ErrorNoReturn
(
cv
::
Error
::
StsBadArg
,
"Invalid function ID"
);
CV_ErrorNoReturn
(
cv
::
Error
::
StsBadArg
,
"Invalid function ID"
);
...
...
modules/core/src/umatrix.cpp
View file @
97aa8d33
...
@@ -55,7 +55,7 @@ UMatData::UMatData(const MatAllocator* allocator)
...
@@ -55,7 +55,7 @@ UMatData::UMatData(const MatAllocator* allocator)
prevAllocator
=
currAllocator
=
allocator
;
prevAllocator
=
currAllocator
=
allocator
;
urefcount
=
refcount
=
0
;
urefcount
=
refcount
=
0
;
data
=
origdata
=
0
;
data
=
origdata
=
0
;
size
=
0
;
capacity
=
0
;
size
=
0
;
flags
=
0
;
flags
=
0
;
handle
=
0
;
handle
=
0
;
userdata
=
0
;
userdata
=
0
;
...
@@ -67,7 +67,7 @@ UMatData::~UMatData()
...
@@ -67,7 +67,7 @@ UMatData::~UMatData()
prevAllocator
=
currAllocator
=
0
;
prevAllocator
=
currAllocator
=
0
;
urefcount
=
refcount
=
0
;
urefcount
=
refcount
=
0
;
data
=
origdata
=
0
;
data
=
origdata
=
0
;
size
=
0
;
capacity
=
0
;
size
=
0
;
flags
=
0
;
flags
=
0
;
handle
=
0
;
handle
=
0
;
userdata
=
0
;
userdata
=
0
;
...
@@ -221,7 +221,7 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
...
@@ -221,7 +221,7 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
temp_u
=
a
->
allocate
(
dims
,
size
.
p
,
type
(),
data
,
step
.
p
,
accessFlags
,
usageFlags
);
temp_u
=
a
->
allocate
(
dims
,
size
.
p
,
type
(),
data
,
step
.
p
,
accessFlags
,
usageFlags
);
temp_u
->
refcount
=
1
;
temp_u
->
refcount
=
1
;
}
}
UMat
::
getStdAllocator
()
->
allocate
(
temp_u
,
accessFlags
,
usageFlags
);
UMat
::
getStdAllocator
()
->
allocate
(
temp_u
,
accessFlags
,
usageFlags
);
// TODO result is not checked
hdr
.
flags
=
flags
;
hdr
.
flags
=
flags
;
setSize
(
hdr
,
dims
,
size
.
p
,
step
.
p
);
setSize
(
hdr
,
dims
,
size
.
p
,
step
.
p
);
finalizeHdr
(
hdr
);
finalizeHdr
(
hdr
);
...
@@ -575,7 +575,7 @@ Mat UMat::getMat(int accessFlags) const
...
@@ -575,7 +575,7 @@ Mat UMat::getMat(int accessFlags) const
{
{
if
(
!
u
)
if
(
!
u
)
return
Mat
();
return
Mat
();
u
->
currAllocator
->
map
(
u
,
accessFlags
|
ACCESS_READ
);
u
->
currAllocator
->
map
(
u
,
accessFlags
|
ACCESS_READ
);
// TODO Support ACCESS_WRITE without unnecessary data transfers
CV_Assert
(
u
->
data
!=
0
);
CV_Assert
(
u
->
data
!=
0
);
Mat
hdr
(
dims
,
size
.
p
,
type
(),
u
->
data
+
offset
,
step
.
p
);
Mat
hdr
(
dims
,
size
.
p
,
type
(),
u
->
data
+
offset
,
step
.
p
);
hdr
.
flags
=
flags
;
hdr
.
flags
=
flags
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment