Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
90df5e00
Commit
90df5e00
authored
Mar 26, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
parents
8c25a8eb
9340fc0c
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
744 additions
and
111 deletions
+744
-111
cl_d3d11_ext.h
3rdparty/include/opencl/1.2/CL/cl_d3d11_ext.h
+122
-0
CMakeLists.txt
CMakeLists.txt
+8
-0
OpenCVDetectOpenCL.cmake
cmake/OpenCVDetectOpenCL.cmake
+7
-2
cvconfig.h.in
cmake/templates/cvconfig.h.in
+3
-0
opencl_core.hpp
...opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp
+1
-0
directx.cpp
modules/core/src/directx.cpp
+467
-84
directx.inc.hpp
modules/core/src/directx.inc.hpp
+3
-0
opencl_core.hpp.in
.../src/opencl/runtime/generator/template/opencl_core.hpp.in
+1
-0
opencl_core.cpp
modules/core/src/opencl/runtime/opencl_core.cpp
+1
-0
lrn_layer.cpp
modules/dnn/src/layers/lrn_layer.cpp
+1
-1
onnx_importer.cpp
modules/dnn/src/onnx/onnx_importer.cpp
+31
-0
test_halide_layers.cpp
modules/dnn/test/test_halide_layers.cpp
+4
-0
test_onnx_importer.cpp
modules/dnn/test/test_onnx_importer.cpp
+1
-0
sumpixels.avx512_skx.cpp
modules/imgproc/src/sumpixels.avx512_skx.cpp
+92
-23
sumpixels.cpp
modules/imgproc/src/sumpixels.cpp
+1
-1
opencl-opencv-interop.cpp
samples/opencl/opencl-opencv-interop.cpp
+1
-0
No files found.
3rdparty/include/opencl/1.2/CL/cl_d3d11_ext.h
0 → 100644
View file @
90df5e00
/**********************************************************************************
* Copyright (c) 2008-2009 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_D3D11_EXT_H
#define __OPENCL_CL_D3D11_EXT_H
#include <d3d11.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern
"C"
{
#endif
/******************************************************************************
* cl_nv_d3d11_sharing */
typedef
cl_uint
cl_d3d11_device_source_nv
;
typedef
cl_uint
cl_d3d11_device_set_nv
;
/******************************************************************************/
// Error Codes
#define CL_INVALID_D3D11_DEVICE_NV -1006
#define CL_INVALID_D3D11_RESOURCE_NV -1007
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_NV -1008
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_NV -1009
// cl_d3d11_device_source_nv
#define CL_D3D11_DEVICE_NV 0x4019
#define CL_D3D11_DXGI_ADAPTER_NV 0x401A
// cl_d3d11_device_set_nv
#define CL_PREFERRED_DEVICES_FOR_D3D11_NV 0x401B
#define CL_ALL_DEVICES_FOR_D3D11_NV 0x401C
// cl_context_info
#define CL_CONTEXT_D3D11_DEVICE_NV 0x401D
// cl_mem_info
#define CL_MEM_D3D11_RESOURCE_NV 0x401E
// cl_image_info
#define CL_IMAGE_D3D11_SUBRESOURCE_NV 0x401F
// cl_command_type
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_NV 0x4020
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_NV 0x4021
/******************************************************************************/
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clGetDeviceIDsFromD3D11NV_fn
)(
cl_platform_id
platform
,
cl_d3d11_device_source_nv
d3d_device_source
,
void
*
d3d_object
,
cl_d3d11_device_set_nv
d3d_device_set
,
cl_uint
num_entries
,
cl_device_id
*
devices
,
cl_uint
*
num_devices
)
CL_API_SUFFIX__VERSION_1_0
;
typedef
CL_API_ENTRY
cl_mem
(
CL_API_CALL
*
clCreateFromD3D11BufferNV_fn
)(
cl_context
context
,
cl_mem_flags
flags
,
ID3D11Buffer
*
resource
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
;
typedef
CL_API_ENTRY
cl_mem
(
CL_API_CALL
*
clCreateFromD3D11Texture2DNV_fn
)(
cl_context
context
,
cl_mem_flags
flags
,
ID3D11Texture2D
*
resource
,
UINT
subresource
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
;
typedef
CL_API_ENTRY
cl_mem
(
CL_API_CALL
*
clCreateFromD3D11Texture3DNV_fn
)(
cl_context
context
,
cl_mem_flags
flags
,
ID3D11Texture3D
*
resource
,
UINT
subresource
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueAcquireD3D11ObjectsNV_fn
)(
cl_command_queue
command_queue
,
cl_uint
num_objects
,
const
cl_mem
*
mem_objects
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
CL_API_SUFFIX__VERSION_1_0
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueReleaseD3D11ObjectsNV_fn
)(
cl_command_queue
command_queue
,
cl_uint
num_objects
,
cl_mem
*
mem_objects
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
CL_API_SUFFIX__VERSION_1_0
;
#ifdef __cplusplus
}
#endif
#endif // __OPENCL_CL_D3D11_H
CMakeLists.txt
View file @
90df5e00
...
@@ -108,6 +108,10 @@ if(POLICY CMP0067)
...
@@ -108,6 +108,10 @@ if(POLICY CMP0067)
cmake_policy
(
SET CMP0067 NEW
)
cmake_policy
(
SET CMP0067 NEW
)
endif
()
endif
()
if
(
POLICY CMP0068
)
cmake_policy
(
SET CMP0068 NEW
)
# CMake 3.9+: `RPATH` settings on macOS do not affect `install_name`.
endif
()
include
(
cmake/OpenCVUtils.cmake
)
include
(
cmake/OpenCVUtils.cmake
)
ocv_cmake_reset_hooks
()
ocv_cmake_reset_hooks
()
ocv_check_environment_variables
(
OPENCV_CMAKE_HOOKS_DIR
)
ocv_check_environment_variables
(
OPENCV_CMAKE_HOOKS_DIR
)
...
@@ -368,6 +372,9 @@ OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON
...
@@ -368,6 +372,9 @@ OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON
OCV_OPTION
(
WITH_DIRECTX
"Include DirectX support"
ON
OCV_OPTION
(
WITH_DIRECTX
"Include DirectX support"
ON
VISIBLE_IF WIN32 AND NOT WINRT
VISIBLE_IF WIN32 AND NOT WINRT
VERIFY HAVE_DIRECTX
)
VERIFY HAVE_DIRECTX
)
OCV_OPTION
(
WITH_OPENCL_D3D11_NV
"Include NVIDIA OpenCL D3D11 support"
WITH_DIRECTX
VISIBLE_IF WIN32 AND NOT WINRT
VERIFY HAVE_OPENCL_D3D11_NV
)
OCV_OPTION
(
WITH_LIBREALSENSE
"Include Intel librealsense support"
OFF
OCV_OPTION
(
WITH_LIBREALSENSE
"Include Intel librealsense support"
OFF
VISIBLE_IF NOT WITH_INTELPERC
VISIBLE_IF NOT WITH_INTELPERC
VERIFY HAVE_LIBREALSENSE
)
VERIFY HAVE_LIBREALSENSE
)
...
@@ -1570,6 +1577,7 @@ if(WITH_OPENCL OR HAVE_OPENCL)
...
@@ -1570,6 +1577,7 @@ if(WITH_OPENCL OR HAVE_OPENCL)
IF HAVE_OPENCL_SVM THEN
"SVM"
IF HAVE_OPENCL_SVM THEN
"SVM"
IF HAVE_CLAMDFFT THEN
"AMDFFT"
IF HAVE_CLAMDFFT THEN
"AMDFFT"
IF HAVE_CLAMDBLAS THEN
"AMDBLAS"
IF HAVE_CLAMDBLAS THEN
"AMDBLAS"
IF HAVE_OPENCL_D3D11_NV THEN
"NVD3D11"
ELSE
"no extra features"
)
ELSE
"no extra features"
)
status
(
""
)
status
(
""
)
status
(
" OpenCL:"
HAVE_OPENCL THEN
"YES (
${
opencl_features
}
)"
ELSE
"NO"
)
status
(
" OpenCL:"
HAVE_OPENCL THEN
"YES (
${
opencl_features
}
)"
ELSE
"NO"
)
...
...
cmake/OpenCVDetectOpenCL.cmake
View file @
90df5e00
...
@@ -2,14 +2,19 @@ set(OPENCL_FOUND ON CACHE BOOL "OpenCL library is found")
...
@@ -2,14 +2,19 @@ set(OPENCL_FOUND ON CACHE BOOL "OpenCL library is found")
if
(
APPLE
)
if
(
APPLE
)
set
(
OPENCL_LIBRARY
"-framework OpenCL"
CACHE STRING
"OpenCL library"
)
set
(
OPENCL_LIBRARY
"-framework OpenCL"
CACHE STRING
"OpenCL library"
)
set
(
OPENCL_INCLUDE_DIR
""
CACHE PATH
"OpenCL include directory"
)
set
(
OPENCL_INCLUDE_DIR
""
CACHE PATH
"OpenCL include directory"
)
else
(
APPLE
)
else
()
set
(
OPENCL_LIBRARY
""
CACHE STRING
"OpenCL library"
)
set
(
OPENCL_LIBRARY
""
CACHE STRING
"OpenCL library"
)
set
(
OPENCL_INCLUDE_DIR
"
${
OpenCV_SOURCE_DIR
}
/3rdparty/include/opencl/1.2"
CACHE PATH
"OpenCL include directory"
)
set
(
OPENCL_INCLUDE_DIR
"
${
OpenCV_SOURCE_DIR
}
/3rdparty/include/opencl/1.2"
CACHE PATH
"OpenCL include directory"
)
ocv_install_3rdparty_licenses
(
opencl-headers
"
${
OpenCV_SOURCE_DIR
}
/3rdparty/include/opencl/LICENSE.txt"
)
ocv_install_3rdparty_licenses
(
opencl-headers
"
${
OpenCV_SOURCE_DIR
}
/3rdparty/include/opencl/LICENSE.txt"
)
endif
(
APPLE
)
endif
()
mark_as_advanced
(
OPENCL_INCLUDE_DIR OPENCL_LIBRARY
)
mark_as_advanced
(
OPENCL_INCLUDE_DIR OPENCL_LIBRARY
)
if
(
OPENCL_FOUND
)
if
(
OPENCL_FOUND
)
if
(
WITH_OPENCL_D3D11_NV AND EXISTS
"
${
OPENCL_INCLUDE_DIR
}
/CL/cl_d3d11_ext.h"
)
set
(
HAVE_OPENCL_D3D11_NV ON
)
endif
()
if
(
OPENCL_LIBRARY
)
if
(
OPENCL_LIBRARY
)
set
(
HAVE_OPENCL_STATIC ON
)
set
(
HAVE_OPENCL_STATIC ON
)
set
(
OPENCL_LIBRARIES
"
${
OPENCL_LIBRARY
}
"
)
set
(
OPENCL_LIBRARIES
"
${
OPENCL_LIBRARY
}
"
)
...
...
cmake/templates/cvconfig.h.in
View file @
90df5e00
...
@@ -100,6 +100,9 @@
...
@@ -100,6 +100,9 @@
#cmakedefine HAVE_OPENCL_STATIC
#cmakedefine HAVE_OPENCL_STATIC
#cmakedefine HAVE_OPENCL_SVM
#cmakedefine HAVE_OPENCL_SVM
/* NVIDIA OpenCL D3D Extensions support */
#cmakedefine HAVE_OPENCL_D3D11_NV
/* OpenEXR codec */
/* OpenEXR codec */
#cmakedefine HAVE_OPENEXR
#cmakedefine HAVE_OPENEXR
...
...
modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp
View file @
90df5e00
...
@@ -96,6 +96,7 @@
...
@@ -96,6 +96,7 @@
#define clWaitForEvents clWaitForEvents_
#define clWaitForEvents clWaitForEvents_
#if defined __APPLE__
#if defined __APPLE__
#define CL_SILENCE_DEPRECATION
#include <OpenCL/cl.h>
#include <OpenCL/cl.h>
#else
#else
#include <CL/cl.h>
#include <CL/cl.h>
...
...
modules/core/src/directx.cpp
View file @
90df5e00
...
@@ -48,13 +48,13 @@
...
@@ -48,13 +48,13 @@
#ifdef HAVE_DIRECTX
#ifdef HAVE_DIRECTX
#include <vector>
#include <vector>
#
include "directx.inc.hpp"
#include "directx.inc.hpp"
#else // HAVE_DIRECTX
#else // HAVE_DIRECTX
#define NO_DIRECTX_SUPPORT_ERROR CV_Error(cv::Error::StsBadFunc, "OpenCV was build without DirectX support")
#define NO_DIRECTX_SUPPORT_ERROR CV_Error(cv::Error::StsBadFunc, "OpenCV was build without DirectX support")
#endif
#endif
#ifndef HAVE_OPENCL
#ifndef HAVE_OPENCL
#
define NO_OPENCL_SUPPORT_ERROR CV_Error(cv::Error::StsBadFunc, "OpenCV was build without OpenCL support")
#define NO_OPENCL_SUPPORT_ERROR CV_Error(cv::Error::StsBadFunc, "OpenCV was build without OpenCL support")
#endif // HAVE_OPENCL
#endif // HAVE_OPENCL
namespace
cv
{
namespace
directx
{
namespace
cv
{
namespace
directx
{
...
@@ -168,7 +168,7 @@ int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT)
...
@@ -168,7 +168,7 @@ int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT)
//case DXGI_FORMAT_BC7_TYPELESS:
//case DXGI_FORMAT_BC7_TYPELESS:
//case DXGI_FORMAT_BC7_UNORM:
//case DXGI_FORMAT_BC7_UNORM:
//case DXGI_FORMAT_BC7_UNORM_SRGB:
//case DXGI_FORMAT_BC7_UNORM_SRGB:
#ifdef HAVE_DIRECTX_NV12
#ifdef HAVE_DIRECTX_NV12
//D3DX11 should support DXGI_FORMAT_NV12.
case
DXGI_FORMAT_NV12
:
return
CV_8UC3
;
case
DXGI_FORMAT_NV12
:
return
CV_8UC3
;
#endif
#endif
default
:
break
;
default
:
break
;
...
@@ -256,75 +256,70 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
...
@@ -256,75 +256,70 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: No available platforms"
);
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: No available platforms"
);
std
::
vector
<
cl_platform_id
>
platforms
(
numPlatforms
);
std
::
vector
<
cl_platform_id
>
platforms
(
numPlatforms
);
status
=
clGetPlatformIDs
(
numPlatforms
,
&
platforms
[
0
],
NULL
);
size_t
exts_len
;
if
(
status
!=
CL_SUCCESS
)
cv
::
AutoBuffer
<
char
>
extensions
;
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: Can't get number of platforms"
);
bool
is_support_cl_khr_d3d11_sharing
=
false
;
#ifdef HAVE_OPENCL_D3D11_NV
// TODO Filter platforms by name from OPENCV_OPENCL_DEVICE
bool
is_support_cl_nv_d3d11_sharing
=
false
;
#endif
for
(
int
i
=
0
;
i
<
(
int
)
numPlatforms
;
i
++
)
{
status
=
clGetPlatformIDs
(
numPlatforms
,
&
platforms
[
i
],
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: Can't get number of platforms"
);
status
=
clGetPlatformInfo
(
platforms
[
i
],
CL_PLATFORM_EXTENSIONS
,
0
,
NULL
,
&
exts_len
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: Can't get length of CL_PLATFORM_EXTENSIONS"
);
extensions
.
resize
(
exts_len
);
status
=
clGetPlatformInfo
(
platforms
[
i
],
CL_PLATFORM_EXTENSIONS
,
exts_len
,
static_cast
<
void
*>
(
extensions
.
data
()),
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: No available CL_PLATFORM_EXTENSIONS"
);
if
(
strstr
(
extensions
.
data
(),
"cl_khr_d3d11_sharing"
))
is_support_cl_khr_d3d11_sharing
=
true
;
#ifdef HAVE_OPENCL_D3D11_NV
if
(
strstr
(
extensions
.
data
(),
"cl_nv_d3d11_sharing"
))
is_support_cl_nv_d3d11_sharing
=
true
;
#endif
}
#ifdef HAVE_OPENCL_D3D11_NV
if
(
!
is_support_cl_nv_d3d11_sharing
&&
!
is_support_cl_khr_d3d11_sharing
)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: No supported extensions"
);
#else
if
(
!
is_support_cl_khr_d3d11_sharing
)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: No supported extensions"
);
#endif
int
found
=
-
1
;
int
found
=
-
1
;
cl_device_id
device
=
NULL
;
cl_device_id
device
=
NULL
;
cl_uint
numDevices
=
0
;
cl_uint
numDevices
=
0
;
cl_context
context
=
NULL
;
cl_context
context
=
NULL
;
// try with CL_PREFERRED_DEVICES_FOR_D3D11_KHR
#ifdef HAVE_OPENCL_D3D11_NV
for
(
int
i
=
0
;
i
<
(
int
)
numPlatforms
;
i
++
)
if
(
is_support_cl_nv_d3d11_sharing
)
{
clGetDeviceIDsFromD3D11KHR_fn
clGetDeviceIDsFromD3D11KHR
=
(
clGetDeviceIDsFromD3D11KHR_fn
)
clGetExtensionFunctionAddressForPlatform
(
platforms
[
i
],
"clGetDeviceIDsFromD3D11KHR"
);
if
(
!
clGetDeviceIDsFromD3D11KHR
)
continue
;
device
=
NULL
;
numDevices
=
0
;
status
=
clGetDeviceIDsFromD3D11KHR
(
platforms
[
i
],
CL_D3D11_DEVICE_KHR
,
pD3D11Device
,
CL_PREFERRED_DEVICES_FOR_D3D11_KHR
,
1
,
&
device
,
&
numDevices
);
if
(
status
!=
CL_SUCCESS
)
continue
;
if
(
numDevices
>
0
)
{
cl_context_properties
properties
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
i
],
CL_CONTEXT_D3D11_DEVICE_KHR
,
(
cl_context_properties
)(
pD3D11Device
),
CL_CONTEXT_INTEROP_USER_SYNC
,
CL_FALSE
,
NULL
,
NULL
};
context
=
clCreateContext
(
properties
,
1
,
&
device
,
NULL
,
NULL
,
&
status
);
if
(
status
!=
CL_SUCCESS
)
{
clReleaseDevice
(
device
);
}
else
{
found
=
i
;
break
;
}
}
}
if
(
found
<
0
)
{
{
// try with CL_
ALL_DEVICES_FOR_D3D11_KHR
// try with CL_
PREFERRED_DEVICES_FOR_D3D11_NV
for
(
int
i
=
0
;
i
<
(
int
)
numPlatforms
;
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
numPlatforms
;
i
++
)
{
{
clGetDeviceIDsFromD3D11
KHR_fn
clGetDeviceIDsFromD3D11KHR
=
(
clGetDeviceIDsFromD3D11KHR
_fn
)
clGetDeviceIDsFromD3D11
NV_fn
clGetDeviceIDsFromD3D11NV
=
(
clGetDeviceIDsFromD3D11NV
_fn
)
clGetExtensionFunctionAddressForPlatform
(
platforms
[
i
],
"clGetDeviceIDsFromD3D11
KHR
"
);
clGetExtensionFunctionAddressForPlatform
(
platforms
[
i
],
"clGetDeviceIDsFromD3D11
NV
"
);
if
(
!
clGetDeviceIDsFromD3D11
KHR
)
if
(
!
clGetDeviceIDsFromD3D11
NV
)
continue
;
continue
;
device
=
NULL
;
device
=
NULL
;
numDevices
=
0
;
numDevices
=
0
;
status
=
clGetDeviceIDsFromD3D11
KHR
(
platforms
[
i
],
CL_D3D11_DEVICE_KHR
,
pD3D11Device
,
status
=
clGetDeviceIDsFromD3D11
NV
(
platforms
[
i
],
CL_D3D11_DEVICE_NV
,
pD3D11Device
,
CL_
ALL_DEVICES_FOR_D3D11_KHR
,
1
,
&
device
,
&
numDevices
);
CL_
PREFERRED_DEVICES_FOR_D3D11_NV
,
1
,
&
device
,
&
numDevices
);
if
(
status
!=
CL_SUCCESS
)
if
(
status
!=
CL_SUCCESS
)
continue
;
continue
;
if
(
numDevices
>
0
)
if
(
numDevices
>
0
)
{
{
cl_context_properties
properties
[]
=
{
cl_context_properties
properties
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
i
],
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
i
],
CL_CONTEXT_D3D11_DEVICE_
KHR
,
(
cl_context_properties
)(
pD3D11Device
),
CL_CONTEXT_D3D11_DEVICE_
NV
,
(
cl_context_properties
)(
pD3D11Device
),
CL_CONTEXT_INTEROP_USER_SYNC
,
CL_FALSE
,
//
CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE,
NULL
,
NULL
0
};
};
context
=
clCreateContext
(
properties
,
1
,
&
device
,
NULL
,
NULL
,
&
status
);
context
=
clCreateContext
(
properties
,
1
,
&
device
,
NULL
,
NULL
,
&
status
);
if
(
status
!=
CL_SUCCESS
)
if
(
status
!=
CL_SUCCESS
)
{
{
...
@@ -338,9 +333,127 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
...
@@ -338,9 +333,127 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
}
}
}
}
if
(
found
<
0
)
if
(
found
<
0
)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: Can't create context for DirectX interop"
);
{
// try with CL_ALL_DEVICES_FOR_D3D11_NV
for
(
int
i
=
0
;
i
<
(
int
)
numPlatforms
;
i
++
)
{
clGetDeviceIDsFromD3D11NV_fn
clGetDeviceIDsFromD3D11NV
=
(
clGetDeviceIDsFromD3D11NV_fn
)
clGetExtensionFunctionAddressForPlatform
(
platforms
[
i
],
"clGetDeviceIDsFromD3D11NV"
);
if
(
!
clGetDeviceIDsFromD3D11NV
)
continue
;
device
=
NULL
;
numDevices
=
0
;
status
=
clGetDeviceIDsFromD3D11NV
(
platforms
[
i
],
CL_D3D11_DEVICE_NV
,
pD3D11Device
,
CL_ALL_DEVICES_FOR_D3D11_NV
,
1
,
&
device
,
&
numDevices
);
if
(
status
!=
CL_SUCCESS
)
continue
;
if
(
numDevices
>
0
)
{
cl_context_properties
properties
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
i
],
CL_CONTEXT_D3D11_DEVICE_NV
,
(
cl_context_properties
)(
pD3D11Device
),
//CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE,
0
};
context
=
clCreateContext
(
properties
,
1
,
&
device
,
NULL
,
NULL
,
&
status
);
if
(
status
!=
CL_SUCCESS
)
{
clReleaseDevice
(
device
);
}
else
{
found
=
i
;
break
;
}
}
}
}
}
}
#endif
if
(
is_support_cl_khr_d3d11_sharing
)
{
if
(
found
<
0
)
{
// try with CL_PREFERRED_DEVICES_FOR_D3D11_KHR
for
(
int
i
=
0
;
i
<
(
int
)
numPlatforms
;
i
++
)
{
clGetDeviceIDsFromD3D11KHR_fn
clGetDeviceIDsFromD3D11KHR
=
(
clGetDeviceIDsFromD3D11KHR_fn
)
clGetExtensionFunctionAddressForPlatform
(
platforms
[
i
],
"clGetDeviceIDsFromD3D11KHR"
);
if
(
!
clGetDeviceIDsFromD3D11KHR
)
continue
;
device
=
NULL
;
numDevices
=
0
;
status
=
clGetDeviceIDsFromD3D11KHR
(
platforms
[
i
],
CL_D3D11_DEVICE_KHR
,
pD3D11Device
,
CL_PREFERRED_DEVICES_FOR_D3D11_KHR
,
1
,
&
device
,
&
numDevices
);
if
(
status
!=
CL_SUCCESS
)
continue
;
if
(
numDevices
>
0
)
{
cl_context_properties
properties
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
i
],
CL_CONTEXT_D3D11_DEVICE_KHR
,
(
cl_context_properties
)(
pD3D11Device
),
CL_CONTEXT_INTEROP_USER_SYNC
,
CL_FALSE
,
NULL
,
NULL
};
context
=
clCreateContext
(
properties
,
1
,
&
device
,
NULL
,
NULL
,
&
status
);
if
(
status
!=
CL_SUCCESS
)
{
clReleaseDevice
(
device
);
}
else
{
found
=
i
;
break
;
}
}
}
}
if
(
found
<
0
)
{
// try with CL_ALL_DEVICES_FOR_D3D11_KHR
for
(
int
i
=
0
;
i
<
(
int
)
numPlatforms
;
i
++
)
{
clGetDeviceIDsFromD3D11KHR_fn
clGetDeviceIDsFromD3D11KHR
=
(
clGetDeviceIDsFromD3D11KHR_fn
)
clGetExtensionFunctionAddressForPlatform
(
platforms
[
i
],
"clGetDeviceIDsFromD3D11KHR"
);
if
(
!
clGetDeviceIDsFromD3D11KHR
)
continue
;
device
=
NULL
;
numDevices
=
0
;
status
=
clGetDeviceIDsFromD3D11KHR
(
platforms
[
i
],
CL_D3D11_DEVICE_KHR
,
pD3D11Device
,
CL_ALL_DEVICES_FOR_D3D11_KHR
,
1
,
&
device
,
&
numDevices
);
if
(
status
!=
CL_SUCCESS
)
continue
;
if
(
numDevices
>
0
)
{
cl_context_properties
properties
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
i
],
CL_CONTEXT_D3D11_DEVICE_KHR
,
(
cl_context_properties
)(
pD3D11Device
),
CL_CONTEXT_INTEROP_USER_SYNC
,
CL_FALSE
,
NULL
,
NULL
};
context
=
clCreateContext
(
properties
,
1
,
&
device
,
NULL
,
NULL
,
&
status
);
if
(
status
!=
CL_SUCCESS
)
{
clReleaseDevice
(
device
);
}
else
{
found
=
i
;
break
;
}
}
}
}
}
if
(
found
<
0
)
{
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: Can't create context for DirectX interop"
);
}
Context
&
ctx
=
Context
::
getDefault
(
false
);
Context
&
ctx
=
Context
::
getDefault
(
false
);
initializeContextFromHandle
(
ctx
,
platforms
[
found
],
context
,
device
);
initializeContextFromHandle
(
ctx
,
platforms
[
found
],
context
,
device
);
...
@@ -679,29 +792,85 @@ Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9
...
@@ -679,29 +792,85 @@ Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9
}
// namespace cv::ocl
}
// namespace cv::ocl
#if defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
#if defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
#ifdef HAVE_OPENCL_D3D11_NV
clCreateFromD3D11Texture2DNV_fn
clCreateFromD3D11Texture2DNV
=
NULL
;
clEnqueueAcquireD3D11ObjectsNV_fn
clEnqueueAcquireD3D11ObjectsNV
=
NULL
;
clEnqueueReleaseD3D11ObjectsNV_fn
clEnqueueReleaseD3D11ObjectsNV
=
NULL
;
#endif
clCreateFromD3D11Texture2DKHR_fn
clCreateFromD3D11Texture2DKHR
=
NULL
;
clCreateFromD3D11Texture2DKHR_fn
clCreateFromD3D11Texture2DKHR
=
NULL
;
clEnqueueAcquireD3D11ObjectsKHR_fn
clEnqueueAcquireD3D11ObjectsKHR
=
NULL
;
clEnqueueAcquireD3D11ObjectsKHR_fn
clEnqueueAcquireD3D11ObjectsKHR
=
NULL
;
clEnqueueReleaseD3D11ObjectsKHR_fn
clEnqueueReleaseD3D11ObjectsKHR
=
NULL
;
clEnqueueReleaseD3D11ObjectsKHR_fn
clEnqueueReleaseD3D11ObjectsKHR
=
NULL
;
static
void
__OpenCLinitializeD3D11
()
static
bool
__OpenCLinitializeD3D11
()
{
{
using
namespace
cv
::
ocl
;
using
namespace
cv
::
ocl
;
static
cl_platform_id
initializedPlatform
=
NULL
;
static
cl_platform_id
initializedPlatform
=
NULL
;
cl_platform_id
platform
=
(
cl_platform_id
)
Platform
::
getDefault
().
ptr
();
cl_platform_id
platform
=
(
cl_platform_id
)
Platform
::
getDefault
().
ptr
();
if
(
initializedPlatform
!=
platform
)
bool
useCLNVEXT
=
false
;
size_t
exts_len
;
cl_int
status
=
clGetPlatformInfo
(
platform
,
CL_PLATFORM_EXTENSIONS
,
0
,
NULL
,
&
exts_len
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: Can't get length of CL_PLATFORM_EXTENSIONS"
);
cv
::
AutoBuffer
<
char
>
extensions
(
exts_len
);
status
=
clGetPlatformInfo
(
platform
,
CL_PLATFORM_EXTENSIONS
,
exts_len
,
static_cast
<
void
*>
(
extensions
.
data
()),
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: No available CL_PLATFORM_EXTENSIONS"
);
bool
is_support_cl_khr_d3d11_sharing
=
false
;
if
(
strstr
(
extensions
.
data
(),
"cl_khr_d3d11_sharing"
))
is_support_cl_khr_d3d11_sharing
=
true
;
#ifdef HAVE_OPENCL_D3D11_NV
bool
is_support_cl_nv_d3d11_sharing
=
false
;
if
(
strstr
(
extensions
.
data
(),
"cl_nv_d3d11_sharing"
))
is_support_cl_nv_d3d11_sharing
=
true
;
if
(
!
is_support_cl_nv_d3d11_sharing
&&
!
is_support_cl_khr_d3d11_sharing
)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: No supported extensions"
);
#else
if
(
!
is_support_cl_khr_d3d11_sharing
)
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: No supported extensions"
);
#endif
#ifdef HAVE_OPENCL_D3D11_NV
if
(
is_support_cl_nv_d3d11_sharing
)
{
{
clCreateFromD3D11Texture2DKHR
=
(
clCreateFromD3D11Texture2DKHR_fn
)
if
(
initializedPlatform
!=
platform
)
clGetExtensionFunctionAddressForPlatform
(
platform
,
"clCreateFromD3D11Texture2DKHR"
);
{
clEnqueueAcquireD3D11ObjectsKHR
=
(
clEnqueueAcquireD3D11ObjectsKHR_fn
)
clCreateFromD3D11Texture2DNV
=
(
clCreateFromD3D11Texture2DNV_fn
)
clGetExtensionFunctionAddressForPlatform
(
platform
,
"clEnqueueAcquireD3D11ObjectsKHR"
);
clGetExtensionFunctionAddressForPlatform
(
platform
,
"clCreateFromD3D11Texture2DNV"
);
clEnqueueReleaseD3D11ObjectsKHR
=
(
clEnqueueReleaseD3D11ObjectsKHR_fn
)
clEnqueueAcquireD3D11ObjectsNV
=
(
clEnqueueAcquireD3D11ObjectsNV_fn
)
clGetExtensionFunctionAddressForPlatform
(
platform
,
"clEnqueueReleaseD3D11ObjectsKHR"
);
clGetExtensionFunctionAddressForPlatform
(
platform
,
"clEnqueueAcquireD3D11ObjectsNV"
);
initializedPlatform
=
platform
;
clEnqueueReleaseD3D11ObjectsNV
=
(
clEnqueueReleaseD3D11ObjectsNV_fn
)
clGetExtensionFunctionAddressForPlatform
(
platform
,
"clEnqueueReleaseD3D11ObjectsNV"
);
initializedPlatform
=
platform
;
}
if
(
clCreateFromD3D11Texture2DNV
&&
clEnqueueAcquireD3D11ObjectsNV
&&
clEnqueueReleaseD3D11ObjectsNV
)
{
useCLNVEXT
=
true
;
}
}
}
if
(
!
clCreateFromD3D11Texture2DKHR
||
!
clEnqueueAcquireD3D11ObjectsKHR
||
!
clEnqueueReleaseD3D11ObjectsKHR
)
else
#endif
{
{
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: Can't find functions for D3D11"
);
if
(
is_support_cl_khr_d3d11_sharing
)
{
if
(
initializedPlatform
!=
platform
)
{
clCreateFromD3D11Texture2DKHR
=
(
clCreateFromD3D11Texture2DKHR_fn
)
clGetExtensionFunctionAddressForPlatform
(
platform
,
"clCreateFromD3D11Texture2DKHR"
);
clEnqueueAcquireD3D11ObjectsKHR
=
(
clEnqueueAcquireD3D11ObjectsKHR_fn
)
clGetExtensionFunctionAddressForPlatform
(
platform
,
"clEnqueueAcquireD3D11ObjectsKHR"
);
clEnqueueReleaseD3D11ObjectsKHR
=
(
clEnqueueReleaseD3D11ObjectsKHR_fn
)
clGetExtensionFunctionAddressForPlatform
(
platform
,
"clEnqueueReleaseD3D11ObjectsKHR"
);
initializedPlatform
=
platform
;
}
if
(
!
clCreateFromD3D11Texture2DKHR
||
!
clEnqueueAcquireD3D11ObjectsKHR
||
!
clEnqueueReleaseD3D11ObjectsKHR
)
{
CV_Error
(
cv
::
Error
::
OpenCLInitError
,
"OpenCL: Can't find functions for D3D11"
);
}
}
}
}
return
useCLNVEXT
;
}
}
#endif // defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
#endif // defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
...
@@ -762,14 +931,9 @@ bool ocl_convert_bgr_to_nv12(
...
@@ -762,14 +931,9 @@ bool ocl_convert_bgr_to_nv12(
namespace
directx
{
namespace
directx
{
void
convertToD3D11Texture2D
(
InputArray
src
,
ID3D11Texture2D
*
pD3D11Texture2D
)
#if defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
static
void
__convertToD3D11Texture2DKHR
(
InputArray
src
,
ID3D11Texture2D
*
pD3D11Texture2D
)
{
{
CV_UNUSED
(
src
);
CV_UNUSED
(
pD3D11Texture2D
);
#if !defined(HAVE_DIRECTX)
NO_DIRECTX_SUPPORT_ERROR
;
#elif defined(HAVE_OPENCL)
__OpenCLinitializeD3D11
();
D3D11_TEXTURE2D_DESC
desc
=
{
0
};
D3D11_TEXTURE2D_DESC
desc
=
{
0
};
pD3D11Texture2D
->
GetDesc
(
&
desc
);
pD3D11Texture2D
->
GetDesc
(
&
desc
);
...
@@ -797,7 +961,6 @@ void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D)
...
@@ -797,7 +961,6 @@ void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D)
#ifdef HAVE_DIRECTX_NV12
#ifdef HAVE_DIRECTX_NV12
cl_mem
clImageUV
=
0
;
cl_mem
clImageUV
=
0
;
#endif
#endif
clImage
=
clCreateFromD3D11Texture2DKHR
(
context
,
CL_MEM_WRITE_ONLY
,
pD3D11Texture2D
,
0
,
&
status
);
clImage
=
clCreateFromD3D11Texture2DKHR
(
context
,
CL_MEM_WRITE_ONLY
,
pD3D11Texture2D
,
0
,
&
status
);
if
(
status
!=
CL_SUCCESS
)
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clCreateFromD3D11Texture2DKHR failed"
);
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clCreateFromD3D11Texture2DKHR failed"
);
...
@@ -863,22 +1026,108 @@ void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D)
...
@@ -863,22 +1026,108 @@ void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clReleaseMem failed"
);
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clReleaseMem failed"
);
}
}
#endif
#endif
}
#endif
#else
#if defined(HAVE_OPENCL_D3D11_NV)
// TODO memcpy
static
void
__convertToD3D11Texture2DNV
(
InputArray
src
,
ID3D11Texture2D
*
pD3D11Texture2D
)
NO_OPENCL_SUPPORT_ERROR
;
{
D3D11_TEXTURE2D_DESC
desc
=
{
0
};
pD3D11Texture2D
->
GetDesc
(
&
desc
);
int
srcType
=
src
.
type
();
int
textureType
=
getTypeFromDXGI_FORMAT
(
desc
.
Format
);
CV_Assert
(
textureType
==
srcType
);
Size
srcSize
=
src
.
size
();
CV_Assert
(
srcSize
.
width
==
(
int
)
desc
.
Width
&&
srcSize
.
height
==
(
int
)
desc
.
Height
);
UMat
u
=
src
.
getUMat
();
// TODO Add support for roi
CV_Assert
(
u
.
offset
==
0
);
CV_Assert
(
u
.
isContinuous
());
cl_mem
clBuffer
=
(
cl_mem
)
u
.
handle
(
ACCESS_READ
);
using
namespace
cv
::
ocl
;
Context
&
ctx
=
Context
::
getDefault
();
cl_context
context
=
(
cl_context
)
ctx
.
ptr
();
cl_int
status
=
0
;
cl_mem
clImage
=
0
;
#ifdef HAVE_DIRECTX_NV12
cl_mem
clImageUV
=
0
;
#endif
#endif
}
clImage
=
clCreateFromD3D11Texture2DNV
(
context
,
CL_MEM_WRITE_ONLY
,
pD3D11Texture2D
,
0
,
&
status
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clCreateFromD3D11Texture2DNV failed"
);
#ifdef HAVE_DIRECTX_NV12
if
(
DXGI_FORMAT_NV12
==
desc
.
Format
)
{
clImageUV
=
clCreateFromD3D11Texture2DNV
(
context
,
CL_MEM_WRITE_ONLY
,
pD3D11Texture2D
,
1
,
&
status
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clCreateFromD3D11Texture2DNV failed"
);
}
#endif
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
status
=
clEnqueueAcquireD3D11ObjectsNV
(
q
,
1
,
&
clImage
,
0
,
NULL
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clEnqueueAcquireD3D11ObjectsNV failed"
);
void
convertFromD3D11Texture2D
(
ID3D11Texture2D
*
pD3D11Texture2D
,
OutputArray
dst
)
#ifdef HAVE_DIRECTX_NV12
{
if
(
DXGI_FORMAT_NV12
==
desc
.
Format
)
CV_UNUSED
(
pD3D11Texture2D
);
CV_UNUSED
(
dst
);
{
#if !defined(HAVE_DIRECTX)
status
=
clEnqueueAcquireD3D11ObjectsNV
(
q
,
1
,
&
clImageUV
,
0
,
NULL
,
NULL
);
NO_DIRECTX_SUPPORT_ERROR
;
if
(
status
!=
CL_SUCCESS
)
#elif defined(HAVE_OPENCL)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clEnqueueAcquireD3D11ObjectsNV failed"
);
__OpenCLinitializeD3D11
();
if
(
!
ocl
::
ocl_convert_bgr_to_nv12
(
clBuffer
,
(
int
)
u
.
step
[
0
],
u
.
cols
,
u
.
rows
,
clImage
,
clImageUV
))
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: ocl_convert_bgr_to_nv12 failed"
);
status
=
clEnqueueReleaseD3D11ObjectsNV
(
q
,
1
,
&
clImageUV
,
0
,
NULL
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clEnqueueReleaseD3D11ObjectsNV failed"
);
}
else
#endif
{
size_t
offset
=
0
;
// TODO
size_t
origin
[
3
]
=
{
0
,
0
,
0
};
size_t
region
[
3
]
=
{
(
size_t
)
u
.
cols
,
(
size_t
)
u
.
rows
,
1
};
status
=
clEnqueueCopyBufferToImage
(
q
,
clBuffer
,
clImage
,
offset
,
origin
,
region
,
0
,
NULL
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clEnqueueCopyBufferToImage failed"
);
}
status
=
clEnqueueReleaseD3D11ObjectsNV
(
q
,
1
,
&
clImage
,
0
,
NULL
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clEnqueueReleaseD3D11ObjectsNV failed"
);
status
=
clFinish
(
q
);
// TODO Use events
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clFinish failed"
);
status
=
clReleaseMemObject
(
clImage
);
// TODO RAII
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clReleaseMem failed"
);
#ifdef HAVE_DIRECTX_NV12
if
(
DXGI_FORMAT_NV12
==
desc
.
Format
)
{
status
=
clReleaseMemObject
(
clImageUV
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clReleaseMem failed"
);
}
#endif
}
#endif
#if defined(HAVE_DIRECTX) && defined(HAVE_OPENCL)
static
void
__convertFromD3D11Texture2DKHR
(
ID3D11Texture2D
*
pD3D11Texture2D
,
OutputArray
dst
)
{
D3D11_TEXTURE2D_DESC
desc
=
{
0
};
D3D11_TEXTURE2D_DESC
desc
=
{
0
};
pD3D11Texture2D
->
GetDesc
(
&
desc
);
pD3D11Texture2D
->
GetDesc
(
&
desc
);
...
@@ -968,10 +1217,144 @@ void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst
...
@@ -968,10 +1217,144 @@ void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clReleaseMem failed"
);
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clReleaseMem failed"
);
}
}
#endif
#endif
}
#endif
#if defined(HAVE_OPENCL_D3D11_NV)
static
void
__convertFromD3D11Texture2DNV
(
ID3D11Texture2D
*
pD3D11Texture2D
,
OutputArray
dst
)
{
D3D11_TEXTURE2D_DESC
desc
=
{
0
};
pD3D11Texture2D
->
GetDesc
(
&
desc
);
int
textureType
=
getTypeFromDXGI_FORMAT
(
desc
.
Format
);
CV_Assert
(
textureType
>=
0
);
// TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying!
dst
.
create
(
Size
(
desc
.
Width
,
desc
.
Height
),
textureType
);
UMat
u
=
dst
.
getUMat
();
// TODO Add support for roi
CV_Assert
(
u
.
offset
==
0
);
CV_Assert
(
u
.
isContinuous
());
cl_mem
clBuffer
=
(
cl_mem
)
u
.
handle
(
ACCESS_READ
);
using
namespace
cv
::
ocl
;
Context
&
ctx
=
Context
::
getDefault
();
cl_context
context
=
(
cl_context
)
ctx
.
ptr
();
cl_int
status
=
0
;
cl_mem
clImage
=
0
;
clImage
=
clCreateFromD3D11Texture2DNV
(
context
,
CL_MEM_READ_ONLY
,
pD3D11Texture2D
,
0
,
&
status
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clCreateFromD3D11Texture2DNV failed"
);
#ifdef HAVE_DIRECTX_NV12
cl_mem
clImageUV
=
0
;
if
(
DXGI_FORMAT_NV12
==
desc
.
Format
)
{
clImageUV
=
clCreateFromD3D11Texture2DNV
(
context
,
CL_MEM_READ_ONLY
,
pD3D11Texture2D
,
1
,
&
status
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clCreateFromD3D11Texture2DNV failed"
);
}
#endif
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
status
=
clEnqueueAcquireD3D11ObjectsNV
(
q
,
1
,
&
clImage
,
0
,
NULL
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clEnqueueAcquireD3D11ObjectsNV failed"
);
#ifdef HAVE_DIRECTX_NV12
if
(
DXGI_FORMAT
::
DXGI_FORMAT_NV12
==
desc
.
Format
)
{
status
=
clEnqueueAcquireD3D11ObjectsNV
(
q
,
1
,
&
clImageUV
,
0
,
NULL
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clEnqueueAcquireD3D11ObjectsNV failed"
);
if
(
!
ocl
::
ocl_convert_nv12_to_bgr
(
clImage
,
clImageUV
,
clBuffer
,
(
int
)
u
.
step
[
0
],
u
.
cols
,
u
.
rows
))
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: ocl_convert_nv12_to_bgr failed"
);
status
=
clEnqueueReleaseD3D11ObjectsNV
(
q
,
1
,
&
clImageUV
,
0
,
NULL
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clEnqueueReleaseD3D11ObjectsNV failed"
);
}
else
#endif
{
size_t
offset
=
0
;
// TODO
size_t
origin
[
3
]
=
{
0
,
0
,
0
};
size_t
region
[
3
]
=
{
(
size_t
)
u
.
cols
,
(
size_t
)
u
.
rows
,
1
};
status
=
clEnqueueCopyImageToBuffer
(
q
,
clImage
,
clBuffer
,
origin
,
region
,
offset
,
0
,
NULL
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clEnqueueCopyImageToBuffer failed"
);
}
status
=
clEnqueueReleaseD3D11ObjectsNV
(
q
,
1
,
&
clImage
,
0
,
NULL
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clEnqueueReleaseD3D11ObjectsNV failed"
);
status
=
clFinish
(
q
);
// TODO Use events
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clFinish failed"
);
status
=
clReleaseMemObject
(
clImage
);
// TODO RAII
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clReleaseMem failed"
);
#ifdef HAVE_DIRECTX_NV12
if
(
DXGI_FORMAT_NV12
==
desc
.
Format
)
{
status
=
clReleaseMemObject
(
clImageUV
);
if
(
status
!=
CL_SUCCESS
)
CV_Error
(
cv
::
Error
::
OpenCLApiCallError
,
"OpenCL: clReleaseMem failed"
);
}
#endif
}
#endif
void
convertToD3D11Texture2D
(
InputArray
src
,
ID3D11Texture2D
*
pD3D11Texture2D
)
{
CV_UNUSED
(
src
);
CV_UNUSED
(
pD3D11Texture2D
);
#if !defined(HAVE_DIRECTX)
NO_DIRECTX_SUPPORT_ERROR
;
#elif !defined(HAVE_OPENCL)
NO_OPENCL_SUPPORT_ERROR
;
#else
#else
// TODO memcpy
bool
useCLNVEXT
=
__OpenCLinitializeD3D11
();
if
(
!
useCLNVEXT
){
__convertToD3D11Texture2DKHR
(
src
,
pD3D11Texture2D
);
}
#ifdef HAVE_OPENCL_D3D11_NV
else
{
__convertToD3D11Texture2DNV
(
src
,
pD3D11Texture2D
);
}
#endif
#endif
}
void
convertFromD3D11Texture2D
(
ID3D11Texture2D
*
pD3D11Texture2D
,
OutputArray
dst
)
{
CV_UNUSED
(
pD3D11Texture2D
);
CV_UNUSED
(
dst
);
#if !defined(HAVE_DIRECTX)
NO_DIRECTX_SUPPORT_ERROR
;
#elif !defined(HAVE_OPENCL)
NO_OPENCL_SUPPORT_ERROR
;
NO_OPENCL_SUPPORT_ERROR
;
#else
bool
useCLNVEXT
=
__OpenCLinitializeD3D11
();
if
(
!
useCLNVEXT
){
__convertFromD3D11Texture2DKHR
(
pD3D11Texture2D
,
dst
);
}
#ifdef HAVE_OPENCL_D3D11_NV
else
{
__convertFromD3D11Texture2DNV
(
pD3D11Texture2D
,
dst
);
}
#endif
#endif
#endif
}
}
...
...
modules/core/src/directx.inc.hpp
View file @
90df5e00
...
@@ -48,6 +48,9 @@
...
@@ -48,6 +48,9 @@
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
#include <CL/cl_d3d11.h>
#include <CL/cl_d3d11.h>
#ifdef HAVE_OPENCL_D3D11_NV
#include <CL/cl_d3d11_ext.h>
#endif
#include <CL/cl_d3d10.h>
#include <CL/cl_d3d10.h>
#include <CL/cl_dx9_media_sharing.h>
#include <CL/cl_dx9_media_sharing.h>
#endif // HAVE_OPENCL
#endif // HAVE_OPENCL
...
...
modules/core/src/opencl/runtime/generator/template/opencl_core.hpp.in
View file @
90df5e00
...
@@ -5,6 +5,7 @@
...
@@ -5,6 +5,7 @@
@CL_REMAP_ORIGIN@
@CL_REMAP_ORIGIN@
#if defined __APPLE__
#if defined __APPLE__
#define CL_SILENCE_DEPRECATION
#include <OpenCL/cl.h>
#include <OpenCL/cl.h>
#else
#else
#include <CL/cl.h>
#include <CL/cl.h>
...
...
modules/core/src/opencl/runtime/opencl_core.cpp
View file @
90df5e00
...
@@ -47,6 +47,7 @@
...
@@ -47,6 +47,7 @@
#if defined(HAVE_OPENCL_STATIC)
#if defined(HAVE_OPENCL_STATIC)
#if defined __APPLE__
#if defined __APPLE__
#define CL_SILENCE_DEPRECATION
#include <OpenCL/cl.h>
#include <OpenCL/cl.h>
#else
#else
#include <CL/cl.h>
#include <CL/cl.h>
...
...
modules/dnn/src/layers/lrn_layer.cpp
View file @
90df5e00
...
@@ -92,7 +92,7 @@ public:
...
@@ -92,7 +92,7 @@ public:
virtual
bool
supportBackend
(
int
backendId
)
CV_OVERRIDE
virtual
bool
supportBackend
(
int
backendId
)
CV_OVERRIDE
{
{
if
(
backendId
==
DNN_BACKEND_INFERENCE_ENGINE
)
if
(
backendId
==
DNN_BACKEND_INFERENCE_ENGINE
)
return
(
bias
==
1
)
&&
(
preferableTarget
!=
DNN_TARGET_MYRIAD
||
type
==
SPATIAL_NRM
)
;
return
bias
==
1
;
return
backendId
==
DNN_BACKEND_OPENCV
||
return
backendId
==
DNN_BACKEND_OPENCV
||
backendId
==
DNN_BACKEND_HALIDE
||
backendId
==
DNN_BACKEND_HALIDE
||
(
backendId
==
DNN_BACKEND_VKCOM
&&
haveVulkan
()
&&
(
size
%
2
==
1
)
&&
(
type
==
CHANNEL_NRM
));
(
backendId
==
DNN_BACKEND_VKCOM
&&
haveVulkan
()
&&
(
size
%
2
==
1
)
&&
(
type
==
CHANNEL_NRM
));
...
...
modules/dnn/src/onnx/onnx_importer.cpp
View file @
90df5e00
...
@@ -591,6 +591,37 @@ void ONNXImporter::populateNet(Net dstNet)
...
@@ -591,6 +591,37 @@ void ONNXImporter::populateNet(Net dstNet)
}
}
layerParams
.
set
(
"num_output"
,
layerParams
.
blobs
[
0
].
size
[
1
]
*
layerParams
.
get
<
int
>
(
"group"
,
1
));
layerParams
.
set
(
"num_output"
,
layerParams
.
blobs
[
0
].
size
[
1
]
*
layerParams
.
get
<
int
>
(
"group"
,
1
));
layerParams
.
set
(
"bias_term"
,
node_proto
.
input_size
()
==
3
);
layerParams
.
set
(
"bias_term"
,
node_proto
.
input_size
()
==
3
);
if
(
layerParams
.
has
(
"output_shape"
))
{
const
DictValue
&
outShape
=
layerParams
.
get
(
"output_shape"
);
if
(
outShape
.
size
()
!=
4
)
CV_Error
(
Error
::
StsNotImplemented
,
"Output shape must have 4 elements."
);
const
int
strideY
=
layerParams
.
get
<
int
>
(
"stride_h"
,
1
);
const
int
strideX
=
layerParams
.
get
<
int
>
(
"stride_w"
,
1
);
const
int
outH
=
outShape
.
getIntValue
(
2
);
const
int
outW
=
outShape
.
getIntValue
(
3
);
if
(
layerParams
.
get
<
String
>
(
"pad_mode"
)
==
"SAME"
)
{
layerParams
.
set
(
"adj_w"
,
(
outW
-
1
)
%
strideX
);
layerParams
.
set
(
"adj_h"
,
(
outH
-
1
)
%
strideY
);
}
else
if
(
layerParams
.
get
<
String
>
(
"pad_mode"
)
==
"VALID"
)
{
if
(
!
layerParams
.
has
(
"kernel_h"
)
||
!
layerParams
.
has
(
"kernel_w"
))
CV_Error
(
Error
::
StsNotImplemented
,
"Required attributes 'kernel_h' and 'kernel_w' are not present."
);
int
kernelH
=
layerParams
.
get
<
int
>
(
"kernel_h"
);
int
kernelW
=
layerParams
.
get
<
int
>
(
"kernel_w"
);
layerParams
.
set
(
"adj_w"
,
(
outW
-
kernelW
)
%
strideX
);
layerParams
.
set
(
"adj_h"
,
(
outH
-
kernelH
)
%
strideY
);
}
}
}
}
else
if
(
layer_type
==
"Transpose"
)
else
if
(
layer_type
==
"Transpose"
)
{
{
...
...
modules/dnn/test/test_halide_layers.cpp
View file @
90df5e00
...
@@ -228,6 +228,10 @@ TEST_P(LRN, Accuracy)
...
@@ -228,6 +228,10 @@ TEST_P(LRN, Accuracy)
Backend
backendId
=
get
<
0
>
(
get
<
5
>
(
GetParam
()));
Backend
backendId
=
get
<
0
>
(
get
<
5
>
(
GetParam
()));
Target
targetId
=
get
<
1
>
(
get
<
5
>
(
GetParam
()));
Target
targetId
=
get
<
1
>
(
get
<
5
>
(
GetParam
()));
if
((
inSize
.
width
==
5
||
inSize
.
height
==
5
)
&&
targetId
==
DNN_TARGET_MYRIAD
&&
nrmType
==
"ACROSS_CHANNELS"
)
throw
SkipTestException
(
"This test case is disabled"
);
LayerParams
lp
;
LayerParams
lp
;
lp
.
set
(
"norm_region"
,
nrmType
);
lp
.
set
(
"norm_region"
,
nrmType
);
lp
.
set
(
"local_size"
,
localSize
);
lp
.
set
(
"local_size"
,
localSize
);
...
...
modules/dnn/test/test_onnx_importer.cpp
View file @
90df5e00
...
@@ -73,6 +73,7 @@ TEST_P(Test_ONNX_layers, Deconvolution)
...
@@ -73,6 +73,7 @@ TEST_P(Test_ONNX_layers, Deconvolution)
testONNXModels
(
"deconvolution"
);
testONNXModels
(
"deconvolution"
);
testONNXModels
(
"two_deconvolution"
);
testONNXModels
(
"two_deconvolution"
);
testONNXModels
(
"deconvolution_group"
);
testONNXModels
(
"deconvolution_group"
);
testONNXModels
(
"deconvolution_output_shape"
);
}
}
TEST_P
(
Test_ONNX_layers
,
Dropout
)
TEST_P
(
Test_ONNX_layers
,
Dropout
)
...
...
modules/imgproc/src/sumpixels.avx512_skx.cpp
View file @
90df5e00
...
@@ -13,7 +13,6 @@ namespace { // Anonymous namespace to avoid exposing the implementation classes
...
@@ -13,7 +13,6 @@ namespace { // Anonymous namespace to avoid exposing the implementation classes
// NOTE: Look at the bottom of the file for the entry-point function for external callers
// NOTE: Look at the bottom of the file for the entry-point function for external callers
//
//
// TODO: Add support for 1 channel input (WIP: currently hitting hardware glassjaw)
template
<
size_t
num_channels
>
class
IntegralCalculator
;
template
<
size_t
num_channels
>
class
IntegralCalculator
;
template
<
size_t
num_channels
>
template
<
size_t
num_channels
>
...
@@ -191,51 +190,55 @@ public:
...
@@ -191,51 +190,55 @@ public:
}
}
// The calculate_integral function referenced here must be implemented in the templated derivatives
// because the algorithm depends heavily on the number of channels in the image
// This is the incomplete definition (just the prototype) here.
//
static
CV_ALWAYS_INLINE
__m512d
calculate_integral
(
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
);
static
CV_ALWAYS_INLINE
static
CV_ALWAYS_INLINE
__m512i
read_64_bytes
(
const
__m512i
*
srcs
,
__mmask64
data_mask
)
{
__m512i
read_64_bytes
(
const
__m512i
*
srcs
,
const
__mmask64
data_mask
)
{
return
_mm512_maskz_loadu_epi8
(
data_mask
,
srcs
);
return
_mm512_maskz_loadu_epi8
(
data_mask
,
srcs
);
}
}
static
CV_ALWAYS_INLINE
static
CV_ALWAYS_INLINE
__m128i
extract_lower_16bytes
(
__m512i
src_64byte_chunk
)
{
__m128i
extract_lower_16bytes
(
const
__m512i
src_64byte_chunk
)
{
return
_mm512_extracti64x2_epi64
(
src_64byte_chunk
,
0x0
);
return
_mm512_extracti64x2_epi64
(
src_64byte_chunk
,
0x0
);
}
}
static
CV_ALWAYS_INLINE
static
CV_ALWAYS_INLINE
__m512i
convert_lower_8bytes_to_longs
(
__m128i
src_16bytes
)
{
__m512i
convert_lower_8bytes_to_longs
(
const
__m128i
src_16bytes
)
{
return
_mm512_cvtepu8_epi64
(
src_16bytes
);
return
_mm512_cvtepu8_epi64
(
src_16bytes
);
}
}
static
CV_ALWAYS_INLINE
static
CV_ALWAYS_INLINE
__m512i
square_m512
(
__m512i
src_longs
)
{
__m512i
square_m512
(
const
__m512i
src_longs
)
{
return
_mm512_mullo_epi64
(
src_longs
,
src_longs
);
return
_mm512_mullo_epi64
(
src_longs
,
src_longs
);
}
}
static
CV_ALWAYS_INLINE
static
CV_ALWAYS_INLINE
__m128i
shift_right_8_bytes
(
__m128i
src_16bytes
)
{
__m128i
shift_right_8_bytes
(
const
__m128i
src_16bytes
)
{
return
_mm_maskz_compress_epi64
(
2
,
src_16bytes
);
return
_mm_maskz_compress_epi64
(
2
,
src_16bytes
);
}
}
static
CV_ALWAYS_INLINE
static
CV_ALWAYS_INLINE
__m512i
shift_right_16_bytes
(
__m512i
src_64byte_chunk
)
{
__m512i
shift_right_16_bytes
(
const
__m512i
src_64byte_chunk
)
{
return
_mm512_maskz_compress_epi64
(
0xFC
,
src_64byte_chunk
);
return
_mm512_maskz_compress_epi64
(
0xFC
,
src_64byte_chunk
);
}
}
static
CV_ALWAYS_INLINE
__m512i
m512_hadd
(
const
__m512i
a
){
return
_mm512_add_epi64
(
_mm512_maskz_compress_epi64
(
0xAA
,
a
),
_mm512_maskz_compress_epi64
(
0x55
,
a
));
}
// The calculate_integral function referenced here must be implemented in the templated derivatives
// because the algorithm depends heavily on the number of channels in the image
// This is the incomplete definition (just the prototype) here.
//
static
CV_ALWAYS_INLINE
__m512d
calculate_integral
(
const
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
);
};
};
...
@@ -246,7 +249,7 @@ public:
...
@@ -246,7 +249,7 @@ public:
//
//
// The function prototype that needs to be implemented is:
// The function prototype that needs to be implemented is:
//
//
// __m512d calculate_integral(__m512i src_longs, const __m512d above_values, __m512i &accumulator){ ... }
// __m512d calculate_integral(
const
__m512i src_longs, const __m512d above_values, __m512i &accumulator){ ... }
//
//
// Description of parameters:
// Description of parameters:
// INPUTS:
// INPUTS:
...
@@ -265,12 +268,72 @@ public:
...
@@ -265,12 +268,72 @@ public:
// Below here is the channel specific implementation
// Below here is the channel specific implementation
//
//
//========================================
// 1 Channel Integral Implementation
//========================================
template
<>
CV_ALWAYS_INLINE
__m512d
IntegralCalculator
<
1
>
::
calculate_integral
(
const
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
{
// One channel support is implemented differently than 2, 3, or 4 channel
// One channel support has more horizontal operations that cannot be made vertical without losing performance
// The logical operations needed look like:
// Vertical LANES : |7|6|5|4|3|2|1|0|
// src_longs : |H|G|F|E|D|C|B|A|
// shift_by_1 : + |G|F|E|D|C|B|A| |
// shift_by_2 : + |F|E|D|C|B|A| | |
// shift_by_3 : + |E|D|C|B|A| | | |
// shift_by_4 : + |D|C|B|A| | | | |
// shift_by_5 : + |C|B|A| | | | | |
// shift_by_6 : + |B|A| | | | | | |
// shift_by_7 : + |A| | | | | | | |
// carry_over_idxs : + |7|7|7|7|7|7|7|7| (index position of result from previous iteration)
// = integral
//
// If we do this vertically we end up losing performance because of the number of operations. We will instead
// do a horizontal add tree to create the vertical sections we need as a tree
// Vertical Lanes: | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
// src_longs: | H | G | F | E | D | C | B | A |
// horiz_sum_1: | | | | | G+H | E+F | C+D | A+B |
// horiz_sum_2: | | | | | | | EFGH | ABCD |
//
const
__m512i
horiz_sum_1
=
m512_hadd
(
src_longs
);
// indexes for the permutes below (3,2,1,0) = (GH, EF, CD, AB)
const
__m512i
horiz_sum_2
=
m512_hadd
(
horiz_sum_1
);
// indexes for the permutes below (9, 8) = (EFGH, ABCD)
// Then we can use the partial sums by looking at the vertical stacks above and realize that, for example
// ABCD appears vertically in lanes 7, 6, 5, 4, and 3 so we will permute the values so that all partial products
// appear in the right lanes. and sum them up along with the carry over value from the accumulator. So we setup
// the lanes like:
// Vertical Lanes: | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
// s1 : | 0 | G | 0 | E | 0 | C | 0 | A |
// s2 : | ABCD | ABCD | ABCD | ABCD | ABCD | AB | AB | 0 |
// s3 : | EFGH | EF | EF | 0 | 0 | 0 | 0 | 0 |
// +------+------+------+------+------+------+------+------+
// sum : | A..H | A..G | A..F | A..E | A..D | A..C | A..B | A | Integral :-)
//
const
__m512i
s1
=
_mm512_maskz_mov_epi64
(
0x55
,
src_longs
);
// 0 G 0 E 0 D 0 C 0 A
const
__m512i
s2
=
_mm512_permutex2var_epi64
(
horiz_sum_1
,
_mm512_set_epi64
(
8
,
8
,
8
,
8
,
8
,
0
,
0
,
4
),
horiz_sum_2
);
const
__m512i
s3
=
_mm512_permutex2var_epi64
(
horiz_sum_1
,
_mm512_set_epi64
(
9
,
2
,
2
,
4
,
4
,
4
,
4
,
4
),
horiz_sum_2
);
// Now we use the rolling sum from the previous iteration from accumulator and replicate it into carry_over
// And sum everything up into the accumulator
//
const
__m512i
carry_over
=
_mm512_permutex2var_epi64
(
accumulator
,
_mm512_set_epi64
(
7
,
7
,
7
,
7
,
7
,
7
,
7
,
7
),
accumulator
);
accumulator
=
_mm512_add_epi64
(
_mm512_add_epi64
(
s2
,
s3
),
_mm512_add_epi64
(
carry_over
,
s1
));
// Convert to double precision and store
//
__m512d
integral_pd
=
_mm512_add_pd
(
_mm512_cvtepu64_pd
(
accumulator
),
above_values
);
return
integral_pd
;
}
//========================================
//========================================
// 2 Channel Integral Implementation
// 2 Channel Integral Implementation
//========================================
//========================================
template
<>
template
<>
CV_ALWAYS_INLINE
CV_ALWAYS_INLINE
__m512d
IntegralCalculator
<
2
>
::
calculate_integral
(
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
__m512d
IntegralCalculator
<
2
>
::
calculate_integral
(
const
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
{
{
__m512i
carryover_idxs
=
_mm512_set_epi64
(
7
,
6
,
7
,
6
,
7
,
6
,
7
,
6
);
__m512i
carryover_idxs
=
_mm512_set_epi64
(
7
,
6
,
7
,
6
,
7
,
6
,
7
,
6
);
...
@@ -300,12 +363,13 @@ __m512d IntegralCalculator < 2 > ::calculate_integral(__m512i src_longs, const _
...
@@ -300,12 +363,13 @@ __m512d IntegralCalculator < 2 > ::calculate_integral(__m512i src_longs, const _
return
integral_pd
;
return
integral_pd
;
}
}
//========================================
//========================================
// 3 Channel Integral Implementation
// 3 Channel Integral Implementation
//========================================
//========================================
template
<>
template
<>
CV_ALWAYS_INLINE
CV_ALWAYS_INLINE
__m512d
IntegralCalculator
<
3
>
::
calculate_integral
(
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
__m512d
IntegralCalculator
<
3
>
::
calculate_integral
(
const
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
{
{
__m512i
carryover_idxs
=
_mm512_set_epi64
(
6
,
5
,
7
,
6
,
5
,
7
,
6
,
5
);
__m512i
carryover_idxs
=
_mm512_set_epi64
(
6
,
5
,
7
,
6
,
5
,
7
,
6
,
5
);
...
@@ -338,7 +402,7 @@ __m512d IntegralCalculator < 3 > ::calculate_integral(__m512i src_longs, const _
...
@@ -338,7 +402,7 @@ __m512d IntegralCalculator < 3 > ::calculate_integral(__m512i src_longs, const _
//========================================
//========================================
template
<>
template
<>
CV_ALWAYS_INLINE
CV_ALWAYS_INLINE
__m512d
IntegralCalculator
<
4
>
::
calculate_integral
(
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
__m512d
IntegralCalculator
<
4
>
::
calculate_integral
(
const
__m512i
src_longs
,
const
__m512d
above_values
,
__m512i
&
accumulator
)
{
{
__m512i
carryover_idxs
=
_mm512_set_epi64
(
7
,
6
,
5
,
4
,
7
,
6
,
5
,
4
);
__m512i
carryover_idxs
=
_mm512_set_epi64
(
7
,
6
,
5
,
4
,
7
,
6
,
5
,
4
);
...
@@ -376,18 +440,23 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep,
...
@@ -376,18 +440,23 @@ void calculate_integral_avx512(const uchar *src, size_t _srcstep,
int
width
,
int
height
,
int
cn
)
int
width
,
int
height
,
int
cn
)
{
{
switch
(
cn
){
switch
(
cn
){
case
1
:
{
IntegralCalculator
<
1
>
calculator
;
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
);
break
;
}
case
2
:
{
case
2
:
{
IntegralCalculator
<
2
>
calculator
;
IntegralCalculator
<
2
>
calculator
;
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
);
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
);
break
;
break
;
}
}
case
3
:
{
case
3
:
{
IntegralCalculator
<
3
>
calculator
;
IntegralCalculator
<
3
>
calculator
;
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
);
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
);
break
;
break
;
}
}
case
4
:
{
case
4
:
{
IntegralCalculator
<
4
>
calculator
;
IntegralCalculator
<
4
>
calculator
;
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
);
calculator
.
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
);
}
}
}
}
...
...
modules/imgproc/src/sumpixels.cpp
View file @
90df5e00
...
@@ -77,7 +77,7 @@ struct Integral_SIMD<uchar, double, double> {
...
@@ -77,7 +77,7 @@ struct Integral_SIMD<uchar, double, double> {
#if CV_TRY_AVX512_SKX
#if CV_TRY_AVX512_SKX
CV_UNUSED
(
_tiltedstep
);
CV_UNUSED
(
_tiltedstep
);
// TODO: Add support for 1 channel input (WIP)
// TODO: Add support for 1 channel input (WIP)
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
(
(
cn
>=
2
)
&&
(
cn
<=
4
)
)){
if
(
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
!
tilted
&&
(
cn
<=
4
)){
opt_AVX512_SKX
::
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
opt_AVX512_SKX
::
calculate_integral_avx512
(
src
,
_srcstep
,
sum
,
_sumstep
,
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
sqsum
,
_sqsumstep
,
width
,
height
,
cn
);
return
true
;
return
true
;
...
...
samples/opencl/opencl-opencv-interop.cpp
View file @
90df5e00
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS // eliminate build warning
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS // eliminate build warning
#ifdef __APPLE__
#ifdef __APPLE__
#define CL_SILENCE_DEPRECATION
#include <OpenCL/cl.h>
#include <OpenCL/cl.h>
#else
#else
#include <CL/cl.h>
#include <CL/cl.h>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment