Merge remote-tracking branch 'upstream/3.4' into merge-3.4

90df5e00 · Alexander Alekhin · 8c25a8eb · 9340fc0c · 90df5e00 · 90df5e00
Commit 90df5e00 authored Mar 26, 2019 by Alexander Alekhin
16 changed files
--- a/3rdparty/include/opencl/1.2/CL/cl_d3d11_ext.h
+++ b/3rdparty/include/opencl/1.2/CL/cl_d3d11_ext.h
+/**********************************************************************************
+ * Copyright (c) 2008-2009 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+#ifndef __OPENCL_CL_D3D11_EXT_H
+#define __OPENCL_CL_D3D11_EXT_H
+
+#include <d3d11.h>
+#include <CL/cl.h>
+#include <CL/cl_platform.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************
+ * cl_nv_d3d11_sharing                                                        */
+
+typedef cl_uint cl_d3d11_device_source_nv;
+typedef cl_uint cl_d3d11_device_set_nv;
+
+/******************************************************************************/
+
+// Error Codes
+#define CL_INVALID_D3D11_DEVICE_NV             -1006
+#define CL_INVALID_D3D11_RESOURCE_NV           -1007
+#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_NV  -1008
+#define CL_D3D11_RESOURCE_NOT_ACQUIRED_NV      -1009
+
+// cl_d3d11_device_source_nv
+#define CL_D3D11_DEVICE_NV                     0x4019
+#define CL_D3D11_DXGI_ADAPTER_NV               0x401A
+
+// cl_d3d11_device_set_nv
+#define CL_PREFERRED_DEVICES_FOR_D3D11_NV      0x401B
+#define CL_ALL_DEVICES_FOR_D3D11_NV            0x401C
+
+// cl_context_info
+#define CL_CONTEXT_D3D11_DEVICE_NV             0x401D
+
+// cl_mem_info
+#define CL_MEM_D3D11_RESOURCE_NV               0x401E
+
+// cl_image_info
+#define CL_IMAGE_D3D11_SUBRESOURCE_NV          0x401F
+
+// cl_command_type
+#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_NV    0x4020
+#define CL_COMMAND_RELEASE_D3D11_OBJECTS_NV    0x4021
+
+/******************************************************************************/
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11NV_fn)(
+    cl_platform_id            platform,
+    cl_d3d11_device_source_nv d3d_device_source,
+    void *                    d3d_object,
+    cl_d3d11_device_set_nv    d3d_device_set,
+    cl_uint                   num_entries, 
+    cl_device_id *            devices, 
+    cl_uint *                 num_devices) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferNV_fn)(
+    cl_context     context,
+    cl_mem_flags   flags,
+    ID3D11Buffer * resource,
+    cl_int *       errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DNV_fn)(
+    cl_context        context,
+    cl_mem_flags      flags,
+    ID3D11Texture2D * resource,
+    UINT              subresource,
+    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DNV_fn)(
+    cl_context        context,
+    cl_mem_flags      flags,
+    ID3D11Texture3D * resource,
+    UINT              subresource,
+    cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsNV_fn)(
+    cl_command_queue command_queue,
+    cl_uint          num_objects,
+    const cl_mem *   mem_objects,
+    cl_uint          num_events_in_wait_list,
+    const cl_event * event_wait_list,
+    cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsNV_fn)(
+    cl_command_queue command_queue,
+    cl_uint          num_objects,
+    cl_mem *         mem_objects,
+    cl_uint          num_events_in_wait_list,
+    const cl_event * event_wait_list,
+    cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // __OPENCL_CL_D3D11_H
+
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -108,6 +108,10 @@ if(POLICY CMP0067)
  cmake_policy(SET CMP0067 NEW)
 endif()

+if(POLICY CMP0068)
+  cmake_policy(SET CMP0068 NEW)  # CMake 3.9+: `RPATH` settings on macOS do not affect `install_name`.
+endif()
+
 include(cmake/OpenCVUtils.cmake)
 ocv_cmake_reset_hooks()
 ocv_check_environment_variables(OPENCV_CMAKE_HOOKS_DIR)
@@ -368,6 +372,9 @@ OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON
 OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON
  VISIBLE_IF WIN32 AND NOT WINRT
  VERIFY HAVE_DIRECTX)
+OCV_OPTION(WITH_OPENCL_D3D11_NV "Include NVIDIA OpenCL D3D11 support" WITH_DIRECTX
+  VISIBLE_IF WIN32 AND NOT WINRT
+  VERIFY HAVE_OPENCL_D3D11_NV)
 OCV_OPTION(WITH_LIBREALSENSE "Include Intel librealsense support" OFF
  VISIBLE_IF NOT WITH_INTELPERC
  VERIFY HAVE_LIBREALSENSE)
@@ -1570,6 +1577,7 @@ if(WITH_OPENCL OR HAVE_OPENCL)
    IF HAVE_OPENCL_SVM THEN "SVM"
    IF HAVE_CLAMDFFT THEN "AMDFFT"
    IF HAVE_CLAMDBLAS THEN "AMDBLAS"
+    IF HAVE_OPENCL_D3D11_NV THEN "NVD3D11"
    ELSE "no extra features")
  status("")
  status("  OpenCL:"     HAVE_OPENCL   THEN   "YES (${opencl_features})" ELSE "NO")

--- a/cmake/OpenCVDetectOpenCL.cmake
+++ b/cmake/OpenCVDetectOpenCL.cmake
@@ -2,14 +2,19 @@ set(OPENCL_FOUND ON CACHE BOOL "OpenCL library is found")
 if(APPLE)
  set(OPENCL_LIBRARY "-framework OpenCL" CACHE STRING "OpenCL library")
  set(OPENCL_INCLUDE_DIR "" CACHE PATH "OpenCL include directory")
-else(APPLE)
+else()
  set(OPENCL_LIBRARY "" CACHE STRING "OpenCL library")
  set(OPENCL_INCLUDE_DIR "${OpenCV_SOURCE_DIR}/3rdparty/include/opencl/1.2" CACHE PATH "OpenCL include directory")
  ocv_install_3rdparty_licenses(opencl-headers "${OpenCV_SOURCE_DIR}/3rdparty/include/opencl/LICENSE.txt")
-endif(APPLE)
+endif()
 mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)

 if(OPENCL_FOUND)
+
+  if(WITH_OPENCL_D3D11_NV AND EXISTS "${OPENCL_INCLUDE_DIR}/CL/cl_d3d11_ext.h")
+    set(HAVE_OPENCL_D3D11_NV ON)
+  endif()
+
  if(OPENCL_LIBRARY)
    set(HAVE_OPENCL_STATIC ON)
    set(OPENCL_LIBRARIES "${OPENCL_LIBRARY}")

--- a/cmake/templates/cvconfig.h.in
+++ b/cmake/templates/cvconfig.h.in
@@ -100,6 +100,9 @@
 #cmakedefine HAVE_OPENCL_STATIC
 #cmakedefine HAVE_OPENCL_SVM

+/* NVIDIA OpenCL D3D Extensions support */
+#cmakedefine HAVE_OPENCL_D3D11_NV
+
 /* OpenEXR codec */
 #cmakedefine HAVE_OPENEXR


--- a/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp
+++ b/modules/core/include/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp
@@ -96,6 +96,7 @@
 #define clWaitForEvents clWaitForEvents_

 #if defined __APPLE__
+#define CL_SILENCE_DEPRECATION
 #include <OpenCL/cl.h>
 #else
 #include <CL/cl.h>

--- a/modules/core/src/directx.cpp
+++ b/modules/core/src/directx.cpp
--- a/modules/core/src/directx.inc.hpp
+++ b/modules/core/src/directx.inc.hpp
@@ -48,6 +48,9 @@
 #include "opencv2/core/opencl/runtime/opencl_core.hpp"

 #include <CL/cl_d3d11.h>
+#ifdef HAVE_OPENCL_D3D11_NV
+#include <CL/cl_d3d11_ext.h>
+#endif
 #include <CL/cl_d3d10.h>
 #include <CL/cl_dx9_media_sharing.h>
 #endif // HAVE_OPENCL

--- a/modules/core/src/opencl/runtime/generator/template/opencl_core.hpp.in
+++ b/modules/core/src/opencl/runtime/generator/template/opencl_core.hpp.in
@@ -5,6 +5,7 @@
 @CL_REMAP_ORIGIN@

 #if defined __APPLE__
+#define CL_SILENCE_DEPRECATION
 #include <OpenCL/cl.h>
 #else
 #include <CL/cl.h>

--- a/modules/core/src/opencl/runtime/opencl_core.cpp
+++ b/modules/core/src/opencl/runtime/opencl_core.cpp
@@ -47,6 +47,7 @@

 #if defined(HAVE_OPENCL_STATIC)
 #if defined __APPLE__
+#define CL_SILENCE_DEPRECATION
 #include <OpenCL/cl.h>
 #else
 #include <CL/cl.h>

--- a/modules/dnn/src/layers/lrn_layer.cpp
+++ b/modules/dnn/src/layers/lrn_layer.cpp
@@ -92,7 +92,7 @@ public:
    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
        if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
-            return (bias == 1) && (preferableTarget != DNN_TARGET_MYRIAD || type == SPATIAL_NRM);
+            return bias == 1;
        return backendId == DNN_BACKEND_OPENCV ||
               backendId == DNN_BACKEND_HALIDE ||
               (backendId == DNN_BACKEND_VKCOM && haveVulkan() && (size % 2 == 1) && (type == CHANNEL_NRM));

--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -591,6 +591,37 @@ void ONNXImporter::populateNet(Net dstNet)
            }
            layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get<int>("group", 1));
            layerParams.set("bias_term", node_proto.input_size() == 3);
+
+            if (layerParams.has("output_shape"))
+            {
+                const DictValue& outShape = layerParams.get("output_shape");
+
+                if (outShape.size() != 4)
+                    CV_Error(Error::StsNotImplemented, "Output shape must have 4 elements.");
+
+                const int strideY = layerParams.get<int>("stride_h", 1);
+                const int strideX = layerParams.get<int>("stride_w", 1);
+                const int outH = outShape.getIntValue(2);
+                const int outW = outShape.getIntValue(3);
+
+                if (layerParams.get<String>("pad_mode") == "SAME")
+                {
+                    layerParams.set("adj_w", (outW - 1) % strideX);
+                    layerParams.set("adj_h", (outH - 1) % strideY);
+                }
+                else if (layerParams.get<String>("pad_mode") == "VALID")
+                {
+                    if (!layerParams.has("kernel_h") || !layerParams.has("kernel_w"))
+                        CV_Error(Error::StsNotImplemented,
+                                 "Required attributes 'kernel_h' and 'kernel_w' are not present.");
+
+                    int kernelH = layerParams.get<int>("kernel_h");
+                    int kernelW = layerParams.get<int>("kernel_w");
+
+                    layerParams.set("adj_w", (outW - kernelW) % strideX);
+                    layerParams.set("adj_h", (outH - kernelH) % strideY);
+                }
+            }
        }
        else if (layer_type == "Transpose")
        {

--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@@ -228,6 +228,10 @@ TEST_P(LRN, Accuracy)
    Backend backendId = get<0>(get<5>(GetParam()));
    Target targetId = get<1>(get<5>(GetParam()));

+    if ((inSize.width == 5 || inSize.height == 5) && targetId == DNN_TARGET_MYRIAD &&
+        nrmType == "ACROSS_CHANNELS")
+        throw SkipTestException("This test case is disabled");
+
    LayerParams lp;
    lp.set("norm_region", nrmType);
    lp.set("local_size", localSize);

--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -73,6 +73,7 @@ TEST_P(Test_ONNX_layers, Deconvolution)
    testONNXModels("deconvolution");
    testONNXModels("two_deconvolution");
    testONNXModels("deconvolution_group");
+    testONNXModels("deconvolution_output_shape");
 }

 TEST_P(Test_ONNX_layers, Dropout)

--- a/modules/imgproc/src/sumpixels.avx512_skx.cpp
+++ b/modules/imgproc/src/sumpixels.avx512_skx.cpp
@@ -13,7 +13,6 @@ namespace { // Anonymous namespace to avoid exposing the implementation classes
 // NOTE: Look at the bottom of the file for the entry-point function for external callers
 //

-// TODO: Add support for 1 channel input (WIP: currently hitting hardware glassjaw)
 template<size_t num_channels> class IntegralCalculator;

 template<size_t num_channels>
@@ -191,51 +190,55 @@ public:
    }


-    // The calculate_integral function referenced here must be implemented in the templated derivatives
-    // because the algorithm depends heavily on the number of channels in the image
-    // This is the incomplete definition (just the prototype) here.
-    //
-    static CV_ALWAYS_INLINE
-    __m512d calculate_integral(__m512i src_longs, const __m512d above_values, __m512i &accumulator);
-
-
    static CV_ALWAYS_INLINE
-    __m512i read_64_bytes(const __m512i *srcs, __mmask64 data_mask)  {
+    __m512i read_64_bytes(const __m512i *srcs, const __mmask64 data_mask)  {
        return _mm512_maskz_loadu_epi8(data_mask, srcs);
    }


    static CV_ALWAYS_INLINE
-    __m128i extract_lower_16bytes(__m512i src_64byte_chunk) {
+    __m128i extract_lower_16bytes(const __m512i src_64byte_chunk) {
        return _mm512_extracti64x2_epi64(src_64byte_chunk, 0x0);
    }


    static CV_ALWAYS_INLINE
-    __m512i convert_lower_8bytes_to_longs(__m128i src_16bytes)  {
+    __m512i convert_lower_8bytes_to_longs(const __m128i src_16bytes)  {
        return _mm512_cvtepu8_epi64(src_16bytes);
    }


    static CV_ALWAYS_INLINE
-    __m512i square_m512(__m512i src_longs) {
+    __m512i square_m512(const __m512i src_longs) {
        return _mm512_mullo_epi64(src_longs, src_longs);
    }


    static CV_ALWAYS_INLINE
-    __m128i shift_right_8_bytes(__m128i src_16bytes)  {
+    __m128i shift_right_8_bytes(const __m128i src_16bytes)  {
        return _mm_maskz_compress_epi64(2, src_16bytes);
    }


-
    static CV_ALWAYS_INLINE
-    __m512i shift_right_16_bytes(__m512i src_64byte_chunk)  {
+    __m512i shift_right_16_bytes(const __m512i src_64byte_chunk)  {
        return _mm512_maskz_compress_epi64(0xFC, src_64byte_chunk);
    }


+    static CV_ALWAYS_INLINE
+    __m512i  m512_hadd(const __m512i a){
+        return _mm512_add_epi64(_mm512_maskz_compress_epi64(0xAA, a), _mm512_maskz_compress_epi64(0x55, a));
+    }
+
+
+    // The calculate_integral function referenced here must be implemented in the templated derivatives
+    // because the algorithm depends heavily on the number of channels in the image
+    // This is the incomplete definition (just the prototype) here.
+    //
+    static CV_ALWAYS_INLINE
+    __m512d calculate_integral(const __m512i src_longs, const __m512d above_values, __m512i &accumulator);
+
 };


@@ -246,7 +249,7 @@ public:
 //
 // The function prototype that needs to be implemented is:
 //
-//     __m512d calculate_integral(__m512i src_longs, const __m512d above_values, __m512i &accumulator){ ... }
+//     __m512d calculate_integral(const __m512i src_longs, const __m512d above_values, __m512i &accumulator){ ... }
 //
 // Description of parameters:
 //   INPUTS:
@@ -265,12 +268,72 @@ public:
 // Below here is the channel specific implementation
 //

+//========================================
+//   1 Channel Integral Implementation
+//========================================
+template<>
+CV_ALWAYS_INLINE
+__m512d IntegralCalculator < 1 > ::calculate_integral(const __m512i src_longs, const __m512d above_values, __m512i &accumulator)
+{
+    // One channel support is implemented differently than 2, 3, or 4 channel
+    // One channel support has more horizontal operations that cannot be made vertical without losing performance
+    // The logical operations needed look like:
+    //   Vertical LANES  :   |7|6|5|4|3|2|1|0|
+    //   src_longs       :   |H|G|F|E|D|C|B|A|
+    //   shift_by_1      : + |G|F|E|D|C|B|A| |
+    //   shift_by_2      : + |F|E|D|C|B|A| | |
+    //   shift_by_3      : + |E|D|C|B|A| | | |
+    //   shift_by_4      : + |D|C|B|A| | | | |
+    //   shift_by_5      : + |C|B|A| | | | | |
+    //   shift_by_6      : + |B|A| | | | | | |
+    //   shift_by_7      : + |A| | | | | | | |
+    //   carry_over_idxs : + |7|7|7|7|7|7|7|7|  (index position of result from previous iteration)
+    //                     = integral
+    //
+    // If we do this vertically we end up losing performance because of the number of operations.  We will instead
+    // do a horizontal add tree to create the vertical sections we need as a tree
+    // Vertical Lanes: |   7  |   6  |   5  |   4  |   3  |   2  |   1  |   0  |
+    //      src_longs: |   H  |   G  |   F  |   E  |   D  |   C  |   B  |   A  |
+    //    horiz_sum_1: |      |      |      |      |  G+H |  E+F |  C+D |  A+B |
+    //    horiz_sum_2: |      |      |      |      |      |      | EFGH | ABCD |
+    //
+    const __m512i horiz_sum_1 = m512_hadd(src_longs);   // indexes for the permutes below (3,2,1,0) = (GH, EF, CD, AB)
+    const __m512i horiz_sum_2 = m512_hadd(horiz_sum_1); // indexes for the permutes below (9, 8)    = (EFGH, ABCD)
+
+    // Then we can use the partial sums by looking at the vertical stacks above and realize that, for example
+    // ABCD appears vertically in lanes 7, 6, 5, 4, and 3 so we will permute the values so that all partial products
+    // appear in the right lanes. and sum them up along with the carry over value from the accumulator.  So we setup
+    // the lanes like:
+    // Vertical Lanes: |   7  |   6  |   5  |   4  |   3  |   2  |   1  |   0  |
+    //            s1 : |   0  |   G  |   0  |   E  |   0  |   C  |   0  |   A  |
+    //            s2 : | ABCD | ABCD | ABCD | ABCD | ABCD |  AB  |  AB  |   0  |
+    //            s3 : | EFGH |  EF  |  EF  |   0  |   0  |   0  |   0  |   0  |
+    //                 +------+------+------+------+------+------+------+------+
+    //           sum : | A..H | A..G | A..F | A..E | A..D | A..C | A..B |   A  | Integral :-)
+    //
+    const __m512i s1 = _mm512_maskz_mov_epi64(0x55, src_longs); // 0 G 0 E 0 D 0 C 0 A
+    const __m512i s2 = _mm512_permutex2var_epi64(horiz_sum_1, _mm512_set_epi64(8,8,8,8,8,0,0,4), horiz_sum_2);
+    const __m512i s3 = _mm512_permutex2var_epi64(horiz_sum_1, _mm512_set_epi64(9,2,2,4,4,4,4,4), horiz_sum_2);
+
+    // Now we use the rolling sum from the previous iteration from accumulator and replicate it into carry_over
+    // And sum everything up into the accumulator
+    //
+    const __m512i carry_over  = _mm512_permutex2var_epi64(accumulator, _mm512_set_epi64(7,7,7,7,7,7,7,7), accumulator);
+    accumulator = _mm512_add_epi64(_mm512_add_epi64(s2, s3), _mm512_add_epi64(carry_over, s1));
+
+    // Convert to double precision and store
+    //
+    __m512d integral_pd = _mm512_add_pd(_mm512_cvtepu64_pd(accumulator), above_values);
+    return integral_pd;
+}
+
+
 //========================================
 //   2 Channel Integral Implementation
 //========================================
 template<>
 CV_ALWAYS_INLINE
-__m512d IntegralCalculator < 2 > ::calculate_integral(__m512i src_longs, const __m512d above_values, __m512i &accumulator)
+__m512d IntegralCalculator < 2 > ::calculate_integral(const __m512i src_longs, const __m512d above_values, __m512i &accumulator)
 {
    __m512i carryover_idxs = _mm512_set_epi64(7, 6, 7, 6, 7, 6, 7, 6);

@@ -300,12 +363,13 @@ __m512d IntegralCalculator < 2 > ::calculate_integral(__m512i src_longs, const _
    return integral_pd;
 }

+
 //========================================
 //   3 Channel Integral Implementation
 //========================================
 template<>
 CV_ALWAYS_INLINE
-__m512d IntegralCalculator < 3 > ::calculate_integral(__m512i src_longs, const __m512d above_values, __m512i &accumulator)
+__m512d IntegralCalculator < 3 > ::calculate_integral(const __m512i src_longs, const __m512d above_values, __m512i &accumulator)
 {
    __m512i carryover_idxs = _mm512_set_epi64(6, 5, 7, 6, 5, 7, 6, 5);

@@ -338,7 +402,7 @@ __m512d IntegralCalculator < 3 > ::calculate_integral(__m512i src_longs, const _
 //========================================
 template<>
 CV_ALWAYS_INLINE
-__m512d IntegralCalculator < 4 > ::calculate_integral(__m512i src_longs, const __m512d above_values, __m512i &accumulator)
+__m512d IntegralCalculator < 4 > ::calculate_integral(const __m512i src_longs, const __m512d above_values, __m512i &accumulator)
 {
    __m512i carryover_idxs = _mm512_set_epi64(7, 6, 5, 4, 7, 6, 5, 4);

@@ -376,18 +440,23 @@ void calculate_integral_avx512(const uchar *src,   size_t _srcstep,
                               int width, int height, int cn)
 {
    switch(cn){
+        case 1: {
+            IntegralCalculator< 1 > calculator;
+            calculator.calculate_integral_avx512(src, _srcstep, sum, _sumstep, sqsum, _sqsumstep, width, height);
+            break;
+        }
        case 2: {
-            IntegralCalculator<2> calculator;
+            IntegralCalculator< 2 > calculator;
            calculator.calculate_integral_avx512(src, _srcstep, sum, _sumstep, sqsum, _sqsumstep, width, height);
            break;
        }
        case 3: {
-            IntegralCalculator<3> calculator;
+            IntegralCalculator< 3 > calculator;
            calculator.calculate_integral_avx512(src, _srcstep, sum, _sumstep, sqsum, _sqsumstep, width, height);
            break;
        }
        case 4: {
-            IntegralCalculator<4> calculator;
+            IntegralCalculator< 4 > calculator;
            calculator.calculate_integral_avx512(src, _srcstep, sum, _sumstep, sqsum, _sqsumstep, width, height);
        }
    }

--- a/modules/imgproc/src/sumpixels.cpp
+++ b/modules/imgproc/src/sumpixels.cpp
@@ -77,7 +77,7 @@ struct Integral_SIMD<uchar, double, double> {
 #if CV_TRY_AVX512_SKX
        CV_UNUSED(_tiltedstep);
        // TODO:  Add support for 1 channel input (WIP)
-        if (CV_CPU_HAS_SUPPORT_AVX512_SKX && !tilted && ((cn >= 2) && (cn <= 4))){
+        if (CV_CPU_HAS_SUPPORT_AVX512_SKX && !tilted && (cn <= 4)){
            opt_AVX512_SKX::calculate_integral_avx512(src, _srcstep, sum, _sumstep,
                                                      sqsum, _sqsumstep, width, height, cn);
            return true;

--- a/samples/opencl/opencl-opencv-interop.cpp
+++ b/samples/opencl/opencl-opencv-interop.cpp
@@ -19,6 +19,7 @@
 #define CL_USE_DEPRECATED_OPENCL_2_0_APIS // eliminate build warning

 #ifdef __APPLE__
+#define CL_SILENCE_DEPRECATION
 #include <OpenCL/cl.h>
 #else
 #include <CL/cl.h>