removed unnecessary opencl kernels

8762ee3f · Vadim Pisarevsky · d914f20a · d914f20a · d914f20a · d914f20a
Commit 8762ee3f authored Nov 18, 2013 by Vadim Pisarevsky
29 changed files
--- a/modules/core/src/opencl/mulspectrums.cl
+++ b/modules/core/src/opencl/mulspectrums.cl
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@multicorewareinc.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the uintel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business uinterruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-typedef float2 cfloat;
-inline cfloat cmulf(cfloat a, cfloat b)
-{
-    return (cfloat)( a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x);
-}
-inline cfloat conjf(cfloat a)
-{
-    return (cfloat)( a.x, - a.y );
-}
-__kernel void
-mulAndScaleSpectrumsKernel(
-    __global const cfloat* a,
-    __global const cfloat* b,
-    float scale,
-    __global cfloat* dst,
-    uint cols,
-    uint rows,
-    uint mstep
-)
-{
-    const uint x = get_global_id(0);
-    const uint y = get_global_id(1);
-    const uint idx = mad24(y, mstep / sizeof(cfloat), x);
-    if (x < cols && y < rows)
-    {
-        cfloat v = cmulf(a[idx], b[idx]);
-        dst[idx] = (cfloat)( v.x * scale, v.y * scale );
-    }
-}
-__kernel void
-mulAndScaleSpectrumsKernel_CONJ(
-    __global const cfloat* a,
-    __global const cfloat* b,
-    float scale,
-    __global cfloat* dst,
-    uint cols,
-    uint rows,
-    uint mstep
-)
-{
-    const uint x = get_global_id(0);
-    const uint y = get_global_id(1);
-    const uint idx = mad24(y, mstep / sizeof(cfloat), x);
-    if (x < cols && y < rows)
-    {
-        cfloat v = cmulf(a[idx], conjf(b[idx]));
-        dst[idx] = (cfloat)( v.x * scale, v.y * scale );
-    }
-}
--- a/modules/core/src/opencl/polarcart.cl
+++ b/modules/core/src/opencl/polarcart.cl
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the copyright holders or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-__kernel void polarToCart(__global const uchar* mask, int maskstep, int maskoffset,
-                          __global uchar* dstptr, int dststep, int dstoffset,
-                          int rows, int cols, dstT value )
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-    if (x < cols && y < rows)
-    {
-        int mask_index = mad24(y, maskstep, x + maskoffset);
-        if( mask[mask_index] )
-        {
-            int dst_index  = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
-            *(dstT*)(dstptr + dst_index) = value;
-        }
-    }
-}
-__kernel void cartToPolar(__global uchar* dstptr, int dststep, int dstoffset,
-                          int rows, int cols, dstT value )
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-    if (x < cols && y < rows)
-    {
-        int dst_index  = mad24(y, dststep, x*sizeof(dstT) + dstoffset);
-        *(dstT*)(dstptr + dst_index) = value;
-    }
-}
--- a/modules/core/src/opencl/reductions.cl
+++ b/modules/core/src/opencl/reductions.cl
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan,yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-#if FUNC_SUM
-#define FUNC(a, b) b += a;
-#elif FUNC_ABS_SUM
-#define FUNC(a, b) b += a >= (dstT)(0) ? a : -a;
-#elif FUNC_SQR_SUM
-#define FUNC(a, b) b += a * a;
-#else
-#error No sum function
-#endif
-/**************************************Array buffer SUM**************************************/
-__kernel void arithm_op_sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum,
-                                __global srcT *src, __global dstT *dst)
-{
-   unsigned int lid = get_local_id(0);
-   unsigned int gid = get_group_id(0);
-   unsigned int id = get_global_id(0);
-   unsigned int idx = offset + id + (id / cols) * invalid_cols;
-   __local dstT localmem_sum[128];
-   dstT sum = (dstT)(0), temp;
-   for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
-   {
-       idx = offset + id + (id / cols) * invalid_cols;
-       temp = convertToDstT(src[idx]);
-       FUNC(temp, sum);
-   }
-   if (lid > 127)
-       localmem_sum[lid - 128] = sum;
-   barrier(CLK_LOCAL_MEM_FENCE);
-   if (lid < 128)
-       localmem_sum[lid] = sum + localmem_sum[lid];
-   barrier(CLK_LOCAL_MEM_FENCE);
-   for (int lsize = 64; lsize > 0; lsize >>= 1)
-   {
-       if (lid < lsize)
-       {
-           int lid2 = lsize + lid;
-           localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
-       }
-       barrier(CLK_LOCAL_MEM_FENCE);
-   }
-   if (lid == 0)
-       dst[gid] = localmem_sum[0];
-}
--- a/modules/imgproc/src/opencl/bilateral.cl
+++ b/modules/imgproc/src/opencl/bilateral.cl
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Rock Li, Rock.li@amd.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-__kernel void bilateral_C1_D0(__global uchar *dst,
-        __global const uchar *src,
-        const int dst_rows,
-        const int dst_cols,
-        const int maxk,
-        const int radius,
-        const int dst_step,
-        const int dst_offset,
-        const int src_step,
-        const int src_rows,
-        const int src_cols,
-        __constant float *color_weight,
-        __constant float *space_weight,
-        __constant int *space_ofs)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-    if (y < dst_rows && x < dst_cols)
-    {
-        int src_index = mad24(y + radius, src_step, x + radius);
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-        float sum = 0.f, wsum = 0.f;
-        int val0 = (int)src[src_index];
-        for(int k = 0; k < maxk; k++ )
-        {
-            int val = (int)src[src_index + space_ofs[k]];
-            float w = space_weight[k] * color_weight[abs(val - val0)];
-            sum += (float)(val) * w;
-            wsum += w;
-        }
-        dst[dst_index] = convert_uchar_rtz(sum / wsum + 0.5f);
-    }
-}
-__kernel void bilateral2_C1_D0(__global uchar *dst,
-        __global const uchar *src,
-        const int dst_rows,
-        const int dst_cols,
-        const int maxk,
-        const int radius,
-        const int dst_step,
-        const int dst_offset,
-        const int src_step,
-        const int src_rows,
-        const int src_cols,
-        __constant float *color_weight,
-        __constant float *space_weight,
-        __constant int *space_ofs)
-{
-    int x = get_global_id(0) << 2;
-    int y = get_global_id(1);
-    if (y < dst_rows && x < dst_cols)
-    {
-        int src_index = mad24(y + radius, src_step, x + radius);
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-        float4 sum = (float4)(0.f), wsum = (float4)(0.f);
-        int4 val0 = convert_int4(vload4(0,src + src_index));
-        for(int k = 0; k < maxk; k++ )
-        {
-            int4 val = convert_int4(vload4(0,src+src_index + space_ofs[k]));
-            float4 w = (float4)(space_weight[k]) * (float4)(color_weight[abs(val.x - val0.x)], color_weight[abs(val.y - val0.y)],
-                color_weight[abs(val.z - val0.z)], color_weight[abs(val.w - val0.w)]);
-            sum += convert_float4(val) * w;
-            wsum += w;
-        }
-        *(__global uchar4*)(dst+dst_index) = convert_uchar4_rtz(sum/wsum+0.5f);
-    }
-}
-__kernel void bilateral_C4_D0(__global uchar4 *dst,
-        __global const uchar4 *src,
-        const int dst_rows,
-        const int dst_cols,
-        const int maxk,
-        const int radius,
-        const int dst_step,
-        const int dst_offset,
-        const int src_step,
-        const int src_rows,
-        const int src_cols,
-        __constant float *color_weight,
-        __constant float *space_weight,
-        __constant int *space_ofs)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-    if (y < dst_rows && x < dst_cols)
-    {
-        int src_index = mad24(y + radius, src_step, x + radius);
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-        float4 sum = (float4)0.f;
-        float wsum = 0.f;
-        int4 val0 = convert_int4(src[src_index]);
-        for(int k = 0; k < maxk; k++ )
-        {
-            int4 val = convert_int4(src[src_index + space_ofs[k]]);
-            float w = space_weight[k] * color_weight[abs(val.x - val0.x) + abs(val.y - val0.y) + abs(val.z - val0.z)];
-            sum += convert_float4(val) * (float4)w;
-            wsum += w;
-        }
-        wsum = 1.f / wsum;
-        dst[dst_index] = convert_uchar4_rtz(sum * (float4)wsum + (float4)0.5f);
-    }
-}
--- a/modules/imgproc/src/opencl/boxfilter.cl
+++ b/modules/imgproc/src/opencl/boxfilter.cl
--- a/modules/imgproc/src/opencl/canny.cl
+++ b/modules/imgproc/src/opencl/canny.cl
--- a/modules/imgproc/src/opencl/clahe.cl
+++ b/modules/imgproc/src/opencl/clahe.cl
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Sen Liu, swjtuls1987@126.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#ifndef WAVE_SIZE
-#define WAVE_SIZE 1
-#endif
-int calc_lut(__local int* smem, int val, int tid)
-{
-    smem[tid] = val;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid == 0)
-        for (int i = 1; i < 256; ++i)
-            smem[i] += smem[i - 1];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    return smem[tid];
-}
-#ifdef CPU
-void reduce(volatile __local int* smem, int val, int tid)
-{
-    smem[tid] = val;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 128)
-        smem[tid] = val += smem[tid + 128];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 64)
-        smem[tid] = val += smem[tid + 64];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 32)
-        smem[tid] += smem[tid + 32];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16)
-        smem[tid] += smem[tid + 16];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 8)
-        smem[tid] += smem[tid + 8];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 4)
-        smem[tid] += smem[tid + 4];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 2)
-        smem[tid] += smem[tid + 2];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 1)
-        smem[256] = smem[tid] + smem[tid + 1];
-    barrier(CLK_LOCAL_MEM_FENCE);
-}
-#else
-void reduce(__local volatile int* smem, int val, int tid)
-{
-    smem[tid] = val;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 128)
-        smem[tid] = val += smem[tid + 128];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 64)
-        smem[tid] = val += smem[tid + 64];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 32)
-    {
-        smem[tid] += smem[tid + 32];
-#if WAVE_SIZE < 32
-    } barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 16)
-    {
-#endif
-        smem[tid] += smem[tid + 16];
-#if WAVE_SIZE < 16
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (tid < 8)
-    {
-#endif
-        smem[tid] += smem[tid + 8];
-        smem[tid] += smem[tid + 4];
-        smem[tid] += smem[tid + 2];
-        smem[tid] += smem[tid + 1];
-    }
-}
-#endif
-__kernel void calcLut(__global __const uchar * src, __global uchar * lut,
-                      const int srcStep, const int dstStep,
-                      const int2 tileSize, const int tilesX,
-                      const int clipLimit, const float lutScale,
-                      const int src_offset, const int dst_offset)
-{
-    __local int smem[512];
-    const int tx = get_group_id(0);
-    const int ty = get_group_id(1);
-    const unsigned int tid = get_local_id(1) * get_local_size(0)
-                             + get_local_id(0);
-    smem[tid] = 0;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1))
-    {
-        __global const uchar* srcPtr = src + mad24(ty * tileSize.y + i, srcStep, tx * tileSize.x + src_offset);
-        for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0))
-        {
-            const int data = srcPtr[j];
-            atomic_inc(&smem[data]);
-        }
-    }
-    barrier(CLK_LOCAL_MEM_FENCE);
-    int tHistVal = smem[tid];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (clipLimit > 0)
-    {
-        // clip histogram bar
-        int clipped = 0;
-        if (tHistVal > clipLimit)
-        {
-            clipped = tHistVal - clipLimit;
-            tHistVal = clipLimit;
-        }
-        // find number of overall clipped samples
-        reduce(smem, clipped, tid);
-        barrier(CLK_LOCAL_MEM_FENCE);
-#ifdef CPU
-        clipped = smem[256];
-#else
-        clipped = smem[0];
-#endif
-        // broadcast evaluated value
-        __local int totalClipped;
-        if (tid == 0)
-            totalClipped = clipped;
-        barrier(CLK_LOCAL_MEM_FENCE);
-        // redistribute clipped samples evenly
-        int redistBatch = totalClipped / 256;
-        tHistVal += redistBatch;
-        int residual = totalClipped - redistBatch * 256;
-        if (tid < residual)
-            ++tHistVal;
-    }
-    const int lutVal = calc_lut(smem, tHistVal, tid);
-    uint ires = (uint)convert_int_rte(lutScale * lutVal);
-    lut[(ty * tilesX + tx) * dstStep + tid + dst_offset] =
-        convert_uchar(clamp(ires, (uint)0, (uint)255));
-}
-__kernel void transform(__global __const uchar * src,
-                        __global uchar * dst,
-                        __global uchar * lut,
-                        const int srcStep, const int dstStep, const int lutStep,
-                        const int cols, const int rows,
-                        const int2 tileSize,
-                        const int tilesX, const int tilesY,
-                        const int src_offset, const int dst_offset, int lut_offset)
-{
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
-    if (x >= cols || y >= rows)
-        return;
-    const float tyf = (convert_float(y) / tileSize.y) - 0.5f;
-    int ty1 = convert_int_rtn(tyf);
-    int ty2 = ty1 + 1;
-    const float ya = tyf - ty1;
-    ty1 = max(ty1, 0);
-    ty2 = min(ty2, tilesY - 1);
-    const float txf = (convert_float(x) / tileSize.x) - 0.5f;
-    int tx1 = convert_int_rtn(txf);
-    int tx2 = tx1 + 1;
-    const float xa = txf - tx1;
-    tx1 = max(tx1, 0);
-    tx2 = min(tx2, tilesX - 1);
-    const int srcVal = src[mad24(y, srcStep, x + src_offset)];
-    float res = 0;
-    res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (1.0f - ya));
-    res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (1.0f - ya));
-    res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (ya));
-    res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (ya));
-    uint ires = (uint)convert_int_rte(res);
-    dst[mad24(y, dstStep, x + dst_offset)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
-}
--- a/modules/imgproc/src/opencl/convolve.cl
+++ b/modules/imgproc/src/opencl/convolve.cl
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Jiang Liyuan, jlyuan001.good@163.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (__ATI__)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (__NVIDIA__)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-/************************************** convolve **************************************/
-__kernel void convolve_D5(__global float *src, __global float *temp1, __global float *dst,
-                          int rows, int cols, int src_step, int dst_step,int k_step, int kWidth, int kHeight,
-                          int src_offset, int dst_offset, int koffset)
-{
-    __local float smem[16 + 2 * 8][16 + 2 * 8];
-    int x = get_local_id(0);
-    int y = get_local_id(1);
-    int gx = get_global_id(0);
-    int gy = get_global_id(1);
-            // x | x 0 | 0
-            // -----------
-            // x | x 0 | 0
-            // 0 | 0 0 | 0
-            // -----------
-            // 0 | 0 0 | 0
-    smem[y][x] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset];
-            // 0 | 0 x | x
-            // -----------
-            // 0 | 0 x | x
-            // 0 | 0 0 | 0
-            // -----------
-            // 0 | 0 0 | 0
-    smem[y][x + 16] = src[min(max(gy - 8, 0), rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset];
-            // 0 | 0 0 | 0
-            // -----------
-            // 0 | 0 0 | 0
-            // x | x 0 | 0
-            // -----------
-            // x | x 0 | 0
-    smem[y + 16][x] = src[min(gy + 8, rows - 1) * src_step + min(max(gx - 8, 0), cols - 1) + src_offset];
-            // 0 | 0 0 | 0
-            // -----------
-            // 0 | 0 0 | 0
-            // 0 | 0 x | x
-            // -----------
-            // 0 | 0 x | x
-    smem[y + 16][x + 16] = src[min(gy + 8, rows - 1) * src_step + min(gx + 8, cols - 1) + src_offset];
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (gx < cols && gy < rows)
-    {
-        float res = 0;
-        for (int i = 0; i < kHeight; ++i)
-            for (int j = 0; j < kWidth; ++j)
-                res += smem[y + 8 - kHeight / 2 + i][x + 8 - kWidth / 2 + j] * temp1[i * k_step + j + koffset];
-        dst[gy * dst_step + gx + dst_offset] = res;
-    }
-}
--- a/modules/imgproc/src/opencl/copymakeborder.cl
+++ b/modules/imgproc/src/opencl/copymakeborder.cl
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Zero Lin zero.lin@amd.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_amd_fp64
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (cl_khr_fp64)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-#endif
-#ifdef BORDER_CONSTANT
-#define EXTRAPOLATE(x, y, v) v = scalar;
-#elif defined BORDER_REPLICATE
-#define EXTRAPOLATE(x, y, v) \
-    { \
-        x = max(min(x, src_cols - 1), 0); \
-        y = max(min(y, src_rows - 1), 0); \
-        v = src[mad24(y, src_step, x + src_offset)]; \
-    }
-#elif defined BORDER_WRAP
-#define EXTRAPOLATE(x, y, v) \
-    { \
-        if (x < 0) \
-            x -= ((x - src_cols + 1) / src_cols) * src_cols; \
-        if (x >= src_cols) \
-            x %= src_cols; \
-        \
-        if (y < 0) \
-            y -= ((y - src_rows + 1) / src_rows) * src_rows; \
-        if( y >= src_rows ) \
-            y %= src_rows; \
-        v = src[mad24(y, src_step, x + src_offset)]; \
-    }
-#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
-#ifdef BORDER_REFLECT
-#define DELTA int delta = 0
-#else
-#define DELTA int delta = 1
-#endif
-#define EXTRAPOLATE(x, y, v) \
-    { \
-        DELTA; \
-        if (src_cols == 1) \
-            x = 0; \
-        else \
-            do \
-            { \
-                if( x < 0 ) \
-                    x = -x - 1 + delta; \
-                else \
-                    x = src_cols - 1 - (x - src_cols) - delta; \
-            } \
-            while (x >= src_cols || x < 0); \
-        \
-        if (src_rows == 1) \
-            y = 0; \
-        else \
-            do \
-            { \
-                if( y < 0 ) \
-                    y = -y - 1 + delta; \
-                else \
-                    y = src_rows - 1 - (y - src_rows) - delta; \
-            } \
-            while (y >= src_rows || y < 0); \
-        v = src[mad24(y, src_step, x + src_offset)]; \
-    }
-#else
-#error No extrapolation method
-#endif
-#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0)
-__kernel void copymakeborder
-                        (__global const GENTYPE *src,
-                         __global GENTYPE *dst,
-                         int dst_cols, int dst_rows,
-                         int src_cols, int src_rows,
-                         int src_step, int src_offset,
-                         int dst_step, int dst_offset,
-                         int top, int left, GENTYPE scalar)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-    if (x < dst_cols && y < dst_rows)
-    {
-        int src_x = x - left;
-        int src_y = y - top;
-        int dst_index = mad24(y, dst_step, x + dst_offset);
-        if (NEED_EXTRAPOLATION(src_x, src_y))
-            EXTRAPOLATE(src_x, src_y, dst[dst_index])
-        else
-        {
-            int src_index = mad24(src_y, src_step, src_x + src_offset);
-            dst[dst_index] = src[src_index];
-        }
-    }
-}
--- a/modules/imgproc/src/opencl/gftt.cl
+++ b/modules/imgproc/src/opencl/gftt.cl
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Peng Xiao, pengxiao@outlook.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#ifndef WITH_MASK
-#define WITH_MASK 0
-#endif
-__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
-inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
-{
-    return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
-}
-inline float ELEM_FLT2(image2d_t _eig, float2 pt)
-{
-    return read_imagef(_eig, sampler, pt).x;
-}
-__kernel
-    void findCorners
-    (
-        image2d_t eig,
-        __global const char * mask,
-        __global float2 * corners,
-        const int mask_strip,// in pixels
-        const float threshold,
-        const int rows,
-        const int cols,
-        const int max_count,
-        __global int * g_counter
-    )
-{
-    const int j = get_global_id(0);
-    const int i = get_global_id(1);
-    if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1
-#if WITH_MASK
-        && mask[i * mask_strip + j] != 0
-#endif
-        )
-    {
-        const float val = ELEM_INT2(eig, j, i);
-        if (val > threshold)
-        {
-            float maxVal = val;
-            maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal);
-            maxVal = fmax(ELEM_INT2(eig, j    , i - 1), maxVal);
-            maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
-            maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
-            maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
-            maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
-            maxVal = fmax(ELEM_INT2(eig, j    , i + 1), maxVal);
-            maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
-            if (val == maxVal)
-            {
-                const int ind = atomic_inc(g_counter);
-                if (ind < max_count)
-                    corners[ind] = (float2)(j, i);
-            }
-        }
-    }
-}
-//bitonic sort
-__kernel
-    void sortCorners_bitonicSort
-    (
-        image2d_t eig,
-        __global float2 * corners,
-        const int count,
-        const int stage,
-        const int passOfStage
-    )
-{
-    const int threadId = get_global_id(0);
-    if(threadId >= count / 2)
-    {
-        return;
-    }
-    const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent
-    const int pairDistance = 1 << (stage - passOfStage);
-    const int blockWidth   = 2 * pairDistance;
-    const int leftId = min( (threadId % pairDistance)
-                   + (threadId / pairDistance) * blockWidth, count );
-    const int rightId = min( leftId + pairDistance, count );
-    const float2 leftPt  = corners[leftId];
-    const float2 rightPt = corners[rightId];
-    const float leftVal  = ELEM_FLT2(eig, leftPt);
-    const float rightVal = ELEM_FLT2(eig, rightPt);
-    const bool compareResult = leftVal > rightVal;
-    float2 greater = compareResult ? leftPt:rightPt;
-    float2 lesser  = compareResult ? rightPt:leftPt;
-    corners[leftId]  = sortOrder ? lesser : greater;
-    corners[rightId] = sortOrder ? greater : lesser;
-}
-//selection sort for gfft
-//kernel is ported from Bolt library:
-//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
-//  Local sort will firstly sort elements of each workgroup using selection sort
-//  its performance is O(n)
-__kernel
-    void sortCorners_selectionSortLocal
-    (
-        image2d_t eig,
-        __global float2 * corners,
-        const int count,
-        __local float2 * scratch
-    )
-{
-    int          i  = get_local_id(0); // index in workgroup
-    int numOfGroups = get_num_groups(0); // index in workgroup
-    int groupID     = get_group_id(0);
-    int         wg  = get_local_size(0); // workgroup size = block size
-    int n; // number of elements to be processed for this work group
-    int offset   = groupID * wg;
-    int same     = 0;
-    corners      += offset;
-    n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
-    float2 pt1, pt2;
-    pt1 = corners[min(i, n)];
-    scratch[i] = pt1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(i >= n)
-    {
-        return;
-    }
-    float val1 = ELEM_FLT2(eig, pt1);
-    float val2;
-    int pos = 0;
-    for (int j=0;j<n;++j)
-    {
-        pt2  = scratch[j];
-        val2 = ELEM_FLT2(eig, pt2);
-        if(val2 > val1)
-            pos++;//calculate the rank of this element in this work group
-        else
-        {
-            if(val1 > val2)
-                continue;
-            else
-            {
-                // val1 and val2 are same
-                same++;
-            }
-        }
-    }
-    for (int j=0; j< same; j++)
-        corners[pos + j] = pt1;
-}
-__kernel
-    void sortCorners_selectionSortFinal
-    (
-        image2d_t eig,
-        __global float2 * corners,
-        const int count
-    )
-{
-    const int          i  = get_local_id(0); // index in workgroup
-    const int numOfGroups = get_num_groups(0); // index in workgroup
-    const int groupID     = get_group_id(0);
-    const int         wg  = get_local_size(0); // workgroup size = block size
-    int pos = 0, same = 0;
-    const int offset = get_group_id(0) * wg;
-    const int remainder = count - wg*(numOfGroups-1);
-    if((offset + i ) >= count)
-        return;
-    float2 pt1, pt2;
-    pt1 = corners[groupID*wg + i];
-    float val1 = ELEM_FLT2(eig, pt1);
-    float val2;
-    for(int j=0; j<numOfGroups-1; j++ )
-    {
-        for(int k=0; k<wg; k++)
-        {
-            pt2  = corners[j*wg + k];
-            val2 = ELEM_FLT2(eig, pt2);
-            if(val1 > val2)
-                break;
-            else
-            {
-                //Increment only if the value is not the same.
-                if( val2 > val1 )
-                    pos++;
-                else
-                    same++;
-            }
-        }
-    }
-    for(int k=0; k<remainder; k++)
-    {
-        pt2  = corners[(numOfGroups-1)*wg + k];
-        val2 = ELEM_FLT2(eig, pt2);
-        if(val1 > val2)
-            break;
-        else
-        {
-            //Don't increment if the value is the same.
-            //Two elements are same if (*userComp)(jData, iData)  and (*userComp)(iData, jData) are both false
-            if(val2 > val1)
-                pos++;
-            else
-                same++;
-        }
-    }
-    for (int j=0; j< same; j++)
-        corners[pos + j] = pt1;
-}
--- a/modules/imgproc/src/opencl/harris.cl
+++ b/modules/imgproc/src/opencl/harris.cl
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Shengen Yan,yanshengen@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (DOUBLE_SUPPORT)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////Macro for border type////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////
-#ifdef BORDER_REPLICATE
-//BORDER_REPLICATE:     aaaaaa|abcdefgh|hhhhhhh
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (l_edge)   : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (r_edge)-1 : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (t_edge)   :(i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (b_edge)-1 :(addr))
-#endif
-#ifdef BORDER_REFLECT
-//BORDER_REFLECT:       fedcba|abcdefgh|hgfedcb
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)-1               : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)-1 : (i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
-#endif
-#ifdef BORDER_REFLECT101
-//BORDER_REFLECT101:   gfedcb|abcdefgh|gfedcba
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? -(i)                 : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? -(i)                 : (i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
-#endif
-#ifdef BORDER_WRAP
-//BORDER_WRAP:          cdefgh|abcdefgh|abcdefg
-#define ADDR_L(i, l_edge, r_edge)  ((i) <  (l_edge) ? (i)+(r_edge) : (i))
-#define ADDR_R(i, r_edge, addr)    ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
-#define ADDR_H(i, t_edge, b_edge)  ((i) <  (t_edge) ? (i)+(b_edge) : (i))
-#define ADDR_B(i, b_edge, addr)    ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
-#endif
-#define THREADS 256
-#define ELEM(i, l_edge, r_edge, elem1, elem2) (i) >= (l_edge) && (i) < (r_edge) ? (elem1) : (elem2)
-///////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////calcHarris////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void calcHarris(__global const float *Dx,__global const float *Dy, __global float *dst,
-                              int dx_offset, int dx_whole_rows, int dx_whole_cols, int dx_step,
-                              int dy_offset, int dy_whole_rows, int dy_whole_cols, int dy_step,
-                              int dst_offset, int dst_rows, int dst_cols, int dst_step,
-                              float k)
-{
-    int col = get_local_id(0);
-    const int gX = get_group_id(0);
-    const int gY = get_group_id(1);
-    const int glx = get_global_id(0);
-    const int gly = get_global_id(1);
-    int dx_x_off = (dx_offset % dx_step) >> 2;
-    int dx_y_off = dx_offset / dx_step;
-    int dy_x_off = (dy_offset % dy_step) >> 2;
-    int dy_y_off = dy_offset / dy_step;
-    int dst_x_off = (dst_offset % dst_step) >> 2;
-    int dst_y_off = dst_offset / dst_step;
-    int dx_startX = gX * (THREADS-ksX+1) - anX + dx_x_off;
-    int dx_startY = (gY << 1) - anY + dx_y_off;
-    int dy_startX = gX * (THREADS-ksX+1) - anX + dy_x_off;
-    int dy_startY = (gY << 1) - anY + dy_y_off;
-    int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
-    int dst_startY = (gY << 1) + dst_y_off;
-    float dx_data[ksY+1],dy_data[ksY+1],data[3][ksY+1];
-    __local float temp[6][THREADS];
-#ifdef BORDER_CONSTANT
-    bool dx_con,dy_con;
-    float dx_s,dy_s;
-    for(int i=0; i < ksY+1; i++)
-    {
-        dx_con = dx_startX+col >= 0 && dx_startX+col < dx_whole_cols && dx_startY+i >= 0 && dx_startY+i < dx_whole_rows;
-        dx_s = Dx[(dx_startY+i)*(dx_step>>2)+(dx_startX+col)];
-        dx_data[i] = dx_con ? dx_s : 0.0;
-        dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
-        dy_s = Dy[(dy_startY+i)*(dy_step>>2)+(dy_startX+col)];
-        dy_data[i] = dy_con ? dy_s : 0.0;
-        data[0][i] = dx_data[i] * dx_data[i];
-        data[1][i] = dx_data[i] * dy_data[i];
-        data[2][i] = dy_data[i] * dy_data[i];
-    }
-#else
-    int clamped_col = min(dst_cols, col);
-    for(int i=0; i < ksY+1; i++)
-    {
-        int dx_selected_row;
-        int dx_selected_col;
-        dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows);
-        dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row);
-        dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols);
-        dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col);
-        dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col];
-        int dy_selected_row;
-        int dy_selected_col;
-        dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows);
-        dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row);
-        dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols);
-        dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col);
-        dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col];
-        data[0][i] = dx_data[i] * dx_data[i];
-        data[1][i] = dx_data[i] * dy_data[i];
-        data[2][i] = dy_data[i] * dy_data[i];
-    }
-#endif
-    float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0;
-    for(int i=1; i < ksY; i++)
-    {
-        sum0 += (data[0][i]);
-        sum1 += (data[1][i]);
-        sum2 += (data[2][i]);
-    }
-    float sum01,sum02,sum11,sum12,sum21,sum22;
-    sum01 = sum0 + (data[0][0]);
-    sum02 = sum0 + (data[0][ksY]);
-    temp[0][col] = sum01;
-    temp[1][col] = sum02;
-    sum11 = sum1 + (data[1][0]);
-    sum12 = sum1 + (data[1][ksY]);
-    temp[2][col] = sum11;
-    temp[3][col] = sum12;
-    sum21 = sum2 + (data[2][0]);
-    sum22 = sum2 + (data[2][ksY]);
-    temp[4][col] = sum21;
-    temp[5][col] = sum22;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if(col < (THREADS-(ksX-1)))
-    {
-        col += anX;
-        int posX = dst_startX - dst_x_off + col - anX;
-        int posY = (gly << 1);
-        int till = (ksX + 1)%2;
-        float tmp_sum[6]={ 0.0, 0.0 , 0.0, 0.0, 0.0, 0.0 };
-        for(int k=0; k<6; k++)
-            for(int i=-anX; i<=anX - till; i++)
-            {
-                tmp_sum[k] += temp[k][col+i];
-            }
-        if(posX < dst_cols && (posY) < dst_rows)
-        {
-            dst[(dst_startY+0) * (dst_step>>2)+ dst_startX + col - anX] =
-                    tmp_sum[0] * tmp_sum[4] - tmp_sum[2] * tmp_sum[2] - k * (tmp_sum[0] + tmp_sum[4]) * (tmp_sum[0] + tmp_sum[4]);
-        }
-        if(posX < dst_cols && (posY + 1) < dst_rows)
-        {
-            dst[(dst_startY+1) * (dst_step>>2)+ dst_startX + col - anX] =
-                    tmp_sum[1] * tmp_sum[5] - tmp_sum[3] * tmp_sum[3] - k * (tmp_sum[1] + tmp_sum[5]) * (tmp_sum[1] + tmp_sum[5]);
-        }
-    }
-}
--- a/modules/imgproc/src/opencl/histogram.cl
+++ b/modules/imgproc/src/opencl/histogram.cl
--- a/modules/imgproc/src/opencl/hough.cl
+++ b/modules/imgproc/src/opencl/hough.cl
--- a/modules/imgproc/src/opencl/integral.cl
+++ b/modules/imgproc/src/opencl/integral.cl
--- a/modules/imgproc/src/opencl/integral_sum.cl
+++ b/modules/imgproc/src/opencl/integral_sum.cl
--- a/modules/imgproc/src/opencl/laplacian.cl
+++ b/modules/imgproc/src/opencl/laplacian.cl
--- a/modules/imgproc/src/opencl/match_template.cl
+++ b/modules/imgproc/src/opencl/match_template.cl
--- a/modules/imgproc/src/opencl/median.cl
+++ b/modules/imgproc/src/opencl/median.cl
--- a/modules/imgproc/src/opencl/mineigenval.cl
+++ b/modules/imgproc/src/opencl/mineigenval.cl
--- a/modules/imgproc/src/opencl/moments.cl
+++ b/modules/imgproc/src/opencl/moments.cl
--- a/modules/imgproc/src/opencl/morph.cl
+++ b/modules/imgproc/src/opencl/morph.cl
--- a/modules/imgproc/src/opencl/pyramid.cl
+++ b/modules/imgproc/src/opencl/pyramid.cl
--- a/modules/imgproc/src/opencl/remap.cl
+++ b/modules/imgproc/src/opencl/remap.cl
--- a/modules/imgproc/src/opencl/threshold.cl
+++ b/modules/imgproc/src/opencl/threshold.cl
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Zhang Ying, zhangying913@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#if defined (DOUBLE_SUPPORT)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-// threshold type:
-// enum { THRESH_BINARY=0, THRESH_BINARY_INV=1, THRESH_TRUNC=2, THRESH_TOZERO=3,
-//       THRESH_TOZERO_INV=4, THRESH_MASK=7, THRESH_OTSU=8 };
-__kernel void threshold_C1_D0(__global const uchar * restrict src, __global uchar *dst,
-                              int src_offset, int src_step,
-                              int dst_offset, int dst_rows, int dst_cols, int dst_step,
-                              uchar thresh, uchar max_val, int thresh_type
-                              )
-{
-    int gx = get_global_id(0);
-    const int gy = get_global_id(1);
-    int offset = (dst_offset & 15);
-    src_offset -= offset;
-    int dstart = (gx << 4) - offset;
-    if(dstart < dst_cols && gy < dst_rows)
-    {
-        uchar16 sdata = vload16(gx, src+src_offset+gy*src_step);
-        uchar16 ddata;
-        uchar16 zero = 0;
-        switch (thresh_type)
-        {
-            case 0:
-                ddata = ((sdata > thresh) ) ? (uchar16)(max_val) : (uchar16)(0);
-                break;
-            case 1:
-                ddata = ((sdata > thresh)) ? zero  : (uchar16)(max_val);
-                break;
-            case 2:
-                ddata = ((sdata > thresh)) ? (uchar16)(thresh) : sdata;
-                break;
-            case 3:
-                ddata = ((sdata > thresh)) ? sdata : zero;
-                break;
-            case 4:
-                ddata = ((sdata > thresh)) ? zero : sdata;
-                break;
-            default:
-                ddata = sdata;
-        }
-        int16 dpos = (int16)(dstart, dstart+1, dstart+2, dstart+3, dstart+4, dstart+5, dstart+6, dstart+7, dstart+8,
-                             dstart+9, dstart+10, dstart+11, dstart+12, dstart+13, dstart+14, dstart+15);
-        uchar16 dVal = *(__global uchar16*)(dst+dst_offset+gy*dst_step+dstart);
-        int16 con = dpos >= 0 && dpos < dst_cols;
-        ddata = convert_uchar16(con != 0) ? ddata : dVal;
-        if(dstart < dst_cols)
-        {
-            *(__global uchar16*)(dst+dst_offset+gy*dst_step+dstart) = ddata;
-        }
-    }
-}
-__kernel void threshold_C1_D5(__global const float * restrict src, __global float *dst,
-                              int src_offset, int src_step,
-                              int dst_offset, int dst_rows, int dst_cols, int dst_step,
-                              float thresh, float max_val, int thresh_type
-                              )
-{
-    const int gx = get_global_id(0);
-    const int gy = get_global_id(1);
-    int offset = (dst_offset & 3);
-    src_offset -= offset;
-    int dstart = (gx << 2) - offset;
-    if(dstart < dst_cols && gy < dst_rows)
-    {
-        float4 sdata = vload4(gx, src+src_offset+gy*src_step);
-        float4 ddata;
-        float4 zero = 0;
-        switch (thresh_type)
-        {
-            case 0:
-                ddata = sdata > thresh ? (float4)(max_val) : (float4)(0.f);
-                break;
-            case 1:
-                ddata = sdata > thresh ? zero : (float4)max_val;
-                break;
-            case 2:
-                ddata = sdata > thresh ? (float4)thresh : sdata;
-                break;
-            case 3:
-                ddata = sdata > thresh ? sdata : (float4)(0.f);
-                break;
-            case 4:
-                ddata = sdata > thresh ? (float4)(0.f) : sdata;
-                break;
-            default:
-                ddata = sdata;
-        }
-        int4 dpos = (int4)(dstart, dstart+1, dstart+2, dstart+3);
-        float4 dVal = *(__global float4*)(dst+dst_offset+gy*dst_step+dstart);
-        int4 con = dpos >= 0 && dpos < dst_cols;
-        ddata = convert_float4(con) != (float4)(0) ? ddata : dVal;
-        if(dstart < dst_cols)
-        {
-            *(__global float4*)(dst+dst_offset+gy*dst_step+dstart) = ddata;
-        }
-    }
-}
--- a/modules/imgproc/src/opencl/warpaffine.cl
+++ b/modules/imgproc/src/opencl/warpaffine.cl
--- a/modules/imgproc/src/opencl/warpperspective.cl
+++ b/modules/imgproc/src/opencl/warpperspective.cl
--- a/modules/imgproc/test/test_imgproc_umat.cpp
+++ b/modules/imgproc/test/test_imgproc_umat.cpp
--- a/modules/objdetect/src/opencl/haarobjectdetect.cl
+++ b/modules/objdetect/src/opencl/haarobjectdetect.cl
--- a/modules/objdetect/src/opencl/haarobjectdetect_scaled2.cl
+++ b/modules/objdetect/src/opencl/haarobjectdetect_scaled2.cl