Merge pull request #1760 from ilya-lavrenov:ocl_remap_nn

07db81a4 · Roman Donchenko · OpenCV Buildbot · 641bb7ac · fa15769f · 07db81a4
Commit 07db81a4 authored Nov 07, 2013 by Roman Donchenko Committed by OpenCV Buildbot Nov 07, 2013
4 changed files
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -195,9 +195,14 @@ namespace cv
                return;
            }
+            if (map1.empty())
+                map1.swap(map2);
            CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
-                      || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);
+                      /*|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);
-            CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) ||
+            CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (interpolation == INTER_NEAREST &&
+                                                                    (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) )) ||
+                      (map1.type() == CV_32FC2 && !map2.data) ||
                      (map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
            CV_Assert(!map2.data || map2.size() == map1.size());
            CV_Assert(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_WRAP
@@ -212,10 +217,14 @@ namespace cv
                                   "BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
            string kernelName = "remap";
-            if ( map1.type() == CV_32FC2 && !map2.data )
+            if (map1.type() == CV_32FC2 && map2.empty())
                kernelName += "_32FC2";
-            else if (map1.type() == CV_16SC2 && !map2.data)
+            else if (map1.type() == CV_16SC2)
+            {
                kernelName += "_16SC2";
+                if (!map2.empty())
+                    kernelName += "_16UC1";
+            }
            else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
                kernelName += "_2_32FC1";
            else
@@ -232,9 +241,6 @@ namespace cv
            if (interpolation != INTER_NEAREST)
            {
                int wdepth = std::max(CV_32F, dst.depth());
-                if (!supportsDouble)
-                    wdepth = std::min(CV_32F, wdepth);
                buildOptions += format(" -D WT=%s%s -D convertToT=convert_%s%s%s -D convertToWT=convert_%s%s"
                                       " -D convertToWT2=convert_%s2 -D WT2=%s2",
                                       typeMap[wdepth], channelMap[ocn],

--- a/modules/ocl/src/opencl/imgproc_remap.cl
+++ b/modules/ocl/src/opencl/imgproc_remap.cl
@@ -51,6 +51,13 @@
 #endif
 #endif
+enum
+{
+    INTER_BITS = 5,
+    INTER_TAB_SIZE = 1 << INTER_BITS,
+    INTER_TAB_SIZE2 = INTER_TAB_SIZE * INTER_TAB_SIZE
+};
 #ifdef INTER_NEAREST
 #define convertToWT
 #endif
@@ -204,6 +211,36 @@ __kernel void remap_16SC2(__global const T * restrict src, __global T * dst, __g
    }
 }
+__kernel void remap_16SC2_16UC1(__global const T * restrict src, __global T * dst, __global short2 * map1, __global ushort * map2,
+        int src_offset, int dst_offset, int map1_offset, int map2_offset,
+        int src_step, int dst_step, int map1_step, int map2_step,
+        int src_cols, int src_rows, int dst_cols, int dst_rows, T scalar)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+    if (x < dst_cols && y < dst_rows)
+    {
+        int dstIdx = mad24(y, dst_step, x + dst_offset);
+        int map1Idx = mad24(y, map1_step, x + map1_offset);
+        int map2Idx = mad24(y, map2_step, x + map2_offset);
+        int map2Value = convert_int(map2[map2Idx]) & (INTER_TAB_SIZE2 - 1);
+        int dx = (map2Value & (INTER_TAB_SIZE - 1)) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
+        int dy = (map2Value >> INTER_BITS) < (INTER_TAB_SIZE >> 1) ? 1 : 0;
+        int2 gxy = convert_int2(map1[map1Idx]) + (int2)(dx, dy);
+        int gx = gxy.x, gy = gxy.y;
+        if (NEED_EXTRAPOLATION(gx, gy))
+            EXTRAPOLATE(gxy, dst[dstIdx])
+        else
+        {
+            int srcIdx = mad24(gy, src_step, gx + src_offset);
+            dst[dstIdx] = src[srcIdx];
+        }
+    }
+}
 #elif INTER_LINEAR
 __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
@@ -229,7 +266,7 @@ __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
        float2 _u = map_data - convert_float2(map_dataA);
-        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
+        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
        WT scalar = convertToWT(nVal);
        WT a = scalar, b = scalar, c = scalar, d = scalar;
@@ -282,7 +319,7 @@ __kernel void remap_32FC2(__global T const * restrict  src, __global T * dst,
        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
        float2 _u = map_data - convert_float2(map_dataA);
-        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)32)) / (WT2)32;
+        WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
        WT scalar = convertToWT(nVal);
        WT a = scalar, b = scalar, c = scalar, d = scalar;

--- a/modules/ocl/src/opencl/imgproc_threshold.cl
+++ b/modules/ocl/src/opencl/imgproc_threshold.cl
@@ -93,8 +93,8 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
 #endif
        else
        {
-            T array[VECSIZE];
+            __attribute__(( aligned(sizeof(VT)) )) T array[VECSIZE];
-            VSTOREN(vecValue, 0, array);
+            *((VT*)array) = vecValue;
            #pragma unroll
            for (int i = 0; i < VECSIZE; ++i)
                if (gx + i < max_index)

--- a/modules/ocl/test/test_warp.cpp
+++ b/modules/ocl/test/test_warp.cpp
@@ -355,6 +355,7 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_NEAREST, Combine(
                            Values(1, 2, 3, 4),
                            Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
                                   pair<MatType, MatType>((MatType)CV_32FC2, noType),
+                                   pair<MatType, MatType>((MatType)CV_16SC2, (MatType)CV_16UC1),
                                   pair<MatType, MatType>((MatType)CV_16SC2, noType)),
                            Values((Border)BORDER_CONSTANT,
                                   (Border)BORDER_REPLICATE,