Merge pull request #2106 from savuor:kinfu_fix_ocl

KinFu: try to fix OCL fails (#2106) * some TODOs closed * temporary turn on NaNs patching * reverted * temporary debug code added * more debug code * inf check; more debug code * debug code in kernel * debug code, try to disable some optimizations * -cl-fast-relaxed-math option removed, debug code removed

Merge pull request #2106 from savuor:kinfu_fix_ocl
KinFu: try to fix OCL fails (#2106) * some TODOs closed * temporary turn on NaNs patching * reverted * temporary debug code added * more debug code * inf check; more debug code * debug code in kernel * debug code, try to disable some optimizations * -cl-fast-relaxed-math option removed, debug code removed
5eaa25c9 · Rostislav Vasilikhin · Alexander Alekhin · 8049208c · 5eaa25c9 · 5eaa25c9
Commit 5eaa25c9 authored May 04, 2019 by Rostislav Vasilikhin Committed by Alexander Alekhin May 04, 2019
Showing with 17 additions and 21 deletions

fast_icp.cpp modules/rgbd/src/fast_icp.cpp +2 -3

kinfu_frame.cpp modules/rgbd/src/kinfu_frame.cpp +5 -8

icp.cl modules/rgbd/src/opencl/icp.cl +4 -2

tsdf.cpp modules/rgbd/src/tsdf.cpp +6 -8

No files found.
--- a/modules/rgbd/src/fast_icp.cpp
+++ b/modules/rgbd/src/fast_icp.cpp
@@ -545,7 +545,7 @@ void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& ne
    cv::String errorStr;
    ocl::ProgramSource source = ocl::rgbd::icp_oclsrc;
-    cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
+    cv::String options = "-cl-mad-enable";
    ocl::Kernel k;
    k.create("getAb", source, options, &errorStr);
@@ -594,8 +594,7 @@ void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& ne
           fxy.val, cxy.val,
           distanceThreshold*distanceThreshold,
           cos(angleThreshold),
-           //TODO: replace by KernelArg::Local(lsz)
+           ocl::KernelArg::Local(lsz),
-           ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
           ocl::KernelArg::WriteOnlyNoSize(groupedSumGpu)
           );

--- a/modules/rgbd/src/kinfu_frame.cpp
+++ b/modules/rgbd/src/kinfu_frame.cpp
@@ -335,7 +335,7 @@ bool computePointsNormalsGpu(const Intr intr, float depthFactor, const UMat& dep
    cv::String errorStr;
    cv::String name = "computePointsNormals";
    ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
-    cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
+    cv::String options = "-cl-mad-enable";
    ocl::Kernel k;
    k.create(name.c_str(), source, options, &errorStr);
@@ -368,7 +368,7 @@ bool pyrDownBilateralGpu(const UMat& depth, UMat& depthDown, float sigma)
    cv::String errorStr;
    cv::String name = "pyrDownBilateral";
    ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
-    cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
+    cv::String options = "-cl-mad-enable";
    ocl::Kernel k;
    k.create(name.c_str(), source, options, &errorStr);
@@ -402,7 +402,7 @@ bool customBilateralFilterGpu(const UMat src /* udepth */, UMat& dst /* smooth *
    cv::String errorStr;
    cv::String name = "customBilateral";
    ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
-    cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
+    cv::String options = "-cl-mad-enable";
    ocl::Kernel k;
    k.create(name.c_str(), source, options, &errorStr);
@@ -431,7 +431,7 @@ bool pyrDownPointsNormalsGpu(const UMat p, const UMat n, UMat &pdown, UMat &ndow
    cv::String errorStr;
    cv::String name = "pyrDownPointsNormals";
    ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
-    cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
+    cv::String options = "-cl-mad-enable";
    ocl::Kernel k;
    k.create(name.c_str(), source, options, &errorStr);
@@ -462,7 +462,7 @@ static bool ocl_renderPointsNormals(const UMat points, const UMat normals,
    cv::String errorStr;
    cv::String name = "render";
    ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
-    cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
+    cv::String options = "-cl-mad-enable";
    ocl::Kernel k;
    k.create(name.c_str(), source, options, &errorStr);
@@ -616,9 +616,6 @@ void makeFrameFromDepth(InputArray _depth,
    // looks like OpenCV's bilateral filter works the same as KinFu's
    Depth smooth;
-    //TODO: remove it when OpenCV's bilateral works properly
-    patchNaNs(depth);
    bilateralFilter(depth, smooth, kernelSize, sigmaDepth*depthFactor, sigmaSpatial);
    // depth truncation is not used by default

--- a/modules/rgbd/src/opencl/icp.cl
+++ b/modules/rgbd/src/opencl/icp.cl
@@ -56,7 +56,8 @@ inline void calcAb7(__global const char * oldPointsptr,
    float3 newP = newPtsRow[x].xyz;
    float3 newN = newNrmRow[x].xyz;
-    if(any(isnan(newP)) || any(isnan(newN)))
+    if( any(isnan(newP)) || any(isnan(newN)) ||
+        any(isinf(newP)) || any(isinf(newN)) )
        return;
    //transform to old coord system
@@ -114,7 +115,8 @@ inline void calcAb7(__global const char * oldPointsptr,
    float3 n1 = mix(n10, n11, t.x);
    oldN = mix(n0, n1, t.y);
-    if(any(isnan(oldP)) || any(isnan(oldN)))
+    if( any(isnan(oldP)) || any(isnan(oldN)) ||
+        any(isinf(oldP)) || any(isinf(oldN)) )
        return;
    //filter by distance

--- a/modules/rgbd/src/tsdf.cpp
+++ b/modules/rgbd/src/tsdf.cpp
@@ -1204,7 +1204,7 @@ void TSDFVolumeGPU::integrate(InputArray _depth, float depthFactor,
    cv::String errorStr;
    cv::String name = "integrate";
    ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
-    cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
+    cv::String options = "-cl-mad-enable";
    ocl::Kernel k;
    k.create(name.c_str(), source, options, &errorStr);
@@ -1250,7 +1250,7 @@ void TSDFVolumeGPU::raycast(cv::Affine3f cameraPose, Intr intrinsics, Size frame
    cv::String errorStr;
    cv::String name = "raycast";
    ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
-    cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
+    cv::String options = "-cl-mad-enable";
    ocl::Kernel k;
    k.create(name.c_str(), source, options, &errorStr);
@@ -1318,7 +1318,7 @@ void TSDFVolumeGPU::fetchNormals(InputArray _points, OutputArray _normals) const
        cv::String errorStr;
        cv::String name = "getNormals";
        ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
-        cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
+        cv::String options = "-cl-mad-enable";
        ocl::Kernel k;
        k.create(name.c_str(), source, options, &errorStr);
@@ -1365,7 +1365,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
        cv::String errorStr;
        ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
-        cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
+        cv::String options = "-cl-mad-enable";
        kscan.create("scanSize", source, options, &errorStr);
@@ -1409,8 +1409,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
                   ocl::KernelArg::PtrReadOnly(volPoseGpu),
                   voxelSize,
                   voxelSizeInv,
-                   //TODO: replace by KernelArg::Local(lsz)
+                   ocl::KernelArg::Local(lsz),
-                   ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
                   ocl::KernelArg::WriteOnlyNoSize(groupedSum));
        if(!kscan.run(3, globalSize, localSize, true))
@@ -1455,8 +1454,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
                   voxelSize,
                   voxelSizeInv,
                   ((int)needNormals),
-                   //TODO: replace by ::Local(lsz)
+                   ocl::KernelArg::Local(lsz),
-                   ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
                   ocl::KernelArg::PtrReadWrite(atomicCtr),
                   ocl::KernelArg::ReadOnlyNoSize(groupedSum),
                   ocl::KernelArg::WriteOnlyNoSize(pts),