Commit 5eaa25c9 authored by Rostislav Vasilikhin's avatar Rostislav Vasilikhin Committed by Alexander Alekhin

Merge pull request #2106 from savuor:kinfu_fix_ocl

KinFu: try to fix OCL fails (#2106)

* some TODOs closed

* temporary turn on NaNs patching

* reverted

* temporary debug code added

* more debug code

* inf check; more debug code

* debug code in kernel

* debug code, try to disable some optimizations

* -cl-fast-relaxed-math option removed, debug code removed
parent 8049208c
...@@ -545,7 +545,7 @@ void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& ne ...@@ -545,7 +545,7 @@ void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& ne
cv::String errorStr; cv::String errorStr;
ocl::ProgramSource source = ocl::rgbd::icp_oclsrc; ocl::ProgramSource source = ocl::rgbd::icp_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; cv::String options = "-cl-mad-enable";
ocl::Kernel k; ocl::Kernel k;
k.create("getAb", source, options, &errorStr); k.create("getAb", source, options, &errorStr);
...@@ -594,8 +594,7 @@ void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& ne ...@@ -594,8 +594,7 @@ void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& ne
fxy.val, cxy.val, fxy.val, cxy.val,
distanceThreshold*distanceThreshold, distanceThreshold*distanceThreshold,
cos(angleThreshold), cos(angleThreshold),
//TODO: replace by KernelArg::Local(lsz) ocl::KernelArg::Local(lsz),
ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
ocl::KernelArg::WriteOnlyNoSize(groupedSumGpu) ocl::KernelArg::WriteOnlyNoSize(groupedSumGpu)
); );
......
...@@ -335,7 +335,7 @@ bool computePointsNormalsGpu(const Intr intr, float depthFactor, const UMat& dep ...@@ -335,7 +335,7 @@ bool computePointsNormalsGpu(const Intr intr, float depthFactor, const UMat& dep
cv::String errorStr; cv::String errorStr;
cv::String name = "computePointsNormals"; cv::String name = "computePointsNormals";
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc; ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; cv::String options = "-cl-mad-enable";
ocl::Kernel k; ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr); k.create(name.c_str(), source, options, &errorStr);
...@@ -368,7 +368,7 @@ bool pyrDownBilateralGpu(const UMat& depth, UMat& depthDown, float sigma) ...@@ -368,7 +368,7 @@ bool pyrDownBilateralGpu(const UMat& depth, UMat& depthDown, float sigma)
cv::String errorStr; cv::String errorStr;
cv::String name = "pyrDownBilateral"; cv::String name = "pyrDownBilateral";
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc; ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; cv::String options = "-cl-mad-enable";
ocl::Kernel k; ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr); k.create(name.c_str(), source, options, &errorStr);
...@@ -402,7 +402,7 @@ bool customBilateralFilterGpu(const UMat src /* udepth */, UMat& dst /* smooth * ...@@ -402,7 +402,7 @@ bool customBilateralFilterGpu(const UMat src /* udepth */, UMat& dst /* smooth *
cv::String errorStr; cv::String errorStr;
cv::String name = "customBilateral"; cv::String name = "customBilateral";
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc; ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; cv::String options = "-cl-mad-enable";
ocl::Kernel k; ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr); k.create(name.c_str(), source, options, &errorStr);
...@@ -431,7 +431,7 @@ bool pyrDownPointsNormalsGpu(const UMat p, const UMat n, UMat &pdown, UMat &ndow ...@@ -431,7 +431,7 @@ bool pyrDownPointsNormalsGpu(const UMat p, const UMat n, UMat &pdown, UMat &ndow
cv::String errorStr; cv::String errorStr;
cv::String name = "pyrDownPointsNormals"; cv::String name = "pyrDownPointsNormals";
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc; ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; cv::String options = "-cl-mad-enable";
ocl::Kernel k; ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr); k.create(name.c_str(), source, options, &errorStr);
...@@ -462,7 +462,7 @@ static bool ocl_renderPointsNormals(const UMat points, const UMat normals, ...@@ -462,7 +462,7 @@ static bool ocl_renderPointsNormals(const UMat points, const UMat normals,
cv::String errorStr; cv::String errorStr;
cv::String name = "render"; cv::String name = "render";
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc; ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; cv::String options = "-cl-mad-enable";
ocl::Kernel k; ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr); k.create(name.c_str(), source, options, &errorStr);
...@@ -616,9 +616,6 @@ void makeFrameFromDepth(InputArray _depth, ...@@ -616,9 +616,6 @@ void makeFrameFromDepth(InputArray _depth,
// looks like OpenCV's bilateral filter works the same as KinFu's // looks like OpenCV's bilateral filter works the same as KinFu's
Depth smooth; Depth smooth;
//TODO: remove it when OpenCV's bilateral works properly
patchNaNs(depth);
bilateralFilter(depth, smooth, kernelSize, sigmaDepth*depthFactor, sigmaSpatial); bilateralFilter(depth, smooth, kernelSize, sigmaDepth*depthFactor, sigmaSpatial);
// depth truncation is not used by default // depth truncation is not used by default
......
...@@ -56,7 +56,8 @@ inline void calcAb7(__global const char * oldPointsptr, ...@@ -56,7 +56,8 @@ inline void calcAb7(__global const char * oldPointsptr,
float3 newP = newPtsRow[x].xyz; float3 newP = newPtsRow[x].xyz;
float3 newN = newNrmRow[x].xyz; float3 newN = newNrmRow[x].xyz;
if(any(isnan(newP)) || any(isnan(newN))) if( any(isnan(newP)) || any(isnan(newN)) ||
any(isinf(newP)) || any(isinf(newN)) )
return; return;
//transform to old coord system //transform to old coord system
...@@ -114,7 +115,8 @@ inline void calcAb7(__global const char * oldPointsptr, ...@@ -114,7 +115,8 @@ inline void calcAb7(__global const char * oldPointsptr,
float3 n1 = mix(n10, n11, t.x); float3 n1 = mix(n10, n11, t.x);
oldN = mix(n0, n1, t.y); oldN = mix(n0, n1, t.y);
if(any(isnan(oldP)) || any(isnan(oldN))) if( any(isnan(oldP)) || any(isnan(oldN)) ||
any(isinf(oldP)) || any(isinf(oldN)) )
return; return;
//filter by distance //filter by distance
......
...@@ -1204,7 +1204,7 @@ void TSDFVolumeGPU::integrate(InputArray _depth, float depthFactor, ...@@ -1204,7 +1204,7 @@ void TSDFVolumeGPU::integrate(InputArray _depth, float depthFactor,
cv::String errorStr; cv::String errorStr;
cv::String name = "integrate"; cv::String name = "integrate";
ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc; ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; cv::String options = "-cl-mad-enable";
ocl::Kernel k; ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr); k.create(name.c_str(), source, options, &errorStr);
...@@ -1250,7 +1250,7 @@ void TSDFVolumeGPU::raycast(cv::Affine3f cameraPose, Intr intrinsics, Size frame ...@@ -1250,7 +1250,7 @@ void TSDFVolumeGPU::raycast(cv::Affine3f cameraPose, Intr intrinsics, Size frame
cv::String errorStr; cv::String errorStr;
cv::String name = "raycast"; cv::String name = "raycast";
ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc; ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; cv::String options = "-cl-mad-enable";
ocl::Kernel k; ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr); k.create(name.c_str(), source, options, &errorStr);
...@@ -1318,7 +1318,7 @@ void TSDFVolumeGPU::fetchNormals(InputArray _points, OutputArray _normals) const ...@@ -1318,7 +1318,7 @@ void TSDFVolumeGPU::fetchNormals(InputArray _points, OutputArray _normals) const
cv::String errorStr; cv::String errorStr;
cv::String name = "getNormals"; cv::String name = "getNormals";
ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc; ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; cv::String options = "-cl-mad-enable";
ocl::Kernel k; ocl::Kernel k;
k.create(name.c_str(), source, options, &errorStr); k.create(name.c_str(), source, options, &errorStr);
...@@ -1365,7 +1365,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals) ...@@ -1365,7 +1365,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
cv::String errorStr; cv::String errorStr;
ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc; ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable"; cv::String options = "-cl-mad-enable";
kscan.create("scanSize", source, options, &errorStr); kscan.create("scanSize", source, options, &errorStr);
...@@ -1409,8 +1409,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals) ...@@ -1409,8 +1409,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
ocl::KernelArg::PtrReadOnly(volPoseGpu), ocl::KernelArg::PtrReadOnly(volPoseGpu),
voxelSize, voxelSize,
voxelSizeInv, voxelSizeInv,
//TODO: replace by KernelArg::Local(lsz) ocl::KernelArg::Local(lsz),
ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
ocl::KernelArg::WriteOnlyNoSize(groupedSum)); ocl::KernelArg::WriteOnlyNoSize(groupedSum));
if(!kscan.run(3, globalSize, localSize, true)) if(!kscan.run(3, globalSize, localSize, true))
...@@ -1455,8 +1454,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals) ...@@ -1455,8 +1454,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
voxelSize, voxelSize,
voxelSizeInv, voxelSizeInv,
((int)needNormals), ((int)needNormals),
//TODO: replace by ::Local(lsz) ocl::KernelArg::Local(lsz),
ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
ocl::KernelArg::PtrReadWrite(atomicCtr), ocl::KernelArg::PtrReadWrite(atomicCtr),
ocl::KernelArg::ReadOnlyNoSize(groupedSum), ocl::KernelArg::ReadOnlyNoSize(groupedSum),
ocl::KernelArg::WriteOnlyNoSize(pts), ocl::KernelArg::WriteOnlyNoSize(pts),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment