Merge pull request #1345 from pengx17:2.4_tvl1ocl_opt

bd7930ad · Roman Donchenko · OpenCV Buildbot · 7e4c0ac4 · 040b3b82 · bd7930ad
Commit bd7930ad authored Sep 02, 2013 by Roman Donchenko Committed by OpenCV Buildbot Sep 02, 2013
Hide whitespace changes
Inline Side-by-side

Showing with 26 additions and 10 deletions

tvl1flow.cl modules/ocl/src/opencl/tvl1flow.cl +8 -4

tvl1flow.cpp modules/ocl/src/tvl1flow.cpp +18 -6

No files found.
--- a/modules/ocl/src/opencl/tvl1flow.cl
+++ b/modules/ocl/src/opencl/tvl1flow.cl
@@ -341,7 +341,8 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
    int u1_offset_x,
    int u1_offset_y,
    int u2_offset_x,
-    int u2_offset_y)
+    int u2_offset_y,
+    char calc_error)
 {
    //const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -399,9 +400,12 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
        u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
        u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
-        const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
+        if(calc_error)
-        const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
+        {
-        error[y * I1wx_step + x] = n1 + n2;
+            const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
+            const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
+            error[y * I1wx_step + x] = n1 + n2;
+        }
    }
 }
--- a/modules/ocl/src/tvl1flow.cpp
+++ b/modules/ocl/src/tvl1flow.cpp
@@ -172,7 +172,7 @@ namespace ocl_tvl1flow
    void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
        oclMat &rho_c, oclMat &p11, oclMat &p12,
        oclMat &p21, oclMat &p22, oclMat &u1,
-        oclMat &u2, oclMat &error, float l_t, float theta);
+        oclMat &u2, oclMat &error, float l_t, float theta, char calc_error);
    void estimateDualVariables(oclMat &u1, oclMat &u2,
        oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut);
@@ -229,18 +229,29 @@ void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const ocl
        warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
        double error = numeric_limits<double>::max();
+        double prev_error = 0;
        for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
        {
+            // some tweaks to make sum operation less frequently
+            char calc_error = (n & 0x1) && (prev_error < scaledEpsilon);
            estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
-                u1, u2, diff, l_t, static_cast<float>(theta));
+                      u1, u2, diff, l_t, static_cast<float>(theta), calc_error);
+            if(calc_error)
-            error = ocl::sum(diff)[0];
+            {
+                error = ocl::sum(diff)[0];
+                prev_error = error;
+            }
+            else
+            {
+                error = numeric_limits<double>::max();
+                prev_error -= scaledEpsilon;
+            }
            estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
        }
    }
 }
 void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage()
@@ -348,7 +359,7 @@ void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oc
 void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
    oclMat &rho_c, oclMat &p11, oclMat &p12,
    oclMat &p21, oclMat &p22, oclMat &u1,
-    oclMat &u2, oclMat &error, float l_t, float theta)
+    oclMat &u2, oclMat &error, float l_t, float theta, char calc_error)
 {
    Context* clCxt = I1wx.clCxt;
@@ -401,6 +412,7 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad,
    args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x));
    args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y));
+    args.push_back( make_pair( sizeof(cl_char), (void*)&calc_error));
    openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1);
 }