Commit bff0fad6 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

gpu TVL1 Optical Flow optimization:

do not calculate sum of error in every round of iteration;
instead the error will be summed every 2nd times or more, 
if the previous sum of error is too far away from threshold.
parent 525b6eca
......@@ -427,8 +427,8 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1,
TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v);
......@@ -211,7 +211,7 @@ namespace tvl1flow
const PtrStepf grad, const PtrStepf rho_c,
const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
PtrStepf u1, PtrStepf u2, PtrStepf error,
const float l_t, const float theta)
const float l_t, const float theta, const bool calcError)
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -265,21 +265,24 @@ namespace tvl1flow
u1(y, x) = u1NewVal;
u2(y, x) = u2NewVal;
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error(y, x) = n1 + n2;
if (calcError)
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error(y, x) = n1 + n2;
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
float l_t, float theta)
float l_t, float theta, bool calcError)
const dim3 block(32, 8);
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta, calcError);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
......@@ -173,7 +173,7 @@ namespace tvl1flow
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
float l_t, float theta);
float l_t, float theta, bool calcError);
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut);
......@@ -218,12 +218,24 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const Gpu
warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
double error = numeric_limits<double>::max();
double prevError = 0.0;
for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta));
// some tweaks to make sum operation less frequently
bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon);
if (epsilon > 0)
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta), calcError);
if (calcError)
error = gpu::sum(diff, norm_buf)[0];
prevError = error;
error = numeric_limits<double>::max();
prevError -= scaledEpsilon;
estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment