Commit bff0fad6 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

gpu TVL1 Optical Flow optimization:

do not calculate sum of error in every round of iteration;
instead the error will be summed every 2nd times or more, 
if the previous sum of error is too far away from threshold.
parent 525b6eca
...@@ -427,8 +427,8 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1, ...@@ -427,8 +427,8 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1,
TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v); TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v);
GPU_SANITY_CHECK(u, 1e-2); GPU_SANITY_CHECK(u, 1e-1);
GPU_SANITY_CHECK(v, 1e-2); GPU_SANITY_CHECK(v, 1e-1);
} }
else else
{ {
......
...@@ -211,7 +211,7 @@ namespace tvl1flow ...@@ -211,7 +211,7 @@ namespace tvl1flow
const PtrStepf grad, const PtrStepf rho_c, const PtrStepf grad, const PtrStepf rho_c,
const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22, const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22,
PtrStepf u1, PtrStepf u2, PtrStepf error, PtrStepf u1, PtrStepf u2, PtrStepf error,
const float l_t, const float theta) const float l_t, const float theta, const bool calcError)
{ {
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -265,21 +265,24 @@ namespace tvl1flow ...@@ -265,21 +265,24 @@ namespace tvl1flow
u1(y, x) = u1NewVal; u1(y, x) = u1NewVal;
u2(y, x) = u2NewVal; u2(y, x) = u2NewVal;
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); if (calcError)
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); {
error(y, x) = n1 + n2; const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
error(y, x) = n1 + n2;
}
} }
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
PtrStepSzf grad, PtrStepSzf rho_c, PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
float l_t, float theta) float l_t, float theta, bool calcError)
{ {
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y)); const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta); estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta, calcError);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
......
...@@ -173,7 +173,7 @@ namespace tvl1flow ...@@ -173,7 +173,7 @@ namespace tvl1flow
PtrStepSzf grad, PtrStepSzf rho_c, PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error,
float l_t, float theta); float l_t, float theta, bool calcError);
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut); void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut);
} }
...@@ -218,12 +218,24 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const Gpu ...@@ -218,12 +218,24 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const Gpu
warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c);
double error = numeric_limits<double>::max(); double error = numeric_limits<double>::max();
double prevError = 0.0;
for (int n = 0; error > scaledEpsilon && n < iterations; ++n) for (int n = 0; error > scaledEpsilon && n < iterations; ++n)
{ {
estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta)); // some tweaks to make sum operation less frequently
bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon);
if (epsilon > 0) estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta), calcError);
if (calcError)
{
error = gpu::sum(diff, norm_buf)[0]; error = gpu::sum(diff, norm_buf)[0];
prevError = error;
}
else
{
error = numeric_limits<double>::max();
prevError -= scaledEpsilon;
}
estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment