tvl1flow.cpp 48.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

/*
//
// This implementation is based on Javier Sánchez Pérez <jsanchez@dis.ulpgc.es> implementation.
// Original BSD license:
//
// Copyright (c) 2011, Javier Sánchez Pérez, Enric Meinhardt Llopis
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice, this
//   list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
*/

#include "precomp.hpp"
Alexander Alekhin's avatar
Alexander Alekhin committed
76
#include "opencl_kernels_video.hpp"
77

78
#include <limits>
79 80 81 82 83
#include <iomanip>
#include <iostream>
#include "opencv2/core/opencl/ocl_defs.hpp"


84 85 86

using namespace cv;

87 88
namespace {

89
class OpticalFlowDual_TVL1 : public DualTVL1OpticalFlow
90 91
{
public:
92 93 94 95 96 97 98 99 100 101 102

    OpticalFlowDual_TVL1(double tau_, double lambda_, double theta_, int nscales_, int warps_,
                         double epsilon_, int innerIterations_, int outerIterations_,
                         double scaleStep_, double gamma_, int medianFiltering_,
                         bool useInitialFlow_) :
        tau(tau_), lambda(lambda_), theta(theta_), gamma(gamma_), nscales(nscales_),
        warps(warps_), epsilon(epsilon_), innerIterations(innerIterations_),
        outerIterations(outerIterations_), useInitialFlow(useInitialFlow_),
        scaleStep(scaleStep_), medianFiltering(medianFiltering_)
    {
    }
103 104 105 106 107
    OpticalFlowDual_TVL1();

    void calc(InputArray I0, InputArray I1, InputOutputArray flow);
    void collectGarbage();

108 109 110 111 112 113 114 115 116 117 118 119
    CV_IMPL_PROPERTY(double, Tau, tau)
    CV_IMPL_PROPERTY(double, Lambda, lambda)
    CV_IMPL_PROPERTY(double, Theta, theta)
    CV_IMPL_PROPERTY(double, Gamma, gamma)
    CV_IMPL_PROPERTY(int, ScalesNumber, nscales)
    CV_IMPL_PROPERTY(int, WarpingsNumber, warps)
    CV_IMPL_PROPERTY(double, Epsilon, epsilon)
    CV_IMPL_PROPERTY(int, InnerIterations, innerIterations)
    CV_IMPL_PROPERTY(int, OuterIterations, outerIterations)
    CV_IMPL_PROPERTY(bool, UseInitialFlow, useInitialFlow)
    CV_IMPL_PROPERTY(double, ScaleStep, scaleStep)
    CV_IMPL_PROPERTY(int, MedianFiltering, medianFiltering)
120 121 122 123 124

protected:
    double tau;
    double lambda;
    double theta;
125
    double gamma;
126 127 128
    int nscales;
    int warps;
    double epsilon;
129 130
    int innerIterations;
    int outerIterations;
131
    bool useInitialFlow;
132 133
    double scaleStep;
    int medianFiltering;
134 135

private:
136
    void procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3);
137

138
#ifdef HAVE_OPENCL
139 140 141
    bool procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2);

    bool calc_ocl(InputArray I0, InputArray I1, InputOutputArray flow);
142
#endif
143 144 145 146 147
    struct dataMat
    {
        std::vector<Mat_<float> > I0s;
        std::vector<Mat_<float> > I1s;
        std::vector<Mat_<float> > u1s;
148 149
        std::vector<Mat_<float> > u2s;
        std::vector<Mat_<float> > u3s;
150 151 152 153 154 155 156 157 158 159

        Mat_<float> I1x_buf;
        Mat_<float> I1y_buf;

        Mat_<float> flowMap1_buf;
        Mat_<float> flowMap2_buf;

        Mat_<float> I1w_buf;
        Mat_<float> I1wx_buf;
        Mat_<float> I1wy_buf;
160

161 162
        Mat_<float> grad_buf;
        Mat_<float> rho_c_buf;
163

164
        Mat_<float> v1_buf;
165 166
        Mat_<float> v2_buf;
        Mat_<float> v3_buf;
167

168 169 170
        Mat_<float> p11_buf;
        Mat_<float> p12_buf;
        Mat_<float> p21_buf;
171 172 173
        Mat_<float> p22_buf;
        Mat_<float> p31_buf;
        Mat_<float> p32_buf;
174

175
        Mat_<float> div_p1_buf;
176 177
        Mat_<float> div_p2_buf;
        Mat_<float> div_p3_buf;
178

179 180 181
        Mat_<float> u1x_buf;
        Mat_<float> u1y_buf;
        Mat_<float> u2x_buf;
182 183 184
        Mat_<float> u2y_buf;
        Mat_<float> u3x_buf;
        Mat_<float> u3y_buf;
185
    } dm;
186 187

#ifdef HAVE_OPENCL
188 189 190 191 192 193 194 195 196
    struct dataUMat
    {
        std::vector<UMat> I0s;
        std::vector<UMat> I1s;
        std::vector<UMat> u1s;
        std::vector<UMat> u2s;

        UMat I1x_buf;
        UMat I1y_buf;
197

198 199 200
        UMat I1w_buf;
        UMat I1wx_buf;
        UMat I1wy_buf;
201

202 203
        UMat grad_buf;
        UMat rho_c_buf;
204

205 206 207 208 209 210 211 212
        UMat p11_buf;
        UMat p12_buf;
        UMat p21_buf;
        UMat p22_buf;

        UMat diff_buf;
        UMat norm_buf;
    } dum;
213
#endif
214 215
};

216
#ifdef HAVE_OPENCL
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
namespace cv_ocl_tvl1flow
{
    bool centeredGradient(const UMat &src, UMat &dx, UMat &dy);

    bool warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
        UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
        UMat &grad, UMat &rho);

    bool estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
        UMat &rho_c, UMat &p11, UMat &p12,
        UMat &p21, UMat &p22, UMat &u1,
        UMat &u2, UMat &error, float l_t, float theta, char calc_error);

    bool estimateDualVariables(UMat &u1, UMat &u2,
        UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut);
}

bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy)
{
236
    size_t globalsize[2] = { (size_t)src.cols, (size_t)src.rows };
237 238 239 240 241 242 243 244 245 246 247 248 249

    ocl::Kernel kernel;
    if (!kernel.create("centeredGradientKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
        return false;

    int idxArg = 0;
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat
    idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col
    idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows
    idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));//src mat step
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy
    idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step
250
    return kernel.run(2, globalsize, NULL, false);
251 252 253 254 255 256
}

bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y,
    UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy,
    UMat &grad, UMat &rho)
{
257
    size_t globalsize[2] = { (size_t)I0.cols, (size_t)I0.rows };
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292

    ocl::Kernel kernel;
    if (!kernel.create("warpBackwardKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
        return false;

    int idxArg = 0;
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat
    int I0_step = (int)(I0.step / I0.elemSize());
    idxArg = kernel.set(idxArg, I0_step);//I0_step
    idxArg = kernel.set(idxArg, (int)(I0.cols));//I0_col
    idxArg = kernel.set(idxArg, (int)(I0.rows));//I0_row
    ocl::Image2D imageI1(I1);
    ocl::Image2D imageI1x(I1x);
    ocl::Image2D imageI1y(I1y);
    idxArg = kernel.set(idxArg, imageI1);//image2d_t tex_I1
    idxArg = kernel.set(idxArg, imageI1x);//image2d_t tex_I1x
    idxArg = kernel.set(idxArg, imageI1y);//image2d_t tex_I1y
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1
    idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho
    idxArg = kernel.set(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step
    idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//u2_step
    int u1_offset_x = (int)((u1.offset) % (u1.step));
    u1_offset_x = (int)(u1_offset_x / u1.elemSize());
    idxArg = kernel.set(idxArg, (int)u1_offset_x );//u1_offset_x
    idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step));//u1_offset_y
    int u2_offset_x = (int)((u2.offset) % (u2.step));
    u2_offset_x = (int) (u2_offset_x / u2.elemSize());
    idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x
    idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y
293
    return kernel.run(2, globalsize, NULL, false);
294 295 296 297 298 299 300
}

bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad,
    UMat &rho_c, UMat &p11, UMat &p12,
    UMat &p21, UMat &p22, UMat &u1,
    UMat &u2, UMat &error, float l_t, float theta, char calc_error)
{
301
    size_t globalsize[2] = { (size_t)I1wx.cols, (size_t)I1wx.rows };
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334

    ocl::Kernel kernel;
    if (!kernel.create("estimateUKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
        return false;

    int idxArg = 0;
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx
    idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col
    idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row
    idxArg = kernel.set(idxArg, (int)(I1wx.step/I1wx.elemSize())); //int I1wx_step
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1
    idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error
    idxArg = kernel.set(idxArg, (float)l_t); //float l_t
    idxArg = kernel.set(idxArg, (float)theta); //float theta
    idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step
    int u1_offset_x = (int)(u1.offset % u1.step);
    u1_offset_x = (int) (u1_offset_x  / u1.elemSize());
    idxArg = kernel.set(idxArg, (int)u1_offset_x); //int u1_offset_x
    idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step)); //int u1_offset_y
    int u2_offset_x = (int)(u2.offset % u2.step);
    u2_offset_x = (int)(u2_offset_x / u2.elemSize());
    idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x
    idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
    idxArg = kernel.set(idxArg, (char)calc_error);    //char calc_error
335

336
    return kernel.run(2, globalsize, NULL, false);
337 338 339 340 341
}

bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2,
    UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut)
{
342
    size_t globalsize[2] = { (size_t)u1.cols, (size_t)u1.rows };
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368

    ocl::Kernel kernel;
    if (!kernel.create("estimateDualVariablesKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, ""))
        return false;

    int idxArg = 0;
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1
    idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col
    idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row
    idxArg = kernel.set(idxArg, (int)(u1.step/u1.elemSize())); //int u1_step
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11
    idxArg = kernel.set(idxArg, (int)(p11.step/p11.elemSize())); //int p11_step
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21
    idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22
    idxArg = kernel.set(idxArg, (float)(taut));    //float taut
    idxArg = kernel.set(idxArg, (int)(u2.step/u2.elemSize())); //int u2_step
    int u1_offset_x = (int)(u1.offset % u1.step);
    u1_offset_x = (int)(u1_offset_x / u1.elemSize());
    idxArg = kernel.set(idxArg, u1_offset_x); //int u1_offset_x
    idxArg = kernel.set(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y
    int u2_offset_x = (int)(u2.offset % u2.step);
    u2_offset_x = (int)(u2_offset_x / u2.elemSize());
    idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x
    idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y
369

370
    return kernel.run(2, globalsize, NULL, false);
371 372

}
373
#endif
374

375
OpticalFlowDual_TVL1::OpticalFlowDual_TVL1()
376 377 378 379 380 381 382
{
    tau            = 0.25;
    lambda         = 0.15;
    theta          = 0.3;
    nscales        = 5;
    warps          = 5;
    epsilon        = 0.01;
383
    gamma          = 0.;
384 385
    innerIterations = 30;
    outerIterations = 10;
386
    useInitialFlow = false;
387 388
    medianFiltering = 5;
    scaleStep      = 0.8;
389 390
}

391
void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow)
392
{
393 394
    CV_INSTRUMENT_REGION()

395 396 397
    CV_OCL_RUN(_flow.isUMat() &&
               ocl::Image2D::isFormatSupported(CV_32F, 1, false),
               calc_ocl(_I0, _I1, _flow))
398

399 400 401 402 403 404 405 406
    Mat I0 = _I0.getMat();
    Mat I1 = _I1.getMat();

    CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
    CV_Assert( I0.size() == I1.size() );
    CV_Assert( I0.type() == I1.type() );
    CV_Assert( !useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2) );
    CV_Assert( nscales > 0 );
Ernest Galbrun's avatar
Ernest Galbrun committed
407
    bool use_gamma = gamma != 0;
408
    // allocate memory for the pyramid structure
409 410 411
    dm.I0s.resize(nscales);
    dm.I1s.resize(nscales);
    dm.u1s.resize(nscales);
412
    dm.u2s.resize(nscales);
Ernest Galbrun's avatar
Ernest Galbrun committed
413
    dm.u3s.resize(nscales);
414

415 416
    I0.convertTo(dm.I0s[0], dm.I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0);
    I1.convertTo(dm.I1s[0], dm.I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0);
417

418
    dm.u1s[0].create(I0.size());
419
    dm.u2s[0].create(I0.size());
Ernest Galbrun's avatar
Ernest Galbrun committed
420
    if (use_gamma) dm.u3s[0].create(I0.size());
421

422 423
    if (useInitialFlow)
    {
424
        Mat_<float> mv[] = { dm.u1s[0], dm.u2s[0] };
425 426 427
        split(_flow.getMat(), mv);
    }

428 429
    dm.I1x_buf.create(I0.size());
    dm.I1y_buf.create(I0.size());
430

431 432
    dm.flowMap1_buf.create(I0.size());
    dm.flowMap2_buf.create(I0.size());
433

434 435 436
    dm.I1w_buf.create(I0.size());
    dm.I1wx_buf.create(I0.size());
    dm.I1wy_buf.create(I0.size());
437

438 439
    dm.grad_buf.create(I0.size());
    dm.rho_c_buf.create(I0.size());
440

441
    dm.v1_buf.create(I0.size());
442
    dm.v2_buf.create(I0.size());
Ernest Galbrun's avatar
Ernest Galbrun committed
443
    dm.v3_buf.create(I0.size());
444

445 446 447
    dm.p11_buf.create(I0.size());
    dm.p12_buf.create(I0.size());
    dm.p21_buf.create(I0.size());
448
    dm.p22_buf.create(I0.size());
Ernest Galbrun's avatar
Ernest Galbrun committed
449 450
    dm.p31_buf.create(I0.size());
    dm.p32_buf.create(I0.size());
451

452
    dm.div_p1_buf.create(I0.size());
453
    dm.div_p2_buf.create(I0.size());
Ernest Galbrun's avatar
Ernest Galbrun committed
454
    dm.div_p3_buf.create(I0.size());
455

456 457 458
    dm.u1x_buf.create(I0.size());
    dm.u1y_buf.create(I0.size());
    dm.u2x_buf.create(I0.size());
459 460
    dm.u2y_buf.create(I0.size());
    dm.u3x_buf.create(I0.size());
Ernest Galbrun's avatar
Ernest Galbrun committed
461
    dm.u3y_buf.create(I0.size());
462 463 464 465

    // create the scales
    for (int s = 1; s < nscales; ++s)
    {
466 467
        resize(dm.I0s[s - 1], dm.I0s[s], Size(), scaleStep, scaleStep);
        resize(dm.I1s[s - 1], dm.I1s[s], Size(), scaleStep, scaleStep);
468

469
        if (dm.I0s[s].cols < 16 || dm.I0s[s].rows < 16)
470 471 472 473 474 475 476
        {
            nscales = s;
            break;
        }

        if (useInitialFlow)
        {
477 478
            resize(dm.u1s[s - 1], dm.u1s[s], Size(), scaleStep, scaleStep);
            resize(dm.u2s[s - 1], dm.u2s[s], Size(), scaleStep, scaleStep);
479

480 481
            multiply(dm.u1s[s], Scalar::all(scaleStep), dm.u1s[s]);
            multiply(dm.u2s[s], Scalar::all(scaleStep), dm.u2s[s]);
482
        }
483 484
        else
        {
485
            dm.u1s[s].create(dm.I0s[s].size());
486
            dm.u2s[s].create(dm.I0s[s].size());
487
        }
Ernest Galbrun's avatar
Ernest Galbrun committed
488
        if (use_gamma) dm.u3s[s].create(dm.I0s[s].size());
489 490 491
    }
    if (!useInitialFlow)
    {
492 493
        dm.u1s[nscales - 1].setTo(Scalar::all(0));
        dm.u2s[nscales - 1].setTo(Scalar::all(0));
494
    }
Ernest Galbrun's avatar
Ernest Galbrun committed
495
    if (use_gamma) dm.u3s[nscales - 1].setTo(Scalar::all(0));
496 497 498 499
    // pyramidal structure for computing the optical flow
    for (int s = nscales - 1; s >= 0; --s)
    {
        // compute the optical flow at the current scale
Ernest Galbrun's avatar
Ernest Galbrun committed
500
        procOneScale(dm.I0s[s], dm.I1s[s], dm.u1s[s], dm.u2s[s], dm.u3s[s]);
501 502 503 504 505 506 507 508

        // if this was the last scale, finish now
        if (s == 0)
            break;

        // otherwise, upsample the optical flow

        // zoom the optical flow for the next finer scale
509
        resize(dm.u1s[s], dm.u1s[s - 1], dm.I0s[s - 1].size());
510
        resize(dm.u2s[s], dm.u2s[s - 1], dm.I0s[s - 1].size());
Ernest Galbrun's avatar
Ernest Galbrun committed
511
        if (use_gamma) resize(dm.u3s[s], dm.u3s[s - 1], dm.I0s[s - 1].size());
512

513
        // scale the optical flow with the appropriate zoom factor (don't scale u3!)
514 515
        multiply(dm.u1s[s - 1], Scalar::all(1 / scaleStep), dm.u1s[s - 1]);
        multiply(dm.u2s[s - 1], Scalar::all(1 / scaleStep), dm.u2s[s - 1]);
516 517
    }

518
    Mat uxy[] = { dm.u1s[0], dm.u2s[0] };
519 520 521
    merge(uxy, 2, _flow);
}

522
#ifdef HAVE_OPENCL
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
bool OpticalFlowDual_TVL1::calc_ocl(InputArray _I0, InputArray _I1, InputOutputArray _flow)
{
    UMat I0 = _I0.getUMat();
    UMat I1 = _I1.getUMat();

    CV_Assert(I0.type() == CV_8UC1 || I0.type() == CV_32FC1);
    CV_Assert(I0.size() == I1.size());
    CV_Assert(I0.type() == I1.type());
    CV_Assert(!useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2));
    CV_Assert(nscales > 0);

    // allocate memory for the pyramid structure
    dum.I0s.resize(nscales);
    dum.I1s.resize(nscales);
    dum.u1s.resize(nscales);
    dum.u2s.resize(nscales);
    //I0s_step == I1s_step
    double alpha = I0.depth() == CV_8U ? 1.0 : 255.0;

    I0.convertTo(dum.I0s[0], CV_32F, alpha);
    I1.convertTo(dum.I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0);

    dum.u1s[0].create(I0.size(), CV_32FC1);
    dum.u2s[0].create(I0.size(), CV_32FC1);

    if (useInitialFlow)
    {
        std::vector<UMat> umv;
        umv.push_back(dum.u1s[0]);
        umv.push_back(dum.u2s[0]);
        cv::split(_flow,umv);
    }

    dum.I1x_buf.create(I0.size(), CV_32FC1);
    dum.I1y_buf.create(I0.size(), CV_32FC1);

    dum.I1w_buf.create(I0.size(), CV_32FC1);
    dum.I1wx_buf.create(I0.size(), CV_32FC1);
    dum.I1wy_buf.create(I0.size(), CV_32FC1);

    dum.grad_buf.create(I0.size(), CV_32FC1);
    dum.rho_c_buf.create(I0.size(), CV_32FC1);

    dum.p11_buf.create(I0.size(), CV_32FC1);
    dum.p12_buf.create(I0.size(), CV_32FC1);
    dum.p21_buf.create(I0.size(), CV_32FC1);
    dum.p22_buf.create(I0.size(), CV_32FC1);

    dum.diff_buf.create(I0.size(), CV_32FC1);

    // create the scales
    for (int s = 1; s < nscales; ++s)
    {
        resize(dum.I0s[s - 1], dum.I0s[s], Size(), scaleStep, scaleStep);
        resize(dum.I1s[s - 1], dum.I1s[s], Size(), scaleStep, scaleStep);

        if (dum.I0s[s].cols < 16 || dum.I0s[s].rows < 16)
        {
            nscales = s;
            break;
        }

        if (useInitialFlow)
        {
            resize(dum.u1s[s - 1], dum.u1s[s], Size(), scaleStep, scaleStep);
            resize(dum.u2s[s - 1], dum.u2s[s], Size(), scaleStep, scaleStep);

            //scale by scale factor
            multiply(dum.u1s[s], Scalar::all(scaleStep), dum.u1s[s]);
            multiply(dum.u2s[s], Scalar::all(scaleStep), dum.u2s[s]);
        }
    }

    // pyramidal structure for computing the optical flow
    for (int s = nscales - 1; s >= 0; --s)
    {
        // compute the optical flow at the current scale
        if (!OpticalFlowDual_TVL1::procOneScale_ocl(dum.I0s[s], dum.I1s[s], dum.u1s[s], dum.u2s[s]))
            return false;

        // if this was the last scale, finish now
        if (s == 0)
            break;

        // zoom the optical flow for the next finer scale
        resize(dum.u1s[s], dum.u1s[s - 1], dum.I0s[s - 1].size());
        resize(dum.u2s[s], dum.u2s[s - 1], dum.I0s[s - 1].size());

        // scale the optical flow with the appropriate zoom factor
        multiply(dum.u1s[s - 1], Scalar::all(1 / scaleStep), dum.u1s[s - 1]);
        multiply(dum.u2s[s - 1], Scalar::all(1 / scaleStep), dum.u2s[s - 1]);
    }

    std::vector<UMat> uxy;
    uxy.push_back(dum.u1s[0]);
    uxy.push_back(dum.u2s[0]);
    merge(uxy, _flow);
    return true;
}
622
#endif
623

624 625
////////////////////////////////////////////////////////////
// buildFlowMap
626

627 628 629
struct BuildFlowMapBody : ParallelLoopBody
{
    void operator() (const Range& range) const;
630

631 632 633 634 635
    Mat_<float> u1;
    Mat_<float> u2;
    mutable Mat_<float> map1;
    mutable Mat_<float> map2;
};
636

637 638 639
void BuildFlowMapBody::operator() (const Range& range) const
{
    for (int y = range.start; y < range.end; ++y)
640
    {
641 642
        const float* u1Row = u1[y];
        const float* u2Row = u2[y];
643

644 645
        float* map1Row = map1[y];
        float* map2Row = map2[y];
646

647 648 649 650
        for (int x = 0; x < u1.cols; ++x)
        {
            map1Row[x] = x + u1Row[x];
            map2Row[x] = y + u2Row[x];
651 652
        }
    }
653
}
654

655 656 657 658 659
void buildFlowMap(const Mat_<float>& u1, const Mat_<float>& u2, Mat_<float>& map1, Mat_<float>& map2)
{
    CV_DbgAssert( u2.size() == u1.size() );
    CV_DbgAssert( map1.size() == u1.size() );
    CV_DbgAssert( map2.size() == u1.size() );
660

661
    BuildFlowMapBody body;
662

663 664 665 666
    body.u1 = u1;
    body.u2 = u2;
    body.map1 = map1;
    body.map2 = map2;
667

668 669
    parallel_for_(Range(0, u1.rows), body);
}
670

671 672
////////////////////////////////////////////////////////////
// centeredGradient
673

674 675 676
struct CenteredGradientBody : ParallelLoopBody
{
    void operator() (const Range& range) const;
677

678 679 680 681
    Mat_<float> src;
    mutable Mat_<float> dx;
    mutable Mat_<float> dy;
};
682

683 684 685
void CenteredGradientBody::operator() (const Range& range) const
{
    const int last_col = src.cols - 1;
686

687 688 689 690 691
    for (int y = range.start; y < range.end; ++y)
    {
        const float* srcPrevRow = src[y - 1];
        const float* srcCurRow = src[y];
        const float* srcNextRow = src[y + 1];
692

693 694
        float* dxRow = dx[y];
        float* dyRow = dy[y];
695

696 697 698 699
        for (int x = 1; x < last_col; ++x)
        {
            dxRow[x] = 0.5f * (srcCurRow[x + 1] - srcCurRow[x - 1]);
            dyRow[x] = 0.5f * (srcNextRow[x] - srcPrevRow[x]);
700 701
        }
    }
702
}
703

704 705 706 707 708
void centeredGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
{
    CV_DbgAssert( src.rows > 2 && src.cols > 2 );
    CV_DbgAssert( dx.size() == src.size() );
    CV_DbgAssert( dy.size() == src.size() );
709

710 711
    const int last_row = src.rows - 1;
    const int last_col = src.cols - 1;
712

713 714 715
    // compute the gradient on the center body of the image
    {
        CenteredGradientBody body;
716

717 718 719
        body.src = src;
        body.dx = dx;
        body.dy = dy;
720

721 722
        parallel_for_(Range(1, last_row), body);
    }
723

724 725 726 727 728
    // compute the gradient on the first and last rows
    for (int x = 1; x < last_col; ++x)
    {
        dx(0, x) = 0.5f * (src(0, x + 1) - src(0, x - 1));
        dy(0, x) = 0.5f * (src(1, x) - src(0, x));
729

730 731 732
        dx(last_row, x) = 0.5f * (src(last_row, x + 1) - src(last_row, x - 1));
        dy(last_row, x) = 0.5f * (src(last_row, x) - src(last_row - 1, x));
    }
733

734 735 736 737 738
    // compute the gradient on the first and last columns
    for (int y = 1; y < last_row; ++y)
    {
        dx(y, 0) = 0.5f * (src(y, 1) - src(y, 0));
        dy(y, 0) = 0.5f * (src(y + 1, 0) - src(y - 1, 0));
739

740 741 742
        dx(y, last_col) = 0.5f * (src(y, last_col) - src(y, last_col - 1));
        dy(y, last_col) = 0.5f * (src(y + 1, last_col) - src(y - 1, last_col));
    }
743

744 745 746
    // compute the gradient at the four corners
    dx(0, 0) = 0.5f * (src(0, 1) - src(0, 0));
    dy(0, 0) = 0.5f * (src(1, 0) - src(0, 0));
747

748 749
    dx(0, last_col) = 0.5f * (src(0, last_col) - src(0, last_col - 1));
    dy(0, last_col) = 0.5f * (src(1, last_col) - src(0, last_col));
750

751 752
    dx(last_row, 0) = 0.5f * (src(last_row, 1) - src(last_row, 0));
    dy(last_row, 0) = 0.5f * (src(last_row, 0) - src(last_row - 1, 0));
753

754 755 756
    dx(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row, last_col - 1));
    dy(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row - 1, last_col));
}
757

758 759
////////////////////////////////////////////////////////////
// forwardGradient
760

761 762 763
struct ForwardGradientBody : ParallelLoopBody
{
    void operator() (const Range& range) const;
764

765 766 767 768
    Mat_<float> src;
    mutable Mat_<float> dx;
    mutable Mat_<float> dy;
};
769

770 771 772
void ForwardGradientBody::operator() (const Range& range) const
{
    const int last_col = src.cols - 1;
773

774 775 776 777
    for (int y = range.start; y < range.end; ++y)
    {
        const float* srcCurRow = src[y];
        const float* srcNextRow = src[y + 1];
778

779 780
        float* dxRow = dx[y];
        float* dyRow = dy[y];
781

782 783 784 785
        for (int x = 0; x < last_col; ++x)
        {
            dxRow[x] = srcCurRow[x + 1] - srcCurRow[x];
            dyRow[x] = srcNextRow[x] - srcCurRow[x];
786 787
        }
    }
788
}
789

790 791 792 793 794
void forwardGradient(const Mat_<float>& src, Mat_<float>& dx, Mat_<float>& dy)
{
    CV_DbgAssert( src.rows > 2 && src.cols > 2 );
    CV_DbgAssert( dx.size() == src.size() );
    CV_DbgAssert( dy.size() == src.size() );
795

796 797
    const int last_row = src.rows - 1;
    const int last_col = src.cols - 1;
798

799 800 801
    // compute the gradient on the central body of the image
    {
        ForwardGradientBody body;
802

803 804 805
        body.src = src;
        body.dx = dx;
        body.dy = dy;
806

807 808
        parallel_for_(Range(0, last_row), body);
    }
809

810 811 812 813 814 815
    // compute the gradient on the last row
    for (int x = 0; x < last_col; ++x)
    {
        dx(last_row, x) = src(last_row, x + 1) - src(last_row, x);
        dy(last_row, x) = 0.0f;
    }
816

817 818 819 820 821
    // compute the gradient on the last column
    for (int y = 0; y < last_row; ++y)
    {
        dx(y, last_col) = 0.0f;
        dy(y, last_col) = src(y + 1, last_col) - src(y, last_col);
822 823
    }

824 825 826 827 828 829
    dx(last_row, last_col) = 0.0f;
    dy(last_row, last_col) = 0.0f;
}

////////////////////////////////////////////////////////////
// divergence
830

831 832 833
struct DivergenceBody : ParallelLoopBody
{
    void operator() (const Range& range) const;
834

835 836 837 838
    Mat_<float> v1;
    Mat_<float> v2;
    mutable Mat_<float> div;
};
839

840 841 842
void DivergenceBody::operator() (const Range& range) const
{
    for (int y = range.start; y < range.end; ++y)
843
    {
844 845 846
        const float* v1Row = v1[y];
        const float* v2PrevRow = v2[y - 1];
        const float* v2CurRow = v2[y];
847

848
        float* divRow = div[y];
849

850 851 852 853
        for(int x = 1; x < v1.cols; ++x)
        {
            const float v1x = v1Row[x] - v1Row[x - 1];
            const float v2y = v2CurRow[x] - v2PrevRow[x];
854

855
            divRow[x] = v1x + v2y;
856 857
        }
    }
858 859 860 861 862 863 864
}

void divergence(const Mat_<float>& v1, const Mat_<float>& v2, Mat_<float>& div)
{
    CV_DbgAssert( v1.rows > 2 && v1.cols > 2 );
    CV_DbgAssert( v2.size() == v1.size() );
    CV_DbgAssert( div.size() == v1.size() );
865 866

    {
867
        DivergenceBody body;
868

869 870 871
        body.v1 = v1;
        body.v2 = v2;
        body.div = div;
872

873 874
        parallel_for_(Range(1, v1.rows), body);
    }
875

876 877 878
    // compute the divergence on the first row
    for(int x = 1; x < v1.cols; ++x)
        div(0, x) = v1(0, x) - v1(0, x - 1) + v2(0, x);
879

880 881 882
    // compute the divergence on the first column
    for (int y = 1; y < v1.rows; ++y)
        div(y, 0) = v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
883

884 885
    div(0, 0) = v1(0, 0) + v2(0, 0);
}
886

887 888
////////////////////////////////////////////////////////////
// calcGradRho
889

890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
struct CalcGradRhoBody : ParallelLoopBody
{
    void operator() (const Range& range) const;

    Mat_<float> I0;
    Mat_<float> I1w;
    Mat_<float> I1wx;
    Mat_<float> I1wy;
    Mat_<float> u1;
    Mat_<float> u2;
    mutable Mat_<float> grad;
    mutable Mat_<float> rho_c;
};

void CalcGradRhoBody::operator() (const Range& range) const
{
    for (int y = range.start; y < range.end; ++y)
907
    {
908 909 910 911 912 913
        const float* I0Row = I0[y];
        const float* I1wRow = I1w[y];
        const float* I1wxRow = I1wx[y];
        const float* I1wyRow = I1wy[y];
        const float* u1Row = u1[y];
        const float* u2Row = u2[y];
914

915 916
        float* gradRow = grad[y];
        float* rhoRow = rho_c[y];
917

918 919 920 921
        for (int x = 0; x < I0.cols; ++x)
        {
            const float Ix2 = I1wxRow[x] * I1wxRow[x];
            const float Iy2 = I1wyRow[x] * I1wyRow[x];
922

923 924
            // store the |Grad(I1)|^2
            gradRow[x] = Ix2 + Iy2;
925

926 927
            // compute the constant part of the rho function
            rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]);
928 929
        }
    }
930
}
931

932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
void calcGradRho(const Mat_<float>& I0, const Mat_<float>& I1w, const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2,
    Mat_<float>& grad, Mat_<float>& rho_c)
{
    CV_DbgAssert( I1w.size() == I0.size() );
    CV_DbgAssert( I1wx.size() == I0.size() );
    CV_DbgAssert( I1wy.size() == I0.size() );
    CV_DbgAssert( u1.size() == I0.size() );
    CV_DbgAssert( u2.size() == I0.size() );
    CV_DbgAssert( grad.size() == I0.size() );
    CV_DbgAssert( rho_c.size() == I0.size() );

    CalcGradRhoBody body;

    body.I0 = I0;
    body.I1w = I1w;
    body.I1wx = I1wx;
    body.I1wy = I1wy;
    body.u1 = u1;
    body.u2 = u2;
    body.grad = grad;
    body.rho_c = rho_c;

    parallel_for_(Range(0, I0.rows), body);
}
956

957 958
////////////////////////////////////////////////////////////
// estimateV
959

960 961 962 963 964 965 966
struct EstimateVBody : ParallelLoopBody
{
    void operator() (const Range& range) const;

    Mat_<float> I1wx;
    Mat_<float> I1wy;
    Mat_<float> u1;
967 968
    Mat_<float> u2;
    Mat_<float> u3;
969 970 971
    Mat_<float> grad;
    Mat_<float> rho_c;
    mutable Mat_<float> v1;
972 973
    mutable Mat_<float> v2;
    mutable Mat_<float> v3;
974
    float l_t;
975
    float gamma;
976 977 978 979
};

void EstimateVBody::operator() (const Range& range) const
{
Ernest Galbrun's avatar
Ernest Galbrun committed
980
    bool use_gamma = gamma != 0;
981
    for (int y = range.start; y < range.end; ++y)
982
    {
983 984 985
        const float* I1wxRow = I1wx[y];
        const float* I1wyRow = I1wy[y];
        const float* u1Row = u1[y];
986
        const float* u2Row = u2[y];
987
        const float* u3Row = use_gamma?u3[y]:NULL;
988 989 990 991
        const float* gradRow = grad[y];
        const float* rhoRow = rho_c[y];

        float* v1Row = v1[y];
992
        float* v2Row = v2[y];
Ernest Galbrun's avatar
Ernest Galbrun committed
993
        float* v3Row = use_gamma ? v3[y]:NULL;
994

995
        for (int x = 0; x < I1wx.cols; ++x)
Ernest Galbrun's avatar
Ernest Galbrun committed
996 997 998
        {
            const float rho = use_gamma ? rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]) + gamma * u3Row[x] :
                                          rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]);
999 1000
            float d1 = 0.0f;
            float d2 = 0.0f;
1001
            float d3 = 0.0f;
1002
            if (rho < -l_t * gradRow[x])
1003
            {
1004 1005
                d1 = l_t * I1wxRow[x];
                d2 = l_t * I1wyRow[x];
Ernest Galbrun's avatar
Ernest Galbrun committed
1006
                if (use_gamma) d3 = l_t * gamma;
1007
            }
1008 1009 1010
            else if (rho > l_t * gradRow[x])
            {
                d1 = -l_t * I1wxRow[x];
1011
                d2 = -l_t * I1wyRow[x];
Ernest Galbrun's avatar
Ernest Galbrun committed
1012
                if (use_gamma) d3 = -l_t * gamma;
1013
            }
1014
            else if (gradRow[x] > std::numeric_limits<float>::epsilon())
1015 1016 1017 1018
            {
                float fi = -rho / gradRow[x];
                d1 = fi * I1wxRow[x];
                d2 = fi * I1wyRow[x];
Ernest Galbrun's avatar
Ernest Galbrun committed
1019
                if (use_gamma) d3 = fi * gamma;
1020 1021 1022
            }

            v1Row[x] = u1Row[x] + d1;
1023
            v2Row[x] = u2Row[x] + d2;
Ernest Galbrun's avatar
Ernest Galbrun committed
1024
            if (use_gamma) v3Row[x] = u3Row[x] + d3;
1025 1026
        }
    }
1027
}
1028

1029
void estimateV(const Mat_<float>& I1wx, const Mat_<float>& I1wy, const Mat_<float>& u1, const Mat_<float>& u2, const Mat_<float>& u3, const Mat_<float>& grad, const Mat_<float>& rho_c,
Ernest Galbrun's avatar
Ernest Galbrun committed
1030
   Mat_<float>& v1, Mat_<float>& v2, Mat_<float>& v3, float l_t, float gamma)
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040
{
    CV_DbgAssert( I1wy.size() == I1wx.size() );
    CV_DbgAssert( u1.size() == I1wx.size() );
    CV_DbgAssert( u2.size() == I1wx.size() );
    CV_DbgAssert( grad.size() == I1wx.size() );
    CV_DbgAssert( rho_c.size() == I1wx.size() );
    CV_DbgAssert( v1.size() == I1wx.size() );
    CV_DbgAssert( v2.size() == I1wx.size() );

    EstimateVBody body;
Ernest Galbrun's avatar
Ernest Galbrun committed
1041
    bool use_gamma = gamma != 0;
1042 1043 1044
    body.I1wx = I1wx;
    body.I1wy = I1wy;
    body.u1 = u1;
1045
    body.u2 = u2;
Ernest Galbrun's avatar
Ernest Galbrun committed
1046
    if (use_gamma) body.u3 = u3;
1047 1048 1049
    body.grad = grad;
    body.rho_c = rho_c;
    body.v1 = v1;
1050
    body.v2 = v2;
Ernest Galbrun's avatar
Ernest Galbrun committed
1051
    if (use_gamma) body.v3 = v3;
1052 1053
    body.l_t = l_t;
    body.gamma = gamma;
1054 1055
    parallel_for_(Range(0, I1wx.rows), body);
}
1056

1057 1058
////////////////////////////////////////////////////////////
// estimateU
1059

1060 1061
float estimateU(const Mat_<float>& v1, const Mat_<float>& v2, const Mat_<float>& v3,
            const Mat_<float>& div_p1, const Mat_<float>& div_p2, const Mat_<float>& div_p3,
Ernest Galbrun's avatar
Ernest Galbrun committed
1062 1063
            Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3,
            float theta, float gamma)
1064 1065 1066 1067 1068 1069 1070
{
    CV_DbgAssert( v2.size() == v1.size() );
    CV_DbgAssert( div_p1.size() == v1.size() );
    CV_DbgAssert( div_p2.size() == v1.size() );
    CV_DbgAssert( u1.size() == v1.size() );
    CV_DbgAssert( u2.size() == v1.size() );

Ernest Galbrun's avatar
Ernest Galbrun committed
1071 1072
    float error = 0.0f;
    bool use_gamma = gamma != 0;
1073
    for (int y = 0; y < v1.rows; ++y)
1074
    {
1075
        const float* v1Row = v1[y];
1076
        const float* v2Row = v2[y];
1077
        const float* v3Row = use_gamma?v3[y]:NULL;
1078
        const float* divP1Row = div_p1[y];
1079
        const float* divP2Row = div_p2[y];
1080
        const float* divP3Row = use_gamma?div_p3[y]:NULL;
1081

1082
        float* u1Row = u1[y];
1083
        float* u2Row = u2[y];
1084
        float* u3Row = use_gamma?u3[y]:NULL;
1085

1086

1087 1088 1089
        for (int x = 0; x < v1.cols; ++x)
        {
            const float u1k = u1Row[x];
1090 1091
            const float u2k = u2Row[x];
            const float u3k = use_gamma?u3Row[x]:0;
1092

1093
            u1Row[x] = v1Row[x] + theta * divP1Row[x];
1094
            u2Row[x] = v2Row[x] + theta * divP2Row[x];
Ernest Galbrun's avatar
Ernest Galbrun committed
1095
            if (use_gamma) u3Row[x] = v3Row[x] + theta * divP3Row[x];
1096
            error += use_gamma?(u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k) + (u3Row[x] - u3k) * (u3Row[x] - u3k):
Ernest Galbrun's avatar
Ernest Galbrun committed
1097
                               (u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k);
1098 1099 1100
        }
    }

1101 1102 1103 1104 1105
    return error;
}

////////////////////////////////////////////////////////////
// estimateDualVariables
1106

1107 1108 1109 1110 1111 1112 1113
struct EstimateDualVariablesBody : ParallelLoopBody
{
    void operator() (const Range& range) const;

    Mat_<float> u1x;
    Mat_<float> u1y;
    Mat_<float> u2x;
1114 1115 1116
    Mat_<float> u2y;
    Mat_<float> u3x;
    Mat_<float> u3y;
1117 1118 1119
    mutable Mat_<float> p11;
    mutable Mat_<float> p12;
    mutable Mat_<float> p21;
1120 1121 1122
    mutable Mat_<float> p22;
    mutable Mat_<float> p31;
    mutable Mat_<float> p32;
1123
    float taut;
Ernest Galbrun's avatar
Ernest Galbrun committed
1124
    bool use_gamma;
1125 1126 1127 1128 1129
};

void EstimateDualVariablesBody::operator() (const Range& range) const
{
    for (int y = range.start; y < range.end; ++y)
1130
    {
1131 1132 1133
        const float* u1xRow = u1x[y];
        const float* u1yRow = u1y[y];
        const float* u2xRow = u2x[y];
1134 1135 1136
        const float* u2yRow = u2y[y];
        const float* u3xRow = u3x[y];
        const float* u3yRow = u3y[y];
1137

1138 1139 1140
        float* p11Row = p11[y];
        float* p12Row = p12[y];
        float* p21Row = p21[y];
1141 1142 1143
        float* p22Row = p22[y];
        float* p31Row = p31[y];
        float* p32Row = p32[y];
1144

1145 1146 1147
        for (int x = 0; x < u1x.cols; ++x)
        {
            const float g1 = static_cast<float>(hypot(u1xRow[x], u1yRow[x]));
1148
            const float g2 = static_cast<float>(hypot(u2xRow[x], u2yRow[x]));
1149

1150
            const float ng1  = 1.0f + taut * g1;
1151
            const float ng2 =  1.0f + taut * g2;
1152

1153 1154 1155
            p11Row[x] = (p11Row[x] + taut * u1xRow[x]) / ng1;
            p12Row[x] = (p12Row[x] + taut * u1yRow[x]) / ng1;
            p21Row[x] = (p21Row[x] + taut * u2xRow[x]) / ng2;
1156
            p22Row[x] = (p22Row[x] + taut * u2yRow[x]) / ng2;
1157 1158 1159 1160 1161 1162 1163 1164

            if (use_gamma)
            {
                const float g3 = static_cast<float>(hypot(u3xRow[x], u3yRow[x]));
                const float ng3 = 1.0f + taut * g3;
                p31Row[x] = (p31Row[x] + taut * u3xRow[x]) / ng3;
                p32Row[x] = (p32Row[x] + taut * u3yRow[x]) / ng3;
            }
1165 1166
        }
    }
1167
}
1168

1169
void estimateDualVariables(const Mat_<float>& u1x, const Mat_<float>& u1y,
Ernest Galbrun's avatar
Ernest Galbrun committed
1170 1171
                     const Mat_<float>& u2x, const Mat_<float>& u2y,
                     const Mat_<float>& u3x, const Mat_<float>& u3y,
1172 1173
                           Mat_<float>& p11, Mat_<float>& p12,
                     Mat_<float>& p21, Mat_<float>& p22,
Ernest Galbrun's avatar
Ernest Galbrun committed
1174
                     Mat_<float>& p31, Mat_<float>& p32,
1175
                     float taut, bool use_gamma)
1176 1177 1178
{
    CV_DbgAssert( u1y.size() == u1x.size() );
    CV_DbgAssert( u2x.size() == u1x.size() );
Ernest Galbrun's avatar
Ernest Galbrun committed
1179
    CV_DbgAssert( u3x.size() == u1x.size() );
1180
    CV_DbgAssert( u2y.size() == u1x.size() );
Ernest Galbrun's avatar
Ernest Galbrun committed
1181
    CV_DbgAssert( u3y.size() == u1x.size() );
1182 1183 1184 1185
    CV_DbgAssert( p11.size() == u1x.size() );
    CV_DbgAssert( p12.size() == u1x.size() );
    CV_DbgAssert( p21.size() == u1x.size() );
    CV_DbgAssert( p22.size() == u1x.size() );
Ernest Galbrun's avatar
Ernest Galbrun committed
1186 1187
    CV_DbgAssert( p31.size() == u1x.size() );
    CV_DbgAssert( p32.size() == u1x.size() );
1188 1189 1190 1191 1192 1193

    EstimateDualVariablesBody body;

    body.u1x = u1x;
    body.u1y = u1y;
    body.u2x = u2x;
1194 1195 1196
    body.u2y = u2y;
    body.u3x = u3x;
    body.u3y = u3y;
1197 1198 1199
    body.p11 = p11;
    body.p12 = p12;
    body.p21 = p21;
1200 1201 1202
    body.p22 = p22;
    body.p31 = p31;
    body.p32 = p32;
1203
    body.taut = taut;
Ernest Galbrun's avatar
Ernest Galbrun committed
1204
    body.use_gamma = use_gamma;
1205 1206

    parallel_for_(Range(0, u1x.rows), body);
1207 1208
}

1209
#ifdef HAVE_OPENCL
1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
bool OpticalFlowDual_TVL1::procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2)
{
    using namespace cv_ocl_tvl1flow;

    const double scaledEpsilon = epsilon * epsilon * I0.size().area();

    CV_DbgAssert(I1.size() == I0.size());
    CV_DbgAssert(I1.type() == I0.type());
    CV_DbgAssert(u1.empty() || u1.size() == I0.size());
    CV_DbgAssert(u2.size() == u1.size());

    if (u1.empty())
    {
        u1.create(I0.size(), CV_32FC1);
        u1.setTo(Scalar::all(0));

        u2.create(I0.size(), CV_32FC1);
        u2.setTo(Scalar::all(0));
    }

    UMat I1x = dum.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
    UMat I1y = dum.I1y_buf(Rect(0, 0, I0.cols, I0.rows));

    if (!centeredGradient(I1, I1x, I1y))
        return false;

    UMat I1w = dum.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
    UMat I1wx = dum.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
    UMat I1wy = dum.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));

    UMat grad = dum.grad_buf(Rect(0, 0, I0.cols, I0.rows));
    UMat rho_c = dum.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));

    UMat p11 = dum.p11_buf(Rect(0, 0, I0.cols, I0.rows));
    UMat p12 = dum.p12_buf(Rect(0, 0, I0.cols, I0.rows));
    UMat p21 = dum.p21_buf(Rect(0, 0, I0.cols, I0.rows));
    UMat p22 = dum.p22_buf(Rect(0, 0, I0.cols, I0.rows));
    p11.setTo(Scalar::all(0));
    p12.setTo(Scalar::all(0));
    p21.setTo(Scalar::all(0));
    p22.setTo(Scalar::all(0));

    UMat diff = dum.diff_buf(Rect(0, 0, I0.cols, I0.rows));

    const float l_t = static_cast<float>(lambda * theta);
    const float taut = static_cast<float>(tau / theta);
    int n;

    for (int warpings = 0; warpings < warps; ++warpings)
    {
        if (!warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c))
            return false;

        double error = std::numeric_limits<double>::max();
        double prev_error = 0;

        for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
        {
            if (medianFiltering > 1) {
                cv::medianBlur(u1, u1, medianFiltering);
                cv::medianBlur(u2, u2, medianFiltering);
            }
            for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
            {
                // some tweaks to make sum operation less frequently
                n = n_inner + n_outer*innerIterations;
                char calc_error = (n & 0x1) && (prev_error < scaledEpsilon);
                if (!estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22,
                    u1, u2, diff, l_t, static_cast<float>(theta), calc_error))
                    return false;
                if (calc_error)
                {
                    error = cv::sum(diff)[0];
                    prev_error = error;
                }
                else
                {
                    error = std::numeric_limits<double>::max();
                    prev_error -= scaledEpsilon;
                }
                if (!estimateDualVariables(u1, u2, p11, p12, p21, p22, taut))
                    return false;
            }
        }
    }
    return true;
}
1297
#endif
1298

Ernest Galbrun's avatar
Ernest Galbrun committed
1299
void OpticalFlowDual_TVL1::procOneScale(const Mat_<float>& I0, const Mat_<float>& I1, Mat_<float>& u1, Mat_<float>& u2, Mat_<float>& u3)
1300 1301 1302 1303 1304
{
    const float scaledEpsilon = static_cast<float>(epsilon * epsilon * I0.size().area());

    CV_DbgAssert( I1.size() == I0.size() );
    CV_DbgAssert( I1.type() == I0.type() );
1305
    CV_DbgAssert( u1.size() == I0.size() );
1306 1307
    CV_DbgAssert( u2.size() == u1.size() );

1308 1309
    Mat_<float> I1x = dm.I1x_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> I1y = dm.I1y_buf(Rect(0, 0, I0.cols, I0.rows));
1310 1311
    centeredGradient(I1, I1x, I1y);

1312 1313
    Mat_<float> flowMap1 = dm.flowMap1_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> flowMap2 = dm.flowMap2_buf(Rect(0, 0, I0.cols, I0.rows));
1314

1315 1316 1317
    Mat_<float> I1w = dm.I1w_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> I1wx = dm.I1wx_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> I1wy = dm.I1wy_buf(Rect(0, 0, I0.cols, I0.rows));
1318

1319 1320
    Mat_<float> grad = dm.grad_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> rho_c = dm.rho_c_buf(Rect(0, 0, I0.cols, I0.rows));
1321

1322
    Mat_<float> v1 = dm.v1_buf(Rect(0, 0, I0.cols, I0.rows));
1323
    Mat_<float> v2 = dm.v2_buf(Rect(0, 0, I0.cols, I0.rows));
Ernest Galbrun's avatar
Ernest Galbrun committed
1324
    Mat_<float> v3 = dm.v3_buf(Rect(0, 0, I0.cols, I0.rows));
1325

1326 1327 1328
    Mat_<float> p11 = dm.p11_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> p12 = dm.p12_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> p21 = dm.p21_buf(Rect(0, 0, I0.cols, I0.rows));
1329
    Mat_<float> p22 = dm.p22_buf(Rect(0, 0, I0.cols, I0.rows));
Ernest Galbrun's avatar
Ernest Galbrun committed
1330 1331
    Mat_<float> p31 = dm.p31_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> p32 = dm.p32_buf(Rect(0, 0, I0.cols, I0.rows));
1332 1333 1334
    p11.setTo(Scalar::all(0));
    p12.setTo(Scalar::all(0));
    p21.setTo(Scalar::all(0));
Ernest Galbrun's avatar
Ernest Galbrun committed
1335 1336 1337 1338
    p22.setTo(Scalar::all(0));
    bool use_gamma = gamma != 0.;
    if (use_gamma) p31.setTo(Scalar::all(0));
    if (use_gamma) p32.setTo(Scalar::all(0));
1339

1340
    Mat_<float> div_p1 = dm.div_p1_buf(Rect(0, 0, I0.cols, I0.rows));
1341 1342
    Mat_<float> div_p2 = dm.div_p2_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> div_p3 = dm.div_p3_buf(Rect(0, 0, I0.cols, I0.rows));
1343

1344 1345 1346
    Mat_<float> u1x = dm.u1x_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> u1y = dm.u1y_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> u2x = dm.u2x_buf(Rect(0, 0, I0.cols, I0.rows));
1347 1348 1349
    Mat_<float> u2y = dm.u2y_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> u3x = dm.u3x_buf(Rect(0, 0, I0.cols, I0.rows));
    Mat_<float> u3y = dm.u3y_buf(Rect(0, 0, I0.cols, I0.rows));
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360

    const float l_t = static_cast<float>(lambda * theta);
    const float taut = static_cast<float>(tau / theta);

    for (int warpings = 0; warpings < warps; ++warpings)
    {
        // compute the warping of the target image and its derivatives
        buildFlowMap(u1, u2, flowMap1, flowMap2);
        remap(I1, I1w, flowMap1, flowMap2, INTER_CUBIC);
        remap(I1x, I1wx, flowMap1, flowMap2, INTER_CUBIC);
        remap(I1y, I1wy, flowMap1, flowMap2, INTER_CUBIC);
1361
        //calculate I1(x+u0) and its gradient
1362 1363
        calcGradRho(I0, I1w, I1wx, I1wy, u1, u2, grad, rho_c);

1364
        float error = std::numeric_limits<float>::max();
1365
        for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer)
1366
        {
1367 1368 1369 1370 1371 1372 1373
            if (medianFiltering > 1) {
                cv::medianBlur(u1, u1, medianFiltering);
                cv::medianBlur(u2, u2, medianFiltering);
            }
            for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner)
            {
                // estimate the values of the variable (v1, v2) (thresholding operator TH)
Ernest Galbrun's avatar
Ernest Galbrun committed
1374
                estimateV(I1wx, I1wy, u1, u2, u3, grad, rho_c, v1, v2, v3, l_t, static_cast<float>(gamma));
1375

Ernest Galbrun's avatar
Ernest Galbrun committed
1376
                // compute the divergence of the dual variable (p1, p2, p3)
1377
                divergence(p11, p12, div_p1);
1378
                divergence(p21, p22, div_p2);
Ernest Galbrun's avatar
Ernest Galbrun committed
1379
                if (use_gamma) divergence(p31, p32, div_p3);
1380

1381
                // estimate the values of the optical flow (u1, u2)
Ernest Galbrun's avatar
Ernest Galbrun committed
1382
                error = estimateU(v1, v2, v3, div_p1, div_p2, div_p3, u1, u2, u3, static_cast<float>(theta), static_cast<float>(gamma));
1383

1384 1385
                // compute the gradient of the optical flow (Du1, Du2)
                forwardGradient(u1, u1x, u1y);
1386
                forwardGradient(u2, u2x, u2y);
Ernest Galbrun's avatar
Ernest Galbrun committed
1387
                if (use_gamma) forwardGradient(u3, u3x, u3y);
1388

Ernest Galbrun's avatar
Ernest Galbrun committed
1389
                // estimate the values of the dual variable (p1, p2, p3)
1390
                estimateDualVariables(u1x, u1y, u2x, u2y, u3x, u3y, p11, p12, p21, p22, p31, p32, taut, use_gamma);
1391
            }
1392 1393 1394 1395
        }
    }
}

1396
void OpticalFlowDual_TVL1::collectGarbage()
1397
{
1398 1399 1400 1401 1402
    //dataMat structure dm
    dm.I0s.clear();
    dm.I1s.clear();
    dm.u1s.clear();
    dm.u2s.clear();
1403

1404 1405
    dm.I1x_buf.release();
    dm.I1y_buf.release();
1406

1407 1408
    dm.flowMap1_buf.release();
    dm.flowMap2_buf.release();
1409

1410 1411 1412
    dm.I1w_buf.release();
    dm.I1wx_buf.release();
    dm.I1wy_buf.release();
1413

1414 1415
    dm.grad_buf.release();
    dm.rho_c_buf.release();
1416

1417 1418
    dm.v1_buf.release();
    dm.v2_buf.release();
1419

1420 1421 1422 1423
    dm.p11_buf.release();
    dm.p12_buf.release();
    dm.p21_buf.release();
    dm.p22_buf.release();
1424

1425 1426
    dm.div_p1_buf.release();
    dm.div_p2_buf.release();
1427

1428 1429 1430 1431 1432
    dm.u1x_buf.release();
    dm.u1y_buf.release();
    dm.u2x_buf.release();
    dm.u2y_buf.release();

1433
#ifdef HAVE_OPENCL
1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456
    //dataUMat structure dum
    dum.I0s.clear();
    dum.I1s.clear();
    dum.u1s.clear();
    dum.u2s.clear();

    dum.I1x_buf.release();
    dum.I1y_buf.release();

    dum.I1w_buf.release();
    dum.I1wx_buf.release();
    dum.I1wy_buf.release();

    dum.grad_buf.release();
    dum.rho_c_buf.release();

    dum.p11_buf.release();
    dum.p12_buf.release();
    dum.p21_buf.release();
    dum.p22_buf.release();

    dum.diff_buf.release();
    dum.norm_buf.release();
1457
#endif
1458
}
1459 1460 1461

} // namespace

1462
Ptr<DualTVL1OpticalFlow> cv::createOptFlow_DualTVL1()
1463
{
1464
    return makePtr<OpticalFlowDual_TVL1>();
1465
}
1466 1467 1468 1469 1470 1471 1472 1473 1474 1475

Ptr<DualTVL1OpticalFlow> cv::DualTVL1OpticalFlow::create(
    double tau, double lambda, double theta, int nscales, int warps,
    double epsilon, int innerIterations, int outerIterations, double scaleStep,
    double gamma, int medianFilter, bool useInitialFlow)
{
    return makePtr<OpticalFlowDual_TVL1>(tau, lambda, theta, nscales, warps,
                                         epsilon, innerIterations, outerIterations,
                                         scaleStep, gamma, medianFilter, useInitialFlow);
}