Partial DIS implementation and OF benchmark

Basic interfaces and a partial implementation of the Dense Inverse Search (DIS) optical flow algorithm without variational refinement. Also added a python benchmarking script that can evaluate different optical flow algorithms on the MPI Sintel and Middlebury datasets and build overall comparative charts.

Partial DIS implementation and OF benchmark
Basic interfaces and a partial implementation of the Dense Inverse Search (DIS) optical flow algorithm without variational refinement. Also added a python benchmarking script that can evaluate different optical flow algorithms on the MPI Sintel and Middlebury datasets and build overall comparative charts.
d4c3765e · Alexander Bokov · 76678bca · d4c3765e · d4c3765e · d4c3765e
Commit d4c3765e authored May 27, 2016 by Alexander Bokov
5 changed files
--- a/modules/optflow/doc/optflow.bib
+++ b/modules/optflow/doc/optflow.bib
@@ -37,3 +37,10 @@
  year={2013},
  organization={IEEE}
 }
+
+@article{Kroeger2016,
+  title={Fast Optical Flow using Dense Inverse Search},
+  author={Kroeger, Till and Timofte, Radu and Dai, Dengxin and Van Gool, Luc},
+  journal={arXiv preprint arXiv:1603.03590},
+  year={2016}
+}
--- a/modules/optflow/include/opencv2/optflow.hpp
+++ b/modules/optflow/include/opencv2/optflow.hpp
@@ -191,6 +191,44 @@ CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_Farneback();
 //! Additional interface to the SparseToDenseFlow algorithm - calcOpticalFlowSparseToDense()
 CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_SparseToDense();

+/** @brief DIS optical flow algorithm.
+
+This class implements the Dense Inverse Search (DIS) optical flow algorithm. More
+details about the algorithm can be found at @cite Kroeger2016 .
+*/
+class CV_EXPORTS_W DISOpticalFlow : public DenseOpticalFlow
+{
+public:
+    /** @brief Finest level of the gaussian pyramid on which the flow is computed (zero level
+        corresponds to the original image resolution).The final flow is obtained by bilinear upscaling.
+        @see setFinestScale */
+    virtual int getFinestScale() const = 0;
+    /** @copybrief getFinestScale @see getFinestScale */
+    virtual void setFinestScale(int val) = 0;
+
+    /** @brief Size of an image patch for matching (in pixels)
+        @see setPatchSize */
+    virtual int getPatchSize() const = 0;
+    /** @copybrief getPatchSize @see getPatchSize */
+    virtual void setPatchSize(int val) = 0;
+
+    /** @brief Stride between neighbor patches. Must be less than patch size.
+        @see setPatchStride */
+    virtual int getPatchStride() const = 0;
+    /** @copybrief getPatchStride @see getPatchStride */
+    virtual void setPatchStride(int val) = 0;
+
+    /** @brief number of gradient descent iterations in the patch inverse search stage
+        @see setGradientDescentIterations */
+    virtual int getGradientDescentIterations() const = 0;
+    /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */
+    virtual void setGradientDescentIterations(int val) = 0;
+};
+
+/** @brief Creates an instance of DISOpticalFlow
+*/
+CV_EXPORTS_W Ptr<DISOpticalFlow> createOptFlow_DIS();
+
 //! @}

 } //optflow

--- a/modules/optflow/samples/optical_flow_benchmark.py
+++ b/modules/optflow/samples/optical_flow_benchmark.py
+#!/usr/bin/env python
+from __future__ import print_function
+import os, sys, shutil
+import argparse
+import json, re
+from subprocess import check_output
+import datetime
+import matplotlib.pyplot as plt
+
+
+def load_json(path):
+    f = open(path, "r")
+    data = json.load(f)
+    return data
+
+
+def save_json(obj, path):
+    tmp_file = path + ".bak"
+    f = open(tmp_file, "w")
+    json.dump(obj, f, indent=2)
+    f.flush()
+    os.fsync(f.fileno())
+    f.close()
+    try:
+        os.rename(tmp_file, path)
+    except:
+        os.remove(path)
+        os.rename(tmp_file, path)
+
+
+def parse_evaluation_result(input_str, i):
+    res = {}
+    res['frame_number'] = i + 1
+    res['error'] = {}
+    regex = "([A-Za-z. \\[\\].0-9]+):[ ]*([0-9]*\.[0-9]+|[0-9]+)"
+    for elem in re.findall(regex,input_str):
+        if "Time" in elem[0]:
+            res['time'] = float(elem[1])
+        elif "Average" in elem[0]:
+            res['error']['average'] = float(elem[1])
+        elif "deviation" in elem[0]:
+            res['error']['std'] = float(elem[1])
+        else:
+            res['error'][elem[0]] = float(elem[1])
+    return res
+
+
+def evaluate_sequence(sequence, algorithm, dataset, executable, img_files, gt_files,
+                      state, state_path):
+    if "eval_results" not in state[dataset][algorithm][-1].keys():
+        state[dataset][algorithm][-1]["eval_results"] = {}
+    elif sequence in state[dataset][algorithm][-1]["eval_results"].keys():
+        return
+
+    res = []
+    for i in range(len(img_files) - 1):
+        sys.stdout.write("Algorithm: %-20s Sequence: %-10s Done: [%3d/%3d]\r" %
+                         (algorithm, sequence, i, len(img_files) - 1)),
+        sys.stdout.flush()
+
+        res_string = check_output([executable, img_files[i], img_files[i + 1],
+                                   algorithm, gt_files[i]])
+        res.append(parse_evaluation_result(res_string, i))
+    state[dataset][algorithm][-1]["eval_results"][sequence] = res
+    save_json(state, state_path)
+
+#############################DATSET DEFINITIONS################################
+
+def evaluate_mpi_sintel(source_dir, algorithm, evaluation_executable, state, state_path):
+    evaluation_result = {}
+    img_dir = os.path.join(source_dir, 'mpi_sintel', 'training', 'final')
+    gt_dir = os.path.join(source_dir, 'mpi_sintel', 'training', 'flow')
+    sequences = [f for f in os.listdir(img_dir)
+                 if os.path.isdir(os.path.join(img_dir, f))]
+    for seq in sequences:
+        img_files = sorted([os.path.join(img_dir, seq, f)
+                            for f in os.listdir(os.path.join(img_dir, seq))
+                            if f.endswith(".png")])
+        gt_files = sorted([os.path.join(gt_dir, seq, f)
+                           for f in os.listdir(os.path.join(gt_dir, seq))
+                           if f.endswith(".flo")])
+        evaluation_result[seq] = evaluate_sequence(seq, algorithm, 'mpi_sintel',
+            evaluation_executable, img_files, gt_files, state, state_path)
+    return evaluation_result
+
+
+def evaluate_middlebury(source_dir, algorithm, evaluation_executable, state, state_path):
+    evaluation_result = {}
+    img_dir = os.path.join(source_dir, 'middlebury', 'other-data')
+    gt_dir = os.path.join(source_dir, 'middlebury', 'other-gt-flow')
+    sequences = [f for f in os.listdir(gt_dir)
+                 if os.path.isdir(os.path.join(gt_dir, f))]
+    for seq in sequences:
+        img_files = sorted([os.path.join(img_dir, seq, f)
+                            for f in os.listdir(os.path.join(img_dir, seq))
+                            if f.endswith(".png")])
+        gt_files = sorted([os.path.join(gt_dir, seq, f)
+                           for f in os.listdir(os.path.join(gt_dir, seq))
+                           if f.endswith(".flo")])
+        evaluation_result[seq] = evaluate_sequence(seq, algorithm, 'middlebury',
+            evaluation_executable, img_files, gt_files, state, state_path)
+    return evaluation_result
+
+
+dataset_eval_functions = {
+    "mpi_sintel": evaluate_mpi_sintel,
+    "middlebury": evaluate_middlebury
+}
+
+###############################################################################
+
+def create_dir(dir):
+    if not os.path.exists(dir):
+        os.makedirs(dir)
+
+
+def parse_sequence(input_str):
+    if len(input_str) == 0:
+        return []
+    else:
+        return [o.strip() for o in input_str.split(",") if o]
+
+
+def build_chart(dst_folder, state, dataset):
+    fig = plt.figure(figsize=(16, 10))
+    markers = ["o", "s", "h", "^", "D"]
+    marker_idx = 0
+    colors = ["b", "g", "r"]
+    color_idx = 0
+    for algo in state[dataset].keys():
+        for eval_instance in state[dataset][algo]:
+            name = algo + "--" + eval_instance["timestamp"]
+            average_time = 0.0
+            average_error = 0.0
+            num_elem = 0
+            for seq in eval_instance["eval_results"].keys():
+                for frame in eval_instance["eval_results"][seq]:
+                    average_time += frame["time"]
+                    average_error += frame["error"]["average"]
+                    num_elem += 1
+            average_time /= num_elem
+            average_error /= num_elem
+
+            marker_style = colors[color_idx] + markers[marker_idx]
+            color_idx += 1
+            if color_idx >= len(colors):
+                color_idx = 0
+            marker_idx += 1
+            if marker_idx >= len(markers):
+                marker_idx = 0
+            plt.gca().plot([average_time], [average_error],
+                           marker_style,
+                           markersize=14,
+                           label=name)
+
+    plt.gca().set_ylabel('Average Endpoint Error (EPE)', fontsize=20)
+    plt.gca().set_xlabel('Average Runtime (seconds per frame)', fontsize=20)
+    plt.gca().set_xscale("log")
+    plt.gca().set_title('Evaluation on ' + dataset, fontsize=20)
+
+    plt.gca().legend()
+    fig.savefig(os.path.join(dst_folder, "evaluation_results_" + dataset + ".png"),
+                bbox_inches='tight')
+    plt.close()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Optical flow benchmarking script',
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument(
+        "bin_path",
+        default="./optflow-example-optical_flow_evaluation",
+        help="Path to the optical flow evaluation executable")
+    parser.add_argument(
+        "-a",
+        "--algorithms",
+        metavar="ALGORITHMS",
+        default="",
+        help=("Comma-separated list of optical-flow algorithms to evaluate "
+              "(example: -a farneback,tvl1,deepflow). Note that previously "
+              "evaluated algorithms are also included in the output charts"))
+    parser.add_argument(
+        "-d",
+        "--datasets",
+        metavar="DATASETS",
+        default="mpi_sintel",
+        help=("Comma-separated list of datasets for evaluation (currently only "
+              "'mpi_sintel' and 'middlebury' are supported)"))
+    parser.add_argument(
+        "-f",
+        "--dataset_folder",
+        metavar="DATASET_FOLDER",
+        default="./OF_datasets",
+        help=("Path to a folder containing datasets. To enable evaluation on "
+              "MPI Sintel dataset, please download it using the following links: "
+              "http://files.is.tue.mpg.de/sintel/MPI-Sintel-training_images.zip and "
+              "http://files.is.tue.mpg.de/sintel/MPI-Sintel-training_extras.zip and "
+              "unzip these archives into the 'mpi_sintel' folder. To enable evaluation "
+              "on the Middlebury dataset use the following links: "
+              "http://vision.middlebury.edu/flow/data/comp/zip/other-color-twoframes.zip, "
+              "http://vision.middlebury.edu/flow/data/comp/zip/other-gt-flow.zip. "
+              "These should be unzipped into 'middlebury' folder"))
+    parser.add_argument(
+        "-o",
+        "--out",
+        metavar="OUT_DIR",
+        default="./OF_evaluation_results",
+        help="Output directory where to store benchmark results")
+    parser.add_argument(
+        "-s",
+        "--state",
+        metavar="STATE_JSON",
+        default="./OF_evaluation_state.json",
+        help=("Path to a json file that stores the current evaluation state and "
+              "previous evaluation results"))
+    args, other_args = parser.parse_known_args()
+
+    if not os.path.isfile(args.bin_path):
+        print("Error: " + args.bin_path + " does not exist")
+        sys.exit(1)
+
+    if not os.path.exists(args.dataset_folder):
+        print("Error: " + args.dataset_folder + (" does not exist. Please, correctly "
+                                                 "specify the -f parameter"))
+        sys.exit(1)
+
+    state = {}
+    if os.path.isfile(args.state):
+        state = load_json(args.state)
+
+    algorithm_list = parse_sequence(args.algorithms)
+    dataset_list = parse_sequence(args.datasets)
+    for dataset in dataset_list:
+        if dataset not in dataset_eval_functions.keys():
+            print("Error: unsupported dataset " + dataset)
+            sys.exit(1)
+        if dataset not in os.listdir(args.dataset_folder):
+            print("Error: " + os.path.join(args.dataset_folder, dataset) + (" does not exist. "
+                              "Please, download the dataset and follow the naming conventions "
+                              "(use -h for more information)"))
+            sys.exit(1)
+
+    for dataset in dataset_list:
+        if dataset not in state.keys():
+            state[dataset] = {}
+        for algorithm in algorithm_list:
+            if algorithm in state[dataset].keys():
+                last_eval_instance = state[dataset][algorithm][-1]
+                if "finished" not in last_eval_instance.keys():
+                    print(("Continuing an unfinished evaluation of " +
+                          algorithm + " started at " + last_eval_instance["timestamp"]))
+                else:
+                    state[dataset][algorithm].append({"timestamp":
+                        datetime.datetime.now().strftime("%Y-%m-%d--%H-%M")})
+            else:
+                state[dataset][algorithm] = [{"timestamp":
+                    datetime.datetime.now().strftime("%Y-%m-%d--%H-%M")}]
+            save_json(state, args.state)
+            dataset_eval_functions[dataset](args.dataset_folder, algorithm, args.bin_path,
+                                            state, args.state)
+            state[dataset][algorithm][-1]["finished"] = True
+            save_json(state, args.state)
+    save_json(state, args.state)
+
+    create_dir(args.out)
+    for dataset in dataset_list:
+        build_chart(args.out, state, dataset)
--- a/modules/optflow/samples/optical_flow_evaluation.cpp
+++ b/modules/optflow/samples/optical_flow_evaluation.cpp
@@ -11,7 +11,7 @@ using namespace optflow;
 const String keys = "{help h usage ? |      | print this message   }"
        "{@image1        |      | image1               }"
        "{@image2        |      | image2               }"
-        "{@algorithm     |      | [farneback, simpleflow, tvl1, deepflow or sparsetodenseflow] }"
+        "{@algorithm     |      | [farneback, simpleflow, tvl1, deepflow, sparsetodenseflow or DISflow] }"
        "{@groundtruth   |      | path to the .flo file  (optional), Middlebury format }"
        "{m measure      |endpoint| error measure - [endpoint or angular] }"
        "{r region       |all   | region to compute stats about [all, discontinuities, untextured] }"
@@ -229,7 +229,7 @@ int main( int argc, char** argv )
    if ( i2.depth() != CV_8U )
        i2.convertTo(i2, CV_8U);

-    if ( (method == "farneback" || method == "tvl1" || method == "deepflow") && i1.channels() == 3 )
+    if ( (method == "farneback" || method == "tvl1" || method == "deepflow" || method == "DISflow") && i1.channels() == 3 )
    {   // 1-channel images are expected
        cvtColor(i1, i1, COLOR_BGR2GRAY);
        cvtColor(i2, i2, COLOR_BGR2GRAY);
@@ -252,6 +252,8 @@ int main( int argc, char** argv )
        algorithm = createOptFlow_DeepFlow();
    else if ( method == "sparsetodenseflow" )
        algorithm = createOptFlow_SparseToDense();
+    else if ( method == "DISflow" )
+        algorithm = createOptFlow_DIS();
    else
    {
        printf("Wrong method!\n");

--- a/modules/optflow/src/dis_flow.cpp
+++ b/modules/optflow/src/dis_flow.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include "opencv2/imgproc.hpp"
+using namespace std;
+#define EPS 0.001F
+
+namespace cv
+{
+namespace optflow
+{
+
+class DISOpticalFlowImpl : public DISOpticalFlow
+{
+  public:
+    DISOpticalFlowImpl();
+
+    void calc(InputArray I0, InputArray I1, InputOutputArray flow);
+    void collectGarbage();
+
+  protected: // algorithm parameters
+    int finest_scale, coarsest_scale;
+    int patch_size;
+    int patch_stride;
+    int grad_descent_iter;
+
+  public: // getters and setters
+    int getFinestScale() const { return finest_scale; }
+    void setFinestScale(int val) { finest_scale = val; }
+    int getPatchSize() const { return patch_size; }
+    void setPatchSize(int val) { patch_size = val; }
+    int getPatchStride() const { return patch_stride; }
+    void setPatchStride(int val) { patch_stride = val; }
+    int getGradientDescentIterations() const { return grad_descent_iter; }
+    void setGradientDescentIterations(int val) { grad_descent_iter = val; }
+
+  private:                   // internal buffers
+    vector< Mat_<uchar> > I0s; // gaussian pyramid for the current frame
+    vector< Mat_<uchar> > I1s; // gaussian pyramid for the next frame
+
+    vector< Mat_<short> > I0xs; // gaussian pyramid for the x gradient of the current frame
+    vector< Mat_<short> > I0ys; // gaussian pyramid for the y gradient of the current frame
+
+    vector< Mat_<float> > Ux; // x component of the flow vectors
+    vector< Mat_<float> > Uy; // y component of the flow vectors
+
+    Mat_<Vec2f> U; // buffers for the merged flow
+
+    Mat_<float> Sx; // x component of the sparse flow vectors (before densification)
+    Mat_<float> Sy; // y component of the sparse flow vectors (before densification)
+
+    // structure tensor components and auxiliary buffers:
+    Mat_<float> I0xx_buf; // sum of squares of x gradient values
+    Mat_<float> I0yy_buf; // sum of squares of y gradient values
+    Mat_<float> I0xy_buf; // sum of x and y gradient products
+
+    Mat_<float> I0xx_buf_aux; // for computing sums using the summed area table
+    Mat_<float> I0yy_buf_aux;
+    Mat_<float> I0xy_buf_aux;
+    ////////////////////////////////////////////////////////////
+
+  private: // private methods
+    void prepareBuffers(Mat &I0, Mat &I1);
+    void precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &I0x, Mat &I0y);
+    void patchInverseSearch(Mat &dst_Sx, Mat &dst_Sy, Mat &src_Ux, Mat &src_Uy, Mat &I0, Mat &I0x, Mat &I0y, Mat &I1);
+    void densification(Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy, Mat &I0, Mat &I1);
+};
+
+DISOpticalFlowImpl::DISOpticalFlowImpl()
+{
+    finest_scale = 3;
+    patch_size = 9;
+    patch_stride = 4;
+    grad_descent_iter = 12;
+}
+
+void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1)
+{
+    I0s.resize(coarsest_scale + 1);
+    I1s.resize(coarsest_scale + 1);
+    I0xs.resize(coarsest_scale + 1);
+    I0ys.resize(coarsest_scale + 1);
+    Ux.resize(coarsest_scale + 1);
+    Uy.resize(coarsest_scale + 1);
+    int fraction = 1;
+    int cur_rows = 0, cur_cols = 0;
+
+    for (int i = 0; i <= coarsest_scale; i++)
+    {
+        if (i == finest_scale)
+        {
+            cur_rows = I0.rows / fraction;
+            cur_cols = I0.cols / fraction;
+            I0s[i].create(cur_rows, cur_cols);
+            resize(I0, I0s[i], I0s[i].size(), 0.0, 0.0, INTER_AREA);
+            I1s[i].create(cur_rows, cur_cols);
+            resize(I1, I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA);
+
+            Sx.create(cur_rows / patch_stride, cur_cols / patch_stride);
+            Sy.create(cur_rows / patch_stride, cur_cols / patch_stride);
+            I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
+            I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
+            I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride);
+            I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride);
+            I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride);
+            I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride);
+            U.create(cur_rows, cur_cols);
+        }
+        else if (i > finest_scale)
+        {
+            cur_rows = I0s[i - 1].rows / 2;
+            cur_cols = I0s[i - 1].cols / 2;
+            I0s[i].create(cur_rows, cur_cols);
+            resize(I0s[i - 1], I0s[i], I0s[i].size(), 0.0, 0.0, INTER_AREA);
+            I1s[i].create(cur_rows, cur_cols);
+            resize(I1s[i - 1], I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA);
+        }
+
+        fraction *= 2;
+
+        if (i >= finest_scale)
+        {
+            I0xs[i].create(cur_rows, cur_cols);
+            I0ys[i].create(cur_rows, cur_cols);
+            spatialGradient(I0s[i], I0xs[i], I0ys[i]);
+            Ux[i].create(cur_rows, cur_cols);
+            Uy[i].create(cur_rows, cur_cols);
+        }
+    }
+}
+
+void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &I0x, Mat &I0y)
+{
+    short *I0x_ptr = I0x.ptr<short>();
+    short *I0y_ptr = I0y.ptr<short>();
+
+    float *I0xx_ptr = dst_I0xx.ptr<float>();
+    float *I0yy_ptr = dst_I0yy.ptr<float>();
+    float *I0xy_ptr = dst_I0xy.ptr<float>();
+
+    float *I0xx_aux_ptr = I0xx_buf_aux.ptr<float>();
+    float *I0yy_aux_ptr = I0yy_buf_aux.ptr<float>();
+    float *I0xy_aux_ptr = I0xy_buf_aux.ptr<float>();
+
+    int w = I0x.cols;
+    int h = I0x.rows;
+    int psz2 = patch_size / 2;
+    int psz = 2 * psz2;
+    // width of the sparse OF fields:
+    int ws = 1 + (w - patch_size) / patch_stride;
+
+    // separable box filter for computing patch sums on a sparse
+    // grid (determined by patch_stride)
+    for (int i = 0; i < h; i++)
+    {
+        float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f;
+        short *x_row = I0x_ptr + i * w, *y_row = I0y_ptr + i * w;
+        for (int j = 0; j < patch_size; j++)
+        {
+            sum_xx += x_row[j] * x_row[j];
+            sum_yy += y_row[j] * y_row[j];
+            sum_xy += x_row[j] * y_row[j];
+        }
+        I0xx_aux_ptr[i * ws] = sum_xx;
+        I0yy_aux_ptr[i * ws] = sum_yy;
+        I0xy_aux_ptr[i * ws] = sum_xy;
+        int js = 1;
+        for (int j = patch_size; j < w; j++)
+        {
+            sum_xx += (x_row[j] * x_row[j] - x_row[j - patch_size] * x_row[j - patch_size]);
+            sum_yy += (y_row[j] * y_row[j] - y_row[j - patch_size] * y_row[j - patch_size]);
+            sum_xy += (x_row[j] * y_row[j] - x_row[j - patch_size] * y_row[j - patch_size]);
+            if ((j - psz) % patch_stride == 0)
+            {
+                I0xx_aux_ptr[i * ws + js] = sum_xx;
+                I0yy_aux_ptr[i * ws + js] = sum_yy;
+                I0xy_aux_ptr[i * ws + js] = sum_xy;
+                js++;
+            }
+        }
+    }
+
+    AutoBuffer<float> sum_xx_buf(ws), sum_yy_buf(ws), sum_xy_buf(ws);
+    float *sum_xx = (float *)sum_xx_buf;
+    float *sum_yy = (float *)sum_yy_buf;
+    float *sum_xy = (float *)sum_xy_buf;
+    for (int j = 0; j < ws; j++)
+    {
+        sum_xx[j] = 0.0f;
+        sum_yy[j] = 0.0f;
+        sum_xy[j] = 0.0f;
+    }
+
+    for (int i = 0; i < patch_size; i++)
+        for (int j = 0; j < ws; j++)
+        {
+            sum_xx[j] += I0xx_aux_ptr[i * ws + j];
+            sum_yy[j] += I0yy_aux_ptr[i * ws + j];
+            sum_xy[j] += I0xy_aux_ptr[i * ws + j];
+        }
+    for (int j = 0; j < ws; j++)
+    {
+        I0xx_ptr[j] = sum_xx[j];
+        I0yy_ptr[j] = sum_yy[j];
+        I0xy_ptr[j] = sum_xy[j];
+    }
+    int is = 1;
+    for (int i = patch_size; i < h; i++)
+    {
+        for (int j = 0; j < ws; j++)
+        {
+            sum_xx[j] += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]);
+            sum_yy[j] += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]);
+            sum_xy[j] += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]);
+        }
+        if ((i - psz) % patch_stride == 0)
+        {
+            for (int j = 0; j < ws; j++)
+            {
+                I0xx_ptr[is * ws + j] = sum_xx[j];
+                I0yy_ptr[is * ws + j] = sum_yy[j];
+                I0xy_ptr[is * ws + j] = sum_xy[j];
+            }
+            is++;
+        }
+    }
+}
+
+void DISOpticalFlowImpl::patchInverseSearch(Mat &dst_Sx, Mat &dst_Sy, Mat &src_Ux, Mat &src_Uy, Mat &I0, Mat &I0x,
+                                            Mat &I0y, Mat &I1)
+{
+    float *Ux_ptr = src_Ux.ptr<float>();
+    float *Uy_ptr = src_Uy.ptr<float>();
+    float *Sx_ptr = dst_Sx.ptr<float>();
+    float *Sy_ptr = dst_Sy.ptr<float>();
+    uchar *I0_ptr = I0.ptr<uchar>();
+    uchar *I1_ptr = I1.ptr<uchar>();
+    short *I0x_ptr = I0x.ptr<short>();
+    short *I0y_ptr = I0y.ptr<short>();
+    int w = I0.cols;
+    int h = I1.rows;
+    int psz2 = patch_size / 2;
+    // width and height of the sparse OF fields:
+    int ws = 1 + (w - patch_size) / patch_stride;
+    int hs = 1 + (h - patch_size) / patch_stride;
+
+    precomputeStructureTensor(I0xx_buf, I0yy_buf, I0xy_buf, I0x, I0y);
+    float *xx_ptr = I0xx_buf.ptr<float>();
+    float *yy_ptr = I0yy_buf.ptr<float>();
+    float *xy_ptr = I0xy_buf.ptr<float>();
+
+    // perform a fixed number of gradient descent iterations for each patch:
+    int i = psz2;
+    for (int is = 0; is < hs; is++)
+    {
+        int j = psz2;
+        for (int js = 0; js < ws; js++)
+        {
+            float cur_Ux = Ux_ptr[i * w + j];
+            float cur_Uy = Uy_ptr[i * w + j];
+            float detH = xx_ptr[is * ws + js] * yy_ptr[is * ws + js] - xy_ptr[is * ws + js] * xy_ptr[is * ws + js];
+            if (abs(detH) < EPS)
+                detH = EPS;
+            float invH11 = yy_ptr[is * ws + js] / detH;
+            float invH12 = -xy_ptr[is * ws + js] / detH;
+            float invH22 = xx_ptr[is * ws + js] / detH;
+            float prev_sum_diff = 100000000.0f;
+            for (int t = 0; t < grad_descent_iter; t++)
+            {
+                float dUx = 0, dUy = 0;
+                float diff = 0;
+                float sum_diff = 0.0f;
+                for (int pos_y = i - psz2; pos_y <= i + psz2; pos_y++)
+                    for (int pos_x = j - psz2; pos_x <= j + psz2; pos_x++)
+                    {
+                        float pos_x_shifted = min(max(pos_x + cur_Ux, 0.0f), w - 1.0f - EPS);
+                        float pos_y_shifted = min(max(pos_y + cur_Uy, 0.0f), h - 1.0f - EPS);
+                        int pos_x_lower = (int)pos_x_shifted;
+                        int pos_x_upper = pos_x_lower + 1;
+                        int pos_y_lower = (int)pos_y_shifted;
+                        int pos_y_upper = pos_y_lower + 1;
+                        diff = (pos_x_shifted - pos_x_lower) * (pos_y_shifted - pos_y_lower) *
+                                 I1_ptr[pos_y_upper * w + pos_x_upper] +
+                               (pos_x_upper - pos_x_shifted) * (pos_y_shifted - pos_y_lower) *
+                                 I1_ptr[pos_y_upper * w + pos_x_lower] +
+                               (pos_x_shifted - pos_x_lower) * (pos_y_upper - pos_y_shifted) *
+                                 I1_ptr[pos_y_lower * w + pos_x_upper] +
+                               (pos_x_upper - pos_x_shifted) * (pos_y_upper - pos_y_shifted) *
+                                 I1_ptr[pos_y_lower * w + pos_x_lower] -
+                               I0_ptr[pos_y * w + pos_x];
+                        sum_diff += diff * diff;
+                        dUx += I0x_ptr[pos_y * w + pos_x] * diff;
+                        dUy += I0y_ptr[pos_y * w + pos_x] * diff;
+                    }
+                cur_Ux -= invH11 * dUx + invH12 * dUy;
+                cur_Uy -= invH12 * dUx + invH22 * dUy;
+                if (sum_diff > prev_sum_diff)
+                    break;
+                prev_sum_diff = sum_diff;
+            }
+            if (norm(Vec2f(cur_Ux - Ux_ptr[i * w + j], cur_Uy - Uy_ptr[i * w + j])) <= patch_size)
+            {
+                Sx_ptr[is * ws + js] = cur_Ux;
+                Sy_ptr[is * ws + js] = cur_Uy;
+            }
+            else
+            {
+                Sx_ptr[is * ws + js] = Ux_ptr[i * w + j];
+                Sy_ptr[is * ws + js] = Uy_ptr[i * w + j];
+            }
+            j += patch_stride;
+        }
+        i += patch_stride;
+    }
+}
+
+void DISOpticalFlowImpl::densification(Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy, Mat &I0, Mat &I1)
+{
+    float *Ux_ptr = dst_Ux.ptr<float>();
+    float *Uy_ptr = dst_Uy.ptr<float>();
+    float *Sx_ptr = src_Sx.ptr<float>();
+    float *Sy_ptr = src_Sy.ptr<float>();
+    uchar *I0_ptr = I0.ptr<uchar>();
+    uchar *I1_ptr = I1.ptr<uchar>();
+    int w = I0.cols;
+    int h = I0.rows;
+    int psz2 = patch_size / 2;
+    // width of the sparse OF fields:
+    int ws = 1 + (w - patch_size) / patch_stride;
+
+    int start_x;
+    int start_y;
+
+    start_y = psz2;
+    for (int i = 0; i < h; i++)
+    {
+        if (i - psz2 > start_y && start_y + patch_stride < h - psz2)
+            start_y += patch_stride;
+        start_x = psz2;
+        for (int j = 0; j < w; j++)
+        {
+            float coef, sum_coef = 0.0f;
+            float sum_Ux = 0.0f;
+            float sum_Uy = 0.0f;
+
+            if (j - psz2 > start_x && start_x + patch_stride < w - psz2)
+                start_x += patch_stride;
+
+            for (int pos_y = start_y; pos_y <= min(i + psz2, h - psz2 - 1); pos_y += patch_stride)
+                for (int pos_x = start_x; pos_x <= min(j + psz2, w - psz2 - 1); pos_x += patch_stride)
+                {
+                    float diff;
+                    int is = (pos_y - psz2) / patch_stride;
+                    int js = (pos_x - psz2) / patch_stride;
+                    float j_shifted = min(max(j + Sx_ptr[is * ws + js], 0.0f), w - 1.0f - EPS);
+                    float i_shifted = min(max(i + Sy_ptr[is * ws + js], 0.0f), h - 1.0f - EPS);
+                    int j_lower = (int)j_shifted;
+                    int j_upper = j_lower + 1;
+                    int i_lower = (int)i_shifted;
+                    int i_upper = i_lower + 1;
+                    diff = (j_shifted - j_lower) * (i_shifted - i_lower) * I1_ptr[i_upper * w + j_upper] +
+                           (j_upper - j_shifted) * (i_shifted - i_lower) * I1_ptr[i_upper * w + j_lower] +
+                           (j_shifted - j_lower) * (i_upper - i_shifted) * I1_ptr[i_lower * w + j_upper] +
+                           (j_upper - j_shifted) * (i_upper - i_shifted) * I1_ptr[i_lower * w + j_lower] -
+                           I0_ptr[i * w + j];
+                    coef = 1 / max(1.0f, diff * diff);
+                    sum_Ux += coef * Sx_ptr[is * ws + js];
+                    sum_Uy += coef * Sy_ptr[is * ws + js];
+                    sum_coef += coef;
+                }
+            Ux_ptr[i * w + j] = sum_Ux / sum_coef;
+            Uy_ptr[i * w + j] = sum_Uy / sum_coef;
+        }
+    }
+}
+
+void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow)
+{
+    CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1);
+    CV_Assert(!I1.empty() && I1.depth() == CV_8U && I1.channels() == 1);
+    CV_Assert(I0.sameSize(I1));
+
+    Mat I0Mat = I0.getMat();
+    Mat I1Mat = I1.getMat();
+    flow.create(I1Mat.size(), CV_32FC2);
+    Mat &flowMat = flow.getMatRef();
+    coarsest_scale = (int)(log((2 * I0Mat.cols) / (4.0 * patch_size))/log(2.0) + 0.5) - 1;
+
+    prepareBuffers(I0Mat, I1Mat);
+    Ux[coarsest_scale].setTo(0.0f);
+    Uy[coarsest_scale].setTo(0.0f);
+
+    for (int i = coarsest_scale; i >= finest_scale; i--)
+    {
+        patchInverseSearch(Sx, Sy, Ux[i], Uy[i], I0s[i], I0xs[i], I0ys[i], I1s[i]);
+        densification(Ux[i], Uy[i], Sx, Sy, I0s[i], I1s[i]);
+        // TODO: variational refinement step
+
+        if (i > finest_scale)
+        {
+            resize(Ux[i], Ux[i - 1], Ux[i - 1].size());
+            resize(Uy[i], Uy[i - 1], Uy[i - 1].size());
+            Ux[i - 1] *= 2;
+            Uy[i - 1] *= 2;
+        }
+    }
+    Mat uxy[] = {Ux[finest_scale], Uy[finest_scale]};
+    merge(uxy, 2, U);
+    resize(U, flowMat, flowMat.size());
+    flowMat *= pow(2, finest_scale);
+}
+
+void DISOpticalFlowImpl::collectGarbage()
+{
+    I0s.clear();
+    I1s.clear();
+    I0xs.clear();
+    I0ys.clear();
+    Ux.clear();
+    Uy.clear();
+    U.release();
+    Sx.release();
+    Sy.release();
+    I0xx_buf.release();
+    I0yy_buf.release();
+    I0xy_buf.release();
+    I0xx_buf_aux.release();
+    I0yy_buf_aux.release();
+    I0xy_buf_aux.release();
+}
+
+Ptr<DISOpticalFlow> createOptFlow_DIS() { return makePtr<DISOpticalFlowImpl>(); }
+}
+}