Merge remote-tracking branch 'origin/master' into dnn-python-bindings

# Conflicts: # modules/dnn/samples/fcn_semsegm.cpp # modules/dnn/src/caffe/caffe.proto # modules/dnn/src/caffe/compiled/caffe.tar.gz # modules/dnn/src/layers/crop_layer.cpp # modules/dnn/src/layers/crop_layer.hpp # modules/dnn/src/layers/eltwise_layer.cpp # modules/dnn/src/layers/eltwise_layer.hpp

Merge remote-tracking branch 'origin/master' into dnn-python-bindings
# Conflicts: # modules/dnn/samples/fcn_semsegm.cpp # modules/dnn/src/caffe/caffe.proto # modules/dnn/src/caffe/compiled/caffe.tar.gz # modules/dnn/src/layers/crop_layer.cpp # modules/dnn/src/layers/crop_layer.hpp # modules/dnn/src/layers/eltwise_layer.cpp # modules/dnn/src/layers/eltwise_layer.hpp
8dcc2610 · Vitaliy Lyudvichenko · 0674b6f3 · 73459049 · 8dcc2610 · 8dcc2610
Commit 8dcc2610 authored Aug 18, 2016 by Vitaliy Lyudvichenko
14 changed files
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -365,6 +365,27 @@ namespace dnn
        static CV_WRAP Ptr<PowerLayer> create(double power = 1, double scale = 1, double shift = 0);
    };

+    /* Layers using in semantic segmentation */
+
+    class CV_EXPORTS_W CropLayer : public Layer
+    {
+    public:
+        static Ptr<CropLayer> create(int start_axis, const std::vector<int> &offset);
+    };
+
+    class CV_EXPORTS_W EltwiseLayer : public Layer
+    {
+    public:
+        enum EltwiseOp
+        {
+            PROD = 0,
+            SUM = 1,
+            MAX = 2,
+        };
+
+        static Ptr<EltwiseLayer> create(EltwiseOp op, const std::vector<int> &coeffs);
+    };
+
 //! @}
 //! @}


--- a/modules/dnn/samples/fcn_semsegm.cpp
+++ b/modules/dnn/samples/fcn_semsegm.cpp
@@ -86,7 +86,8 @@ static void colorizeSegmentation(dnn::Blob &score, const vector<cv::Vec3b> &colo

 int main(int argc, char **argv)
 {
-    cv::ocl::setUseOpenCL(false);
+    cv::dnn::initModule();          //Required if OpenCV is built as static libs
+    cv::ocl::setUseOpenCL(false);   //OpenCL switcher

    String modelTxt = fcnType + "-heavy-pascal.prototxt";
    String modelBin = fcnType + "-heavy-pascal.caffemodel";
@@ -131,7 +132,7 @@ int main(int argc, char **argv)
    }

    resize(img, img, Size(500, 500));       //FCN accepts 500x500 RGB-images
-    dnn::Blob inputBlob = dnn::Blob::fromImages(img);   //Convert Mat to dnn::Blob image batch
+    dnn::Blob inputBlob = dnn::Blob::fromImages(img);   //Convert Mat to dnn::Blob batch of images
    //! [Prepare blob]

    //! [Set input blob]

--- a/modules/dnn/samples/ssd_object_detection.cpp
+++ b/modules/dnn/samples/ssd_object_detection.cpp
@@ -61,6 +61,8 @@ int main(int argc, char** argv)
        return 0;
    }

+    cv::dnn::initModule();          //Required if OpenCV is built as static libs
+
    String modelConfiguration = parser.get<string>("proto");
    String modelBinary = parser.get<string>("model");


--- a/modules/dnn/src/caffe/caffe.proto
+++ b/modules/dnn/src/caffe/caffe.proto
@@ -599,7 +599,7 @@ message ConvolutionParameter {
  optional Engine engine = 15 [default = DEFAULT];
  // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
  // holes. (Kernel dilation is sometimes referred to by its use in the
-  // algorithme à trous from Holschneider et al. 1987.)
+  // algorithme a trous from Holschneider et al. 1987.)
  optional uint32 dilation_h = 18; // The dilation height
  optional uint32 dilation_w = 19; // The dilation width
  optional uint32 dilation = 20; // The dilation; defaults to 1

--- a/modules/dnn/src/caffe/layer_loaders.cpp
+++ b/modules/dnn/src/caffe/layer_loaders.cpp
@@ -9,56 +9,12 @@ namespace cv
 namespace dnn
 {

-//Utils
-
-//Extracts params used into Conv, Deconv and Pooling layers
-static void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Size &stride)
-{
-    if (params.has("kernel_h") && params.has("kernel_w"))
-    {
-        kernel.height = params.get<int>("kernel_h");
-        kernel.width = params.get<int>("kernel_w");
-    }
-    else if (params.has("kernel_size"))
-    {
-        kernel.height = kernel.width = params.get<int>("kernel_size");
-    }
-    else
-    {
-        CV_Error(Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
-    }
-    CV_Assert(kernel.height > 0 && kernel.width > 0);
-
-    if (params.has("pad_h") && params.has("pad_w"))
-    {
-        pad.height = params.get<int>("pad_h");
-        pad.width = params.get<int>("pad_w");
-    }
-    else
-    {
-        pad.height = pad.width = params.get<int>("pad", 0);
-    }
-    CV_Assert(pad.height >= 0 && pad.width >= 0);
-
-    if (params.has("stride_h") && params.has("stride_w"))
-    {
-        stride.height = params.get<int>("stride_h");
-        stride.width = params.get<int>("stride_w");
-    }
-    else
-    {
-        stride.height = stride.width = params.get<int>("stride", 1);
-    }
-    CV_Assert(stride.height > 0 && stride.width > 0);
-}
-
 //Layers

 //Convolution and Deconvolution
 static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParams &params)
 {
    l->setParamsFrom(params);
-    //getCaffeConvParams(params, l->kernel, l->pad, l->stride);
    getConvolutionKernelParams(params, l->kernel.height, l->kernel.width, l->pad.height, l->pad.width, l->stride.height, l->stride.width, l->dilation.height, l->dilation.width);

    bool bias = params.get<bool>("bias_term", true);
@@ -273,6 +229,66 @@ Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams& params)
    return Ptr<Layer>(PowerLayer::create(power, scale, shift));
 }

+template<> //CropLayer specialization
+Ptr<Layer> createLayerFromCaffe<CropLayer>(LayerParams& params)
+{
+    int start_axis = params.get<int>("axis");
+    if (4 <= start_axis)
+        CV_Error(Error::StsBadArg, "crop axis bigger than input dim");
+
+    DictValue paramOffset = params.get("offset");
+
+    std::vector<int> offset(4, 0);
+    if (1 < paramOffset.size())
+    {
+        if (4 - start_axis != paramOffset.size())
+            CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis.");
+        for (size_t i = start_axis; i < offset.size(); i++)
+        {
+            offset[i] = paramOffset.get<int>(i);
+        }
+    }
+    else
+    {
+        const int offset_val = paramOffset.get<int>(0);
+        for (size_t i = start_axis; i < offset.size(); i++)
+        {
+            offset[i] = offset_val;
+        }
+    }
+    return Ptr<Layer>(CropLayer::create(start_axis, offset));
+}
+
+template<> //Power specialization
+Ptr<Layer> createLayerFromCaffe<EltwiseLayer>(LayerParams& params)
+{
+    EltwiseLayer::EltwiseOp op = EltwiseLayer::SUM;
+    if (params.has("operation"))
+    {
+        String operation = params.get<String>("operation").toLowerCase();
+        if (operation == "prod")
+            op = EltwiseLayer::PROD;
+        else if (operation == "sum")
+            op = EltwiseLayer::SUM;
+        else if (operation == "max")
+            op = EltwiseLayer::MAX;
+        else
+            CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\"");
+    }
+
+    std::vector<int> coeffs;
+    if (params.has("coeff"))
+    {
+        DictValue paramCoeff = params.get("coeff");
+        coeffs.resize(paramCoeff.size(), 1);
+        for (int i = 0; i < paramCoeff.size(); i++)
+        {
+            coeffs[i] = paramCoeff.get<int>(i);
+        }
+    }
+    return Ptr<Layer>(EltwiseLayer::create(op, coeffs));
+}
+
 //Explicit instantiation
 template Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams&);
 template Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams&);
@@ -292,5 +308,8 @@ template Ptr<Layer> createLayerFromCaffe<AbsLayer>(LayerParams&);
 template Ptr<Layer> createLayerFromCaffe<BNLLLayer>(LayerParams&);
 template Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams&);

+template Ptr<Layer> createLayerFromCaffe<CropLayer>(LayerParams&);
+template Ptr<Layer> createLayerFromCaffe<EltwiseLayer>(LayerParams&);
+
 }
 }
--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -93,11 +93,10 @@ void initModule()
    REG_RUNTIME_LAYER_FUNC(BNLL,            createLayerFromCaffe<BNLLLayer>);
    REG_RUNTIME_LAYER_FUNC(AbsVal,          createLayerFromCaffe<AbsLayer>);
    REG_RUNTIME_LAYER_FUNC(Power,           createLayerFromCaffe<PowerLayer>);
-    REG_RUNTIME_LAYER_CLASS(Dropout,        BlankLayer)
-
-    REG_RUNTIME_LAYER_CLASS(Crop, CropLayer)
-    REG_RUNTIME_LAYER_CLASS(Eltwise, EltwiseLayer)
+    REG_RUNTIME_LAYER_CLASS(Dropout,        BlankLayer);

+    REG_RUNTIME_LAYER_FUNC(Crop,            createLayerFromCaffe<CropLayer>);
+    REG_RUNTIME_LAYER_FUNC(Eltwise,         createLayerFromCaffe<EltwiseLayer>);
    REG_RUNTIME_LAYER_CLASS(Permute, PermuteLayer)
    REG_RUNTIME_LAYER_CLASS(PriorBox, PriorBoxLayer)
    REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer)

--- a/modules/dnn/src/layers/crop_layer.cpp
+++ b/modules/dnn/src/layers/crop_layer.cpp
@@ -47,76 +47,57 @@ namespace cv
 {
 namespace dnn
 {
-
-CropLayer::CropLayer(LayerParams &params) : Layer(params)
-{
-    start_axis = params.get<int>("axis", 2);
-    if (4 <= start_axis)
-        CV_Error(Error::StsBadArg, "crop axis bigger than input dim");
-
-    DictValue paramOffset = params.get("offset");
-
-    offset.resize(4, 0);
-    if (1 < paramOffset.size())
+    CropLayerImpl::CropLayerImpl(int start_axis_, const std::vector<int> &offset_)
    {
-        if (4 - start_axis != paramOffset.size())
-            CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis.");
-        for (size_t i = start_axis; i < offset.size(); i++)
-        {
-            offset[i] = paramOffset.get<int>(i);
-        }
+        start_axis = start_axis_;
+        offset = offset_;
    }
-    else
+
+    void CropLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
    {
-        const int offset_val = paramOffset.get<int>(0);
-        for (size_t i = start_axis; i < offset.size(); i++)
-        {
-            offset[i] = offset_val;
-        }
-    }
-}
+        CV_Assert(2 == inputs.size());

-void CropLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
-{
-    CV_Assert(2 == inputs.size());
+        const Blob &inpBlob = *inputs[0];
+        CV_Assert(inpBlob.dims() == 4 && inpBlob.type() == CV_32F);

-    const Blob &inpBlob = *inputs[0];
-    CV_Assert(inpBlob.dims() == 4 && inpBlob.type() == CV_32F);
+        const Blob &inpSzBlob = *inputs[1];

-    const Blob &inpSzBlob = *inputs[1];
+        outSizes.resize(4, 0);
+        for (int i = 0; i < 4; i++)
+        {
+            if (i < start_axis)
+                outSizes[i] = inpBlob.size(i);
+            else
+                outSizes[i] = inpSzBlob.size(i);
+            if (offset[i] + outSizes[i] > inpBlob.size(i))
+                CV_Error(Error::StsBadArg, "invalid crop parameters");
+        }

-    outSizes.resize(4, 0);
-    for (int i = 0; i < 4; i++)
-    {
-        if (i < start_axis)
-            outSizes[i] = inpBlob.size(i);
-        else
-            outSizes[i] = inpSzBlob.size(i);
-        if (offset[i] + outSizes[i] > inpBlob.size(i))
-            CV_Error(Error::StsBadArg, "invalid crop parameters");
+        outputs.resize(1);
+        outputs[0].create(BlobShape(outSizes));
    }

-    outputs.resize(1);
-    outputs[0].create(BlobShape(outSizes));
-}
-
-void CropLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
-{
-    Blob input = *inputs[0];
-    Blob output = outputs[0];
-    for (int num = 0; num < outSizes[0]; ++num)
+    void CropLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
    {
-        for (int ch = 0; ch < outSizes[1]; ++ch)
+        Blob input = *inputs[0];
+        Blob output = outputs[0];
+        for (int num = 0; num < outSizes[0]; ++num)
        {
-            for (int row = 0; row < outSizes[2]; ++row)
+            for (int ch = 0; ch < outSizes[1]; ++ch)
            {
-                float *srcData = input.ptrf(num + offset[0], ch + offset[1], row + offset[2]);
-                float *dstData = output.ptrf(num, ch, row);
-                memcpy(dstData, srcData + offset[3], sizeof(float) * outSizes[3]);
+                for (int row = 0; row < outSizes[2]; ++row)
+                {
+                    float *srcData = input.ptrf(num + offset[0], ch + offset[1], row + offset[2]);
+                    float *dstData = output.ptrf(num, ch, row);
+                    memcpy(dstData, srcData + offset[3], sizeof(float) * outSizes[3]);
+                }
            }
        }
    }
-}

+    Ptr<CropLayer> CropLayer::create(int start_axis, const std::vector<int> &offset)
+    {
+        return Ptr<CropLayer>(new CropLayerImpl(start_axis, offset));
+    }
 }
 }
--- a/modules/dnn/src/layers/crop_layer.hpp
+++ b/modules/dnn/src/layers/crop_layer.hpp
@@ -42,19 +42,20 @@
 #ifndef __OPENCV_DNN_LAYERS_CROP_LAYER_HPP__
 #define __OPENCV_DNN_LAYERS_CROP_LAYER_HPP__
 #include "../precomp.hpp"
+#include <opencv2/dnn/all_layers.hpp>

 namespace cv
 {
 namespace dnn
 {
-    class CropLayer : public Layer
+    class CropLayerImpl : public CropLayer
    {
        int start_axis;
        std::vector<int> offset;
        std::vector<int> outSizes;

    public:
-        CropLayer(LayerParams& params);
+        CropLayerImpl(int start_axis, const std::vector<int> &offset);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };

--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
@@ -47,37 +47,13 @@ namespace cv
 {
 namespace dnn
 {
-    EltwiseLayer::EltwiseLayer(LayerParams &params) : Layer(params)
+    EltwiseLayerImpl::EltwiseLayerImpl(EltwiseOp op_, const std::vector<int> &coeffs_)
    {
-        if (params.has("operation"))
-        {
-            String operation = params.get<String>("operation").toLowerCase();
-            if (operation == "prod")
-                op = PROD;
-            else if (operation == "sum")
-                op = SUM;
-            else if (operation == "max")
-                op = MAX;
-            else
-                CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\"");
-        }
-        else
-        {
-            op = SUM;
-        }
-
-        if (params.has("coeff"))
-        {
-            DictValue paramCoeff = params.get("coeff");
-            coeffs.resize(paramCoeff.size(), 1);
-            for (int i = 0; i < paramCoeff.size(); i++)
-            {
-                coeffs[i] = paramCoeff.get<int>(i);
-            }
-        }
+        op = op_;
+        coeffs = coeffs_;
    }

-    void EltwiseLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+    void EltwiseLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
    {
        CV_Assert(2 <= inputs.size());
        CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size());
@@ -92,7 +68,7 @@ namespace dnn
        outputs[0].create(shape0);
    }

-    void EltwiseLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+    void EltwiseLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
    {
        switch (op)
        {
@@ -142,5 +118,10 @@ namespace dnn
            break;
        };
    }
+
+    Ptr<EltwiseLayer> EltwiseLayer::create(EltwiseOp op, const std::vector<int> &coeffs)
+    {
+        return Ptr<EltwiseLayer>(new EltwiseLayerImpl(op, coeffs));
+    }
 }
 }
--- a/modules/dnn/src/layers/eltwise_layer.hpp
+++ b/modules/dnn/src/layers/eltwise_layer.hpp
@@ -42,24 +42,18 @@
 #ifndef __OPENCV_DNN_LAYERS_ELTWISE_LAYER_HPP__
 #define __OPENCV_DNN_LAYERS_ELTWISE_LAYER_HPP__
 #include "../precomp.hpp"
+#include <opencv2/dnn/all_layers.hpp>

 namespace cv
 {
 namespace dnn
 {
-    class EltwiseLayer : public Layer
+    class EltwiseLayerImpl : public EltwiseLayer
    {
-        enum EltwiseOp
-        {
-            PROD = 0,
-            SUM = 1,
-            MAX = 2,
-        };
-
        EltwiseOp op;
        std::vector<int> coeffs;
    public:
-        EltwiseLayer(LayerParams& params);
+        EltwiseLayerImpl(EltwiseOp op, const std::vector<int> &coeffs);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };

--- a/modules/dnn/src/layers/op_im2col.hpp
+++ b/modules/dnn/src/layers/op_im2col.hpp
@@ -191,9 +191,9 @@ void col2im_cpu(const Dtype* data_col,
                int dilation_h, int dilation_w,
                Dtype* data_im)
 {
-    int height_col = (height + 2 * pad_h - (dilation_h * (patch_h - 1) + 1)) / stride_h + 1;
-    int width_col = (width + 2 * pad_w - (dilation_w * (patch_w - 1) + 1)) / stride_w + 1;
-    int channels_col = channels * patch_h * patch_w;
+    int height_col = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+    int width_col = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+    int channels_col = channels * kernel_h * kernel_w;

    std::memset(data_im, 0, height * width * channels * sizeof(Dtype));


--- a/modules/optflow/include/opencv2/optflow.hpp
+++ b/modules/optflow/include/opencv2/optflow.hpp
@@ -170,7 +170,7 @@ procedure can be found in @cite Brox2004
 class CV_EXPORTS_W VariationalRefinement : public DenseOpticalFlow
 {
 public:
-    /** @brief calc function overload to handle separate horizontal (u) and vertical (v) flow components
+    /** @brief @ref calc function overload to handle separate horizontal (u) and vertical (v) flow components
    (to avoid extra splits/merges) */
    CV_WRAP virtual void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) = 0;

@@ -258,6 +258,11 @@ This class implements the Dense Inverse Search (DIS) optical flow algorithm. Mor
 details about the algorithm can be found at @cite Kroeger2016 . Includes three presets with preselected
 parameters to provide reasonable trade-off between speed and quality. However, even the slowest preset is
 still relatively fast, use DeepFlow if you need better quality and don't care about speed.
+
+This implementation includes several additional features compared to the algorithm described in the paper,
+including spatial propagation of flow vectors (@ref getUseSpatialPropagation), as well as an option to
+utilize an initial flow approximation passed to @ref calc (which is, essentially, temporal propagation,
+if the previous frame's flow field is passed).
 */
 class CV_EXPORTS_W DISOpticalFlow : public DenseOpticalFlow
 {
@@ -326,7 +331,7 @@ public:

    /** @brief Whether to use mean-normalization of patches when computing patch distance. It is turned on
        by default as it typically provides a noticeable quality boost because of increased robustness to
-        illumanition variations. Turn it off if you are certain that your sequence does't contain any changes
+        illumination variations. Turn it off if you are certain that your sequence doesn't contain any changes
        in illumination.
    @see setUseMeanNormalization */
    CV_WRAP virtual bool getUseMeanNormalization() const = 0;

--- a/modules/optflow/src/dis_flow.cpp
+++ b/modules/optflow/src/dis_flow.cpp
@@ -110,6 +110,9 @@ class DISOpticalFlowImpl : public DISOpticalFlow
    vector<Mat_<float> > Ux; //!< x component of the flow vectors
    vector<Mat_<float> > Uy; //!< y component of the flow vectors

+    vector<Mat_<float> > initial_Ux; //!< x component of the initial flow field, if one was passed as an input
+    vector<Mat_<float> > initial_Uy; //!< y component of the initial flow field, if one was passed as an input
+
    Mat_<Vec2f> U; //!< a buffer for the merged flow

    Mat_<float> Sx; //!< intermediate sparse flow representation (x component)
@@ -121,8 +124,8 @@ class DISOpticalFlowImpl : public DISOpticalFlow
    Mat_<float> I0xy_buf; //!< sum of x and y gradient products

    /* Extra buffers that are useful if patch mean-normalization is used: */
-    Mat_<float> I0x_buf; //!< sum of of x gradient values
-    Mat_<float> I0y_buf; //!< sum of of y gradient values
+    Mat_<float> I0x_buf; //!< sum of x gradient values
+    Mat_<float> I0y_buf; //!< sum of y gradient values

    /* Auxiliary buffers used in structure tensor computation: */
    Mat_<float> I0xx_buf_aux;
@@ -134,7 +137,7 @@ class DISOpticalFlowImpl : public DISOpticalFlow
    vector<Ptr<VariationalRefinement> > variational_refinement_processors;

  private: //!< private methods and parallel sections
-    void prepareBuffers(Mat &I0, Mat &I1);
+    void prepareBuffers(Mat &I0, Mat &I1, Mat &flow, bool use_flow);
    void precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x, Mat &dst_I0y, Mat &I0x,
                                   Mat &I0y);

@@ -144,10 +147,11 @@ class DISOpticalFlowImpl : public DISOpticalFlow
        int nstripes, stripe_sz;
        int hs;
        Mat *Sx, *Sy, *Ux, *Uy, *I0, *I1, *I0x, *I0y;
-        int num_iter;
+        int num_iter, pyr_level;

        PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _hs, Mat &dst_Sx, Mat &dst_Sy,
-                                   Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1, Mat &_I0x, Mat &_I0y, int _num_iter);
+                                   Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1, Mat &_I0x, Mat &_I0y, int _num_iter,
+                                   int _pyr_level);
        void operator()(const Range &range) const;
    };

@@ -185,7 +189,7 @@ DISOpticalFlowImpl::DISOpticalFlowImpl()
        variational_refinement_processors.push_back(createVariationalFlowRefinement());
 }

-void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1)
+void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &flow, bool use_flow)
 {
    I0s.resize(coarsest_scale + 1);
    I1s.resize(coarsest_scale + 1);
@@ -195,6 +199,14 @@ void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1)
    Ux.resize(coarsest_scale + 1);
    Uy.resize(coarsest_scale + 1);

+    Mat flow_uv[2];
+    if (use_flow)
+    {
+        split(flow, flow_uv);
+        initial_Ux.resize(coarsest_scale + 1);
+        initial_Uy.resize(coarsest_scale + 1);
+    }
+
    int fraction = 1;
    int cur_rows = 0, cur_cols = 0;

@@ -237,8 +249,6 @@ void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1)
            resize(I1s[i - 1], I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA);
        }

-        fraction *= 2;
-
        if (i >= finest_scale)
        {
            I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size);
@@ -253,7 +263,17 @@ void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1)
            variational_refinement_processors[i]->setGamma(variational_refinement_gamma);
            variational_refinement_processors[i]->setSorIterations(5);
            variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter);
+
+            if (use_flow)
+            {
+                resize(flow_uv[0], initial_Ux[i], Size(cur_cols, cur_rows));
+                initial_Ux[i] /= fraction;
+                resize(flow_uv[1], initial_Uy[i], Size(cur_cols, cur_rows));
+                initial_Uy[i] /= fraction;
+            }
        }
+
+        fraction *= 2;
    }
 }

@@ -377,9 +397,10 @@ void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy,
 DISOpticalFlowImpl::PatchInverseSearch_ParBody::PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes,
                                                                           int _hs, Mat &dst_Sx, Mat &dst_Sy,
                                                                           Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1,
-                                                                           Mat &_I0x, Mat &_I0y, int _num_iter)
+                                                                           Mat &_I0x, Mat &_I0y, int _num_iter,
+                                                                           int _pyr_level)
    : dis(&_dis), nstripes(_nstripes), hs(_hs), Sx(&dst_Sx), Sy(&dst_Sy), Ux(&src_Ux), Uy(&src_Uy), I0(&_I0), I1(&_I1),
-      I0x(&_I0x), I0y(&_I0y), num_iter(_num_iter)
+      I0x(&_I0x), I0y(&_I0y), num_iter(_num_iter), pyr_level(_pyr_level)
 {
    stripe_sz = (int)ceil(hs / (double)nstripes);
 }
@@ -676,10 +697,10 @@ inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int
 void DISOpticalFlowImpl::PatchInverseSearch_ParBody::operator()(const Range &range) const
 {
    // force separate processing of stripes if we are using spatial propagation:
-    if(dis->use_spatial_propagation && range.end>range.start+1)
+    if (dis->use_spatial_propagation && range.end > range.start + 1)
    {
-        for(int n=range.start;n<range.end;n++)
-            (*this)(Range(n,n+1));
+        for (int n = range.start; n < range.end; n++)
+            (*this)(Range(n, n + 1));
        return;
    }
    int psz = dis->patch_size;
@@ -708,6 +729,15 @@ void DISOpticalFlowImpl::PatchInverseSearch_ParBody::operator()(const Range &ran
    float *x_ptr = dis->I0x_buf.ptr<float>();
    float *y_ptr = dis->I0y_buf.ptr<float>();

+    bool use_temporal_candidates = false;
+    float *initial_Ux_ptr = NULL, *initial_Uy_ptr = NULL;
+    if (!dis->initial_Ux.empty())
+    {
+        initial_Ux_ptr = dis->initial_Ux[pyr_level].ptr<float>();
+        initial_Uy_ptr = dis->initial_Uy[pyr_level].ptr<float>();
+        use_temporal_candidates = true;
+    }
+
    int i, j, dir;
    int start_is, end_is, start_js, end_js;
    int start_i, start_j;
@@ -772,11 +802,28 @@ void DISOpticalFlowImpl::PatchInverseSearch_ParBody::operator()(const Range &ran
                    Sy_ptr[is * dis->ws + js] = Uy_ptr[(i + psz2) * dis->w + j + psz2];
                }

-                if (dis->use_spatial_propagation)
+                float min_SSD = INF, cur_SSD;
+                if (use_temporal_candidates || dis->use_spatial_propagation)
                {
-                    /* Updating the current Sx_ptr, Sy_ptr to the best candidate: */
-                    float min_SSD, cur_SSD;
                    COMPUTE_SSD(min_SSD, Sx_ptr[is * dis->ws + js], Sy_ptr[is * dis->ws + js]);
+                }
+
+                if (use_temporal_candidates)
+                {
+                    /* Try temporal candidates (vectors from the initial flow field that was passed to the function) */
+                    COMPUTE_SSD(cur_SSD, initial_Ux_ptr[(i + psz2) * dis->w + j + psz2],
+                                initial_Uy_ptr[(i + psz2) * dis->w + j + psz2]);
+                    if (cur_SSD < min_SSD)
+                    {
+                        min_SSD = cur_SSD;
+                        Sx_ptr[is * dis->ws + js] = initial_Ux_ptr[(i + psz2) * dis->w + j + psz2];
+                        Sy_ptr[is * dis->ws + js] = initial_Uy_ptr[(i + psz2) * dis->w + j + psz2];
+                    }
+                }
+
+                if (dis->use_spatial_propagation)
+                {
+                    /* Try spatial candidates: */
                    if (dir * js > dir * start_js)
                    {
                        COMPUTE_SSD(cur_SSD, Sx_ptr[is * dis->ws + js - dir], Sy_ptr[is * dis->ws + js - dir]);
@@ -967,12 +1014,16 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo

    Mat I0Mat = I0.getMat();
    Mat I1Mat = I1.getMat();
-    flow.create(I1Mat.size(), CV_32FC2);
+    bool use_input_flow = false;
+    if (flow.sameSize(I0) && flow.depth() == CV_32F && flow.channels() == 2)
+        use_input_flow = true;
+    else
+        flow.create(I1Mat.size(), CV_32FC2);
    Mat &flowMat = flow.getMatRef();
    coarsest_scale = (int)(log((2 * I0Mat.cols) / (4.0 * patch_size)) / log(2.0) + 0.5) - 1;
    int num_stripes = getNumThreads();

-    prepareBuffers(I0Mat, I1Mat);
+    prepareBuffers(I0Mat, I1Mat, flowMat, use_input_flow);
    Ux[coarsest_scale].setTo(0.0f);
    Uy[coarsest_scale].setTo(0.0f);

@@ -990,13 +1041,13 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo
             * with spatial propagation reproducible
             */
            parallel_for_(Range(0, 8), PatchInverseSearch_ParBody(*this, 8, hs, Sx, Sy, Ux[i], Uy[i], I0s[i],
-                                                                  I1s_ext[i], I0xs[i], I0ys[i], 2));
+                                                                  I1s_ext[i], I0xs[i], I0ys[i], 2, i));
        }
        else
        {
            parallel_for_(Range(0, num_stripes),
                          PatchInverseSearch_ParBody(*this, num_stripes, hs, Sx, Sy, Ux[i], Uy[i], I0s[i], I1s_ext[i],
-                                                     I0xs[i], I0ys[i], 1));
+                                                     I0xs[i], I0ys[i], 1, i));
        }

        parallel_for_(Range(0, num_stripes),

--- a/samples/python2/dis_opt_flow.py
+++ b/samples/python2/dis_opt_flow.py
+#!/usr/bin/env python
+
+'''
+example to show optical flow estimation using DISOpticalFlow
+
+USAGE: dis_opt_flow.py [<video_source>]
+
+Keys:
+ 1  - toggle HSV flow visualization
+ 2  - toggle glitch
+ 3  - toggle spatial propagation of flow vectors
+ 4  - toggle temporal propagation of flow vectors
+ESC - exit
+'''
+
+# Python 2/3 compatibility
+from __future__ import print_function
+
+import numpy as np
+import cv2
+import video
+
+
+def draw_flow(img, flow, step=16):
+    h, w = img.shape[:2]
+    y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
+    fx, fy = flow[y,x].T
+    lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
+    lines = np.int32(lines + 0.5)
+    vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+    cv2.polylines(vis, lines, 0, (0, 255, 0))
+    for (x1, y1), (x2, y2) in lines:
+        cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
+    return vis
+
+
+def draw_hsv(flow):
+    h, w = flow.shape[:2]
+    fx, fy = flow[:,:,0], flow[:,:,1]
+    ang = np.arctan2(fy, fx) + np.pi
+    v = np.sqrt(fx*fx+fy*fy)
+    hsv = np.zeros((h, w, 3), np.uint8)
+    hsv[...,0] = ang*(180/np.pi/2)
+    hsv[...,1] = 255
+    hsv[...,2] = np.minimum(v*4, 255)
+    bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
+    return bgr
+
+
+def warp_flow(img, flow):
+    h, w = flow.shape[:2]
+    flow = -flow
+    flow[:,:,0] += np.arange(w)
+    flow[:,:,1] += np.arange(h)[:,np.newaxis]
+    res = cv2.remap(img, flow, None, cv2.INTER_LINEAR)
+    return res
+
+
+if __name__ == '__main__':
+    import sys
+    print(__doc__)
+    try:
+        fn = sys.argv[1]
+    except IndexError:
+        fn = 0
+
+    cam = video.create_capture(fn)
+    ret, prev = cam.read()
+    prevgray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
+    show_hsv = False
+    show_glitch = False
+    use_spatial_propagation = False
+    use_temporal_propagation = True
+    cur_glitch = prev.copy()
+    inst = cv2.optflow.createOptFlow_DIS(cv2.optflow.DISOPTICAL_FLOW_PRESET_MEDIUM)
+    inst.setUseSpatialPropagation(use_spatial_propagation)
+
+    flow = None
+    while True:
+        ret, img = cam.read()
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        if flow is not None and use_temporal_propagation:
+            #warp previous flow to get an initial approximation for the current flow:
+            flow = inst.calc(prevgray, gray, warp_flow(flow,flow))
+        else:
+            flow = inst.calc(prevgray, gray, None)
+        prevgray = gray
+
+        cv2.imshow('flow', draw_flow(gray, flow))
+        if show_hsv:
+            cv2.imshow('flow HSV', draw_hsv(flow))
+        if show_glitch:
+            cur_glitch = warp_flow(cur_glitch, flow)
+            cv2.imshow('glitch', cur_glitch)
+
+        ch = 0xFF & cv2.waitKey(5)
+        if ch == 27:
+            break
+        if ch == ord('1'):
+            show_hsv = not show_hsv
+            print('HSV flow visualization is', ['off', 'on'][show_hsv])
+        if ch == ord('2'):
+            show_glitch = not show_glitch
+            if show_glitch:
+                cur_glitch = img.copy()
+            print('glitch is', ['off', 'on'][show_glitch])
+        if ch == ord('3'):
+            use_spatial_propagation = not use_spatial_propagation
+            inst.setUseSpatialPropagation(use_spatial_propagation)
+            print('spatial propagation is', ['off', 'on'][use_spatial_propagation])
+        if ch == ord('4'):
+            use_temporal_propagation = not use_temporal_propagation
+            print('temporal propagation is', ['off', 'on'][use_temporal_propagation])
+    cv2.destroyAllWindows()