Merge pull request #1552 from amuhic:master

Implementation of CSR-DCF tracker (#1552) * Initial commit for CSR-DCF tracker implementation * Fixes for automatic build * General code fixes * Removed unused parameters. Added CSRT to automatic tests. * Fixed VS build warnings. Fixed a bug with gray sequences. * Fixed VS build errors for samples file.

Merge pull request #1552 from amuhic:master
Implementation of CSR-DCF tracker (#1552) * Initial commit for CSR-DCF tracker implementation * Fixes for automatic build * General code fixes * Removed unused parameters. Added CSRT to automatic tests. * Fixed VS build warnings. Fixed a bug with gray sequences. * Fixed VS build errors for samples file.
ced5aa76 · Andrej Muhič · Alexander Alekhin · 295dce52 · ced5aa76 · ced5aa76
Commit ced5aa76 authored Feb 22, 2018 by Andrej Muhič Committed by Alexander Alekhin Feb 22, 2018
11 changed files
--- a/modules/tracking/include/opencv2/tracking/tracker.hpp
+++ b/modules/tracking/include/opencv2/tracking/tracker.hpp
@@ -1462,6 +1462,71 @@ public:
 //! @}
+/*********************************** CSRT ************************************/
+/** @brief Discriminative Correlation Filter Tracker with Channel and Spatial Reliability
+*/
+class CV_EXPORTS_W TrackerCSRT : public Tracker
+{
+public:
+  struct CV_EXPORTS Params
+  {
+    /**
+    * \brief Constructor
+    */
+    Params();
+    /**
+    * \brief Read parameters from file
+    */
+    void read(const FileNode& /*fn*/);
+    /**
+    * \brief Write parameters from file
+    */
+    void write(cv::FileStorage& fs) const;
+    bool use_hog;
+    bool use_color_names;
+    bool use_gray;
+    bool use_rgb;
+    bool use_channel_weights;
+    bool use_segmentation;
+    std::string window_function; //!<  Window function: "hann", "cheb", "kaiser"
+    float kaiser_alpha;
+    float cheb_attenuation;
+    float template_size;
+    float gsl_sigma;
+    float hog_orientations;
+    float hog_clip;
+    float padding;
+    float filter_lr;
+    float weights_lr;
+    int num_hog_channels_used;
+    int admm_iterations;
+    int histogram_bins;
+    float histogram_lr;
+    int background_ratio;
+    int number_of_scales;
+    float scale_sigma_factor;
+    float scale_model_max_area;
+    float scale_lr;
+    float scale_step;
+  };
+  /** @brief Constructor
+  @param parameters CSRT parameters TrackerCSRT::Params
+  */
+  static Ptr<TrackerCSRT> create(const TrackerCSRT::Params &parameters);
+  CV_WRAP static Ptr<TrackerCSRT> create();
+  virtual void setInitialMask(const Mat mask) = 0;
+  virtual ~TrackerCSRT() {}
+};
 } /* namespace cv */
 #endif
--- a/modules/tracking/samples/csrt.cpp
+++ b/modules/tracking/samples/csrt.cpp
+#include <opencv2/core/utility.hpp>
+#include <opencv2/tracking.hpp>
+#include <opencv2/videoio.hpp>
+#include <opencv2/highgui.hpp>
+#include <iostream>
+#include <cstring>
+#include <fstream>
+#include "samples_utility.hpp"
+using namespace std;
+using namespace cv;
+int main(int argc, char** argv)
+{
+    // show help
+    if (argc<2) {
+        cout <<
+            " Usage: example_tracking_csrt <video_name>\n"
+            " examples:\n"
+            " example_tracking_csrt Bolt/img/%04.jpg\n"
+            " example_tracking_csrt Bolt/img/%04.jpg Bolt/grouondtruth.txt\n"
+            " example_tracking_csrt faceocc2.webm\n"
+            << endl;
+        return 0;
+    }
+    // create the tracker
+    Ptr<TrackerCSRT> tracker = TrackerCSRT::create();
+    // const char* param_file_path = "/home/amuhic/Workspace/3_dip/params.yml";
+    // FileStorage fs(params_file_path, FileStorage::WRITE);
+    // tracker->write(fs);
+    // FileStorage fs(param_file_path, FileStorage::READ);
+    // tracker->read( fs.root());
+    // set input video
+    std::string video = argv[1];
+    VideoCapture cap(video);
+    // and read first frame
+    Mat frame;
+    cap >> frame;
+    // target bounding box
+    Rect2d roi;
+    if (argc > 2) {
+        // read first line of ground-truth file
+        std::string groundtruthPath = argv[2];
+        std::ifstream gtIfstream(groundtruthPath.c_str());
+        std::string gtLine;
+        getline(gtIfstream, gtLine);
+        gtIfstream.close();
+        // parse the line by elements
+        std::stringstream gtStream(gtLine);
+        std::string element;
+        std::vector<int> elements;
+        while (std::getline(gtStream, element, ','))
+        {
+            elements.push_back(cvRound(std::atof(element.c_str())));
+        }
+        if (elements.size() == 4) {
+            // ground-truth is rectangle
+            roi = cv::Rect(elements[0], elements[1], elements[2], elements[3]);
+        }
+        else if (elements.size() == 8) {
+            // ground-truth is polygon
+            int xMin = cvRound(min(elements[0], min(elements[2], min(elements[4], elements[6]))));
+            int yMin = cvRound(min(elements[1], min(elements[3], min(elements[5], elements[7]))));
+            int xMax = cvRound(max(elements[0], max(elements[2], max(elements[4], elements[6]))));
+            int yMax = cvRound(max(elements[1], max(elements[3], max(elements[5], elements[7]))));
+            roi = cv::Rect(xMin, yMin, xMax - xMin, yMax - yMin);
+            // create mask from polygon and set it to the tracker
+            cv::Rect aaRect = cv::Rect(xMin, yMin, xMax - xMin, yMax - yMin);
+            cout << aaRect.size() << endl;
+            Mat mask = Mat::zeros(aaRect.size(), CV_8UC1);
+            const int n = 4;
+            std::vector<cv::Point> poly_points(n);
+            //Translate x and y to rects start position
+            int sx = aaRect.x;
+            int sy = aaRect.y;
+            for (int i = 0; i < n; ++i) {
+                poly_points[i] = Point(elements[2 * i] - sx, elements[2 * i + 1] - sy);
+            }
+            cv::fillConvexPoly(mask, poly_points, Scalar(1.0), 8);
+            mask.convertTo(mask, CV_32FC1);
+            tracker->setInitialMask(mask);
+        }
+        else {
+            std::cout << "Number of ground-truth elements is not 4 or 8." << std::endl;
+        }
+    }
+    else {
+        // second argument is not given - user selects target
+        roi = selectROI("tracker", frame, true, false);
+    }
+    //quit if ROI was not selected
+    if (roi.width == 0 || roi.height == 0)
+        return 0;
+    // initialize the tracker
+    int64 t1 = cv::getTickCount();
+    tracker->init(frame, roi);
+    int64 t2 = cv::getTickCount();
+    int64 tick_counter = t2 - t1;
+    // do the tracking
+    printf("Start the tracking process, press ESC to quit.\n");
+    int frame_idx = 1;
+    for (;;) {
+        // get frame from the video
+        cap >> frame;
+        // stop the program if no more images
+        if (frame.rows == 0 || frame.cols == 0)
+            break;
+        // update the tracking result
+        t1 = cv::getTickCount();
+        bool isfound = tracker->update(frame, roi);
+        t2 = cv::getTickCount();
+        tick_counter += t2 - t1;
+        frame_idx++;
+        if (!isfound) {
+            cout << "The target has been lost...\n";
+            waitKey(0);
+            return 0;
+        }
+        // draw the tracked object and show the image
+        rectangle(frame, roi, Scalar(255, 0, 0), 2, 1);
+        imshow("tracker", frame);
+        //quit on ESC button
+        if (waitKey(1) == 27)break;
+    }
+    cout << "Elapsed sec: " << static_cast<double>(tick_counter) / cv::getTickFrequency() << endl;
+    cout << "FPS: " << ((double)(frame_idx)) / (static_cast<double>(tick_counter) / cv::getTickFrequency()) << endl;
+}
--- a/modules/tracking/samples/samples_utility.hpp
+++ b/modules/tracking/samples/samples_utility.hpp
@@ -21,6 +21,8 @@ inline cv::Ptr<cv::Tracker> createTrackerByName(cv::String name)
        tracker = cv::TrackerGOTURN::create();
    else if (name == "MOSSE")
        tracker = cv::TrackerMOSSE::create();
+    else if (name == "CSRT")
+        tracker = cv::TrackerCSRT::create();
    else
        CV_Error(cv::Error::StsBadArg, "Invalid tracking algorithm name\n");

--- a/modules/tracking/src/trackerCSRT.cpp
+++ b/modules/tracking/src/trackerCSRT.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "precomp.hpp"
+#include "trackerCSRTSegmentation.hpp"
+#include "trackerCSRTUtils.hpp"
+#include "trackerCSRTScaleEstimation.hpp"
+namespace cv
+{
+/**
+* \brief Implementation of TrackerModel for CSRT algorithm
+*/
+class TrackerCSRTModel : public TrackerModel
+{
+public:
+    TrackerCSRTModel(TrackerCSRT::Params /*params*/){}
+    ~TrackerCSRTModel(){}
+protected:
+    void modelEstimationImpl(const std::vector<Mat>& /*responses*/){}
+    void modelUpdateImpl(){}
+};
+class TrackerCSRTImpl : public TrackerCSRT
+{
+public:
+    TrackerCSRTImpl(const TrackerCSRT::Params &parameters = TrackerCSRT::Params());
+    void read(const FileNode& fn);
+    void write(FileStorage& fs) const;
+protected:
+    TrackerCSRT::Params params;
+    bool initImpl(const Mat& image, const Rect2d& boundingBox);
+    virtual void setInitialMask(const Mat mask);
+    bool updateImpl(const Mat& image, Rect2d& boundingBox);
+    void update_csr_filter(const Mat &image, const Mat &my_mask);
+    void update_histograms(const Mat &image, const Rect &region);
+    void extract_histograms(const Mat &image, cv::Rect region, Histogram &hf, Histogram &hb);
+    std::vector<Mat> create_csr_filter(const std::vector<cv::Mat>
+            img_features, const cv::Mat Y, const cv::Mat P);
+    Mat calculate_response(const Mat &image, const std::vector<Mat> filter);
+    Mat get_location_prior(const Rect roi, const Size2f target_size, const Size img_sz);
+    Mat segment_region(const Mat &image, const Point2f &object_center,
+            const Size2f &template_size, const Size &target_size, float scale_factor);
+    Point2f estimate_new_position(const Mat &image);
+    std::vector<Mat> get_features(const Mat &patch, const Size2i &feature_size);
+private:
+    bool check_mask_area(const Mat &mat, const double obj_area);
+    float current_scale_factor;
+    Mat window;
+    Mat yf;
+    Rect2f bounding_box;
+    std::vector<Mat> csr_filter;
+    std::vector<float> filter_weights;
+    Size2f original_target_size;
+    Size2i image_size;
+    Size2f template_size;
+    Size2i rescaled_template_size;
+    float rescale_ratio;
+    Point2f object_center;
+    DSST dsst;
+    Histogram hist_foreground;
+    Histogram hist_background;
+    double p_b;
+    Mat erode_element;
+    Mat filter_mask;
+    Mat preset_mask;
+    Mat default_mask;
+    float default_mask_area;
+    int cell_size;
+};
+Ptr<TrackerCSRT> TrackerCSRT::create(const TrackerCSRT::Params &parameters)
+{
+    return Ptr<TrackerCSRTImpl>(new TrackerCSRTImpl(parameters));
+}
+Ptr<TrackerCSRT> TrackerCSRT::create()
+{
+    return Ptr<TrackerCSRTImpl>(new TrackerCSRTImpl());
+}
+TrackerCSRTImpl::TrackerCSRTImpl(const TrackerCSRT::Params &parameters) :
+    params(parameters)
+{
+    isInit = false;
+}
+void TrackerCSRTImpl::read(const cv::FileNode& fn)
+{
+    params.read(fn);
+}
+void TrackerCSRTImpl::write(cv::FileStorage& fs) const
+{
+    params.write(fs);
+}
+void TrackerCSRTImpl::setInitialMask(const Mat mask)
+{
+    preset_mask = mask;
+}
+bool TrackerCSRTImpl::check_mask_area(const Mat &mat, const double obj_area)
+{
+    double threshold = 0.05;
+    double mask_area= sum(mat)[0];
+    if(mask_area < threshold*obj_area) {
+        return false;
+    }
+    return true;
+}
+Mat TrackerCSRTImpl::calculate_response(const Mat &image, const std::vector<Mat> filter)
+{
+    Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width),
+        cvFloor(current_scale_factor * template_size.height));
+    resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC);
+    std::vector<Mat> ftrs = get_features(patch, yf.size());
+    std::vector<Mat> Ffeatures = fourier_transform_features(ftrs);
+    Mat resp, res;
+    if(params.use_channel_weights){
+        res = Mat::zeros(Ffeatures[0].size(), CV_32FC2);
+        Mat resp_ch;
+        Mat mul_mat;
+        for(size_t i = 0; i < Ffeatures.size(); ++i) {
+            mulSpectrums(Ffeatures[i], filter[i], resp_ch, 0, true);
+            res += (resp_ch * filter_weights[i]);
+        }
+        idft(res, res, DFT_SCALE | DFT_REAL_OUTPUT);
+    } else {
+        res = Mat::zeros(Ffeatures[0].size(), CV_32FC2);
+        Mat resp_ch;
+        for(size_t i = 0; i < Ffeatures.size(); ++i) {
+            mulSpectrums(Ffeatures[i], filter[i], resp_ch, 0 , true);
+            res = res + resp_ch;
+        }
+        idft(res, res, DFT_SCALE | DFT_REAL_OUTPUT);
+    }
+    return res;
+}
+void TrackerCSRTImpl::update_csr_filter(const Mat &image, const Mat &mask)
+{
+    Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width),
+        cvFloor(current_scale_factor * template_size.height));
+    resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC);
+    std::vector<Mat> ftrs = get_features(patch, yf.size());
+    std::vector<Mat> Fftrs = fourier_transform_features(ftrs);
+    std::vector<Mat> new_csr_filter = create_csr_filter(Fftrs, yf, mask);
+    //calculate per channel weights
+    if(params.use_channel_weights) {
+        Mat current_resp;
+        double max_val;
+        float sum_weights = 0;
+        std::vector<float> new_filter_weights = std::vector<float>(new_csr_filter.size());
+        for(size_t i = 0; i < new_csr_filter.size(); ++i) {
+            mulSpectrums(Fftrs[i], new_csr_filter[i], current_resp, 0, true);
+            idft(current_resp, current_resp, DFT_SCALE | DFT_REAL_OUTPUT);
+            minMaxLoc(current_resp, NULL, &max_val, NULL, NULL);
+            sum_weights += static_cast<float>(max_val);
+            new_filter_weights[i] = static_cast<float>(max_val);
+        }
+        //update filter weights with new values
+        float updated_sum = 0;
+        for(size_t i = 0; i < filter_weights.size(); ++i) {
+            filter_weights[i] = filter_weights[i]*(1.0f - params.weights_lr) +
+                params.weights_lr * (new_filter_weights[i] / sum_weights);
+            updated_sum += filter_weights[i];
+        }
+        //normalize weights
+        for(size_t i = 0; i < filter_weights.size(); ++i) {
+            filter_weights[i] /= updated_sum;
+        }
+    }
+    for(size_t i = 0; i < csr_filter.size(); ++i) {
+        csr_filter[i] = (1.0f - params.filter_lr)*csr_filter[i] + params.filter_lr * new_csr_filter[i];
+    }
+    std::vector<Mat>().swap(ftrs);
+    std::vector<Mat>().swap(Fftrs);
+}
+std::vector<Mat> TrackerCSRTImpl::get_features(const Mat &patch, const Size2i &feature_size)
+{
+    std::vector<Mat> features;
+    if (params.use_hog) {
+        std::vector<Mat> hog = get_features_hog(patch, cell_size);
+        features.insert(features.end(), hog.begin(),
+                hog.begin()+params.num_hog_channels_used);
+    }
+    if (params.use_color_names) {
+        std::vector<Mat> cn;
+        cn = get_features_cn(patch, feature_size);
+        features.insert(features.end(), cn.begin(), cn.end());
+    }
+    if(params.use_gray) {
+        Mat gray_m;
+        cvtColor(patch, gray_m, CV_BGR2GRAY);
+        resize(gray_m, gray_m, feature_size, 0, 0, INTER_CUBIC);
+        gray_m.convertTo(gray_m, CV_32FC1, 1.0/255.0, -0.5);
+        features.push_back(gray_m);
+    }
+    if(params.use_rgb) {
+        std::vector<Mat> rgb_features = get_features_rgb(patch, feature_size);
+        features.insert(features.end(), rgb_features.begin(), rgb_features.end());
+    }
+    for (size_t i = 0; i < features.size(); ++i) {
+        features.at(i) = features.at(i).mul(window);
+    }
+    return features;
+}
+class ParallelCreateCSRFilter : public ParallelLoopBody {
+public:
+    ParallelCreateCSRFilter(
+        const std::vector<cv::Mat> img_features,
+        const cv::Mat Y,
+        const cv::Mat P,
+        int admm_iterations,
+        std::vector<Mat> &result_filter_):
+        result_filter(result_filter_)
+    {
+        this->img_features = img_features;
+        this->Y = Y;
+        this->P = P;
+        this->admm_iterations = admm_iterations;
+    }
+    virtual void operator ()(const Range& range) const
+    {
+        for (int i = range.start; i < range.end; i++) {
+            float mu = 5.0f;
+            float beta = 3.0f;
+            float mu_max = 20.0f;
+            float lambda = mu / 100.0f;
+            Mat F = img_features[i];
+            Mat Sxy, Sxx;
+            mulSpectrums(F, Y, Sxy, 0, true);
+            mulSpectrums(F, F, Sxx, 0, true);
+            Mat H;
+            H = divide_complex_matrices(Sxy, (Sxx + lambda));
+            idft(H, H, DFT_SCALE|DFT_REAL_OUTPUT);
+            H = H.mul(P);
+            dft(H, H, DFT_COMPLEX_OUTPUT);
+            Mat L = Mat::zeros(H.size(), H.type()); //Lagrangian multiplier
+            Mat G;
+            for(int iteration = 0; iteration < admm_iterations; ++iteration) {
+                G = divide_complex_matrices((Sxy + (mu * H) - L) , (Sxx + mu));
+                idft((mu * G) + L, H, DFT_SCALE | DFT_REAL_OUTPUT);
+                float lm = 1.0f / (lambda+mu);
+                H = H.mul(P*lm);
+                dft(H, H, DFT_COMPLEX_OUTPUT);
+                //Update variables for next iteration
+                L = L + mu * (G - H);
+                mu = min(mu_max, beta*mu);
+            }
+            result_filter[i] = H;
+        }
+    }
+    ParallelCreateCSRFilter& operator=(const ParallelCreateCSRFilter &) {
+        return *this;
+    }
+private:
+    int admm_iterations;
+    Mat Y;
+    Mat P;
+    std::vector<Mat> img_features;
+    std::vector<Mat> &result_filter;
+};
+std::vector<Mat> TrackerCSRTImpl::create_csr_filter(
+        const std::vector<cv::Mat> img_features,
+        const cv::Mat Y,
+        const cv::Mat P)
+{
+    std::vector<Mat> result_filter;
+    result_filter.resize(img_features.size());
+    ParallelCreateCSRFilter parallelCreateCSRFilter(img_features, Y, P,
+            params.admm_iterations, result_filter);
+    parallel_for_(Range(0, static_cast<int>(result_filter.size())), parallelCreateCSRFilter);
+    return result_filter;
+}
+Mat TrackerCSRTImpl::get_location_prior(
+        const Rect roi,
+        const Size2f target_size,
+        const Size img_sz)
+{
+    int x1 = cvRound(max(min(roi.x-1, img_sz.width-1) , 0));
+    int y1 = cvRound(max(min(roi.y-1, img_sz.height-1) , 0));
+    int x2 = cvRound(min(max(roi.width-1, 0) , img_sz.width-1));
+    int y2 = cvRound(min(max(roi.height-1, 0) , img_sz.height-1));
+    Size target_sz;
+    target_sz.width = target_sz.height = cvFloor(min(target_size.width, target_size.height));
+    double cx = x1 + (x2-x1)/2.;
+    double cy = y1 + (y2-y1)/2.;
+    double kernel_size_width = 1.0/(0.5*static_cast<double>(target_sz.width)*1.4142+1);
+    double kernel_size_height = 1.0/(0.5*static_cast<double>(target_sz.height)*1.4142+1);
+    cv::Mat kernel_weight = Mat::zeros(1 + cvFloor(y2 - y1) , 1+cvFloor(-(x1-cx) + (x2-cx)), CV_64FC1);
+    for (int y = y1; y < y2+1; ++y){
+        double * weightPtr = kernel_weight.ptr<double>(y);
+        double tmp_y = std::pow((cy-y)*kernel_size_height, 2);
+        for (int x = x1; x < x2+1; ++x){
+            weightPtr[x] = kernel_epan(std::pow((cx-x)*kernel_size_width,2) + tmp_y);
+        }
+    }
+    double max_val;
+    cv::minMaxLoc(kernel_weight, NULL, &max_val, NULL, NULL);
+    Mat fg_prior = kernel_weight / max_val;
+    fg_prior.setTo(0.5, fg_prior < 0.5);
+    fg_prior.setTo(0.9, fg_prior > 0.9);
+    return fg_prior;
+}
+Mat TrackerCSRTImpl::segment_region(
+        const Mat &image,
+        const Point2f &object_center,
+        const Size2f &template_size,
+        const Size &target_size,
+        float scale_factor)
+{
+    Rect valid_pixels;
+    Mat patch = get_subwindow(image, object_center, cvFloor(scale_factor * template_size.width),
+        cvFloor(scale_factor * template_size.height), &valid_pixels);
+    Size2f scaled_target = Size2f(target_size.width * scale_factor,
+            target_size.height * scale_factor);
+    Mat fg_prior = get_location_prior(
+            Rect(0,0, patch.size().width, patch.size().height),
+            scaled_target , patch.size());
+    std::vector<Mat> img_channels;
+    split(patch, img_channels);
+    std::pair<Mat, Mat> probs = Segment::computePosteriors2(img_channels, 0, 0, patch.cols, patch.rows,
+                    p_b, fg_prior, 1.0-fg_prior, hist_foreground, hist_background);
+    Mat mask = Mat::zeros(probs.first.size(), probs.first.type());
+    probs.first(valid_pixels).copyTo(mask(valid_pixels));
+    double max_resp = get_max(mask);
+    threshold(mask, mask, max_resp / 2.0, 1, THRESH_BINARY);
+    mask.convertTo(mask, CV_32FC1, 1.0);
+    return mask;
+}
+void TrackerCSRTImpl::extract_histograms(const Mat &image, cv::Rect region, Histogram &hf, Histogram &hb)
+{
+    // get coordinates of the region
+    int x1 = std::min(std::max(0, region.x), image.cols-1);
+    int y1 = std::min(std::max(0, region.y), image.rows-1);
+    int x2 = std::min(std::max(0, region.x + region.width), image.cols-1);
+    int y2 = std::min(std::max(0, region.y + region.height), image.rows-1);
+    // calculate coordinates of the background region
+    int offsetX = (x2-x1+1) / params.background_ratio;
+    int offsetY = (y2-y1+1) / params.background_ratio;
+    int outer_y1 = std::max(0, (int)(y1-offsetY));
+    int outer_y2 = std::min(image.rows, (int)(y2+offsetY+1));
+    int outer_x1 = std::max(0, (int)(x1-offsetX));
+    int outer_x2 = std::min(image.cols, (int)(x2+offsetX+1));
+    // calculate probability for the background
+    p_b = 1.0 - ((x2-x1+1) * (y2-y1+1)) /
+        ((double) (outer_x2-outer_x1+1) * (outer_y2-outer_y1+1));
+    // split multi-channel image into the std::vector of matrices
+    std::vector<Mat> img_channels(image.channels());
+    split(image, img_channels);
+    for(size_t k=0; k<img_channels.size(); k++) {
+        img_channels.at(k).convertTo(img_channels.at(k), CV_8UC1);
+    }
+    hf.extractForegroundHistogram(img_channels, Mat(), false, x1, y1, x2, y2);
+    hb.extractBackGroundHistogram(img_channels, x1, y1, x2, y2,
+        outer_x1, outer_y1, outer_x2, outer_y2);
+    std::vector<Mat>().swap(img_channels);
+}
+void TrackerCSRTImpl::update_histograms(const Mat &image, const Rect &region)
+{
+    // create temporary histograms
+    Histogram hf(image.channels(), params.histogram_bins);
+    Histogram hb(image.channels(), params.histogram_bins);
+    extract_histograms(image, region, hf, hb);
+    // get histogram vectors from temporary histograms
+    std::vector<double> hf_vect_new = hf.getHistogramVector();
+    std::vector<double> hb_vect_new = hb.getHistogramVector();
+    // get histogram vectors from learned histograms
+    std::vector<double> hf_vect = hist_foreground.getHistogramVector();
+    std::vector<double> hb_vect = hist_background.getHistogramVector();
+    // update histograms - use learning rate
+    for(size_t i=0; i<hf_vect.size(); i++) {
+        hf_vect_new[i] = (1-params.histogram_lr)*hf_vect[i] +
+            params.histogram_lr*hf_vect_new[i];
+        hb_vect_new[i] = (1-params.histogram_lr)*hb_vect[i] +
+            params.histogram_lr*hb_vect_new[i];
+    }
+    // set learned histograms
+    hist_foreground.setHistogramVector(&hf_vect_new[0]);
+    hist_background.setHistogramVector(&hb_vect_new[0]);
+    std::vector<double>().swap(hf_vect);
+    std::vector<double>().swap(hb_vect);
+}
+Point2f TrackerCSRTImpl::estimate_new_position(const Mat &image)
+{
+    Mat resp = calculate_response(image, csr_filter);
+    Point max_loc;
+    minMaxLoc(resp, NULL, NULL, NULL, &max_loc);
+    // take into account also subpixel accuracy
+    float col = ((float) max_loc.x) + subpixel_peak(resp, "horizontal", max_loc);
+    float row = ((float) max_loc.y) + subpixel_peak(resp, "vertical", max_loc);
+    if(row + 1 > (float)resp.rows / 2.0f) {
+        row = row - resp.rows;
+    }
+    if(col + 1 > (float)resp.cols / 2.0f) {
+        col = col - resp.cols;
+    }
+    // calculate x and y displacements
+    Point2f new_center = object_center + Point2f(current_scale_factor * (1.0f / rescale_ratio) *cell_size*(col),
+            current_scale_factor * (1.0f / rescale_ratio) *cell_size*(row));
+    //sanity checks
+    if(new_center.x < 0)
+        new_center.x = 0;
+    if(new_center.x >= image_size.width)
+        new_center.x = static_cast<float>(image_size.width - 1);
+    if(new_center.y < 0)
+        new_center.y = 0;
+    if(new_center.y >= image_size.height)
+        new_center.y = static_cast<float>(image_size.height - 1);
+    return new_center;
+}
+// *********************************************************************
+// *                        Update API function                        *
+// *********************************************************************
+bool TrackerCSRTImpl::updateImpl(const Mat& image_, Rect2d& boundingBox)
+{
+    //treat gray image as color image
+    Mat image;
+    if(image_.channels() == 1) {
+        std::vector<Mat> channels(3);
+        channels[0] = channels[1] = channels[2] = image_;
+        merge(channels, image);
+    } else {
+        image = image_;
+    }
+    object_center = estimate_new_position(image);
+    current_scale_factor = dsst.getScale(image, object_center);
+    //update bouding_box according to new scale and location
+    bounding_box.x = object_center.x - current_scale_factor * original_target_size.width / 2.0f;
+    bounding_box.y = object_center.y - current_scale_factor * original_target_size.height / 2.0f;
+    bounding_box.width = current_scale_factor * original_target_size.width;
+    bounding_box.height = current_scale_factor * original_target_size.height;
+    //update tracker
+    if(params.use_segmentation) {
+        Mat hsv_img = bgr2hsv(image);
+        update_histograms(hsv_img, bounding_box);
+        filter_mask = segment_region(hsv_img, object_center,
+                template_size,original_target_size, current_scale_factor);
+        resize(filter_mask, filter_mask, yf.size(), 0, 0, INTER_NEAREST);
+        if(check_mask_area(filter_mask, default_mask_area)) {
+            dilate(filter_mask , filter_mask, erode_element);
+        } else {
+            filter_mask = default_mask;
+        }
+    } else {
+        filter_mask = default_mask;
+    }
+    update_csr_filter(image, filter_mask);
+    dsst.update(image, object_center);
+    boundingBox = bounding_box;
+    return true;
+}
+// *********************************************************************
+// *                        Init API function                          *
+// *********************************************************************
+bool TrackerCSRTImpl::initImpl(const Mat& image_, const Rect2d& boundingBox)
+{
+    cv::setNumThreads(getNumThreads());
+    //treat gray image as color image
+    Mat image;
+    if(image_.channels() == 1) {
+        std::vector<Mat> channels(3);
+        channels[0] = channels[1] = channels[2] = image_;
+        merge(channels, image);
+    } else {
+        image = image_;
+    }
+    current_scale_factor = 1.0;
+    image_size = image.size();
+    bounding_box = boundingBox;
+    cell_size = cvFloor(std::min(4.0, std::max(1.0, static_cast<double>(
+        cvCeil((bounding_box.width * bounding_box.height)/400.0)))));
+    original_target_size = Size(bounding_box.size());
+    template_size.width = static_cast<float>(cvFloor(original_target_size.width + params.padding *
+            sqrt(original_target_size.width * original_target_size.height)));
+    template_size.height = static_cast<float>(cvFloor(original_target_size.height + params.padding *
+            sqrt(original_target_size.width * original_target_size.height)));
+    template_size.width = template_size.height =
+        (template_size.width + template_size.height) / 2.0f;
+    rescale_ratio = sqrt(pow(params.template_size,2) / (template_size.width * template_size.height));
+    if(rescale_ratio > 1)  {
+        rescale_ratio = 1;
+    }
+    rescaled_template_size = Size2i(cvFloor(template_size.width * rescale_ratio),
+            cvFloor(template_size.height * rescale_ratio));
+    object_center = Point2f(static_cast<float>(boundingBox.x) + original_target_size.width / 2.0f,
+            static_cast<float>(boundingBox.y) + original_target_size.height / 2.0f);
+    yf = gaussian_shaped_labels(params.gsl_sigma,
+            rescaled_template_size.width / cell_size, rescaled_template_size.height / cell_size);
+    if(params.window_function.compare("hann") == 0) {
+        window = get_hann_win(Size(yf.cols,yf.rows));
+    } else if(params.window_function.compare("cheb") == 0) {
+        window = get_chebyshev_win(Size(yf.cols,yf.rows), params.cheb_attenuation);
+    } else if(params.window_function.compare("kaiser") == 0) {
+        window = get_kaiser_win(Size(yf.cols,yf.rows), params.kaiser_alpha);
+    } else {
+        std::cout << "Not a valid window function" << std::endl;
+        return false;
+    }
+    Size2i scaled_obj_size = Size2i(cvFloor(original_target_size.width * rescale_ratio / cell_size),
+            cvFloor(original_target_size.height * rescale_ratio / cell_size));
+    //set dummy mask and area;
+    int x0 = std::max((yf.size().width - scaled_obj_size.width)/2 - 1, 0);
+    int y0 = std::max((yf.size().height - scaled_obj_size.height)/2 - 1, 0);
+    default_mask = Mat::zeros(yf.size(), CV_32FC1);
+    default_mask(Rect(x0,y0,scaled_obj_size.width, scaled_obj_size.height)) = 1.0f;
+    default_mask_area = static_cast<float>(sum(default_mask)[0]);
+    //initalize segmentation
+    if(params.use_segmentation) {
+        Mat hsv_img = bgr2hsv(image);
+        hist_foreground = Histogram(hsv_img.channels(), params.histogram_bins);
+        hist_background = Histogram(hsv_img.channels(), params.histogram_bins);
+        extract_histograms(hsv_img, bounding_box, hist_foreground, hist_background);
+        filter_mask = segment_region(hsv_img, object_center, template_size,
+                original_target_size, current_scale_factor);
+        //update calculated mask with preset mask
+        if(preset_mask.data){
+            Mat preset_mask_padded = Mat::zeros(filter_mask.size(), filter_mask.type());
+            int sx = std::max((int)cvFloor(preset_mask_padded.cols / 2.0f - preset_mask.cols / 2.0f) - 1, 0);
+            int sy = std::max((int)cvFloor(preset_mask_padded.rows / 2.0f - preset_mask.rows / 2.0f) - 1, 0);
+            preset_mask.copyTo(preset_mask_padded(
+                        Rect(sx, sy, preset_mask.cols, preset_mask.rows)));
+            filter_mask = filter_mask.mul(preset_mask_padded);
+        }
+        erode_element = getStructuringElement(MORPH_ELLIPSE, Size(3,3), Point(1,1));
+        resize(filter_mask, filter_mask, yf.size(), 0, 0, INTER_NEAREST);
+        if(check_mask_area(filter_mask, default_mask_area)) {
+            dilate(filter_mask , filter_mask, erode_element);
+        } else {
+            filter_mask = default_mask;
+        }
+    } else {
+        filter_mask = default_mask;
+    }
+    //initialize filter
+    Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width),
+        cvFloor(current_scale_factor * template_size.height));
+    resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC);
+    std::vector<Mat> patch_ftrs = get_features(patch, yf.size());
+    std::vector<Mat> Fftrs = fourier_transform_features(patch_ftrs);
+    csr_filter = create_csr_filter(Fftrs, yf, filter_mask);
+    if(params.use_channel_weights) {
+        Mat current_resp;
+        filter_weights = std::vector<float>(csr_filter.size());
+        float chw_sum = 0;
+        for (size_t i = 0; i < csr_filter.size(); ++i) {
+            mulSpectrums(Fftrs[i], csr_filter[i], current_resp, 0, true);
+            idft(current_resp, current_resp, DFT_SCALE | DFT_REAL_OUTPUT);
+            double max_val;
+            minMaxLoc(current_resp, NULL, &max_val, NULL , NULL);
+            chw_sum += static_cast<float>(max_val);
+            filter_weights[i] = static_cast<float>(max_val);
+        }
+        for (size_t i = 0; i < filter_weights.size(); ++i) {
+            filter_weights[i] /= chw_sum;
+        }
+    }
+    //initialize scale search
+    dsst = DSST(image, bounding_box, template_size, params.number_of_scales, params.scale_step,
+            params.scale_model_max_area, params.scale_sigma_factor, params.scale_lr);
+    model=Ptr<TrackerCSRTModel>(new TrackerCSRTModel(params));
+    isInit = true;
+    return true;
+}
+TrackerCSRT::Params::Params()
+{
+    use_channel_weights = true;
+    use_segmentation = true;
+    use_hog = true;
+    use_color_names = true;
+    use_gray = true;
+    use_rgb = false;
+    window_function = "hann";
+    kaiser_alpha = 3.75f;
+    cheb_attenuation = 45;
+    padding = 3.0f;
+    template_size = 200;
+    gsl_sigma = 1.0f;
+    hog_orientations = 9;
+    hog_clip = 0.2f;
+    num_hog_channels_used = 18;
+    filter_lr = 0.02f;
+    weights_lr = 0.02f;
+    admm_iterations = 4;
+    number_of_scales = 33;
+    scale_sigma_factor = 0.250f;
+    scale_model_max_area = 512.0f;
+    scale_lr = 0.025f;
+    scale_step = 1.020f;
+    histogram_bins = 16;
+    background_ratio = 2;
+    histogram_lr = 0.04f;
+}
+void TrackerCSRT::Params::read(const FileNode& fn)
+{
+    *this = TrackerCSRT::Params();
+    if(!fn["padding"].empty())
+        fn["padding"] >> padding;
+    if(!fn["template_size"].empty())
+        fn["template_size"] >> template_size;
+    if(!fn["gsl_sigma"].empty())
+        fn["gsl_sigma"] >> gsl_sigma;
+    if(!fn["hog_orientations"].empty())
+        fn["hog_orientations"] >> hog_orientations;
+    if(!fn["num_hog_channels_used"].empty())
+        fn["num_hog_channels_used"] >> num_hog_channels_used;
+    if(!fn["hog_clip"].empty())
+        fn["hog_clip"] >> hog_clip;
+    if(!fn["use_hog"].empty())
+        fn["use_hog"] >> use_hog;
+    if(!fn["use_color_names"].empty())
+        fn["use_color_names"] >> use_color_names;
+    if(!fn["use_gray"].empty())
+        fn["use_gray"] >> use_gray;
+    if(!fn["use_rgb"].empty())
+        fn["use_rgb"] >> use_rgb;
+    if(!fn["window_function"].empty())
+        fn["window_function"] >> window_function;
+    if(!fn["kaiser_alpha"].empty())
+        fn["kaiser_alpha"] >> kaiser_alpha;
+    if(!fn["cheb_attenuation"].empty())
+        fn["cheb_attenuation"] >> cheb_attenuation;
+    if(!fn["filter_lr"].empty())
+        fn["filter_lr"] >> filter_lr;
+    if(!fn["admm_iterations"].empty())
+        fn["admm_iterations"] >> admm_iterations;
+    if(!fn["number_of_scales"].empty())
+        fn["number_of_scales"] >> number_of_scales;
+    if(!fn["scale_sigma_factor"].empty())
+        fn["scale_sigma_factor"] >> scale_sigma_factor;
+    if(!fn["scale_model_max_area"].empty())
+        fn["scale_model_max_area"] >> scale_model_max_area;
+    if(!fn["scale_lr"].empty())
+        fn["scale_lr"] >> scale_lr;
+    if(!fn["scale_step"].empty())
+        fn["scale_step"] >> scale_step;
+    if(!fn["use_channel_weights"].empty())
+        fn["use_channel_weights"] >> use_channel_weights;
+    if(!fn["weights_lr"].empty())
+        fn["weights_lr"] >> weights_lr;
+    if(!fn["use_segmentation"].empty())
+        fn["use_segmentation"] >> use_segmentation;
+    if(!fn["histogram_bins"].empty())
+        fn["histogram_bins"] >> histogram_bins;
+    if(!fn["background_ratio"].empty())
+        fn["background_ratio"] >> background_ratio;
+    if(!fn["histogram_lr"].empty())
+        fn["histogram_lr"] >> histogram_lr;
+    CV_Assert(number_of_scales % 2 == 1);
+    CV_Assert(use_gray || use_color_names || use_hog || use_rgb);
+}
+void TrackerCSRT::Params::write(FileStorage& fs) const
+{
+    fs << "padding" << padding;
+    fs << "template_size" << template_size;
+    fs << "gsl_sigma" << gsl_sigma;
+    fs << "hog_orientations" << hog_orientations;
+    fs << "num_hog_channels_used" << num_hog_channels_used;
+    fs << "hog_clip" << hog_clip;
+    fs << "use_hog" << use_hog;
+    fs << "use_color_names" << use_color_names;
+    fs << "use_gray" << use_gray;
+    fs << "use_rgb" << use_rgb;
+    fs << "window_function" << window_function;
+    fs << "kaiser_alpha" << kaiser_alpha;
+    fs << "cheb_attenuation" << cheb_attenuation;
+    fs << "filter_lr" << filter_lr;
+    fs << "admm_iterations" << admm_iterations;
+    fs << "number_of_scales" << number_of_scales;
+    fs << "scale_sigma_factor" << scale_sigma_factor;
+    fs << "scale_model_max_area" << scale_model_max_area;
+    fs << "scale_lr" << scale_lr;
+    fs << "scale_step" << scale_step;
+    fs << "use_channel_weights" << use_channel_weights;
+    fs << "weights_lr" << weights_lr;
+    fs << "use_segmentation" << use_segmentation;
+    fs << "histogram_bins" << histogram_bins;
+    fs << "background_ratio" << background_ratio;
+    fs << "histogram_lr" << histogram_lr;
+}
+} /* namespace cv */
--- a/modules/tracking/src/trackerCSRTScaleEstimation.cpp
+++ b/modules/tracking/src/trackerCSRTScaleEstimation.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "precomp.hpp"
+#include "trackerCSRTScaleEstimation.hpp"
+#include "trackerCSRTUtils.hpp"
+//Discriminative Scale Space Tracking
+namespace cv
+{
+class ParallelGetScaleFeatures : public ParallelLoopBody
+{
+public:
+    ParallelGetScaleFeatures(
+        Mat img,
+        Point2f pos,
+        Size2f base_target_sz,
+        float current_scale,
+        std::vector<float> &scale_factors,
+        Mat scale_window,
+        Size scale_model_sz,
+        int col_len,
+        Mat &result)
+    {
+        this->img = img;
+        this->pos = pos;
+        this->base_target_sz = base_target_sz;
+        this->current_scale = current_scale;
+        this->scale_factors = scale_factors;
+        this->scale_window = scale_window;
+        this->scale_model_sz = scale_model_sz;
+        this->col_len = col_len;
+        this->result = result;
+    }
+    virtual void operator ()(const Range& range) const
+    {
+        for (int s = range.start; s < range.end; s++) {
+            Size patch_sz = Size(static_cast<int>(current_scale * scale_factors[s] * base_target_sz.width),
+                    static_cast<int>(current_scale * scale_factors[s] * base_target_sz.height));
+            Mat img_patch = get_subwindow(img, pos, patch_sz.width, patch_sz.height);
+            img_patch.convertTo(img_patch, CV_32FC3);
+            resize(img_patch, img_patch, Size(scale_model_sz.width, scale_model_sz.height),0,0,INTER_LINEAR);
+            std::vector<Mat> hog;
+            hog = get_features_hog(img_patch, 4);
+            for (int i = 0; i < static_cast<int>(hog.size()); ++i) {
+                hog[i] = hog[i].t();
+                hog[i] = scale_window.at<float>(0,s) * hog[i].reshape(0, col_len);
+                hog[i].copyTo(result(Rect(Point(s, i*col_len), hog[i].size())));
+            }
+        }
+    }
+    ParallelGetScaleFeatures& operator=(const ParallelGetScaleFeatures &) {
+        return *this;
+    }
+private:
+    Mat img;
+    Point2f pos;
+    Size2f base_target_sz;
+    float current_scale;
+    std::vector<float> scale_factors;
+    Mat scale_window;
+    Size scale_model_sz;
+    int col_len;
+    Mat result;
+};
+DSST::DSST(const Mat &image,
+        Rect2f bounding_box,
+        Size2f template_size,
+        int numberOfScales,
+        float scaleStep,
+        float maxModelArea,
+        float sigmaFactor,
+        float scaleLearnRate):
+    scales_count(numberOfScales), scale_step(scaleStep), max_model_area(maxModelArea),
+    sigma_factor(sigmaFactor), learn_rate(scaleLearnRate)
+{
+    original_targ_sz = bounding_box.size();
+    Point2f object_center = Point2f(bounding_box.x + original_targ_sz.width / 2,
+            bounding_box.y + original_targ_sz.height / 2);
+    current_scale_factor = 1.0;
+    if(scales_count % 2 == 0)
+        scales_count++;
+    scale_sigma = static_cast<float>(sqrt(scales_count) * sigma_factor);
+    min_scale_factor = pow(scale_step,
+            cvCeil(log(max(5.0 / template_size.width, 5.0 / template_size.height)) / log(scale_step)));
+    max_scale_factor = powf(scale_step,
+            static_cast<float>(cvFloor(log(min((float)image.rows / (float)bounding_box.width,
+            (float)image.cols / (float)bounding_box.height)) / log(scale_step))));
+    ys = Mat(1, scales_count, CV_32FC1);
+    float ss, sf;
+    for(int i = 0; i < ys.cols; ++i) {
+        ss = (float)(i+1) - cvCeil((float)scales_count / 2.0f);
+        ys.at<float>(0,i) = static_cast<float>(exp(-0.5 * pow(ss,2) / pow(scale_sigma,2)));
+        sf = static_cast<float>(i + 1);
+        scale_factors.push_back(pow(scale_step, cvCeil((float)scales_count / 2.0f) - sf));
+    }
+    scale_window = get_hann_win(Size(scales_count, 1));
+    float scale_model_factor = 1.0;
+    if(template_size.width * template_size.height * pow(scale_model_factor, 2) > max_model_area)
+    {
+        scale_model_factor = sqrt(max_model_area /
+                (template_size.width * template_size.height));
+    }
+    scale_model_sz = Size(cvFloor(template_size.width * scale_model_factor),
+            cvFloor(template_size.height * scale_model_factor));
+    Mat scale_resp = get_scale_features(image, object_center, original_targ_sz,
+            current_scale_factor, scale_factors, scale_window, scale_model_sz);
+    Mat ysf_row = Mat(ys.size(), CV_32FC2);
+    dft(ys, ysf_row, DFT_ROWS | DFT_COMPLEX_OUTPUT, 0);
+    ysf = repeat(ysf_row, scale_resp.rows, 1);
+    Mat Fscale_resp;
+    dft(scale_resp, Fscale_resp, DFT_ROWS | DFT_COMPLEX_OUTPUT);
+    mulSpectrums(ysf, Fscale_resp, sf_num, 0 , true);
+    Mat sf_den_all;
+    mulSpectrums(Fscale_resp, Fscale_resp, sf_den_all, 0, true);
+    reduce(sf_den_all, sf_den, 0, CV_REDUCE_SUM, -1);
+}
+DSST::~DSST()
+{
+}
+Mat DSST::get_scale_features(
+        Mat img,
+        Point2f pos,
+        Size2f base_target_sz,
+        float current_scale,
+        std::vector<float> &scale_factors,
+        Mat scale_window,
+        Size scale_model_sz)
+{
+    Mat result;
+    int col_len = 0;
+    Size patch_sz = Size(cvFloor(current_scale * scale_factors[0] * base_target_sz.width),
+            cvFloor(current_scale * scale_factors[0] * base_target_sz.height));
+    Mat img_patch = get_subwindow(img, pos, patch_sz.width, patch_sz.height);
+    img_patch.convertTo(img_patch, CV_32FC3);
+    resize(img_patch, img_patch, Size(scale_model_sz.width, scale_model_sz.height),0,0,INTER_LINEAR);
+    std::vector<Mat> hog;
+    hog = get_features_hog(img_patch, 4);
+    result = Mat(Size((int)scale_factors.size(), hog[0].cols * hog[0].rows * (int)hog.size()), CV_32F);
+    col_len = hog[0].cols * hog[0].rows;
+    for (int i = 0; i < static_cast<int>(hog.size()); ++i) {
+        hog[i] = hog[i].t();
+        hog[i] = scale_window.at<float>(0,0) * hog[i].reshape(0, col_len);
+        hog[i].copyTo(result(Rect(Point(0, i*col_len), hog[i].size())));
+    }
+    ParallelGetScaleFeatures parallelGetScaleFeatures(img, pos, base_target_sz,
+            current_scale, scale_factors, scale_window, scale_model_sz, col_len, result);
+    parallel_for_(Range(1, static_cast<int>(scale_factors.size())), parallelGetScaleFeatures);
+    return result;
+}
+void DSST::update(const Mat &image, const Point2f object_center)
+{
+    Mat scale_features = get_scale_features(image, object_center, original_targ_sz,
+            current_scale_factor, scale_factors, scale_window, scale_model_sz);
+    Mat Fscale_features;
+    dft(scale_features, Fscale_features, DFT_ROWS | DFT_COMPLEX_OUTPUT);
+    Mat new_sf_num;
+    Mat new_sf_den;
+    Mat new_sf_den_all;
+    mulSpectrums(ysf, Fscale_features, new_sf_num, DFT_ROWS, true);
+    Mat sf_den_all;
+    mulSpectrums(Fscale_features, Fscale_features, new_sf_den_all, DFT_ROWS, true);
+    reduce(new_sf_den_all, new_sf_den, 0, CV_REDUCE_SUM, -1);
+    sf_num = (1 - learn_rate) * sf_num + learn_rate * new_sf_num;
+    sf_den = (1 - learn_rate) * sf_den + learn_rate * new_sf_den;
+}
+float DSST::getScale(const Mat &image, const Point2f object_center)
+{
+    Mat scale_features = get_scale_features(image, object_center, original_targ_sz,
+            current_scale_factor, scale_factors, scale_window, scale_model_sz);
+    Mat Fscale_features;
+    dft(scale_features, Fscale_features, DFT_ROWS | DFT_COMPLEX_OUTPUT);
+    mulSpectrums(Fscale_features, sf_num, Fscale_features, 0, false);
+    Mat scale_resp;
+    reduce(Fscale_features, scale_resp, 0, CV_REDUCE_SUM, -1);
+    scale_resp = divide_complex_matrices(scale_resp, sf_den + 0.01f);
+    idft(scale_resp, scale_resp, DFT_REAL_OUTPUT|DFT_SCALE);
+    Point max_loc;
+    minMaxLoc(scale_resp, NULL, NULL, NULL, &max_loc);
+    current_scale_factor *= scale_factors[max_loc.x];
+    if(current_scale_factor < min_scale_factor)
+        current_scale_factor = min_scale_factor;
+    else if(current_scale_factor > max_scale_factor)
+        current_scale_factor = max_scale_factor;
+    return current_scale_factor;
+}
+} /* namespace cv */
--- a/modules/tracking/src/trackerCSRTScaleEstimation.hpp
+++ b/modules/tracking/src/trackerCSRTScaleEstimation.hpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef OPENCV_TRACKER_CSRT_SCALE_ESTIMATION
+#define OPENCV_TRACKER_CSRT_SCALE_ESTIMATION
+namespace cv
+{
+class DSST {
+public:
+    DSST() {};
+    DSST(const Mat &image, Rect2f bounding_box, Size2f template_size, int numberOfScales,
+            float scaleStep, float maxModelArea, float sigmaFactor, float scaleLearnRate);
+    ~DSST();
+    void update(const Mat &image, const Point2f objectCenter);
+    float getScale(const Mat &image, const Point2f objecCenter);
+private:
+    Mat get_scale_features(Mat img, Point2f pos, Size2f base_target_sz, float current_scale,
+            std::vector<float> &scale_factors, Mat scale_window, Size scale_model_sz);
+    Size scale_model_sz;
+    Mat ys;
+    Mat ysf;
+    Mat scale_window;
+    std::vector<float> scale_factors;
+    Mat sf_num;
+    Mat sf_den;
+    float scale_sigma;
+    float min_scale_factor;
+    float max_scale_factor;
+    float current_scale_factor;
+    int scales_count;
+    float scale_step;
+    float max_model_area;
+    float sigma_factor;
+    float learn_rate;
+    Size original_targ_sz;
+};
+} /* namespace cv */
+#endif
--- a/modules/tracking/src/trackerCSRTSegmentation.cpp
+++ b/modules/tracking/src/trackerCSRTSegmentation.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "precomp.hpp"
+#include "trackerCSRTSegmentation.hpp"
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <iostream>
+//-------------------- HISTOGRAM CLASS --------------------
+namespace cv
+{
+Histogram::Histogram(int numDimensions, int numBinsPerDimension)
+{
+    m_numBinsPerDim = numBinsPerDimension;
+    m_numDim = numDimensions;
+    p_size = cvFloor(std::pow(m_numBinsPerDim, m_numDim));
+    p_bins.resize(p_size, 0);
+    p_dimIdCoef.resize(m_numDim, 1);
+    for (int i = 0; i < m_numDim-1; ++i)
+        p_dimIdCoef[i] = static_cast<int>(std::pow(numBinsPerDimension, m_numDim - 1 - i));
+}
+void Histogram::extractForegroundHistogram(std::vector<cv::Mat> & imgChannels,
+        cv::Mat weights, bool useMatWeights, int x1, int y1, int x2, int y2)
+{
+    //just for code clarity
+    cv::Mat & img = imgChannels[0];
+    if (!useMatWeights){
+        //weights are epanechnikov distr. with peek at the center of the image;
+        double cx = x1 + (x2-x1)/2.;
+        double cy = y1 + (y2-y1)/2.;
+        double kernelSize_width = 1.0/(0.5*static_cast<double>(x2-x1)*1.4142+1);  //sqrt(2)
+        double kernelSize_height = 1.0/(0.5*static_cast<double>(y2-y1)*1.4142+1);
+        cv::Mat kernelWeight(img.rows, img.cols, CV_64FC1);
+        for (int y = y1; y < y2+1; ++y){
+            double * weightPtr = kernelWeight.ptr<double>(y);
+            double tmp_y = std::pow((cy-y)*kernelSize_height, 2);
+            for (int x = x1; x < x2+1; ++x){
+                weightPtr[x] = kernelProfile_Epanechnikov(std::pow((cx-x)*kernelSize_width,2) + tmp_y);
+            }
+        }
+        weights = kernelWeight;
+    }
+    //extract pixel values and compute histogram
+    double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0;  // 1 / (imgRange/numBinsPerDim)
+    double sum = 0;
+    for (int y = y1; y < y2+1; ++y){
+        std::vector<const uchar *> dataPtr(m_numDim);
+        for (int dim = 0; dim < m_numDim; ++dim)
+            dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);
+        const double * weightPtr = weights.ptr<double>(y);
+        for (int x = x1; x < x2+1; ++x){
+            int id = 0;
+            for (int dim = 0; dim < m_numDim; ++dim){
+                id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
+            }
+            p_bins[id] += weightPtr[x];
+            sum += weightPtr[x];
+        }
+    }
+    //normalize
+    sum = 1./sum;
+    for(int i = 0; i < p_size; ++i)
+        p_bins[i] *= sum;
+}
+void Histogram::extractBackGroundHistogram(
+        std::vector<cv::Mat> & imgChannels,
+        int x1, int y1, int x2, int y2,
+        int outer_x1, int outer_y1, int outer_x2, int outer_y2)
+{
+    //extract pixel values and compute histogram
+    double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0;  // 1 / (imgRange/numBinsPerDim)
+    double sum = 0;
+    for (int y = outer_y1; y < outer_y2; ++y){
+        std::vector<const uchar *> dataPtr(m_numDim);
+        for (int dim = 0; dim < m_numDim; ++dim)
+            dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);
+        for (int x = outer_x1; x < outer_x2; ++x){
+            if (x >= x1 && x <= x2 && y >= y1 && y <= y2)
+                continue;
+            int id = 0;
+            for (int dim = 0; dim < m_numDim; ++dim){
+                id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
+            }
+            p_bins[id] += 1.0;
+            sum += 1.0;
+        }
+    }
+    //normalize
+    sum = 1./sum;
+    for(int i = 0; i < p_size; ++i)
+        p_bins[i] *= sum;
+}
+cv::Mat Histogram::backProject(std::vector<cv::Mat> & imgChannels)
+{
+    //just for code clarity
+    cv::Mat & img = imgChannels[0];
+    cv::Mat backProject(img.rows, img.cols, CV_64FC1);
+    double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0;  // 1 / (imgRange/numBinsPerDim)
+    for (int y = 0; y < img.rows; ++y){
+        double * backProjectPtr = backProject.ptr<double>(y);
+        std::vector<const uchar *> dataPtr(m_numDim);
+        for (int dim = 0; dim < m_numDim; ++dim)
+            dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);
+        for (int x = 0; x < img.cols; ++x){
+            int id = 0;
+            for (int dim = 0; dim < m_numDim; ++dim){
+                id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
+            }
+            backProjectPtr[x] = p_bins[id];
+        }
+    }
+    return backProject;
+}
+// add new methods
+std::vector<double> Histogram::getHistogramVector() {
+    return p_bins;
+}
+void Histogram::setHistogramVector(double *vector) {
+    for (size_t i=0; i<p_bins.size(); i++) {
+        p_bins[i] = vector[i];
+    }
+}
+//-------------------- SEGMENT CLASS --------------------
+std::pair<cv::Mat, cv::Mat> Segment::computePosteriors(
+        std::vector<cv::Mat> &imgChannels,
+        int x1, int y1, int x2, int y2,
+        cv::Mat weights, cv::Mat fgPrior, cv::Mat bgPrior,
+        const Histogram &fgHistPrior, int numBinsPerChannel)
+{
+    //preprocess and normalize all data
+    CV_Assert(imgChannels.size() > 0);
+    //fit target to the image
+    x1 = std::min(std::max(x1, 0), imgChannels[0].cols-1);
+    y1 = std::min(std::max(y1, 0), imgChannels[0].rows-1);
+    x2 = std::max(std::min(x2, imgChannels[0].cols-1), 0);
+    y2 = std::max(std::min(y2, imgChannels[0].rows-1), 0);
+    //enlarge bbox by 1/3 of its size for background area
+    int offsetX = (x2-x1)/3;
+    int offsetY = (y2-y1)/3;
+    int outer_y1 = std::max(0, (int)(y1-offsetY));
+    int outer_y2 = std::min(imgChannels[0].rows, (int)(y2+offsetY+1));
+    int outer_x1 = std::max(0, (int)(x1-offsetX));
+    int outer_x2 = std::min(imgChannels[0].cols, (int)(x2+offsetX+1));
+    //extract histogram from original data -> more pixels better representation of distr. by histograms
+    Histogram hist_target =
+        (fgHistPrior.m_numBinsPerDim == numBinsPerChannel && (size_t)fgHistPrior.m_numDim == imgChannels.size())
+        ? fgHistPrior : Histogram(static_cast<int>(imgChannels.size()), numBinsPerChannel);
+    Histogram hist_background(static_cast<int>(imgChannels.size()), numBinsPerChannel);
+    if (weights.cols == 0)
+        hist_target.extractForegroundHistogram(imgChannels, cv::Mat(), false, x1, y1, x2, y2);
+    else
+        hist_target.extractForegroundHistogram(imgChannels, weights, true, x1, y1, x2, y2);
+    hist_background.extractBackGroundHistogram(imgChannels, x1, y1, x2, y2,
+            outer_x1, outer_y1, outer_x2, outer_y2);
+    //compute resize factor so that the max area is 1000 (=avg. size ~ 32x32)
+    double factor = sqrt(1000.0/((x2-x1)*(y2-y1)));
+    if (factor > 1)
+        factor = 1.0;
+    cv::Size newSize(cvFloor((x2-x1)*factor), cvFloor((y2-y1)*factor));
+    //rescale input data
+    cv::Rect roiRect_inner = cv::Rect(x1, y1, x2-x1, y2-y1);
+    std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
+    for (size_t i = 0; i < imgChannels.size(); ++i)
+        cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);
+    //initialize priors if there is no external source and rescale
+    cv::Mat fgPriorScaled;
+    if (fgPrior.cols == 0)
+        fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
+    else
+        cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
+    cv::Mat bgPriorScaled;
+    if (bgPrior.cols == 0)
+        bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
+    else
+        cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);
+    //backproject pixels likelihood
+    cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
+    cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);
+    double p_b = std::sqrt((std::pow(outer_x2-outer_x1, 2) + std::pow(outer_y2-outer_y1, 2)) /
+            (std::pow(x2-x1, 2) + std::pow(y2-y1, 2))) ;
+    double p_o = 1./(p_b + 1);
+    //convert likelihoods to posterior prob. (Bayes rule)
+    cv::Mat prob_o(newSize, foregroundLikelihood.type());
+    prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
+    cv::Mat prob_b = 1.0 - prob_o;
+    std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b, fgPriorScaled, bgPriorScaled);
+    //resize probs to original size
+    std::pair<cv::Mat, cv::Mat> probs;
+    cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
+    cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));
+    return probs;
+}
+std::pair<cv::Mat, cv::Mat> Segment::computePosteriors2(
+    std::vector<cv::Mat> &imgChannels, int x1, int y1, int x2, int y2, double p_b,
+    cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background)
+{
+    //preprocess and normalize all data
+    CV_Assert(imgChannels.size() > 0);
+    //fit target to the image
+    x1 = std::min(std::max(x1, 0), imgChannels[0].cols-1);
+    y1 = std::min(std::max(y1, 0), imgChannels[0].rows-1);
+    x2 = std::max(std::min(x2, imgChannels[0].cols-1), 0);
+    y2 = std::max(std::min(y2, imgChannels[0].rows-1), 0);
+    // calculate width and height of the region
+    int w = x2 - x1 + 1;
+    int h = y2 - y1 + 1;
+    w = std::min(std::max(w, 1), imgChannels[0].cols);
+    h = std::min(std::max(h, 1), imgChannels[0].rows);
+    //double p_o = 1./(p_b + 1);
+    double p_o = 1. - p_b;
+    //compute resize factor so that the max area is 1000 (=avg. size ~ 32x32)
+    double factor = sqrt(1000.0/(w*h));
+    if (factor > 1)
+        factor = 1.0;
+    cv::Size newSize(cvFloor(w*factor), cvFloor(h*factor));
+    //rescale input data
+    cv::Rect roiRect_inner = cv::Rect(x1, y1, w, h);
+    std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
+    for (size_t i = 0; i < imgChannels.size(); ++i)
+        cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);
+    //initialize priors if there is no external source and rescale
+    cv::Mat fgPriorScaled;
+    if (fgPrior.cols == 0)
+        fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
+    else
+        cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
+    cv::Mat bgPriorScaled;
+    if (bgPrior.cols == 0)
+        bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
+    else
+        cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);
+    //backproject pixels likelihood
+    cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
+    cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);
+    //convert likelihoods to posterior prob. (Bayes rule)
+    cv::Mat prob_o(newSize, foregroundLikelihood.type());
+    prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
+    cv::Mat prob_b = 1.0 - prob_o;
+    std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b,
+            fgPriorScaled, bgPriorScaled);
+    //std::pair<cv::Mat, cv::Mat> sizedProbs = std::pair<cv::Mat, cv::Mat>(prob_o, prob_b);
+    //resize probs to original size
+    std::pair<cv::Mat, cv::Mat> probs;
+    cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
+    cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));
+    return probs;
+}
+std::pair<cv::Mat, cv::Mat> Segment::computePosteriors2(std::vector<cv::Mat> &imgChannels,
+        cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background)
+{
+    //preprocess and normalize all data
+    CV_Assert(imgChannels.size() > 0);
+    //fit target to the image
+    int x1 = 0;
+    int y1 = 0;
+    int x2 = imgChannels[0].cols-1;
+    int y2 = imgChannels[0].rows-1;
+    //compute resize factor so that we control the max area ~32^2
+    double factor = sqrt(1000./((x2-x1)*(y2-y1)));
+    //double factor = 1;
+    if (factor > 1)
+        factor = 1.0;
+    cv::Size newSize(cvFloor((x2-x1)*factor), cvFloor((y2-y1)*factor));
+    //rescale input data
+    cv::Rect roiRect_inner = cv::Rect(x1, y1, x2-x1+1, y2-y1+1);
+    std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
+    for (size_t i = 0; i < imgChannels.size(); ++i)
+        cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);
+    //initialize priors if there is no external source and rescale
+    cv::Mat fgPriorScaled;
+    if (fgPrior.cols == 0)
+        fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
+    else
+        cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
+    cv::Mat bgPriorScaled;
+    if (bgPrior.cols == 0)
+        bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
+    else
+        cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);
+    //backproject pixels likelihood
+    cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
+    cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);
+    //prior for posterior, relative to the number of pixels in bg and fg
+    double p_b = 5./3.;
+    double p_o = 1./(p_b + 1);
+    //convert likelihoods to posterior prob. (Bayes rule)
+    cv::Mat prob_o(newSize, foregroundLikelihood.type());
+    prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
+    cv::Mat prob_b = 1.0 - prob_o;
+    std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b, fgPriorScaled, bgPriorScaled);
+    //resize probs to original size
+    std::pair<cv::Mat, cv::Mat> probs;
+    cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
+    cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));
+    return probs;
+}
+std::pair<cv::Mat, cv::Mat> Segment::getRegularizedSegmentation(
+        cv::Mat &prob_o, cv::Mat &prob_b, cv::Mat & prior_o, cv::Mat & prior_b)
+{
+    int hsize = cvFloor(std::max(1.0, (double)cvFloor(static_cast<double>(prob_b.cols)*3./50. + 0.5)));
+    int lambdaSize = hsize*2+1;
+    //compute gaussian kernel
+    cv::Mat lambda(lambdaSize, lambdaSize, CV_64FC1);
+    double std2 = std::pow(hsize/3.0, 2);
+    double sumLambda = 0.0;
+    for (int y = -hsize; y < hsize + 1; ++y){
+        double * lambdaPtr = lambda.ptr<double>(y+hsize);
+        double tmp_y = y*y;
+        for (int x = -hsize; x < hsize +1; ++x){
+            double tmp_gauss = gaussian(x*x, tmp_y, std2);
+            lambdaPtr[x+hsize] = tmp_gauss;
+            sumLambda += tmp_gauss;
+        }
+    }
+    sumLambda -= lambda.at<double>(hsize, hsize);
+    //set center of kernel to 0
+    lambda.at<double>(hsize, hsize) = 0.0;
+    sumLambda = 1.0/sumLambda;
+    //normalize kernel to sum to 1
+    lambda = lambda*sumLambda;
+    //create lambda2 kernel
+    cv::Mat lambda2 = lambda.clone();
+    lambda2.at<double>(hsize, hsize) = 1.0;
+    double terminateThr = 1e-1;
+    double logLike = std::numeric_limits<double>::max();
+    int maxIter = 50;
+    //return values
+    cv::Mat Qsum_o(prior_o.rows, prior_o.cols, prior_o.type());
+    cv::Mat Qsum_b(prior_o.rows, prior_o.cols, prior_o.type());
+    //algorithm temporal
+    cv::Mat Si_o(prior_o.rows, prior_o.cols, prior_o.type());
+    cv::Mat Si_b(prior_o.rows, prior_o.cols, prior_o.type());
+    cv::Mat Ssum_o(prior_o.rows, prior_o.cols, prior_o.type());
+    cv::Mat Ssum_b(prior_o.rows, prior_o.cols, prior_o.type());
+    cv::Mat Qi_o(prior_o.rows, prior_o.cols, prior_o.type());
+    cv::Mat Qi_b(prior_o.rows, prior_o.cols, prior_o.type());
+    cv::Mat logQo(prior_o.rows, prior_o.cols, prior_o.type());
+    cv::Mat logQb(prior_o.rows, prior_o.cols, prior_o.type());
+    int i;
+    for (i = 0; i < maxIter; ++i){
+        //follows the equations from Kristan et al. ACCV2014 paper
+        //"A graphical model for rapid obstacle image-map estimation from unmanned surface vehicles"
+        cv::Mat P_Io = prior_o.mul(prob_o) + std::numeric_limits<double>::epsilon();
+        cv::Mat P_Ib = prior_b.mul(prob_b) + std::numeric_limits<double>::epsilon();
+        cv::filter2D(prior_o, Si_o, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
+        cv::filter2D(prior_b, Si_b, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
+        Si_o = Si_o.mul(prior_o);
+        Si_b = Si_b.mul(prior_b);
+        cv::Mat normSi = 1.0/(Si_o + Si_b);
+        Si_o = Si_o.mul(normSi);
+        Si_b = Si_b.mul(normSi);
+        cv::filter2D(Si_o, Ssum_o, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
+        cv::filter2D(Si_b, Ssum_b, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
+        cv::filter2D(P_Io, Qi_o, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
+        cv::filter2D(P_Ib, Qi_b, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
+        Qi_o = Qi_o.mul(P_Io);
+        Qi_b = Qi_b.mul(P_Ib);
+        cv::Mat normQi = 1.0/(Qi_o + Qi_b);
+        Qi_o = Qi_o.mul(normQi);
+        Qi_b = Qi_b.mul(normQi);
+        cv::filter2D(Qi_o, Qsum_o, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
+        cv::filter2D(Qi_b, Qsum_b, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
+        prior_o = (Qsum_o + Ssum_o)*0.25;
+        prior_b = (Qsum_b + Ssum_b)*0.25;
+        cv::Mat normPI = 1.0/(prior_o + prior_b);
+        prior_o = prior_o.mul(normPI);
+        prior_b = prior_b.mul(normPI);
+        //converge ?
+        cv::log(Qsum_o, logQo);
+        cv::log(Qsum_b, logQb);
+        cv::Scalar mean = cv::sum(logQo+logQb);
+        double logLikeNew = -mean.val[0]/(2*Qsum_o.rows*Qsum_o.cols);
+        if (std::abs(logLike - logLikeNew) < terminateThr)
+            break;
+        logLike = logLikeNew;
+    }
+    return std::pair<cv::Mat, cv::Mat>(Qsum_o, Qsum_b);
+}
+} //cv namespace
+//---------------------------------------------------------------------------------------------------------------------
--- a/modules/tracking/src/trackerCSRTSegmentation.hpp
+++ b/modules/tracking/src/trackerCSRTSegmentation.hpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef OPENCV_TRACKER_CSRT_SEGMENTATION
+#define OPENCV_TRACKER_CSRT_SEGMENTATION
+namespace cv
+{
+class Histogram
+{
+public:
+    int m_numBinsPerDim;
+    int m_numDim;
+    Histogram() : m_numBinsPerDim(0), m_numDim(0) {}
+    Histogram(int numDimensions, int numBinsPerDimension = 8);
+    void extractForegroundHistogram(std::vector<cv::Mat> & imgChannels,
+            cv::Mat weights, bool useMatWeights, int x1, int y1, int x2, int y2);
+    void extractBackGroundHistogram(std::vector<cv::Mat> & imgChannels,
+            int x1, int y1, int x2, int y2, int outer_x1, int outer_y1,
+            int outer_x2, int outer_y2);
+    cv::Mat backProject(std::vector<cv::Mat> & imgChannels);
+    std::vector<double> getHistogramVector();
+    void setHistogramVector(double *vector);
+private:
+    int p_size;
+    std::vector<double> p_bins;
+    std::vector<int> p_dimIdCoef;
+    inline double kernelProfile_Epanechnikov(double x)
+        { return (x <= 1) ? (2.0/CV_PI)*(1-x) : 0; }
+};
+class Segment
+{
+public:
+    static std::pair<cv::Mat, cv::Mat> computePosteriors(std::vector<cv::Mat> & imgChannels,
+            int x1, int y1, int x2, int y2, cv::Mat weights, cv::Mat fgPrior,
+            cv::Mat bgPrior, const Histogram &fgHistPrior, int numBinsPerChannel = 16);
+    static std::pair<cv::Mat, cv::Mat> computePosteriors2(std::vector<cv::Mat> & imgChannels,
+            int x1, int y1, int x2, int y2, double p_b, cv::Mat fgPrior,
+            cv::Mat bgPrior, Histogram hist_target, Histogram hist_background);
+    static std::pair<cv::Mat, cv::Mat> computePosteriors2(std::vector<cv::Mat> &imgChannels,
+            cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background);
+private:
+    static std::pair<cv::Mat, cv::Mat> getRegularizedSegmentation(cv::Mat & prob_o,
+            cv::Mat & prob_b, cv::Mat &prior_o, cv::Mat &prior_b);
+    inline static double gaussian(double x2, double y2, double std2){
+        return exp(-(x2 + y2)/(2*std2))/(2*CV_PI*std2);
+    }
+};
+}//cv namespace
+#endif
--- a/modules/tracking/src/trackerCSRTUtils.cpp
+++ b/modules/tracking/src/trackerCSRTUtils.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "precomp.hpp"
+#include "trackerCSRTUtils.hpp"
+namespace cv {
+Mat circshift(Mat matrix, int dx, int dy)
+{
+    Mat matrix_out = matrix.clone();
+    int idx_y = 0;
+    int idx_x = 0;
+    for(int i=0; i<matrix.rows; i++) {
+        for(int j=0; j<matrix.cols; j++) {
+            idx_y = modul(i+dy+1, matrix.rows);
+            idx_x = modul(j+dx+1, matrix.cols);
+            matrix_out.at<float>(idx_y, idx_x) = matrix.at<float>(i,j);
+        }
+    }
+    return matrix_out;
+}
+Mat gaussian_shaped_labels(const float sigma, const int w, const int h)
+{
+    // create 2D Gaussian peak, convert to Fourier space and stores it into the yf
+    Mat y = Mat::zeros(h, w, CV_32F);
+    float w2 = static_cast<float>(cvFloor(w / 2));
+    float h2 = static_cast<float>(cvFloor(h / 2));
+    // calculate for each pixel separatelly
+    for(int i=0; i<y.rows; i++) {
+        for(int j=0; j<y.cols; j++) {
+            y.at<float>(i,j) = (float)exp((-0.5 / pow(sigma, 2)) * (pow((i+1-h2), 2) + pow((j+1-w2), 2)));
+        }
+    }
+    // wrap-arround with the circulat shifting
+    y = circshift(y, -cvFloor(y.cols / 2), -cvFloor(y.rows / 2));
+    Mat yf;
+    dft(y, yf, DFT_COMPLEX_OUTPUT);
+    return yf;
+}
+std::vector<Mat> fourier_transform_features(const std::vector<Mat> &M)
+{
+    std::vector<Mat> out(M.size());
+    Mat channel;
+    // iterate over channels and convert them to Fourier domain
+    for(size_t k = 0; k < M.size(); k++) {
+        M[k].convertTo(channel, CV_32F);
+        dft(channel, channel, DFT_COMPLEX_OUTPUT);
+        out[k] = (channel);
+    }
+    return out;
+}
+Mat divide_complex_matrices(const Mat &A, const Mat &B)
+{
+    std::vector<Mat> va,vb;
+    split(A, va);
+    split(B, vb);
+    Mat a = va.at(0);
+    Mat b = va.at(1);
+    Mat c = vb.at(0);
+    Mat d = vb.at(1);
+    Mat div = c.mul(c) + d.mul(d);
+    Mat real_part = (a.mul(c) + b.mul(d));
+    Mat im_part = (b.mul(c) - a.mul(d));
+    divide(real_part, div, real_part);
+    divide(im_part, div, im_part);
+    std::vector<Mat> tmp(2);
+    tmp[0] = real_part;
+    tmp[1] = im_part;
+    Mat res;
+    merge(tmp, res);
+    return res;
+}
+Mat get_subwindow(
+        const Mat &image,
+        const Point2f center,
+        const int w,
+        const int h,
+        Rect *valid_pixels)
+{
+    int startx = cvFloor(center.x) + 1 - (cvFloor(w/2));
+    int starty = cvFloor(center.y) + 1 - (cvFloor(h/2));
+    Rect roi(startx, starty, w, h);
+    int padding_left = 0, padding_right = 0, padding_top = 0, padding_bottom = 0;
+    if(roi.x < 0) {
+        padding_left = -roi.x;
+        roi.x = 0;
+    }
+    if(roi.y < 0) {
+        padding_top = -roi.y;
+        roi.y = 0;
+    }
+    roi.width -= padding_left;
+    roi.height-= padding_top;
+    if(roi.x + roi.width >= image.cols) {
+        padding_right = roi.x + roi.width - image.cols;
+        roi.width = image.cols - roi.x;
+    }
+    if(roi.y + roi.height >= image.rows) {
+        padding_bottom = roi.y + roi.height - image.rows;
+        roi.height = image.rows - roi.y;
+    }
+    Mat subwin = image(roi).clone();
+    copyMakeBorder(subwin, subwin, padding_top, padding_bottom, padding_left, padding_right, BORDER_REPLICATE);
+    if(valid_pixels != NULL) {
+        *valid_pixels = Rect(padding_left, padding_top, roi.width, roi.height);
+    }
+    return subwin;
+}
+float subpixel_peak(const Mat &response, const std::string &s, const Point2f &p)
+{
+    int i_p0, i_p_l, i_p_r;     // indexes in response
+    float p0, p_l, p_r;         // values in response
+    if(s.compare("vertical") == 0) {
+        // neighbouring rows
+        i_p0 = cvRound(p.y);
+        i_p_l = modul(cvRound(p.y) - 1, response.rows);
+        i_p_r = modul(cvRound(p.y) + 1, response.rows);
+        int px = static_cast<int>(p.x);
+        p0 = response.at<float>(i_p0, px);
+        p_l = response.at<float>(i_p_l, px);
+        p_r = response.at<float>(i_p_r, px);
+    } else if(s.compare("horizontal") == 0) {
+        // neighbouring cols
+        i_p0 = cvRound(p.x);
+        i_p_l = modul(cvRound(p.x) - 1, response.cols);
+        i_p_r = modul(cvRound(p.x) + 1, response.cols);
+        int py = static_cast<int>(p.y);
+        p0 = response.at<float>(py, i_p0);
+        p_l = response.at<float>(py, i_p_l);
+        p_r = response.at<float>(py, i_p_r);
+    } else {
+        std::cout << "Warning: unknown subpixel peak direction!" << std::endl;
+        return 0;
+    }
+    float delta = 0.5f * (p_r - p_l) / (2*p0 - p_r - p_l);
+    if(!std::isfinite(delta)) {
+        delta = 0;
+    }
+    return delta;
+}
+inline float chebpoly(const int n, const float x)
+{
+    float res;
+    if (fabs(x) <= 1)
+        res = cos(n*acos(x));
+    else
+        res = cosh(n*acosh(x));
+    return res;
+}
+static Mat chebwin(int N, const float atten)
+{
+    Mat out(N , 1, CV_32FC1);
+    int nn, i;
+    float M, n, sum = 0, max=0;
+    float tg = static_cast<float>(pow(10,atten/20.0f));  /* 1/r term [2], 10^gamma [2] */
+    float x0 = cosh((1.0f/(N-1))*acosh(tg));
+    M = (N-1)/2.0f;
+    if(N%2==0)
+        M = M + 0.5f; /* handle even length windows */
+    for(nn=0; nn<(N/2+1); nn++) {
+        n = nn-M;
+        sum = 0;
+        for(i=1; i<=M; i++){
+            sum += chebpoly(N-1,x0*static_cast<float>(cos(CV_PI*i/N))) *
+                static_cast<float>(cos(2.0f*n*CV_PI*i/N));
+        }
+        out.at<float>(nn,0) = tg + 2*sum;
+        out.at<float>(N-nn-1,0) = out.at<float>(nn,0) ;
+        if(out.at<float>(nn,0) > max)
+            max = out.at<float>(nn,0);
+    }
+    for(nn=0; nn<N; nn++)
+        out.at<float>(nn,0) /= max; /* normalize everything */
+    return out;
+}
+static double modified_bessel(int order, double x)
+{
+    //  sum m=0:inf 1/(m! * Gamma(m + order + 1)) * (x/2)^(2m + order)
+    const double eps = 1e-13;
+    double result = 0;
+    double m = 0;
+    double gamma = 1.0;
+    for(int i = 2; i <= order; ++i)
+        gamma *= i;
+    double term = pow(x,order) / (pow(2,order) * gamma);
+    while(term  > eps * result) {
+        result += term;
+        //calculate new term in series
+        ++m;
+        term *= (x*x) / (4*m*(m+order));
+    }
+    return result;
+}
+Mat get_hann_win(Size sz)
+{
+    Mat hann_rows = Mat::ones(sz.height, 1, CV_32F);
+    Mat hann_cols = Mat::ones(1, sz.width, CV_32F);
+    int NN = sz.height - 1;
+    if(NN != 0) {
+        for (int i = 0; i < hann_rows.rows; ++i) {
+            hann_rows.at<float>(i,0) = (float)(1.0/2.0 * (1.0 - cos(2*CV_PI*i/NN)));
+        }
+    }
+    NN = sz.width - 1;
+    if(NN != 0) {
+        for (int i = 0; i < hann_cols.cols; ++i) {
+            hann_cols.at<float>(0,i) = (float)(1.0/2.0 * (1.0 - cos(2*CV_PI*i/NN)));
+        }
+    }
+    return hann_rows * hann_cols;
+}
+Mat get_kaiser_win(Size sz, float alpha)
+{
+    Mat kaiser_rows = Mat::ones(sz.height, 1, CV_32F);
+    Mat kaiser_cols = Mat::ones(1, sz.width, CV_32F);
+    int N = sz.height - 1;
+    double shape = alpha;
+    double den = 1.0 / modified_bessel(0, shape);
+    for(int n = 0; n <= N; ++n) {
+        double K = (2.0 * n * 1.0/N) - 1.0;
+        double x = sqrt(1.0 - (K * K));
+        kaiser_rows.at<float>(n,0) = static_cast<float>(modified_bessel(0, shape * x) * den);
+    }
+    N = sz.width - 1;
+    for(int n = 0; n <= N; ++n) {
+        double K = (2.0 * n * 1.0/N) - 1.0;
+        double x = sqrt(1.0 - (K * K));
+        kaiser_cols.at<float>(0,n) = static_cast<float>(modified_bessel(0, shape * x) * den);
+    }
+    return kaiser_rows * kaiser_cols;
+}
+Mat get_chebyshev_win(Size sz, float attenuation)
+{
+    Mat cheb_rows = chebwin(sz.height, attenuation);
+    Mat cheb_cols = chebwin(sz.width, attenuation).t();
+    return cheb_rows * cheb_cols;
+}
+static void computeHOG32D(const Mat &imageM, Mat &featM, const int sbin, const int pad_x, const int pad_y)
+{
+    const int dimHOG = 32;
+    CV_Assert(pad_x >= 0);
+    CV_Assert(pad_y >= 0);
+    CV_Assert(imageM.channels() == 3);
+    CV_Assert(imageM.depth() == CV_64F);
+    // epsilon to avoid division by zero
+    const double eps = 0.0001;
+    // number of orientations
+    const int numOrient = 18;
+    // unit vectors to compute gradient orientation
+    const double uu[9] = {1.000, 0.9397, 0.7660, 0.5000, 0.1736, -0.1736, -0.5000, -0.7660, -0.9397};
+    const double vv[9] = {0.000, 0.3420, 0.6428, 0.8660, 0.9848,  0.9848,  0.8660,  0.6428,  0.3420};
+    // image size
+    const Size imageSize = imageM.size();
+    // block size
+    // int bW = cvRound((double)imageSize.width/(double)sbin);
+    // int bH = cvRound((double)imageSize.height/(double)sbin);
+    int bW = cvFloor((double)imageSize.width/(double)sbin);
+    int bH = cvFloor((double)imageSize.height/(double)sbin);
+    const Size blockSize(bW, bH);
+    // size of HOG features
+    int oW = max(blockSize.width-2, 0) + 2*pad_x;
+    int oH = max(blockSize.height-2, 0) + 2*pad_y;
+    Size outSize = Size(oW, oH);
+    // size of visible
+    const Size visible = blockSize*sbin;
+    // initialize historgram, norm, output feature matrices
+    Mat histM = Mat::zeros(Size(blockSize.width*numOrient, blockSize.height), CV_64F);
+    Mat normM = Mat::zeros(Size(blockSize.width, blockSize.height), CV_64F);
+    featM = Mat::zeros(Size(outSize.width*dimHOG, outSize.height), CV_64F);
+    // get the stride of each matrix
+    const size_t imStride = imageM.step1();
+    const size_t histStride = histM.step1();
+    const size_t normStride = normM.step1();
+    const size_t featStride = featM.step1();
+    // calculate the zero offset
+    const double* im = imageM.ptr<double>(0);
+    double* const hist = histM.ptr<double>(0);
+    double* const norm = normM.ptr<double>(0);
+    double* const feat = featM.ptr<double>(0);
+    for (int y = 1; y < visible.height - 1; y++)
+    {
+        for (int x = 1; x < visible.width - 1; x++)
+        {
+            // OpenCV uses an interleaved format: BGR-BGR-BGR
+            const double* s = im + 3*min(x, imageM.cols-2) + min(y, imageM.rows-2)*imStride;
+            // blue image channel
+            double dyb = *(s+imStride) - *(s-imStride);
+            double dxb = *(s+3) - *(s-3);
+            double vb = dxb*dxb + dyb*dyb;
+            // green image channel
+            s += 1;
+            double dyg = *(s+imStride) - *(s-imStride);
+            double dxg = *(s+3) - *(s-3);
+            double vg = dxg*dxg + dyg*dyg;
+            // red image channel
+            s += 1;
+            double dy = *(s+imStride) - *(s-imStride);
+            double dx = *(s+3) - *(s-3);
+            double v = dx*dx + dy*dy;
+            // pick the channel with the strongest gradient
+            if (vg > v) { v = vg; dx = dxg; dy = dyg; }
+            if (vb > v) { v = vb; dx = dxb; dy = dyb; }
+            // snap to one of the 18 orientations
+            double best_dot = 0;
+            int best_o = 0;
+            for (int o = 0; o < (int)numOrient/2; o++)
+            {
+                double dot =  uu[o]*dx + vv[o]*dy;
+                if (dot > best_dot)
+                {
+                    best_dot = dot;
+                    best_o = o;
+                }
+                else if (-dot > best_dot)
+                {
+                    best_dot = -dot;
+                    best_o = o + (int)(numOrient/2);
+                }
+            }
+            // add to 4 historgrams around pixel using bilinear interpolation
+            double yp =  ((double)y+0.5)/(double)sbin - 0.5;
+            double xp =  ((double)x+0.5)/(double)sbin - 0.5;
+            int iyp = (int)cvFloor(yp);
+            int ixp = (int)cvFloor(xp);
+            double vy0 = yp - iyp;
+            double vx0 = xp - ixp;
+            double vy1 = 1.0 - vy0;
+            double vx1 = 1.0 - vx0;
+            v = sqrt(v);
+            // fill the value into the 4 neighborhood cells
+            if (iyp >= 0 && ixp >= 0)
+                *(hist + iyp*histStride + ixp*numOrient + best_o) += vy1*vx1*v;
+            if (iyp >= 0 && ixp+1 < blockSize.width)
+                *(hist + iyp*histStride + (ixp+1)*numOrient + best_o) += vx0*vy1*v;
+            if (iyp+1 < blockSize.height && ixp >= 0)
+                *(hist + (iyp+1)*histStride + ixp*numOrient + best_o) += vy0*vx1*v;
+            if (iyp+1 < blockSize.height && ixp+1 < blockSize.width)
+                *(hist + (iyp+1)*histStride + (ixp+1)*numOrient + best_o) += vy0*vx0*v;
+        } // for y
+    } // for x
+    // compute the energy in each block by summing over orientation
+    for (int y = 0; y < blockSize.height; y++)
+    {
+        const double* src = hist + y*histStride;
+        double* dst = norm + y*normStride;
+        double const* const dst_end = dst + blockSize.width;
+        // for each cell
+        while (dst < dst_end)
+        {
+            *dst = 0;
+            for (int o = 0; o < (int)(numOrient/2); o++)
+            {
+                *dst += (*src + *(src + numOrient/2))*
+                    (*src + *(src + numOrient/2));
+                src++;
+            }
+            dst++;
+            src += numOrient/2;
+        }
+    }
+    // compute the features
+    for (int y = pad_y; y < outSize.height - pad_y; y++)
+    {
+        for (int x = pad_x; x < outSize.width - pad_x; x++)
+        {
+            double* dst = feat + y*featStride + x*dimHOG;
+            double* p, n1, n2, n3, n4;
+            const double* src;
+            p = norm + (y - pad_y + 1)*normStride + (x - pad_x + 1);
+            n1 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
+            p = norm + (y - pad_y)*normStride + (x - pad_x + 1);
+            n2 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
+            p = norm + (y- pad_y + 1)*normStride + x - pad_x;
+            n3 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
+            p = norm + (y - pad_y)*normStride + x - pad_x;
+            n4 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
+            double t1 = 0.0, t2 = 0.0, t3 = 0.0, t4 = 0.0;
+            // contrast-sesitive features
+            src = hist + (y - pad_y + 1)*histStride + (x - pad_x + 1)*numOrient;
+            for (int o = 0; o < numOrient; o++)
+            {
+                double val = *src;
+                double h1 = min(val*n1, 0.2);
+                double h2 = min(val*n2, 0.2);
+                double h3 = min(val*n3, 0.2);
+                double h4 = min(val*n4, 0.2);
+                *(dst++) = 0.5 * (h1 + h2 + h3 + h4);
+                src++;
+                t1 += h1;
+                t2 += h2;
+                t3 += h3;
+                t4 += h4;
+            }
+            // contrast-insensitive features
+            src =  hist + (y - pad_y + 1)*histStride + (x - pad_x + 1)*numOrient;
+            for (int o = 0; o < numOrient/2; o++)
+            {
+                double sum = *src + *(src + numOrient/2);
+                double h1 = min(sum * n1, 0.2);
+                double h2 = min(sum * n2, 0.2);
+                double h3 = min(sum * n3, 0.2);
+                double h4 = min(sum * n4, 0.2);
+                *(dst++) = 0.5 * (h1 + h2 + h3 + h4);
+                src++;
+            }
+            // texture features
+            *(dst++) = 0.2357 * t1;
+            *(dst++) = 0.2357 * t2;
+            *(dst++) = 0.2357 * t3;
+            *(dst++) = 0.2357 * t4;
+            // truncation feature
+            *dst = 0;
+        }// for x
+    }// for y
+    // Truncation features
+    for (int m = 0; m < featM.rows; m++)
+    {
+        for (int n = 0; n < featM.cols; n += dimHOG)
+        {
+            if (m > pad_y - 1 && m < featM.rows - pad_y && n > pad_x*dimHOG - 1 && n < featM.cols - pad_x*dimHOG)
+                continue;
+            featM.at<double>(m, n + dimHOG - 1) = 1;
+        } // for x
+    }// for y
+}
+std::vector<Mat> get_features_hog(const Mat &im, const int bin_size)
+{
+    Mat hogmatrix;
+    Mat im_;
+    im.convertTo(im_, CV_64FC3, 1.0/255.0);
+    computeHOG32D(im_,hogmatrix,bin_size,1,1);
+    hogmatrix.convertTo(hogmatrix, CV_32F);
+    Size hog_size = im.size();
+    hog_size.width /= bin_size;
+    hog_size.height /= bin_size;
+    Mat hogc(hog_size, CV_32FC(32), hogmatrix.data);
+    std::vector<Mat> features;
+    split(hogc, features);
+    return features;
+}
+std::vector<Mat> get_features_cn(const Mat &ppatch_data, const Size &output_size) {
+    Mat patch_data = ppatch_data.clone();
+    Vec3b & pixel = patch_data.at<Vec3b>(0,0);
+    unsigned index;
+    Mat cnFeatures = Mat::zeros(patch_data.rows,patch_data.cols,CV_32FC(10));
+    for(int i=0;i<patch_data.rows;i++){
+        for(int j=0;j<patch_data.cols;j++){
+            pixel=patch_data.at<Vec3b>(i,j);
+            index=(unsigned)(cvFloor((float)pixel[2]/8)+32*cvFloor((float)pixel[1]/8)+32*32*cvFloor((float)pixel[0]/8));
+            //copy the values
+            for(int k=0;k<10;k++){
+                cnFeatures.at<Vec<float,10> >(i,j)[k]=(float)ColorNames[index][k];
+            }
+        }
+    }
+    std::vector<Mat> result;
+    split(cnFeatures, result);
+    for (size_t i = 0; i < result.size(); i++) {
+        if (output_size.width > 0 && output_size.height > 0) {
+            resize(result.at(i), result.at(i), output_size, INTER_CUBIC);
+        }
+    }
+    return result;
+}
+std::vector<Mat> get_features_rgb(const Mat &patch, const Size &output_size)
+{
+    std::vector<Mat> channels;
+    split(patch, channels);
+    for(size_t k=0; k<channels.size(); k++) {
+        channels[k].convertTo(channels[k], CV_32F, 1.0/255.0, -0.5);
+        channels[k] = channels[k] - mean(channels[k])[0];
+        resize(channels[k], channels[k], output_size, INTER_CUBIC);
+    }
+    return channels;
+}
+double get_max(const Mat &m)
+{
+    double val;
+    minMaxLoc(m, NULL, &val, NULL, NULL);
+    return val;
+}
+double get_min(const Mat &m)
+{
+    double val;
+    minMaxLoc(m, &val, NULL, NULL, NULL);
+    return val;
+}
+Mat bgr2hsv(const Mat &img)
+{
+    Mat hsv_img;
+    cvtColor(img, hsv_img, CV_BGR2HSV);
+    std::vector<Mat> hsv_img_channels;
+    split(hsv_img, hsv_img_channels);
+    hsv_img_channels.at(0).convertTo(hsv_img_channels.at(0), CV_8UC1, 255.0 / 180.0);
+    merge(hsv_img_channels, hsv_img);
+    return hsv_img;
+}
+} //cv namespace
--- a/modules/tracking/src/trackerCSRTUtils.hpp
+++ b/modules/tracking/src/trackerCSRTUtils.hpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#ifndef OPENCV_TRACKER_CSRT_UTILS
+#define OPENCV_TRACKER_CSRT_UTILS
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <iostream>
+#include <algorithm>
+#include <iterator>
+namespace cv
+{
+inline int modul(int a, int b)
+{
+    // function calculates the module of two numbers and it takes into account also negative numbers
+    return ((a % b) + b) % b;
+}
+inline double kernel_epan(double x)
+{
+    return (x <= 1) ? (2.0/3.14)*(1-x) : 0;
+}
+Mat circshift(Mat matrix, int dx, int dy);
+Mat gaussian_shaped_labels(const float sigma, const int w, const int h);
+std::vector<Mat> fourier_transform_features(const std::vector<Mat> &M);
+Mat divide_complex_matrices(const Mat &A, const Mat &B);
+Mat get_subwindow(const Mat &image, const Point2f center,
+        const int w, const int h,Rect *valid_pixels = NULL);
+float subpixel_peak(const Mat &response, const std::string &s, const Point2f &p);
+double get_max(const Mat &m);
+double get_min(const Mat &m);
+Mat get_hann_win(Size sz);
+Mat get_kaiser_win(Size sz, float alpha);
+Mat get_chebyshev_win(Size sz, float attenuation);
+std::vector<Mat> get_features_rgb(const Mat &patch, const Size &output_size);
+std::vector<Mat> get_features_hog(const Mat &im, const int bin_size);
+std::vector<Mat> get_features_cn(const Mat &im, const Size &output_size);
+Mat bgr2hsv(const Mat &img);
+} //cv namespace
+#endif
--- a/modules/tracking/test/test_trackers.cpp
+++ b/modules/tracking/test/test_trackers.cpp
@@ -467,6 +467,12 @@ TEST_P(DistanceAndOverlap, MOSSE)
  test.run();
 }
+TEST_P(DistanceAndOverlap, CSRT)
+{
+  TrackerTest test( TrackerCSRT::create(), dataset, 22, .7f, NoTransform);
+  test.run();
+}
 /***************************************************************************************/
 //Tests with shifted initial window
 TEST_P(DistanceAndOverlap, Shifted_Data_MedianFlow)
@@ -504,6 +510,12 @@ TEST_P(DistanceAndOverlap, Shifted_Data_MOSSE)
  TrackerTest test( TrackerMOSSE::create(), dataset, 13, .69f, CenterShiftLeft);
  test.run();
 }
+TEST_P(DistanceAndOverlap, Shifted_Data_CSRT)
+{
+  TrackerTest test( TrackerCSRT::create(), dataset, 13, .69f, CenterShiftLeft);
+  test.run();
+}
 /***************************************************************************************/
 //Tests with scaled initial window
 TEST_P(DistanceAndOverlap, Scaled_Data_MedianFlow)
@@ -549,6 +561,12 @@ TEST_P(DistanceAndOverlap, Scaled_Data_MOSSE)
  test.run();
 }
+TEST_P(DistanceAndOverlap, Scaled_Data_CSRT)
+{
+  TrackerTest test( TrackerCSRT::create(), dataset, 22, 0.69f, Scale_1_1, 1);
+  test.run();
+}
 INSTANTIATE_TEST_CASE_P( Tracking, DistanceAndOverlap, TESTSET_NAMES);