Commit ced5aa76 authored by Andrej Muhič's avatar Andrej Muhič Committed by Alexander Alekhin

Merge pull request #1552 from amuhic:master

Implementation of CSR-DCF tracker (#1552)

* Initial commit for CSR-DCF tracker implementation

* Fixes for automatic build

* General code fixes

* Removed unused parameters. Added CSRT to automatic tests.

* Fixed VS build warnings. Fixed a bug with gray sequences.

* Fixed VS build errors for samples file.
parent 295dce52
...@@ -1462,6 +1462,71 @@ public: ...@@ -1462,6 +1462,71 @@ public:
//! @} //! @}
/*********************************** CSRT ************************************/
/** @brief Discriminative Correlation Filter Tracker with Channel and Spatial Reliability
*/
class CV_EXPORTS_W TrackerCSRT : public Tracker
{
public:
struct CV_EXPORTS Params
{
/**
* \brief Constructor
*/
Params();
/**
* \brief Read parameters from file
*/
void read(const FileNode& /*fn*/);
/**
* \brief Write parameters from file
*/
void write(cv::FileStorage& fs) const;
bool use_hog;
bool use_color_names;
bool use_gray;
bool use_rgb;
bool use_channel_weights;
bool use_segmentation;
std::string window_function; //!< Window function: "hann", "cheb", "kaiser"
float kaiser_alpha;
float cheb_attenuation;
float template_size;
float gsl_sigma;
float hog_orientations;
float hog_clip;
float padding;
float filter_lr;
float weights_lr;
int num_hog_channels_used;
int admm_iterations;
int histogram_bins;
float histogram_lr;
int background_ratio;
int number_of_scales;
float scale_sigma_factor;
float scale_model_max_area;
float scale_lr;
float scale_step;
};
/** @brief Constructor
@param parameters CSRT parameters TrackerCSRT::Params
*/
static Ptr<TrackerCSRT> create(const TrackerCSRT::Params &parameters);
CV_WRAP static Ptr<TrackerCSRT> create();
virtual void setInitialMask(const Mat mask) = 0;
virtual ~TrackerCSRT() {}
};
} /* namespace cv */ } /* namespace cv */
#endif #endif
#include <opencv2/core/utility.hpp>
#include <opencv2/tracking.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
#include <cstring>
#include <fstream>
#include "samples_utility.hpp"
using namespace std;
using namespace cv;
int main(int argc, char** argv)
{
// show help
if (argc<2) {
cout <<
" Usage: example_tracking_csrt <video_name>\n"
" examples:\n"
" example_tracking_csrt Bolt/img/%04.jpg\n"
" example_tracking_csrt Bolt/img/%04.jpg Bolt/grouondtruth.txt\n"
" example_tracking_csrt faceocc2.webm\n"
<< endl;
return 0;
}
// create the tracker
Ptr<TrackerCSRT> tracker = TrackerCSRT::create();
// const char* param_file_path = "/home/amuhic/Workspace/3_dip/params.yml";
// FileStorage fs(params_file_path, FileStorage::WRITE);
// tracker->write(fs);
// FileStorage fs(param_file_path, FileStorage::READ);
// tracker->read( fs.root());
// set input video
std::string video = argv[1];
VideoCapture cap(video);
// and read first frame
Mat frame;
cap >> frame;
// target bounding box
Rect2d roi;
if (argc > 2) {
// read first line of ground-truth file
std::string groundtruthPath = argv[2];
std::ifstream gtIfstream(groundtruthPath.c_str());
std::string gtLine;
getline(gtIfstream, gtLine);
gtIfstream.close();
// parse the line by elements
std::stringstream gtStream(gtLine);
std::string element;
std::vector<int> elements;
while (std::getline(gtStream, element, ','))
{
elements.push_back(cvRound(std::atof(element.c_str())));
}
if (elements.size() == 4) {
// ground-truth is rectangle
roi = cv::Rect(elements[0], elements[1], elements[2], elements[3]);
}
else if (elements.size() == 8) {
// ground-truth is polygon
int xMin = cvRound(min(elements[0], min(elements[2], min(elements[4], elements[6]))));
int yMin = cvRound(min(elements[1], min(elements[3], min(elements[5], elements[7]))));
int xMax = cvRound(max(elements[0], max(elements[2], max(elements[4], elements[6]))));
int yMax = cvRound(max(elements[1], max(elements[3], max(elements[5], elements[7]))));
roi = cv::Rect(xMin, yMin, xMax - xMin, yMax - yMin);
// create mask from polygon and set it to the tracker
cv::Rect aaRect = cv::Rect(xMin, yMin, xMax - xMin, yMax - yMin);
cout << aaRect.size() << endl;
Mat mask = Mat::zeros(aaRect.size(), CV_8UC1);
const int n = 4;
std::vector<cv::Point> poly_points(n);
//Translate x and y to rects start position
int sx = aaRect.x;
int sy = aaRect.y;
for (int i = 0; i < n; ++i) {
poly_points[i] = Point(elements[2 * i] - sx, elements[2 * i + 1] - sy);
}
cv::fillConvexPoly(mask, poly_points, Scalar(1.0), 8);
mask.convertTo(mask, CV_32FC1);
tracker->setInitialMask(mask);
}
else {
std::cout << "Number of ground-truth elements is not 4 or 8." << std::endl;
}
}
else {
// second argument is not given - user selects target
roi = selectROI("tracker", frame, true, false);
}
//quit if ROI was not selected
if (roi.width == 0 || roi.height == 0)
return 0;
// initialize the tracker
int64 t1 = cv::getTickCount();
tracker->init(frame, roi);
int64 t2 = cv::getTickCount();
int64 tick_counter = t2 - t1;
// do the tracking
printf("Start the tracking process, press ESC to quit.\n");
int frame_idx = 1;
for (;;) {
// get frame from the video
cap >> frame;
// stop the program if no more images
if (frame.rows == 0 || frame.cols == 0)
break;
// update the tracking result
t1 = cv::getTickCount();
bool isfound = tracker->update(frame, roi);
t2 = cv::getTickCount();
tick_counter += t2 - t1;
frame_idx++;
if (!isfound) {
cout << "The target has been lost...\n";
waitKey(0);
return 0;
}
// draw the tracked object and show the image
rectangle(frame, roi, Scalar(255, 0, 0), 2, 1);
imshow("tracker", frame);
//quit on ESC button
if (waitKey(1) == 27)break;
}
cout << "Elapsed sec: " << static_cast<double>(tick_counter) / cv::getTickFrequency() << endl;
cout << "FPS: " << ((double)(frame_idx)) / (static_cast<double>(tick_counter) / cv::getTickFrequency()) << endl;
}
...@@ -21,6 +21,8 @@ inline cv::Ptr<cv::Tracker> createTrackerByName(cv::String name) ...@@ -21,6 +21,8 @@ inline cv::Ptr<cv::Tracker> createTrackerByName(cv::String name)
tracker = cv::TrackerGOTURN::create(); tracker = cv::TrackerGOTURN::create();
else if (name == "MOSSE") else if (name == "MOSSE")
tracker = cv::TrackerMOSSE::create(); tracker = cv::TrackerMOSSE::create();
else if (name == "CSRT")
tracker = cv::TrackerCSRT::create();
else else
CV_Error(cv::Error::StsBadArg, "Invalid tracking algorithm name\n"); CV_Error(cv::Error::StsBadArg, "Invalid tracking algorithm name\n");
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "trackerCSRTSegmentation.hpp"
#include "trackerCSRTUtils.hpp"
#include "trackerCSRTScaleEstimation.hpp"
namespace cv
{
/**
* \brief Implementation of TrackerModel for CSRT algorithm
*/
class TrackerCSRTModel : public TrackerModel
{
public:
TrackerCSRTModel(TrackerCSRT::Params /*params*/){}
~TrackerCSRTModel(){}
protected:
void modelEstimationImpl(const std::vector<Mat>& /*responses*/){}
void modelUpdateImpl(){}
};
class TrackerCSRTImpl : public TrackerCSRT
{
public:
TrackerCSRTImpl(const TrackerCSRT::Params &parameters = TrackerCSRT::Params());
void read(const FileNode& fn);
void write(FileStorage& fs) const;
protected:
TrackerCSRT::Params params;
bool initImpl(const Mat& image, const Rect2d& boundingBox);
virtual void setInitialMask(const Mat mask);
bool updateImpl(const Mat& image, Rect2d& boundingBox);
void update_csr_filter(const Mat &image, const Mat &my_mask);
void update_histograms(const Mat &image, const Rect &region);
void extract_histograms(const Mat &image, cv::Rect region, Histogram &hf, Histogram &hb);
std::vector<Mat> create_csr_filter(const std::vector<cv::Mat>
img_features, const cv::Mat Y, const cv::Mat P);
Mat calculate_response(const Mat &image, const std::vector<Mat> filter);
Mat get_location_prior(const Rect roi, const Size2f target_size, const Size img_sz);
Mat segment_region(const Mat &image, const Point2f &object_center,
const Size2f &template_size, const Size &target_size, float scale_factor);
Point2f estimate_new_position(const Mat &image);
std::vector<Mat> get_features(const Mat &patch, const Size2i &feature_size);
private:
bool check_mask_area(const Mat &mat, const double obj_area);
float current_scale_factor;
Mat window;
Mat yf;
Rect2f bounding_box;
std::vector<Mat> csr_filter;
std::vector<float> filter_weights;
Size2f original_target_size;
Size2i image_size;
Size2f template_size;
Size2i rescaled_template_size;
float rescale_ratio;
Point2f object_center;
DSST dsst;
Histogram hist_foreground;
Histogram hist_background;
double p_b;
Mat erode_element;
Mat filter_mask;
Mat preset_mask;
Mat default_mask;
float default_mask_area;
int cell_size;
};
Ptr<TrackerCSRT> TrackerCSRT::create(const TrackerCSRT::Params &parameters)
{
return Ptr<TrackerCSRTImpl>(new TrackerCSRTImpl(parameters));
}
Ptr<TrackerCSRT> TrackerCSRT::create()
{
return Ptr<TrackerCSRTImpl>(new TrackerCSRTImpl());
}
TrackerCSRTImpl::TrackerCSRTImpl(const TrackerCSRT::Params &parameters) :
params(parameters)
{
isInit = false;
}
void TrackerCSRTImpl::read(const cv::FileNode& fn)
{
params.read(fn);
}
void TrackerCSRTImpl::write(cv::FileStorage& fs) const
{
params.write(fs);
}
void TrackerCSRTImpl::setInitialMask(const Mat mask)
{
preset_mask = mask;
}
bool TrackerCSRTImpl::check_mask_area(const Mat &mat, const double obj_area)
{
double threshold = 0.05;
double mask_area= sum(mat)[0];
if(mask_area < threshold*obj_area) {
return false;
}
return true;
}
Mat TrackerCSRTImpl::calculate_response(const Mat &image, const std::vector<Mat> filter)
{
Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width),
cvFloor(current_scale_factor * template_size.height));
resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC);
std::vector<Mat> ftrs = get_features(patch, yf.size());
std::vector<Mat> Ffeatures = fourier_transform_features(ftrs);
Mat resp, res;
if(params.use_channel_weights){
res = Mat::zeros(Ffeatures[0].size(), CV_32FC2);
Mat resp_ch;
Mat mul_mat;
for(size_t i = 0; i < Ffeatures.size(); ++i) {
mulSpectrums(Ffeatures[i], filter[i], resp_ch, 0, true);
res += (resp_ch * filter_weights[i]);
}
idft(res, res, DFT_SCALE | DFT_REAL_OUTPUT);
} else {
res = Mat::zeros(Ffeatures[0].size(), CV_32FC2);
Mat resp_ch;
for(size_t i = 0; i < Ffeatures.size(); ++i) {
mulSpectrums(Ffeatures[i], filter[i], resp_ch, 0 , true);
res = res + resp_ch;
}
idft(res, res, DFT_SCALE | DFT_REAL_OUTPUT);
}
return res;
}
void TrackerCSRTImpl::update_csr_filter(const Mat &image, const Mat &mask)
{
Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width),
cvFloor(current_scale_factor * template_size.height));
resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC);
std::vector<Mat> ftrs = get_features(patch, yf.size());
std::vector<Mat> Fftrs = fourier_transform_features(ftrs);
std::vector<Mat> new_csr_filter = create_csr_filter(Fftrs, yf, mask);
//calculate per channel weights
if(params.use_channel_weights) {
Mat current_resp;
double max_val;
float sum_weights = 0;
std::vector<float> new_filter_weights = std::vector<float>(new_csr_filter.size());
for(size_t i = 0; i < new_csr_filter.size(); ++i) {
mulSpectrums(Fftrs[i], new_csr_filter[i], current_resp, 0, true);
idft(current_resp, current_resp, DFT_SCALE | DFT_REAL_OUTPUT);
minMaxLoc(current_resp, NULL, &max_val, NULL, NULL);
sum_weights += static_cast<float>(max_val);
new_filter_weights[i] = static_cast<float>(max_val);
}
//update filter weights with new values
float updated_sum = 0;
for(size_t i = 0; i < filter_weights.size(); ++i) {
filter_weights[i] = filter_weights[i]*(1.0f - params.weights_lr) +
params.weights_lr * (new_filter_weights[i] / sum_weights);
updated_sum += filter_weights[i];
}
//normalize weights
for(size_t i = 0; i < filter_weights.size(); ++i) {
filter_weights[i] /= updated_sum;
}
}
for(size_t i = 0; i < csr_filter.size(); ++i) {
csr_filter[i] = (1.0f - params.filter_lr)*csr_filter[i] + params.filter_lr * new_csr_filter[i];
}
std::vector<Mat>().swap(ftrs);
std::vector<Mat>().swap(Fftrs);
}
std::vector<Mat> TrackerCSRTImpl::get_features(const Mat &patch, const Size2i &feature_size)
{
std::vector<Mat> features;
if (params.use_hog) {
std::vector<Mat> hog = get_features_hog(patch, cell_size);
features.insert(features.end(), hog.begin(),
hog.begin()+params.num_hog_channels_used);
}
if (params.use_color_names) {
std::vector<Mat> cn;
cn = get_features_cn(patch, feature_size);
features.insert(features.end(), cn.begin(), cn.end());
}
if(params.use_gray) {
Mat gray_m;
cvtColor(patch, gray_m, CV_BGR2GRAY);
resize(gray_m, gray_m, feature_size, 0, 0, INTER_CUBIC);
gray_m.convertTo(gray_m, CV_32FC1, 1.0/255.0, -0.5);
features.push_back(gray_m);
}
if(params.use_rgb) {
std::vector<Mat> rgb_features = get_features_rgb(patch, feature_size);
features.insert(features.end(), rgb_features.begin(), rgb_features.end());
}
for (size_t i = 0; i < features.size(); ++i) {
features.at(i) = features.at(i).mul(window);
}
return features;
}
class ParallelCreateCSRFilter : public ParallelLoopBody {
public:
ParallelCreateCSRFilter(
const std::vector<cv::Mat> img_features,
const cv::Mat Y,
const cv::Mat P,
int admm_iterations,
std::vector<Mat> &result_filter_):
result_filter(result_filter_)
{
this->img_features = img_features;
this->Y = Y;
this->P = P;
this->admm_iterations = admm_iterations;
}
virtual void operator ()(const Range& range) const
{
for (int i = range.start; i < range.end; i++) {
float mu = 5.0f;
float beta = 3.0f;
float mu_max = 20.0f;
float lambda = mu / 100.0f;
Mat F = img_features[i];
Mat Sxy, Sxx;
mulSpectrums(F, Y, Sxy, 0, true);
mulSpectrums(F, F, Sxx, 0, true);
Mat H;
H = divide_complex_matrices(Sxy, (Sxx + lambda));
idft(H, H, DFT_SCALE|DFT_REAL_OUTPUT);
H = H.mul(P);
dft(H, H, DFT_COMPLEX_OUTPUT);
Mat L = Mat::zeros(H.size(), H.type()); //Lagrangian multiplier
Mat G;
for(int iteration = 0; iteration < admm_iterations; ++iteration) {
G = divide_complex_matrices((Sxy + (mu * H) - L) , (Sxx + mu));
idft((mu * G) + L, H, DFT_SCALE | DFT_REAL_OUTPUT);
float lm = 1.0f / (lambda+mu);
H = H.mul(P*lm);
dft(H, H, DFT_COMPLEX_OUTPUT);
//Update variables for next iteration
L = L + mu * (G - H);
mu = min(mu_max, beta*mu);
}
result_filter[i] = H;
}
}
ParallelCreateCSRFilter& operator=(const ParallelCreateCSRFilter &) {
return *this;
}
private:
int admm_iterations;
Mat Y;
Mat P;
std::vector<Mat> img_features;
std::vector<Mat> &result_filter;
};
std::vector<Mat> TrackerCSRTImpl::create_csr_filter(
const std::vector<cv::Mat> img_features,
const cv::Mat Y,
const cv::Mat P)
{
std::vector<Mat> result_filter;
result_filter.resize(img_features.size());
ParallelCreateCSRFilter parallelCreateCSRFilter(img_features, Y, P,
params.admm_iterations, result_filter);
parallel_for_(Range(0, static_cast<int>(result_filter.size())), parallelCreateCSRFilter);
return result_filter;
}
Mat TrackerCSRTImpl::get_location_prior(
const Rect roi,
const Size2f target_size,
const Size img_sz)
{
int x1 = cvRound(max(min(roi.x-1, img_sz.width-1) , 0));
int y1 = cvRound(max(min(roi.y-1, img_sz.height-1) , 0));
int x2 = cvRound(min(max(roi.width-1, 0) , img_sz.width-1));
int y2 = cvRound(min(max(roi.height-1, 0) , img_sz.height-1));
Size target_sz;
target_sz.width = target_sz.height = cvFloor(min(target_size.width, target_size.height));
double cx = x1 + (x2-x1)/2.;
double cy = y1 + (y2-y1)/2.;
double kernel_size_width = 1.0/(0.5*static_cast<double>(target_sz.width)*1.4142+1);
double kernel_size_height = 1.0/(0.5*static_cast<double>(target_sz.height)*1.4142+1);
cv::Mat kernel_weight = Mat::zeros(1 + cvFloor(y2 - y1) , 1+cvFloor(-(x1-cx) + (x2-cx)), CV_64FC1);
for (int y = y1; y < y2+1; ++y){
double * weightPtr = kernel_weight.ptr<double>(y);
double tmp_y = std::pow((cy-y)*kernel_size_height, 2);
for (int x = x1; x < x2+1; ++x){
weightPtr[x] = kernel_epan(std::pow((cx-x)*kernel_size_width,2) + tmp_y);
}
}
double max_val;
cv::minMaxLoc(kernel_weight, NULL, &max_val, NULL, NULL);
Mat fg_prior = kernel_weight / max_val;
fg_prior.setTo(0.5, fg_prior < 0.5);
fg_prior.setTo(0.9, fg_prior > 0.9);
return fg_prior;
}
Mat TrackerCSRTImpl::segment_region(
const Mat &image,
const Point2f &object_center,
const Size2f &template_size,
const Size &target_size,
float scale_factor)
{
Rect valid_pixels;
Mat patch = get_subwindow(image, object_center, cvFloor(scale_factor * template_size.width),
cvFloor(scale_factor * template_size.height), &valid_pixels);
Size2f scaled_target = Size2f(target_size.width * scale_factor,
target_size.height * scale_factor);
Mat fg_prior = get_location_prior(
Rect(0,0, patch.size().width, patch.size().height),
scaled_target , patch.size());
std::vector<Mat> img_channels;
split(patch, img_channels);
std::pair<Mat, Mat> probs = Segment::computePosteriors2(img_channels, 0, 0, patch.cols, patch.rows,
p_b, fg_prior, 1.0-fg_prior, hist_foreground, hist_background);
Mat mask = Mat::zeros(probs.first.size(), probs.first.type());
probs.first(valid_pixels).copyTo(mask(valid_pixels));
double max_resp = get_max(mask);
threshold(mask, mask, max_resp / 2.0, 1, THRESH_BINARY);
mask.convertTo(mask, CV_32FC1, 1.0);
return mask;
}
void TrackerCSRTImpl::extract_histograms(const Mat &image, cv::Rect region, Histogram &hf, Histogram &hb)
{
// get coordinates of the region
int x1 = std::min(std::max(0, region.x), image.cols-1);
int y1 = std::min(std::max(0, region.y), image.rows-1);
int x2 = std::min(std::max(0, region.x + region.width), image.cols-1);
int y2 = std::min(std::max(0, region.y + region.height), image.rows-1);
// calculate coordinates of the background region
int offsetX = (x2-x1+1) / params.background_ratio;
int offsetY = (y2-y1+1) / params.background_ratio;
int outer_y1 = std::max(0, (int)(y1-offsetY));
int outer_y2 = std::min(image.rows, (int)(y2+offsetY+1));
int outer_x1 = std::max(0, (int)(x1-offsetX));
int outer_x2 = std::min(image.cols, (int)(x2+offsetX+1));
// calculate probability for the background
p_b = 1.0 - ((x2-x1+1) * (y2-y1+1)) /
((double) (outer_x2-outer_x1+1) * (outer_y2-outer_y1+1));
// split multi-channel image into the std::vector of matrices
std::vector<Mat> img_channels(image.channels());
split(image, img_channels);
for(size_t k=0; k<img_channels.size(); k++) {
img_channels.at(k).convertTo(img_channels.at(k), CV_8UC1);
}
hf.extractForegroundHistogram(img_channels, Mat(), false, x1, y1, x2, y2);
hb.extractBackGroundHistogram(img_channels, x1, y1, x2, y2,
outer_x1, outer_y1, outer_x2, outer_y2);
std::vector<Mat>().swap(img_channels);
}
void TrackerCSRTImpl::update_histograms(const Mat &image, const Rect &region)
{
// create temporary histograms
Histogram hf(image.channels(), params.histogram_bins);
Histogram hb(image.channels(), params.histogram_bins);
extract_histograms(image, region, hf, hb);
// get histogram vectors from temporary histograms
std::vector<double> hf_vect_new = hf.getHistogramVector();
std::vector<double> hb_vect_new = hb.getHistogramVector();
// get histogram vectors from learned histograms
std::vector<double> hf_vect = hist_foreground.getHistogramVector();
std::vector<double> hb_vect = hist_background.getHistogramVector();
// update histograms - use learning rate
for(size_t i=0; i<hf_vect.size(); i++) {
hf_vect_new[i] = (1-params.histogram_lr)*hf_vect[i] +
params.histogram_lr*hf_vect_new[i];
hb_vect_new[i] = (1-params.histogram_lr)*hb_vect[i] +
params.histogram_lr*hb_vect_new[i];
}
// set learned histograms
hist_foreground.setHistogramVector(&hf_vect_new[0]);
hist_background.setHistogramVector(&hb_vect_new[0]);
std::vector<double>().swap(hf_vect);
std::vector<double>().swap(hb_vect);
}
Point2f TrackerCSRTImpl::estimate_new_position(const Mat &image)
{
Mat resp = calculate_response(image, csr_filter);
Point max_loc;
minMaxLoc(resp, NULL, NULL, NULL, &max_loc);
// take into account also subpixel accuracy
float col = ((float) max_loc.x) + subpixel_peak(resp, "horizontal", max_loc);
float row = ((float) max_loc.y) + subpixel_peak(resp, "vertical", max_loc);
if(row + 1 > (float)resp.rows / 2.0f) {
row = row - resp.rows;
}
if(col + 1 > (float)resp.cols / 2.0f) {
col = col - resp.cols;
}
// calculate x and y displacements
Point2f new_center = object_center + Point2f(current_scale_factor * (1.0f / rescale_ratio) *cell_size*(col),
current_scale_factor * (1.0f / rescale_ratio) *cell_size*(row));
//sanity checks
if(new_center.x < 0)
new_center.x = 0;
if(new_center.x >= image_size.width)
new_center.x = static_cast<float>(image_size.width - 1);
if(new_center.y < 0)
new_center.y = 0;
if(new_center.y >= image_size.height)
new_center.y = static_cast<float>(image_size.height - 1);
return new_center;
}
// *********************************************************************
// * Update API function *
// *********************************************************************
bool TrackerCSRTImpl::updateImpl(const Mat& image_, Rect2d& boundingBox)
{
//treat gray image as color image
Mat image;
if(image_.channels() == 1) {
std::vector<Mat> channels(3);
channels[0] = channels[1] = channels[2] = image_;
merge(channels, image);
} else {
image = image_;
}
object_center = estimate_new_position(image);
current_scale_factor = dsst.getScale(image, object_center);
//update bouding_box according to new scale and location
bounding_box.x = object_center.x - current_scale_factor * original_target_size.width / 2.0f;
bounding_box.y = object_center.y - current_scale_factor * original_target_size.height / 2.0f;
bounding_box.width = current_scale_factor * original_target_size.width;
bounding_box.height = current_scale_factor * original_target_size.height;
//update tracker
if(params.use_segmentation) {
Mat hsv_img = bgr2hsv(image);
update_histograms(hsv_img, bounding_box);
filter_mask = segment_region(hsv_img, object_center,
template_size,original_target_size, current_scale_factor);
resize(filter_mask, filter_mask, yf.size(), 0, 0, INTER_NEAREST);
if(check_mask_area(filter_mask, default_mask_area)) {
dilate(filter_mask , filter_mask, erode_element);
} else {
filter_mask = default_mask;
}
} else {
filter_mask = default_mask;
}
update_csr_filter(image, filter_mask);
dsst.update(image, object_center);
boundingBox = bounding_box;
return true;
}
// *********************************************************************
// * Init API function *
// *********************************************************************
bool TrackerCSRTImpl::initImpl(const Mat& image_, const Rect2d& boundingBox)
{
cv::setNumThreads(getNumThreads());
//treat gray image as color image
Mat image;
if(image_.channels() == 1) {
std::vector<Mat> channels(3);
channels[0] = channels[1] = channels[2] = image_;
merge(channels, image);
} else {
image = image_;
}
current_scale_factor = 1.0;
image_size = image.size();
bounding_box = boundingBox;
cell_size = cvFloor(std::min(4.0, std::max(1.0, static_cast<double>(
cvCeil((bounding_box.width * bounding_box.height)/400.0)))));
original_target_size = Size(bounding_box.size());
template_size.width = static_cast<float>(cvFloor(original_target_size.width + params.padding *
sqrt(original_target_size.width * original_target_size.height)));
template_size.height = static_cast<float>(cvFloor(original_target_size.height + params.padding *
sqrt(original_target_size.width * original_target_size.height)));
template_size.width = template_size.height =
(template_size.width + template_size.height) / 2.0f;
rescale_ratio = sqrt(pow(params.template_size,2) / (template_size.width * template_size.height));
if(rescale_ratio > 1) {
rescale_ratio = 1;
}
rescaled_template_size = Size2i(cvFloor(template_size.width * rescale_ratio),
cvFloor(template_size.height * rescale_ratio));
object_center = Point2f(static_cast<float>(boundingBox.x) + original_target_size.width / 2.0f,
static_cast<float>(boundingBox.y) + original_target_size.height / 2.0f);
yf = gaussian_shaped_labels(params.gsl_sigma,
rescaled_template_size.width / cell_size, rescaled_template_size.height / cell_size);
if(params.window_function.compare("hann") == 0) {
window = get_hann_win(Size(yf.cols,yf.rows));
} else if(params.window_function.compare("cheb") == 0) {
window = get_chebyshev_win(Size(yf.cols,yf.rows), params.cheb_attenuation);
} else if(params.window_function.compare("kaiser") == 0) {
window = get_kaiser_win(Size(yf.cols,yf.rows), params.kaiser_alpha);
} else {
std::cout << "Not a valid window function" << std::endl;
return false;
}
Size2i scaled_obj_size = Size2i(cvFloor(original_target_size.width * rescale_ratio / cell_size),
cvFloor(original_target_size.height * rescale_ratio / cell_size));
//set dummy mask and area;
int x0 = std::max((yf.size().width - scaled_obj_size.width)/2 - 1, 0);
int y0 = std::max((yf.size().height - scaled_obj_size.height)/2 - 1, 0);
default_mask = Mat::zeros(yf.size(), CV_32FC1);
default_mask(Rect(x0,y0,scaled_obj_size.width, scaled_obj_size.height)) = 1.0f;
default_mask_area = static_cast<float>(sum(default_mask)[0]);
//initalize segmentation
if(params.use_segmentation) {
Mat hsv_img = bgr2hsv(image);
hist_foreground = Histogram(hsv_img.channels(), params.histogram_bins);
hist_background = Histogram(hsv_img.channels(), params.histogram_bins);
extract_histograms(hsv_img, bounding_box, hist_foreground, hist_background);
filter_mask = segment_region(hsv_img, object_center, template_size,
original_target_size, current_scale_factor);
//update calculated mask with preset mask
if(preset_mask.data){
Mat preset_mask_padded = Mat::zeros(filter_mask.size(), filter_mask.type());
int sx = std::max((int)cvFloor(preset_mask_padded.cols / 2.0f - preset_mask.cols / 2.0f) - 1, 0);
int sy = std::max((int)cvFloor(preset_mask_padded.rows / 2.0f - preset_mask.rows / 2.0f) - 1, 0);
preset_mask.copyTo(preset_mask_padded(
Rect(sx, sy, preset_mask.cols, preset_mask.rows)));
filter_mask = filter_mask.mul(preset_mask_padded);
}
erode_element = getStructuringElement(MORPH_ELLIPSE, Size(3,3), Point(1,1));
resize(filter_mask, filter_mask, yf.size(), 0, 0, INTER_NEAREST);
if(check_mask_area(filter_mask, default_mask_area)) {
dilate(filter_mask , filter_mask, erode_element);
} else {
filter_mask = default_mask;
}
} else {
filter_mask = default_mask;
}
//initialize filter
Mat patch = get_subwindow(image, object_center, cvFloor(current_scale_factor * template_size.width),
cvFloor(current_scale_factor * template_size.height));
resize(patch, patch, rescaled_template_size, 0, 0, INTER_CUBIC);
std::vector<Mat> patch_ftrs = get_features(patch, yf.size());
std::vector<Mat> Fftrs = fourier_transform_features(patch_ftrs);
csr_filter = create_csr_filter(Fftrs, yf, filter_mask);
if(params.use_channel_weights) {
Mat current_resp;
filter_weights = std::vector<float>(csr_filter.size());
float chw_sum = 0;
for (size_t i = 0; i < csr_filter.size(); ++i) {
mulSpectrums(Fftrs[i], csr_filter[i], current_resp, 0, true);
idft(current_resp, current_resp, DFT_SCALE | DFT_REAL_OUTPUT);
double max_val;
minMaxLoc(current_resp, NULL, &max_val, NULL , NULL);
chw_sum += static_cast<float>(max_val);
filter_weights[i] = static_cast<float>(max_val);
}
for (size_t i = 0; i < filter_weights.size(); ++i) {
filter_weights[i] /= chw_sum;
}
}
//initialize scale search
dsst = DSST(image, bounding_box, template_size, params.number_of_scales, params.scale_step,
params.scale_model_max_area, params.scale_sigma_factor, params.scale_lr);
model=Ptr<TrackerCSRTModel>(new TrackerCSRTModel(params));
isInit = true;
return true;
}
TrackerCSRT::Params::Params()
{
use_channel_weights = true;
use_segmentation = true;
use_hog = true;
use_color_names = true;
use_gray = true;
use_rgb = false;
window_function = "hann";
kaiser_alpha = 3.75f;
cheb_attenuation = 45;
padding = 3.0f;
template_size = 200;
gsl_sigma = 1.0f;
hog_orientations = 9;
hog_clip = 0.2f;
num_hog_channels_used = 18;
filter_lr = 0.02f;
weights_lr = 0.02f;
admm_iterations = 4;
number_of_scales = 33;
scale_sigma_factor = 0.250f;
scale_model_max_area = 512.0f;
scale_lr = 0.025f;
scale_step = 1.020f;
histogram_bins = 16;
background_ratio = 2;
histogram_lr = 0.04f;
}
void TrackerCSRT::Params::read(const FileNode& fn)
{
*this = TrackerCSRT::Params();
if(!fn["padding"].empty())
fn["padding"] >> padding;
if(!fn["template_size"].empty())
fn["template_size"] >> template_size;
if(!fn["gsl_sigma"].empty())
fn["gsl_sigma"] >> gsl_sigma;
if(!fn["hog_orientations"].empty())
fn["hog_orientations"] >> hog_orientations;
if(!fn["num_hog_channels_used"].empty())
fn["num_hog_channels_used"] >> num_hog_channels_used;
if(!fn["hog_clip"].empty())
fn["hog_clip"] >> hog_clip;
if(!fn["use_hog"].empty())
fn["use_hog"] >> use_hog;
if(!fn["use_color_names"].empty())
fn["use_color_names"] >> use_color_names;
if(!fn["use_gray"].empty())
fn["use_gray"] >> use_gray;
if(!fn["use_rgb"].empty())
fn["use_rgb"] >> use_rgb;
if(!fn["window_function"].empty())
fn["window_function"] >> window_function;
if(!fn["kaiser_alpha"].empty())
fn["kaiser_alpha"] >> kaiser_alpha;
if(!fn["cheb_attenuation"].empty())
fn["cheb_attenuation"] >> cheb_attenuation;
if(!fn["filter_lr"].empty())
fn["filter_lr"] >> filter_lr;
if(!fn["admm_iterations"].empty())
fn["admm_iterations"] >> admm_iterations;
if(!fn["number_of_scales"].empty())
fn["number_of_scales"] >> number_of_scales;
if(!fn["scale_sigma_factor"].empty())
fn["scale_sigma_factor"] >> scale_sigma_factor;
if(!fn["scale_model_max_area"].empty())
fn["scale_model_max_area"] >> scale_model_max_area;
if(!fn["scale_lr"].empty())
fn["scale_lr"] >> scale_lr;
if(!fn["scale_step"].empty())
fn["scale_step"] >> scale_step;
if(!fn["use_channel_weights"].empty())
fn["use_channel_weights"] >> use_channel_weights;
if(!fn["weights_lr"].empty())
fn["weights_lr"] >> weights_lr;
if(!fn["use_segmentation"].empty())
fn["use_segmentation"] >> use_segmentation;
if(!fn["histogram_bins"].empty())
fn["histogram_bins"] >> histogram_bins;
if(!fn["background_ratio"].empty())
fn["background_ratio"] >> background_ratio;
if(!fn["histogram_lr"].empty())
fn["histogram_lr"] >> histogram_lr;
CV_Assert(number_of_scales % 2 == 1);
CV_Assert(use_gray || use_color_names || use_hog || use_rgb);
}
void TrackerCSRT::Params::write(FileStorage& fs) const
{
fs << "padding" << padding;
fs << "template_size" << template_size;
fs << "gsl_sigma" << gsl_sigma;
fs << "hog_orientations" << hog_orientations;
fs << "num_hog_channels_used" << num_hog_channels_used;
fs << "hog_clip" << hog_clip;
fs << "use_hog" << use_hog;
fs << "use_color_names" << use_color_names;
fs << "use_gray" << use_gray;
fs << "use_rgb" << use_rgb;
fs << "window_function" << window_function;
fs << "kaiser_alpha" << kaiser_alpha;
fs << "cheb_attenuation" << cheb_attenuation;
fs << "filter_lr" << filter_lr;
fs << "admm_iterations" << admm_iterations;
fs << "number_of_scales" << number_of_scales;
fs << "scale_sigma_factor" << scale_sigma_factor;
fs << "scale_model_max_area" << scale_model_max_area;
fs << "scale_lr" << scale_lr;
fs << "scale_step" << scale_step;
fs << "use_channel_weights" << use_channel_weights;
fs << "weights_lr" << weights_lr;
fs << "use_segmentation" << use_segmentation;
fs << "histogram_bins" << histogram_bins;
fs << "background_ratio" << background_ratio;
fs << "histogram_lr" << histogram_lr;
}
} /* namespace cv */
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "trackerCSRTScaleEstimation.hpp"
#include "trackerCSRTUtils.hpp"
//Discriminative Scale Space Tracking
namespace cv
{
class ParallelGetScaleFeatures : public ParallelLoopBody
{
public:
ParallelGetScaleFeatures(
Mat img,
Point2f pos,
Size2f base_target_sz,
float current_scale,
std::vector<float> &scale_factors,
Mat scale_window,
Size scale_model_sz,
int col_len,
Mat &result)
{
this->img = img;
this->pos = pos;
this->base_target_sz = base_target_sz;
this->current_scale = current_scale;
this->scale_factors = scale_factors;
this->scale_window = scale_window;
this->scale_model_sz = scale_model_sz;
this->col_len = col_len;
this->result = result;
}
virtual void operator ()(const Range& range) const
{
for (int s = range.start; s < range.end; s++) {
Size patch_sz = Size(static_cast<int>(current_scale * scale_factors[s] * base_target_sz.width),
static_cast<int>(current_scale * scale_factors[s] * base_target_sz.height));
Mat img_patch = get_subwindow(img, pos, patch_sz.width, patch_sz.height);
img_patch.convertTo(img_patch, CV_32FC3);
resize(img_patch, img_patch, Size(scale_model_sz.width, scale_model_sz.height),0,0,INTER_LINEAR);
std::vector<Mat> hog;
hog = get_features_hog(img_patch, 4);
for (int i = 0; i < static_cast<int>(hog.size()); ++i) {
hog[i] = hog[i].t();
hog[i] = scale_window.at<float>(0,s) * hog[i].reshape(0, col_len);
hog[i].copyTo(result(Rect(Point(s, i*col_len), hog[i].size())));
}
}
}
ParallelGetScaleFeatures& operator=(const ParallelGetScaleFeatures &) {
return *this;
}
private:
Mat img;
Point2f pos;
Size2f base_target_sz;
float current_scale;
std::vector<float> scale_factors;
Mat scale_window;
Size scale_model_sz;
int col_len;
Mat result;
};
DSST::DSST(const Mat &image,
Rect2f bounding_box,
Size2f template_size,
int numberOfScales,
float scaleStep,
float maxModelArea,
float sigmaFactor,
float scaleLearnRate):
scales_count(numberOfScales), scale_step(scaleStep), max_model_area(maxModelArea),
sigma_factor(sigmaFactor), learn_rate(scaleLearnRate)
{
original_targ_sz = bounding_box.size();
Point2f object_center = Point2f(bounding_box.x + original_targ_sz.width / 2,
bounding_box.y + original_targ_sz.height / 2);
current_scale_factor = 1.0;
if(scales_count % 2 == 0)
scales_count++;
scale_sigma = static_cast<float>(sqrt(scales_count) * sigma_factor);
min_scale_factor = pow(scale_step,
cvCeil(log(max(5.0 / template_size.width, 5.0 / template_size.height)) / log(scale_step)));
max_scale_factor = powf(scale_step,
static_cast<float>(cvFloor(log(min((float)image.rows / (float)bounding_box.width,
(float)image.cols / (float)bounding_box.height)) / log(scale_step))));
ys = Mat(1, scales_count, CV_32FC1);
float ss, sf;
for(int i = 0; i < ys.cols; ++i) {
ss = (float)(i+1) - cvCeil((float)scales_count / 2.0f);
ys.at<float>(0,i) = static_cast<float>(exp(-0.5 * pow(ss,2) / pow(scale_sigma,2)));
sf = static_cast<float>(i + 1);
scale_factors.push_back(pow(scale_step, cvCeil((float)scales_count / 2.0f) - sf));
}
scale_window = get_hann_win(Size(scales_count, 1));
float scale_model_factor = 1.0;
if(template_size.width * template_size.height * pow(scale_model_factor, 2) > max_model_area)
{
scale_model_factor = sqrt(max_model_area /
(template_size.width * template_size.height));
}
scale_model_sz = Size(cvFloor(template_size.width * scale_model_factor),
cvFloor(template_size.height * scale_model_factor));
Mat scale_resp = get_scale_features(image, object_center, original_targ_sz,
current_scale_factor, scale_factors, scale_window, scale_model_sz);
Mat ysf_row = Mat(ys.size(), CV_32FC2);
dft(ys, ysf_row, DFT_ROWS | DFT_COMPLEX_OUTPUT, 0);
ysf = repeat(ysf_row, scale_resp.rows, 1);
Mat Fscale_resp;
dft(scale_resp, Fscale_resp, DFT_ROWS | DFT_COMPLEX_OUTPUT);
mulSpectrums(ysf, Fscale_resp, sf_num, 0 , true);
Mat sf_den_all;
mulSpectrums(Fscale_resp, Fscale_resp, sf_den_all, 0, true);
reduce(sf_den_all, sf_den, 0, CV_REDUCE_SUM, -1);
}
DSST::~DSST()
{
}
Mat DSST::get_scale_features(
Mat img,
Point2f pos,
Size2f base_target_sz,
float current_scale,
std::vector<float> &scale_factors,
Mat scale_window,
Size scale_model_sz)
{
Mat result;
int col_len = 0;
Size patch_sz = Size(cvFloor(current_scale * scale_factors[0] * base_target_sz.width),
cvFloor(current_scale * scale_factors[0] * base_target_sz.height));
Mat img_patch = get_subwindow(img, pos, patch_sz.width, patch_sz.height);
img_patch.convertTo(img_patch, CV_32FC3);
resize(img_patch, img_patch, Size(scale_model_sz.width, scale_model_sz.height),0,0,INTER_LINEAR);
std::vector<Mat> hog;
hog = get_features_hog(img_patch, 4);
result = Mat(Size((int)scale_factors.size(), hog[0].cols * hog[0].rows * (int)hog.size()), CV_32F);
col_len = hog[0].cols * hog[0].rows;
for (int i = 0; i < static_cast<int>(hog.size()); ++i) {
hog[i] = hog[i].t();
hog[i] = scale_window.at<float>(0,0) * hog[i].reshape(0, col_len);
hog[i].copyTo(result(Rect(Point(0, i*col_len), hog[i].size())));
}
ParallelGetScaleFeatures parallelGetScaleFeatures(img, pos, base_target_sz,
current_scale, scale_factors, scale_window, scale_model_sz, col_len, result);
parallel_for_(Range(1, static_cast<int>(scale_factors.size())), parallelGetScaleFeatures);
return result;
}
void DSST::update(const Mat &image, const Point2f object_center)
{
Mat scale_features = get_scale_features(image, object_center, original_targ_sz,
current_scale_factor, scale_factors, scale_window, scale_model_sz);
Mat Fscale_features;
dft(scale_features, Fscale_features, DFT_ROWS | DFT_COMPLEX_OUTPUT);
Mat new_sf_num;
Mat new_sf_den;
Mat new_sf_den_all;
mulSpectrums(ysf, Fscale_features, new_sf_num, DFT_ROWS, true);
Mat sf_den_all;
mulSpectrums(Fscale_features, Fscale_features, new_sf_den_all, DFT_ROWS, true);
reduce(new_sf_den_all, new_sf_den, 0, CV_REDUCE_SUM, -1);
sf_num = (1 - learn_rate) * sf_num + learn_rate * new_sf_num;
sf_den = (1 - learn_rate) * sf_den + learn_rate * new_sf_den;
}
float DSST::getScale(const Mat &image, const Point2f object_center)
{
Mat scale_features = get_scale_features(image, object_center, original_targ_sz,
current_scale_factor, scale_factors, scale_window, scale_model_sz);
Mat Fscale_features;
dft(scale_features, Fscale_features, DFT_ROWS | DFT_COMPLEX_OUTPUT);
mulSpectrums(Fscale_features, sf_num, Fscale_features, 0, false);
Mat scale_resp;
reduce(Fscale_features, scale_resp, 0, CV_REDUCE_SUM, -1);
scale_resp = divide_complex_matrices(scale_resp, sf_den + 0.01f);
idft(scale_resp, scale_resp, DFT_REAL_OUTPUT|DFT_SCALE);
Point max_loc;
minMaxLoc(scale_resp, NULL, NULL, NULL, &max_loc);
current_scale_factor *= scale_factors[max_loc.x];
if(current_scale_factor < min_scale_factor)
current_scale_factor = min_scale_factor;
else if(current_scale_factor > max_scale_factor)
current_scale_factor = max_scale_factor;
return current_scale_factor;
}
} /* namespace cv */
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_TRACKER_CSRT_SCALE_ESTIMATION
#define OPENCV_TRACKER_CSRT_SCALE_ESTIMATION
namespace cv
{
class DSST {
public:
DSST() {};
DSST(const Mat &image, Rect2f bounding_box, Size2f template_size, int numberOfScales,
float scaleStep, float maxModelArea, float sigmaFactor, float scaleLearnRate);
~DSST();
void update(const Mat &image, const Point2f objectCenter);
float getScale(const Mat &image, const Point2f objecCenter);
private:
Mat get_scale_features(Mat img, Point2f pos, Size2f base_target_sz, float current_scale,
std::vector<float> &scale_factors, Mat scale_window, Size scale_model_sz);
Size scale_model_sz;
Mat ys;
Mat ysf;
Mat scale_window;
std::vector<float> scale_factors;
Mat sf_num;
Mat sf_den;
float scale_sigma;
float min_scale_factor;
float max_scale_factor;
float current_scale_factor;
int scales_count;
float scale_step;
float max_model_area;
float sigma_factor;
float learn_rate;
Size original_targ_sz;
};
} /* namespace cv */
#endif
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "trackerCSRTSegmentation.hpp"
#include <fstream>
#include <iostream>
#include <vector>
#include <iostream>
//-------------------- HISTOGRAM CLASS --------------------
namespace cv
{
Histogram::Histogram(int numDimensions, int numBinsPerDimension)
{
m_numBinsPerDim = numBinsPerDimension;
m_numDim = numDimensions;
p_size = cvFloor(std::pow(m_numBinsPerDim, m_numDim));
p_bins.resize(p_size, 0);
p_dimIdCoef.resize(m_numDim, 1);
for (int i = 0; i < m_numDim-1; ++i)
p_dimIdCoef[i] = static_cast<int>(std::pow(numBinsPerDimension, m_numDim - 1 - i));
}
void Histogram::extractForegroundHistogram(std::vector<cv::Mat> & imgChannels,
cv::Mat weights, bool useMatWeights, int x1, int y1, int x2, int y2)
{
//just for code clarity
cv::Mat & img = imgChannels[0];
if (!useMatWeights){
//weights are epanechnikov distr. with peek at the center of the image;
double cx = x1 + (x2-x1)/2.;
double cy = y1 + (y2-y1)/2.;
double kernelSize_width = 1.0/(0.5*static_cast<double>(x2-x1)*1.4142+1); //sqrt(2)
double kernelSize_height = 1.0/(0.5*static_cast<double>(y2-y1)*1.4142+1);
cv::Mat kernelWeight(img.rows, img.cols, CV_64FC1);
for (int y = y1; y < y2+1; ++y){
double * weightPtr = kernelWeight.ptr<double>(y);
double tmp_y = std::pow((cy-y)*kernelSize_height, 2);
for (int x = x1; x < x2+1; ++x){
weightPtr[x] = kernelProfile_Epanechnikov(std::pow((cx-x)*kernelSize_width,2) + tmp_y);
}
}
weights = kernelWeight;
}
//extract pixel values and compute histogram
double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0; // 1 / (imgRange/numBinsPerDim)
double sum = 0;
for (int y = y1; y < y2+1; ++y){
std::vector<const uchar *> dataPtr(m_numDim);
for (int dim = 0; dim < m_numDim; ++dim)
dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);
const double * weightPtr = weights.ptr<double>(y);
for (int x = x1; x < x2+1; ++x){
int id = 0;
for (int dim = 0; dim < m_numDim; ++dim){
id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
}
p_bins[id] += weightPtr[x];
sum += weightPtr[x];
}
}
//normalize
sum = 1./sum;
for(int i = 0; i < p_size; ++i)
p_bins[i] *= sum;
}
void Histogram::extractBackGroundHistogram(
std::vector<cv::Mat> & imgChannels,
int x1, int y1, int x2, int y2,
int outer_x1, int outer_y1, int outer_x2, int outer_y2)
{
//extract pixel values and compute histogram
double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0; // 1 / (imgRange/numBinsPerDim)
double sum = 0;
for (int y = outer_y1; y < outer_y2; ++y){
std::vector<const uchar *> dataPtr(m_numDim);
for (int dim = 0; dim < m_numDim; ++dim)
dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);
for (int x = outer_x1; x < outer_x2; ++x){
if (x >= x1 && x <= x2 && y >= y1 && y <= y2)
continue;
int id = 0;
for (int dim = 0; dim < m_numDim; ++dim){
id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
}
p_bins[id] += 1.0;
sum += 1.0;
}
}
//normalize
sum = 1./sum;
for(int i = 0; i < p_size; ++i)
p_bins[i] *= sum;
}
cv::Mat Histogram::backProject(std::vector<cv::Mat> & imgChannels)
{
//just for code clarity
cv::Mat & img = imgChannels[0];
cv::Mat backProject(img.rows, img.cols, CV_64FC1);
double rangePerBinInverse = static_cast<double>(m_numBinsPerDim)/256.0; // 1 / (imgRange/numBinsPerDim)
for (int y = 0; y < img.rows; ++y){
double * backProjectPtr = backProject.ptr<double>(y);
std::vector<const uchar *> dataPtr(m_numDim);
for (int dim = 0; dim < m_numDim; ++dim)
dataPtr[dim] = imgChannels[dim].ptr<uchar>(y);
for (int x = 0; x < img.cols; ++x){
int id = 0;
for (int dim = 0; dim < m_numDim; ++dim){
id += p_dimIdCoef[dim]*cvFloor(rangePerBinInverse*dataPtr[dim][x]);
}
backProjectPtr[x] = p_bins[id];
}
}
return backProject;
}
// add new methods
std::vector<double> Histogram::getHistogramVector() {
return p_bins;
}
void Histogram::setHistogramVector(double *vector) {
for (size_t i=0; i<p_bins.size(); i++) {
p_bins[i] = vector[i];
}
}
//-------------------- SEGMENT CLASS --------------------
std::pair<cv::Mat, cv::Mat> Segment::computePosteriors(
std::vector<cv::Mat> &imgChannels,
int x1, int y1, int x2, int y2,
cv::Mat weights, cv::Mat fgPrior, cv::Mat bgPrior,
const Histogram &fgHistPrior, int numBinsPerChannel)
{
//preprocess and normalize all data
CV_Assert(imgChannels.size() > 0);
//fit target to the image
x1 = std::min(std::max(x1, 0), imgChannels[0].cols-1);
y1 = std::min(std::max(y1, 0), imgChannels[0].rows-1);
x2 = std::max(std::min(x2, imgChannels[0].cols-1), 0);
y2 = std::max(std::min(y2, imgChannels[0].rows-1), 0);
//enlarge bbox by 1/3 of its size for background area
int offsetX = (x2-x1)/3;
int offsetY = (y2-y1)/3;
int outer_y1 = std::max(0, (int)(y1-offsetY));
int outer_y2 = std::min(imgChannels[0].rows, (int)(y2+offsetY+1));
int outer_x1 = std::max(0, (int)(x1-offsetX));
int outer_x2 = std::min(imgChannels[0].cols, (int)(x2+offsetX+1));
//extract histogram from original data -> more pixels better representation of distr. by histograms
Histogram hist_target =
(fgHistPrior.m_numBinsPerDim == numBinsPerChannel && (size_t)fgHistPrior.m_numDim == imgChannels.size())
? fgHistPrior : Histogram(static_cast<int>(imgChannels.size()), numBinsPerChannel);
Histogram hist_background(static_cast<int>(imgChannels.size()), numBinsPerChannel);
if (weights.cols == 0)
hist_target.extractForegroundHistogram(imgChannels, cv::Mat(), false, x1, y1, x2, y2);
else
hist_target.extractForegroundHistogram(imgChannels, weights, true, x1, y1, x2, y2);
hist_background.extractBackGroundHistogram(imgChannels, x1, y1, x2, y2,
outer_x1, outer_y1, outer_x2, outer_y2);
//compute resize factor so that the max area is 1000 (=avg. size ~ 32x32)
double factor = sqrt(1000.0/((x2-x1)*(y2-y1)));
if (factor > 1)
factor = 1.0;
cv::Size newSize(cvFloor((x2-x1)*factor), cvFloor((y2-y1)*factor));
//rescale input data
cv::Rect roiRect_inner = cv::Rect(x1, y1, x2-x1, y2-y1);
std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
for (size_t i = 0; i < imgChannels.size(); ++i)
cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);
//initialize priors if there is no external source and rescale
cv::Mat fgPriorScaled;
if (fgPrior.cols == 0)
fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
else
cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
cv::Mat bgPriorScaled;
if (bgPrior.cols == 0)
bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
else
cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);
//backproject pixels likelihood
cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);
double p_b = std::sqrt((std::pow(outer_x2-outer_x1, 2) + std::pow(outer_y2-outer_y1, 2)) /
(std::pow(x2-x1, 2) + std::pow(y2-y1, 2))) ;
double p_o = 1./(p_b + 1);
//convert likelihoods to posterior prob. (Bayes rule)
cv::Mat prob_o(newSize, foregroundLikelihood.type());
prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
cv::Mat prob_b = 1.0 - prob_o;
std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b, fgPriorScaled, bgPriorScaled);
//resize probs to original size
std::pair<cv::Mat, cv::Mat> probs;
cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));
return probs;
}
std::pair<cv::Mat, cv::Mat> Segment::computePosteriors2(
std::vector<cv::Mat> &imgChannels, int x1, int y1, int x2, int y2, double p_b,
cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background)
{
//preprocess and normalize all data
CV_Assert(imgChannels.size() > 0);
//fit target to the image
x1 = std::min(std::max(x1, 0), imgChannels[0].cols-1);
y1 = std::min(std::max(y1, 0), imgChannels[0].rows-1);
x2 = std::max(std::min(x2, imgChannels[0].cols-1), 0);
y2 = std::max(std::min(y2, imgChannels[0].rows-1), 0);
// calculate width and height of the region
int w = x2 - x1 + 1;
int h = y2 - y1 + 1;
w = std::min(std::max(w, 1), imgChannels[0].cols);
h = std::min(std::max(h, 1), imgChannels[0].rows);
//double p_o = 1./(p_b + 1);
double p_o = 1. - p_b;
//compute resize factor so that the max area is 1000 (=avg. size ~ 32x32)
double factor = sqrt(1000.0/(w*h));
if (factor > 1)
factor = 1.0;
cv::Size newSize(cvFloor(w*factor), cvFloor(h*factor));
//rescale input data
cv::Rect roiRect_inner = cv::Rect(x1, y1, w, h);
std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
for (size_t i = 0; i < imgChannels.size(); ++i)
cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);
//initialize priors if there is no external source and rescale
cv::Mat fgPriorScaled;
if (fgPrior.cols == 0)
fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
else
cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
cv::Mat bgPriorScaled;
if (bgPrior.cols == 0)
bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
else
cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);
//backproject pixels likelihood
cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);
//convert likelihoods to posterior prob. (Bayes rule)
cv::Mat prob_o(newSize, foregroundLikelihood.type());
prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
cv::Mat prob_b = 1.0 - prob_o;
std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b,
fgPriorScaled, bgPriorScaled);
//std::pair<cv::Mat, cv::Mat> sizedProbs = std::pair<cv::Mat, cv::Mat>(prob_o, prob_b);
//resize probs to original size
std::pair<cv::Mat, cv::Mat> probs;
cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));
return probs;
}
std::pair<cv::Mat, cv::Mat> Segment::computePosteriors2(std::vector<cv::Mat> &imgChannels,
cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background)
{
//preprocess and normalize all data
CV_Assert(imgChannels.size() > 0);
//fit target to the image
int x1 = 0;
int y1 = 0;
int x2 = imgChannels[0].cols-1;
int y2 = imgChannels[0].rows-1;
//compute resize factor so that we control the max area ~32^2
double factor = sqrt(1000./((x2-x1)*(y2-y1)));
//double factor = 1;
if (factor > 1)
factor = 1.0;
cv::Size newSize(cvFloor((x2-x1)*factor), cvFloor((y2-y1)*factor));
//rescale input data
cv::Rect roiRect_inner = cv::Rect(x1, y1, x2-x1+1, y2-y1+1);
std::vector<cv::Mat> imgChannelsROI_inner(imgChannels.size());
for (size_t i = 0; i < imgChannels.size(); ++i)
cv::resize(imgChannels[i](roiRect_inner), imgChannelsROI_inner[i], newSize);
//initialize priors if there is no external source and rescale
cv::Mat fgPriorScaled;
if (fgPrior.cols == 0)
fgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
else
cv::resize(fgPrior(roiRect_inner), fgPriorScaled, newSize);
cv::Mat bgPriorScaled;
if (bgPrior.cols == 0)
bgPriorScaled = 0.5*cv::Mat::ones(newSize, CV_64FC1);
else
cv::resize(bgPrior(roiRect_inner), bgPriorScaled, newSize);
//backproject pixels likelihood
cv::Mat foregroundLikelihood = hist_target.backProject(imgChannelsROI_inner).mul(fgPriorScaled);
cv::Mat backgroundLikelihood = hist_background.backProject(imgChannelsROI_inner).mul(bgPriorScaled);
//prior for posterior, relative to the number of pixels in bg and fg
double p_b = 5./3.;
double p_o = 1./(p_b + 1);
//convert likelihoods to posterior prob. (Bayes rule)
cv::Mat prob_o(newSize, foregroundLikelihood.type());
prob_o = p_o*foregroundLikelihood / (p_o*foregroundLikelihood + p_b*backgroundLikelihood);
cv::Mat prob_b = 1.0 - prob_o;
std::pair<cv::Mat, cv::Mat> sizedProbs = getRegularizedSegmentation(prob_o, prob_b, fgPriorScaled, bgPriorScaled);
//resize probs to original size
std::pair<cv::Mat, cv::Mat> probs;
cv::resize(sizedProbs.first, probs.first, cv::Size(roiRect_inner.width, roiRect_inner.height));
cv::resize(sizedProbs.second, probs.second, cv::Size(roiRect_inner.width, roiRect_inner.height));
return probs;
}
std::pair<cv::Mat, cv::Mat> Segment::getRegularizedSegmentation(
cv::Mat &prob_o, cv::Mat &prob_b, cv::Mat & prior_o, cv::Mat & prior_b)
{
int hsize = cvFloor(std::max(1.0, (double)cvFloor(static_cast<double>(prob_b.cols)*3./50. + 0.5)));
int lambdaSize = hsize*2+1;
//compute gaussian kernel
cv::Mat lambda(lambdaSize, lambdaSize, CV_64FC1);
double std2 = std::pow(hsize/3.0, 2);
double sumLambda = 0.0;
for (int y = -hsize; y < hsize + 1; ++y){
double * lambdaPtr = lambda.ptr<double>(y+hsize);
double tmp_y = y*y;
for (int x = -hsize; x < hsize +1; ++x){
double tmp_gauss = gaussian(x*x, tmp_y, std2);
lambdaPtr[x+hsize] = tmp_gauss;
sumLambda += tmp_gauss;
}
}
sumLambda -= lambda.at<double>(hsize, hsize);
//set center of kernel to 0
lambda.at<double>(hsize, hsize) = 0.0;
sumLambda = 1.0/sumLambda;
//normalize kernel to sum to 1
lambda = lambda*sumLambda;
//create lambda2 kernel
cv::Mat lambda2 = lambda.clone();
lambda2.at<double>(hsize, hsize) = 1.0;
double terminateThr = 1e-1;
double logLike = std::numeric_limits<double>::max();
int maxIter = 50;
//return values
cv::Mat Qsum_o(prior_o.rows, prior_o.cols, prior_o.type());
cv::Mat Qsum_b(prior_o.rows, prior_o.cols, prior_o.type());
//algorithm temporal
cv::Mat Si_o(prior_o.rows, prior_o.cols, prior_o.type());
cv::Mat Si_b(prior_o.rows, prior_o.cols, prior_o.type());
cv::Mat Ssum_o(prior_o.rows, prior_o.cols, prior_o.type());
cv::Mat Ssum_b(prior_o.rows, prior_o.cols, prior_o.type());
cv::Mat Qi_o(prior_o.rows, prior_o.cols, prior_o.type());
cv::Mat Qi_b(prior_o.rows, prior_o.cols, prior_o.type());
cv::Mat logQo(prior_o.rows, prior_o.cols, prior_o.type());
cv::Mat logQb(prior_o.rows, prior_o.cols, prior_o.type());
int i;
for (i = 0; i < maxIter; ++i){
//follows the equations from Kristan et al. ACCV2014 paper
//"A graphical model for rapid obstacle image-map estimation from unmanned surface vehicles"
cv::Mat P_Io = prior_o.mul(prob_o) + std::numeric_limits<double>::epsilon();
cv::Mat P_Ib = prior_b.mul(prob_b) + std::numeric_limits<double>::epsilon();
cv::filter2D(prior_o, Si_o, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
cv::filter2D(prior_b, Si_b, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
Si_o = Si_o.mul(prior_o);
Si_b = Si_b.mul(prior_b);
cv::Mat normSi = 1.0/(Si_o + Si_b);
Si_o = Si_o.mul(normSi);
Si_b = Si_b.mul(normSi);
cv::filter2D(Si_o, Ssum_o, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
cv::filter2D(Si_b, Ssum_b, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
cv::filter2D(P_Io, Qi_o, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
cv::filter2D(P_Ib, Qi_b, -1, lambda, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
Qi_o = Qi_o.mul(P_Io);
Qi_b = Qi_b.mul(P_Ib);
cv::Mat normQi = 1.0/(Qi_o + Qi_b);
Qi_o = Qi_o.mul(normQi);
Qi_b = Qi_b.mul(normQi);
cv::filter2D(Qi_o, Qsum_o, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
cv::filter2D(Qi_b, Qsum_b, -1, lambda2, cv::Point(-1, -1), 0, cv::BORDER_REFLECT);
prior_o = (Qsum_o + Ssum_o)*0.25;
prior_b = (Qsum_b + Ssum_b)*0.25;
cv::Mat normPI = 1.0/(prior_o + prior_b);
prior_o = prior_o.mul(normPI);
prior_b = prior_b.mul(normPI);
//converge ?
cv::log(Qsum_o, logQo);
cv::log(Qsum_b, logQb);
cv::Scalar mean = cv::sum(logQo+logQb);
double logLikeNew = -mean.val[0]/(2*Qsum_o.rows*Qsum_o.cols);
if (std::abs(logLike - logLikeNew) < terminateThr)
break;
logLike = logLikeNew;
}
return std::pair<cv::Mat, cv::Mat>(Qsum_o, Qsum_b);
}
} //cv namespace
//---------------------------------------------------------------------------------------------------------------------
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_TRACKER_CSRT_SEGMENTATION
#define OPENCV_TRACKER_CSRT_SEGMENTATION
namespace cv
{
class Histogram
{
public:
int m_numBinsPerDim;
int m_numDim;
Histogram() : m_numBinsPerDim(0), m_numDim(0) {}
Histogram(int numDimensions, int numBinsPerDimension = 8);
void extractForegroundHistogram(std::vector<cv::Mat> & imgChannels,
cv::Mat weights, bool useMatWeights, int x1, int y1, int x2, int y2);
void extractBackGroundHistogram(std::vector<cv::Mat> & imgChannels,
int x1, int y1, int x2, int y2, int outer_x1, int outer_y1,
int outer_x2, int outer_y2);
cv::Mat backProject(std::vector<cv::Mat> & imgChannels);
std::vector<double> getHistogramVector();
void setHistogramVector(double *vector);
private:
int p_size;
std::vector<double> p_bins;
std::vector<int> p_dimIdCoef;
inline double kernelProfile_Epanechnikov(double x)
{ return (x <= 1) ? (2.0/CV_PI)*(1-x) : 0; }
};
class Segment
{
public:
static std::pair<cv::Mat, cv::Mat> computePosteriors(std::vector<cv::Mat> & imgChannels,
int x1, int y1, int x2, int y2, cv::Mat weights, cv::Mat fgPrior,
cv::Mat bgPrior, const Histogram &fgHistPrior, int numBinsPerChannel = 16);
static std::pair<cv::Mat, cv::Mat> computePosteriors2(std::vector<cv::Mat> & imgChannels,
int x1, int y1, int x2, int y2, double p_b, cv::Mat fgPrior,
cv::Mat bgPrior, Histogram hist_target, Histogram hist_background);
static std::pair<cv::Mat, cv::Mat> computePosteriors2(std::vector<cv::Mat> &imgChannels,
cv::Mat fgPrior, cv::Mat bgPrior, Histogram hist_target, Histogram hist_background);
private:
static std::pair<cv::Mat, cv::Mat> getRegularizedSegmentation(cv::Mat & prob_o,
cv::Mat & prob_b, cv::Mat &prior_o, cv::Mat &prior_b);
inline static double gaussian(double x2, double y2, double std2){
return exp(-(x2 + y2)/(2*std2))/(2*CV_PI*std2);
}
};
}//cv namespace
#endif
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "trackerCSRTUtils.hpp"
namespace cv {
Mat circshift(Mat matrix, int dx, int dy)
{
Mat matrix_out = matrix.clone();
int idx_y = 0;
int idx_x = 0;
for(int i=0; i<matrix.rows; i++) {
for(int j=0; j<matrix.cols; j++) {
idx_y = modul(i+dy+1, matrix.rows);
idx_x = modul(j+dx+1, matrix.cols);
matrix_out.at<float>(idx_y, idx_x) = matrix.at<float>(i,j);
}
}
return matrix_out;
}
Mat gaussian_shaped_labels(const float sigma, const int w, const int h)
{
// create 2D Gaussian peak, convert to Fourier space and stores it into the yf
Mat y = Mat::zeros(h, w, CV_32F);
float w2 = static_cast<float>(cvFloor(w / 2));
float h2 = static_cast<float>(cvFloor(h / 2));
// calculate for each pixel separatelly
for(int i=0; i<y.rows; i++) {
for(int j=0; j<y.cols; j++) {
y.at<float>(i,j) = (float)exp((-0.5 / pow(sigma, 2)) * (pow((i+1-h2), 2) + pow((j+1-w2), 2)));
}
}
// wrap-arround with the circulat shifting
y = circshift(y, -cvFloor(y.cols / 2), -cvFloor(y.rows / 2));
Mat yf;
dft(y, yf, DFT_COMPLEX_OUTPUT);
return yf;
}
std::vector<Mat> fourier_transform_features(const std::vector<Mat> &M)
{
std::vector<Mat> out(M.size());
Mat channel;
// iterate over channels and convert them to Fourier domain
for(size_t k = 0; k < M.size(); k++) {
M[k].convertTo(channel, CV_32F);
dft(channel, channel, DFT_COMPLEX_OUTPUT);
out[k] = (channel);
}
return out;
}
Mat divide_complex_matrices(const Mat &A, const Mat &B)
{
std::vector<Mat> va,vb;
split(A, va);
split(B, vb);
Mat a = va.at(0);
Mat b = va.at(1);
Mat c = vb.at(0);
Mat d = vb.at(1);
Mat div = c.mul(c) + d.mul(d);
Mat real_part = (a.mul(c) + b.mul(d));
Mat im_part = (b.mul(c) - a.mul(d));
divide(real_part, div, real_part);
divide(im_part, div, im_part);
std::vector<Mat> tmp(2);
tmp[0] = real_part;
tmp[1] = im_part;
Mat res;
merge(tmp, res);
return res;
}
Mat get_subwindow(
const Mat &image,
const Point2f center,
const int w,
const int h,
Rect *valid_pixels)
{
int startx = cvFloor(center.x) + 1 - (cvFloor(w/2));
int starty = cvFloor(center.y) + 1 - (cvFloor(h/2));
Rect roi(startx, starty, w, h);
int padding_left = 0, padding_right = 0, padding_top = 0, padding_bottom = 0;
if(roi.x < 0) {
padding_left = -roi.x;
roi.x = 0;
}
if(roi.y < 0) {
padding_top = -roi.y;
roi.y = 0;
}
roi.width -= padding_left;
roi.height-= padding_top;
if(roi.x + roi.width >= image.cols) {
padding_right = roi.x + roi.width - image.cols;
roi.width = image.cols - roi.x;
}
if(roi.y + roi.height >= image.rows) {
padding_bottom = roi.y + roi.height - image.rows;
roi.height = image.rows - roi.y;
}
Mat subwin = image(roi).clone();
copyMakeBorder(subwin, subwin, padding_top, padding_bottom, padding_left, padding_right, BORDER_REPLICATE);
if(valid_pixels != NULL) {
*valid_pixels = Rect(padding_left, padding_top, roi.width, roi.height);
}
return subwin;
}
float subpixel_peak(const Mat &response, const std::string &s, const Point2f &p)
{
int i_p0, i_p_l, i_p_r; // indexes in response
float p0, p_l, p_r; // values in response
if(s.compare("vertical") == 0) {
// neighbouring rows
i_p0 = cvRound(p.y);
i_p_l = modul(cvRound(p.y) - 1, response.rows);
i_p_r = modul(cvRound(p.y) + 1, response.rows);
int px = static_cast<int>(p.x);
p0 = response.at<float>(i_p0, px);
p_l = response.at<float>(i_p_l, px);
p_r = response.at<float>(i_p_r, px);
} else if(s.compare("horizontal") == 0) {
// neighbouring cols
i_p0 = cvRound(p.x);
i_p_l = modul(cvRound(p.x) - 1, response.cols);
i_p_r = modul(cvRound(p.x) + 1, response.cols);
int py = static_cast<int>(p.y);
p0 = response.at<float>(py, i_p0);
p_l = response.at<float>(py, i_p_l);
p_r = response.at<float>(py, i_p_r);
} else {
std::cout << "Warning: unknown subpixel peak direction!" << std::endl;
return 0;
}
float delta = 0.5f * (p_r - p_l) / (2*p0 - p_r - p_l);
if(!std::isfinite(delta)) {
delta = 0;
}
return delta;
}
inline float chebpoly(const int n, const float x)
{
float res;
if (fabs(x) <= 1)
res = cos(n*acos(x));
else
res = cosh(n*acosh(x));
return res;
}
static Mat chebwin(int N, const float atten)
{
Mat out(N , 1, CV_32FC1);
int nn, i;
float M, n, sum = 0, max=0;
float tg = static_cast<float>(pow(10,atten/20.0f)); /* 1/r term [2], 10^gamma [2] */
float x0 = cosh((1.0f/(N-1))*acosh(tg));
M = (N-1)/2.0f;
if(N%2==0)
M = M + 0.5f; /* handle even length windows */
for(nn=0; nn<(N/2+1); nn++) {
n = nn-M;
sum = 0;
for(i=1; i<=M; i++){
sum += chebpoly(N-1,x0*static_cast<float>(cos(CV_PI*i/N))) *
static_cast<float>(cos(2.0f*n*CV_PI*i/N));
}
out.at<float>(nn,0) = tg + 2*sum;
out.at<float>(N-nn-1,0) = out.at<float>(nn,0) ;
if(out.at<float>(nn,0) > max)
max = out.at<float>(nn,0);
}
for(nn=0; nn<N; nn++)
out.at<float>(nn,0) /= max; /* normalize everything */
return out;
}
static double modified_bessel(int order, double x)
{
// sum m=0:inf 1/(m! * Gamma(m + order + 1)) * (x/2)^(2m + order)
const double eps = 1e-13;
double result = 0;
double m = 0;
double gamma = 1.0;
for(int i = 2; i <= order; ++i)
gamma *= i;
double term = pow(x,order) / (pow(2,order) * gamma);
while(term > eps * result) {
result += term;
//calculate new term in series
++m;
term *= (x*x) / (4*m*(m+order));
}
return result;
}
Mat get_hann_win(Size sz)
{
Mat hann_rows = Mat::ones(sz.height, 1, CV_32F);
Mat hann_cols = Mat::ones(1, sz.width, CV_32F);
int NN = sz.height - 1;
if(NN != 0) {
for (int i = 0; i < hann_rows.rows; ++i) {
hann_rows.at<float>(i,0) = (float)(1.0/2.0 * (1.0 - cos(2*CV_PI*i/NN)));
}
}
NN = sz.width - 1;
if(NN != 0) {
for (int i = 0; i < hann_cols.cols; ++i) {
hann_cols.at<float>(0,i) = (float)(1.0/2.0 * (1.0 - cos(2*CV_PI*i/NN)));
}
}
return hann_rows * hann_cols;
}
Mat get_kaiser_win(Size sz, float alpha)
{
Mat kaiser_rows = Mat::ones(sz.height, 1, CV_32F);
Mat kaiser_cols = Mat::ones(1, sz.width, CV_32F);
int N = sz.height - 1;
double shape = alpha;
double den = 1.0 / modified_bessel(0, shape);
for(int n = 0; n <= N; ++n) {
double K = (2.0 * n * 1.0/N) - 1.0;
double x = sqrt(1.0 - (K * K));
kaiser_rows.at<float>(n,0) = static_cast<float>(modified_bessel(0, shape * x) * den);
}
N = sz.width - 1;
for(int n = 0; n <= N; ++n) {
double K = (2.0 * n * 1.0/N) - 1.0;
double x = sqrt(1.0 - (K * K));
kaiser_cols.at<float>(0,n) = static_cast<float>(modified_bessel(0, shape * x) * den);
}
return kaiser_rows * kaiser_cols;
}
Mat get_chebyshev_win(Size sz, float attenuation)
{
Mat cheb_rows = chebwin(sz.height, attenuation);
Mat cheb_cols = chebwin(sz.width, attenuation).t();
return cheb_rows * cheb_cols;
}
static void computeHOG32D(const Mat &imageM, Mat &featM, const int sbin, const int pad_x, const int pad_y)
{
const int dimHOG = 32;
CV_Assert(pad_x >= 0);
CV_Assert(pad_y >= 0);
CV_Assert(imageM.channels() == 3);
CV_Assert(imageM.depth() == CV_64F);
// epsilon to avoid division by zero
const double eps = 0.0001;
// number of orientations
const int numOrient = 18;
// unit vectors to compute gradient orientation
const double uu[9] = {1.000, 0.9397, 0.7660, 0.5000, 0.1736, -0.1736, -0.5000, -0.7660, -0.9397};
const double vv[9] = {0.000, 0.3420, 0.6428, 0.8660, 0.9848, 0.9848, 0.8660, 0.6428, 0.3420};
// image size
const Size imageSize = imageM.size();
// block size
// int bW = cvRound((double)imageSize.width/(double)sbin);
// int bH = cvRound((double)imageSize.height/(double)sbin);
int bW = cvFloor((double)imageSize.width/(double)sbin);
int bH = cvFloor((double)imageSize.height/(double)sbin);
const Size blockSize(bW, bH);
// size of HOG features
int oW = max(blockSize.width-2, 0) + 2*pad_x;
int oH = max(blockSize.height-2, 0) + 2*pad_y;
Size outSize = Size(oW, oH);
// size of visible
const Size visible = blockSize*sbin;
// initialize historgram, norm, output feature matrices
Mat histM = Mat::zeros(Size(blockSize.width*numOrient, blockSize.height), CV_64F);
Mat normM = Mat::zeros(Size(blockSize.width, blockSize.height), CV_64F);
featM = Mat::zeros(Size(outSize.width*dimHOG, outSize.height), CV_64F);
// get the stride of each matrix
const size_t imStride = imageM.step1();
const size_t histStride = histM.step1();
const size_t normStride = normM.step1();
const size_t featStride = featM.step1();
// calculate the zero offset
const double* im = imageM.ptr<double>(0);
double* const hist = histM.ptr<double>(0);
double* const norm = normM.ptr<double>(0);
double* const feat = featM.ptr<double>(0);
for (int y = 1; y < visible.height - 1; y++)
{
for (int x = 1; x < visible.width - 1; x++)
{
// OpenCV uses an interleaved format: BGR-BGR-BGR
const double* s = im + 3*min(x, imageM.cols-2) + min(y, imageM.rows-2)*imStride;
// blue image channel
double dyb = *(s+imStride) - *(s-imStride);
double dxb = *(s+3) - *(s-3);
double vb = dxb*dxb + dyb*dyb;
// green image channel
s += 1;
double dyg = *(s+imStride) - *(s-imStride);
double dxg = *(s+3) - *(s-3);
double vg = dxg*dxg + dyg*dyg;
// red image channel
s += 1;
double dy = *(s+imStride) - *(s-imStride);
double dx = *(s+3) - *(s-3);
double v = dx*dx + dy*dy;
// pick the channel with the strongest gradient
if (vg > v) { v = vg; dx = dxg; dy = dyg; }
if (vb > v) { v = vb; dx = dxb; dy = dyb; }
// snap to one of the 18 orientations
double best_dot = 0;
int best_o = 0;
for (int o = 0; o < (int)numOrient/2; o++)
{
double dot = uu[o]*dx + vv[o]*dy;
if (dot > best_dot)
{
best_dot = dot;
best_o = o;
}
else if (-dot > best_dot)
{
best_dot = -dot;
best_o = o + (int)(numOrient/2);
}
}
// add to 4 historgrams around pixel using bilinear interpolation
double yp = ((double)y+0.5)/(double)sbin - 0.5;
double xp = ((double)x+0.5)/(double)sbin - 0.5;
int iyp = (int)cvFloor(yp);
int ixp = (int)cvFloor(xp);
double vy0 = yp - iyp;
double vx0 = xp - ixp;
double vy1 = 1.0 - vy0;
double vx1 = 1.0 - vx0;
v = sqrt(v);
// fill the value into the 4 neighborhood cells
if (iyp >= 0 && ixp >= 0)
*(hist + iyp*histStride + ixp*numOrient + best_o) += vy1*vx1*v;
if (iyp >= 0 && ixp+1 < blockSize.width)
*(hist + iyp*histStride + (ixp+1)*numOrient + best_o) += vx0*vy1*v;
if (iyp+1 < blockSize.height && ixp >= 0)
*(hist + (iyp+1)*histStride + ixp*numOrient + best_o) += vy0*vx1*v;
if (iyp+1 < blockSize.height && ixp+1 < blockSize.width)
*(hist + (iyp+1)*histStride + (ixp+1)*numOrient + best_o) += vy0*vx0*v;
} // for y
} // for x
// compute the energy in each block by summing over orientation
for (int y = 0; y < blockSize.height; y++)
{
const double* src = hist + y*histStride;
double* dst = norm + y*normStride;
double const* const dst_end = dst + blockSize.width;
// for each cell
while (dst < dst_end)
{
*dst = 0;
for (int o = 0; o < (int)(numOrient/2); o++)
{
*dst += (*src + *(src + numOrient/2))*
(*src + *(src + numOrient/2));
src++;
}
dst++;
src += numOrient/2;
}
}
// compute the features
for (int y = pad_y; y < outSize.height - pad_y; y++)
{
for (int x = pad_x; x < outSize.width - pad_x; x++)
{
double* dst = feat + y*featStride + x*dimHOG;
double* p, n1, n2, n3, n4;
const double* src;
p = norm + (y - pad_y + 1)*normStride + (x - pad_x + 1);
n1 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
p = norm + (y - pad_y)*normStride + (x - pad_x + 1);
n2 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
p = norm + (y- pad_y + 1)*normStride + x - pad_x;
n3 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
p = norm + (y - pad_y)*normStride + x - pad_x;
n4 = 1.0f / sqrt(*p + *(p + 1) + *(p + normStride) + *(p + normStride + 1) + eps);
double t1 = 0.0, t2 = 0.0, t3 = 0.0, t4 = 0.0;
// contrast-sesitive features
src = hist + (y - pad_y + 1)*histStride + (x - pad_x + 1)*numOrient;
for (int o = 0; o < numOrient; o++)
{
double val = *src;
double h1 = min(val*n1, 0.2);
double h2 = min(val*n2, 0.2);
double h3 = min(val*n3, 0.2);
double h4 = min(val*n4, 0.2);
*(dst++) = 0.5 * (h1 + h2 + h3 + h4);
src++;
t1 += h1;
t2 += h2;
t3 += h3;
t4 += h4;
}
// contrast-insensitive features
src = hist + (y - pad_y + 1)*histStride + (x - pad_x + 1)*numOrient;
for (int o = 0; o < numOrient/2; o++)
{
double sum = *src + *(src + numOrient/2);
double h1 = min(sum * n1, 0.2);
double h2 = min(sum * n2, 0.2);
double h3 = min(sum * n3, 0.2);
double h4 = min(sum * n4, 0.2);
*(dst++) = 0.5 * (h1 + h2 + h3 + h4);
src++;
}
// texture features
*(dst++) = 0.2357 * t1;
*(dst++) = 0.2357 * t2;
*(dst++) = 0.2357 * t3;
*(dst++) = 0.2357 * t4;
// truncation feature
*dst = 0;
}// for x
}// for y
// Truncation features
for (int m = 0; m < featM.rows; m++)
{
for (int n = 0; n < featM.cols; n += dimHOG)
{
if (m > pad_y - 1 && m < featM.rows - pad_y && n > pad_x*dimHOG - 1 && n < featM.cols - pad_x*dimHOG)
continue;
featM.at<double>(m, n + dimHOG - 1) = 1;
} // for x
}// for y
}
std::vector<Mat> get_features_hog(const Mat &im, const int bin_size)
{
Mat hogmatrix;
Mat im_;
im.convertTo(im_, CV_64FC3, 1.0/255.0);
computeHOG32D(im_,hogmatrix,bin_size,1,1);
hogmatrix.convertTo(hogmatrix, CV_32F);
Size hog_size = im.size();
hog_size.width /= bin_size;
hog_size.height /= bin_size;
Mat hogc(hog_size, CV_32FC(32), hogmatrix.data);
std::vector<Mat> features;
split(hogc, features);
return features;
}
std::vector<Mat> get_features_cn(const Mat &ppatch_data, const Size &output_size) {
Mat patch_data = ppatch_data.clone();
Vec3b & pixel = patch_data.at<Vec3b>(0,0);
unsigned index;
Mat cnFeatures = Mat::zeros(patch_data.rows,patch_data.cols,CV_32FC(10));
for(int i=0;i<patch_data.rows;i++){
for(int j=0;j<patch_data.cols;j++){
pixel=patch_data.at<Vec3b>(i,j);
index=(unsigned)(cvFloor((float)pixel[2]/8)+32*cvFloor((float)pixel[1]/8)+32*32*cvFloor((float)pixel[0]/8));
//copy the values
for(int k=0;k<10;k++){
cnFeatures.at<Vec<float,10> >(i,j)[k]=(float)ColorNames[index][k];
}
}
}
std::vector<Mat> result;
split(cnFeatures, result);
for (size_t i = 0; i < result.size(); i++) {
if (output_size.width > 0 && output_size.height > 0) {
resize(result.at(i), result.at(i), output_size, INTER_CUBIC);
}
}
return result;
}
std::vector<Mat> get_features_rgb(const Mat &patch, const Size &output_size)
{
std::vector<Mat> channels;
split(patch, channels);
for(size_t k=0; k<channels.size(); k++) {
channels[k].convertTo(channels[k], CV_32F, 1.0/255.0, -0.5);
channels[k] = channels[k] - mean(channels[k])[0];
resize(channels[k], channels[k], output_size, INTER_CUBIC);
}
return channels;
}
double get_max(const Mat &m)
{
double val;
minMaxLoc(m, NULL, &val, NULL, NULL);
return val;
}
double get_min(const Mat &m)
{
double val;
minMaxLoc(m, &val, NULL, NULL, NULL);
return val;
}
Mat bgr2hsv(const Mat &img)
{
Mat hsv_img;
cvtColor(img, hsv_img, CV_BGR2HSV);
std::vector<Mat> hsv_img_channels;
split(hsv_img, hsv_img_channels);
hsv_img_channels.at(0).convertTo(hsv_img_channels.at(0), CV_8UC1, 255.0 / 180.0);
merge(hsv_img_channels, hsv_img);
return hsv_img;
}
} //cv namespace
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_TRACKER_CSRT_UTILS
#define OPENCV_TRACKER_CSRT_UTILS
#include <fstream>
#include <iostream>
#include <vector>
#include <iostream>
#include <algorithm>
#include <iterator>
namespace cv
{
inline int modul(int a, int b)
{
// function calculates the module of two numbers and it takes into account also negative numbers
return ((a % b) + b) % b;
}
inline double kernel_epan(double x)
{
return (x <= 1) ? (2.0/3.14)*(1-x) : 0;
}
Mat circshift(Mat matrix, int dx, int dy);
Mat gaussian_shaped_labels(const float sigma, const int w, const int h);
std::vector<Mat> fourier_transform_features(const std::vector<Mat> &M);
Mat divide_complex_matrices(const Mat &A, const Mat &B);
Mat get_subwindow(const Mat &image, const Point2f center,
const int w, const int h,Rect *valid_pixels = NULL);
float subpixel_peak(const Mat &response, const std::string &s, const Point2f &p);
double get_max(const Mat &m);
double get_min(const Mat &m);
Mat get_hann_win(Size sz);
Mat get_kaiser_win(Size sz, float alpha);
Mat get_chebyshev_win(Size sz, float attenuation);
std::vector<Mat> get_features_rgb(const Mat &patch, const Size &output_size);
std::vector<Mat> get_features_hog(const Mat &im, const int bin_size);
std::vector<Mat> get_features_cn(const Mat &im, const Size &output_size);
Mat bgr2hsv(const Mat &img);
} //cv namespace
#endif
...@@ -467,6 +467,12 @@ TEST_P(DistanceAndOverlap, MOSSE) ...@@ -467,6 +467,12 @@ TEST_P(DistanceAndOverlap, MOSSE)
test.run(); test.run();
} }
TEST_P(DistanceAndOverlap, CSRT)
{
TrackerTest test( TrackerCSRT::create(), dataset, 22, .7f, NoTransform);
test.run();
}
/***************************************************************************************/ /***************************************************************************************/
//Tests with shifted initial window //Tests with shifted initial window
TEST_P(DistanceAndOverlap, Shifted_Data_MedianFlow) TEST_P(DistanceAndOverlap, Shifted_Data_MedianFlow)
...@@ -504,6 +510,12 @@ TEST_P(DistanceAndOverlap, Shifted_Data_MOSSE) ...@@ -504,6 +510,12 @@ TEST_P(DistanceAndOverlap, Shifted_Data_MOSSE)
TrackerTest test( TrackerMOSSE::create(), dataset, 13, .69f, CenterShiftLeft); TrackerTest test( TrackerMOSSE::create(), dataset, 13, .69f, CenterShiftLeft);
test.run(); test.run();
} }
TEST_P(DistanceAndOverlap, Shifted_Data_CSRT)
{
TrackerTest test( TrackerCSRT::create(), dataset, 13, .69f, CenterShiftLeft);
test.run();
}
/***************************************************************************************/ /***************************************************************************************/
//Tests with scaled initial window //Tests with scaled initial window
TEST_P(DistanceAndOverlap, Scaled_Data_MedianFlow) TEST_P(DistanceAndOverlap, Scaled_Data_MedianFlow)
...@@ -549,6 +561,12 @@ TEST_P(DistanceAndOverlap, Scaled_Data_MOSSE) ...@@ -549,6 +561,12 @@ TEST_P(DistanceAndOverlap, Scaled_Data_MOSSE)
test.run(); test.run();
} }
TEST_P(DistanceAndOverlap, Scaled_Data_CSRT)
{
TrackerTest test( TrackerCSRT::create(), dataset, 22, 0.69f, Scale_1_1, 1);
test.run();
}
INSTANTIATE_TEST_CASE_P( Tracking, DistanceAndOverlap, TESTSET_NAMES); INSTANTIATE_TEST_CASE_P( Tracking, DistanceAndOverlap, TESTSET_NAMES);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment