Commit 0a2179b3 authored by Leonid Beynenson's avatar Leonid Beynenson Committed by Alexander Alekhin

Merge pull request #2182 from LeonidBeynenson:lb/tracking_by_matching

* Add tracking-by_matching code and sample

* Make interface for PedestrianTracker

* Replace PedestrianTracker -> TrackerByMatching

* Make proper filtering by class id in tracking_by_matching

Also make the sample build in the case when opencv_dnn module is not
Also help is added.

* Remove TODO-s from tracking_by_matching code

* Add parameter frame_step, add copyrights, fix warnings

* Remove copyright from tracking_by_matching

* Rename check macros and remove obsolete mentions of pedestrians

* Tune default thresholds in tracking_by_matching sample

* Add description of classes and factories

* Remove unrequired EOL-s at the end of files

* Replace pointers by references for output parameters

* Fix some warnings found by buildbot

* Fix warning from buildbot, tune some thresholds in tracking_by_matching

* Replace pragma once by ifndef-define clause

* Fix more Windows warnings

* Change case of methods of TrackerByMatching class

* Change name of methods to CamelCase in TrackerByMatching

* Make more convenient check macros in tracking_by_matching.cpp

* Simplify tracking_by_matching sample

* Fix Mac error in tracking_by_matching
parent 1b636e72
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/tracking/tracking_by_matching.hpp>
#include <iostream>
#include <opencv2/dnn.hpp>
using namespace std;
using namespace cv;
using namespace cv::tbm;
static const char* keys =
{ "{video_name | | video name }"
"{start_frame |0| Start frame }"
"{frame_step |1| Frame step }"
"{detector_model | | Path to detector's Caffe model }"
"{detector_weights | | Path to detector's Caffe weights }"
"{desired_class_id |-1| The desired class that should be tracked }"
static void help()
cout << "\nThis example shows the functionality of \"Tracking-by-Matching\" approach:"
" detector is used to detect objects on frames, \n"
"matching is used to find correspondences between new detections and tracked objects.\n"
"Detection is made by DNN detection network every `--frame_step` frame.\n"
"Point a .prototxt file of the network as the parameter `--detector_model`, and a .caffemodel file"
" as the parameter `--detector_weights`.\n"
"(As an example of such detection network is a popular MobileNet_SSD network trained on VOC dataset.)\n"
"If `--desired_class_id` parameter is set, the detection result is filtered by class id,"
" returned by the detection network.\n"
"(That is, if a detection net was trained on VOC dataset, then to track pedestrians point --desired_class_id=15)\n"
"Example of <video_name> is in opencv_extra/testdata/cv/tracking/\n"
"./example_tracking_tracking_by_matching --video_name=<video_name> --detector_model=<detector_model_path> --detector_weights=<detector_weights_path> \\\n"
" [--start_frame=<start_frame>] \\\n"
" [--frame_step=<frame_step>] \\\n"
" [--desired_class_id=<desired_class_id>]\n"
<< endl;
cout << "\n\nHot keys: \n"
"\tq - quit the program\n"
"\tp - pause/resume video\n";
cv::Ptr<ITrackerByMatching> createTrackerByMatchingWithFastDescriptor();
class DnnObjectDetector
DnnObjectDetector(const String& net_caffe_model_path, const String& net_caffe_weights_path,
int desired_class_id=-1,
float confidence_threshold = 0.2,
//the following parameters are default for popular MobileNet_SSD caffe model
const String& net_input_name="data",
const String& net_output_name="detection_out",
double net_scalefactor=0.007843,
const Size& net_size = Size(300,300),
const Scalar& net_mean = Scalar(127.5, 127.5, 127.5),
bool net_swapRB=false)
net = dnn::readNetFromCaffe(net_caffe_model_path, net_caffe_weights_path);
if (net.empty())
CV_Error(Error::StsError, "Cannot read Caffe net");
TrackedObjects detect(const cv::Mat& frame, int frame_idx)
Mat resized_frame;
resize(frame, resized_frame, net_size);
Mat inputBlob = cv::dnn::blobFromImage(resized_frame, net_scalefactor, net_size, net_mean, net_swapRB);
net.setInput(inputBlob, net_input_name);
Mat detection = net.forward(net_output_name);
Mat detection_as_mat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
TrackedObjects res;
for (int i = 0; i < detection_as_mat.rows; i++)
float cur_confidence =<float>(i, 2);
int cur_class_id = static_cast<int>(<float>(i, 1));
int x_left = static_cast<int>(<float>(i, 3) * frame.cols);
int y_bottom = static_cast<int>(<float>(i, 4) * frame.rows);
int x_right = static_cast<int>(<float>(i, 5) * frame.cols);
int y_top = static_cast<int>(<float>(i, 6) * frame.rows);
Rect cur_rect(x_left, y_bottom, (x_right - x_left), (y_top - y_bottom));
if (cur_confidence < confidence_threshold)
if ((desired_class_id >= 0) && (cur_class_id != desired_class_id))
//clipping by frame size
cur_rect = cur_rect & Rect(Point(), frame.size());
if (cur_rect.empty())
TrackedObject cur_obj(cur_rect, cur_confidence, frame_idx, -1);
return res;
cv::dnn::Net net;
int desired_class_id;
float confidence_threshold;
String net_input_name;
String net_output_name;
double net_scalefactor;
Size net_size;
Scalar net_mean;
bool net_swapRB;
createTrackerByMatchingWithFastDescriptor() {
cv::tbm::TrackerParams params;
cv::Ptr<ITrackerByMatching> tracker = createTrackerByMatching(params);
std::shared_ptr<IImageDescriptor> descriptor_fast =
cv::Size(16, 32), cv::InterpolationFlags::INTER_LINEAR);
std::shared_ptr<IDescriptorDistance> distance_fast =
return tracker;
int main( int argc, char** argv ){
CommandLineParser parser( argc, argv, keys );
cv::Ptr<ITrackerByMatching> tracker = createTrackerByMatchingWithFastDescriptor();
String video_name = parser.get<String>("video_name");
int start_frame = parser.get<int>("start_frame");
int frame_step = parser.get<int>("frame_step");
String detector_model = parser.get<String>("detector_model");
String detector_weights = parser.get<String>("detector_weights");
int desired_class_id = parser.get<int>("desired_class_id");
if( video_name.empty() || detector_model.empty() || detector_weights.empty() )
return -1;
//open the capture
VideoCapture cap; video_name );
cap.set( CAP_PROP_POS_FRAMES, start_frame );
if( !cap.isOpened() )
cout << "***Could not initialize capturing...***\n";
cout << "Current parameter's value: \n";
return -1;
// If you use the popular MobileNet_SSD detector, the default parameters may be used.
// Otherwise, set your own parameters (net_mean, net_scalefactor, etc).
DnnObjectDetector detector(detector_model, detector_weights, desired_class_id);
Mat frame;
namedWindow( "Tracking by Matching", 1 );
int frame_counter = -1;
int64 time_total = 0;
bool paused = false;
for ( ;; )
if( paused )
char c = (char) waitKey(30);
if (c == 'p')
paused = !paused;
if (c == 'q')
cap >> frame;
if (frame_counter < start_frame)
if (frame_counter % frame_step != 0)
int64 frame_time = getTickCount();
TrackedObjects detections = detector.detect(frame, frame_counter);
// timestamp in milliseconds
uint64_t cur_timestamp = static_cast<uint64_t>(1000.0 / 30 * frame_counter);
tracker->process(frame, detections, cur_timestamp);
frame_time = getTickCount() - frame_time;
time_total += frame_time;
// Drawing colored "worms" (tracks).
frame = tracker->drawActiveTracks(frame);
// Drawing all detected objects on a frame by BLUE COLOR
for (const auto &detection : detections) {
cv::rectangle(frame, detection.rect, cv::Scalar(255, 0, 0), 3);
// Drawing tracked detections only by RED color and print ID and detection
// confidence level.
for (const auto &detection : tracker->trackedDetections()) {
cv::rectangle(frame, detection.rect, cv::Scalar(0, 0, 255), 3);
std::string text = std::to_string(detection.object_id) +
" conf: " + std::to_string(detection.confidence);
cv::putText(frame, text,, cv::FONT_HERSHEY_COMPLEX,
1.0, cv::Scalar(0, 0, 255), 3);
imshow( "Tracking by Matching", frame );
char c = (char) waitKey( 2 );
if (c == 'q')
if (c == 'p')
paused = !paused;
double s = frame_counter / (time_total / getTickFrequency());
printf("FPS: %f\n", s);
return 0;
#else // #ifdef HAVE_OPENCV_DNN
int main(int, char**){
CV_Error(cv::Error::StsNotImplemented, "At the moment the sample 'tracking_by_matching' can work only when opencv_dnn module is built.");
#endif // #ifdef HAVE_OPENCV_DNN
#include "kuhn_munkres.hpp"
#include <algorithm>
#include <limits>
#include <vector>
KuhnMunkres::KuhnMunkres() : n_() {}
std::vector<size_t> KuhnMunkres::Solve(const cv::Mat& dissimilarity_matrix) {
CV_Assert(dissimilarity_matrix.type() == CV_32F);
double min_val;
cv::minMaxLoc(dissimilarity_matrix, &min_val);
CV_Assert(min_val >= 0);
n_ = std::max(dissimilarity_matrix.rows, dissimilarity_matrix.cols);
dm_ = cv::Mat(n_, n_, CV_32F, cv::Scalar(0));
marked_ = cv::Mat(n_, n_, CV_8S, cv::Scalar(0));
points_ = std::vector<cv::Point>(n_ * 2);
cv::Rect(0, 0, dissimilarity_matrix.cols, dissimilarity_matrix.rows)));
is_row_visited_ = std::vector<int>(n_, 0);
is_col_visited_ = std::vector<int>(n_, 0);
std::vector<size_t> results(static_cast<size_t>(marked_.rows), static_cast<size_t>(-1));
for (int i = 0; i < marked_.rows; i++) {
const auto ptr = marked_.ptr<char>(i);
for (int j = 0; j < marked_.cols; j++) {
if (ptr[j] == kStar) {
results[i] = j;
return results;
void KuhnMunkres::TrySimpleCase() {
auto is_row_visited = std::vector<int>(n_, 0);
auto is_col_visited = std::vector<int>(n_, 0);
for (int row = 0; row < n_; row++) {
auto ptr = dm_.ptr<float>(row);
auto marked_ptr = marked_.ptr<char>(row);
auto min_val = *std::min_element(ptr, ptr + n_);
for (int col = 0; col < n_; col++) {
ptr[col] -= min_val;
if (ptr[col] == 0 && !is_col_visited[col] && !is_row_visited[row]) {
marked_ptr[col] = kStar;
is_col_visited[col] = 1;
is_row_visited[row] = 1;
bool KuhnMunkres::CheckIfOptimumIsFound() {
int count = 0;
for (int i = 0; i < n_; i++) {
const auto marked_ptr = marked_.ptr<char>(i);
for (int j = 0; j < n_; j++) {
if (marked_ptr[j] == kStar) {
is_col_visited_[j] = 1;
return count >= n_;
cv::Point KuhnMunkres::FindUncoveredMinValPos() {
auto min_val = std::numeric_limits<float>::max();
cv::Point min_val_pos(-1, -1);
for (int i = 0; i < n_; i++) {
if (!is_row_visited_[i]) {
auto dm_ptr = dm_.ptr<float>(i);
for (int j = 0; j < n_; j++) {
if (!is_col_visited_[j] && dm_ptr[j] < min_val) {
min_val = dm_ptr[j];
min_val_pos = cv::Point(j, i);
return min_val_pos;
void KuhnMunkres::UpdateDissimilarityMatrix(float val) {
for (int i = 0; i < n_; i++) {
auto dm_ptr = dm_.ptr<float>(i);
for (int j = 0; j < n_; j++) {
if (is_row_visited_[i]) dm_ptr[j] += val;
if (!is_col_visited_[j]) dm_ptr[j] -= val;
int KuhnMunkres::FindInRow(int row, int what) {
for (int j = 0; j < n_; j++) {
if (<char>(row, j) == what) {
return j;
return -1;
int KuhnMunkres::FindInCol(int col, int what) {
for (int i = 0; i < n_; i++) {
if (<char>(i, col) == what) {
return i;
return -1;
void KuhnMunkres::Run() {
while (!CheckIfOptimumIsFound()) {
while (true) {
auto point = FindUncoveredMinValPos();
auto min_val =<float>(point.y, point.x);
if (min_val > 0) {
} else {<char>(point.y, point.x) = kPrime;
int col = FindInRow(point.y, kStar);
if (col >= 0) {
is_row_visited_[point.y] = 1;
is_col_visited_[col] = 0;
} else {
int count = 0;
points_[count] = point;
while (true) {
int row = FindInCol(points_[count].x, kStar);
if (row >= 0) {
points_[count] = cv::Point(points_[count - 1].x, row);
int col1 = FindInRow(points_[count].y, kPrime);
points_[count] = cv::Point(col1, points_[count - 1].y);
} else {
for (int i = 0; i < count + 1; i++) {
auto& mark =<char>(points_[i].y, points_[i].x);
mark = mark == kStar ? 0 : kStar;
is_row_visited_ = std::vector<int>(n_, 0);
is_col_visited_ = std::vector<int>(n_, 0);
marked_.setTo(0, marked_ == kPrime);
#include "opencv2/core.hpp"
#include <memory>
#include <vector>
/// \brief The KuhnMunkres class
/// Solves the assignment problem.
class KuhnMunkres {
/// \brief Solves the assignment problem for given dissimilarity matrix.
/// It returns a vector that where each element is a column index for
/// corresponding row (e.g. result[0] stores optimal column index for very
/// first row in the dissimilarity matrix).
/// \param dissimilarity_matrix CV_32F dissimilarity matrix.
/// \return Optimal column index for each row. -1 means that there is no
/// column for row.
std::vector<size_t> Solve(const cv::Mat &dissimilarity_matrix);
static constexpr int kStar = 1;
static constexpr int kPrime = 2;
cv::Mat dm_;
cv::Mat marked_;
std::vector<cv::Point> points_;
std::vector<int> is_row_visited_;
std::vector<int> is_col_visited_;
int n_;
void TrySimpleCase();
bool CheckIfOptimumIsFound();
cv::Point FindUncoveredMinValPos();
void UpdateDissimilarityMatrix(float val);
int FindInRow(int row, int what);
int FindInCol(int col, int what);
void Run();
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment