Commit 18b1a4de authored by haoshuang's avatar haoshuang

add the cuda implementation of hungarian algorithm

parent 6741d9b9
......@@ -15,8 +15,11 @@
#ifdef _KF_IOU_CUDA_
#include "bev_overlap_online.h"
#include "kalman_update_batch_online.h"
#include "hungarian_gpu/hungarian_cuda.h"
#endif
#include <fstream>
#ifdef _USING_NSIGHT_
#include <nvToolsExt.h>
#endif
......@@ -68,6 +71,7 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
#ifdef _USING_NSIGHT_
nvtxRangePush("Run Predict");
#endif
auto t00 = std::chrono::steady_clock::now();
for (auto& track : m_tracker)
{
track.second->Predict();
......@@ -75,6 +79,11 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
//track.second->GetPredictData(predict);
//SDK_LOG(SDK_INFO, "predict id = %d, data = [%f,%f,%f,%f,%f,%f,%f]",track.first,predict[0], predict[1], predict[2], predict[3], predict[4], predict[5], predict[6]);
}
auto t01 = std::chrono::steady_clock::now();
auto tdura = std::chrono::duration_cast<std::chrono::nanoseconds>(t01 - t00);
auto tdurams = tdura.count() * 1e-6;
SDK_LOG(SDK_INFO, "Predict using time is %f ", tdurams);
#ifdef _USING_NSIGHT_
nvtxRangePop();
#endif
......@@ -98,6 +107,7 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
#ifdef _USING_NSIGHT_
nvtxRangePush("Run AssociateDetectionsToTrackers");
#endif
auto t02 = std::chrono::steady_clock::now();
detectionsId.resize(detections.size());
// Hash-map between track ID and associated detection bounding box
std::map<uint64_t, int> matched;
......@@ -106,6 +116,10 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
// return values - matched, unmatched_det
AssociateDetectionsToTrackers(detections,_no,_ns, m_tracker, matched, unmatched_det);
auto t03 = std::chrono::steady_clock::now();
tdura = std::chrono::duration_cast<std::chrono::nanoseconds>(t03 - t02);
tdurams = tdura.count() * 1e-6;
SDK_LOG(SDK_INFO, "Associate using time is %f ", tdurams);
#ifdef _USING_NSIGHT_
nvtxRangePop();
#endif
......@@ -136,6 +150,9 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
std::shared_ptr<float> R = std::shared_ptr<float>(new float[no * no], [](float* p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float> HX = std::shared_ptr<float>(new float[bs * no], [](float* p) {if (p) delete[] p; p = nullptr; });
int bs_i = 0;
auto t04 = std::chrono::steady_clock::now();
for (const auto& match : matched)
{
const auto& id = match.first;
......@@ -163,6 +180,10 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
detectionsId[match.second] = id;
updateId[id] = match.second;
}
auto t05 = std::chrono::steady_clock::now();
tdura = std::chrono::duration_cast<std::chrono::nanoseconds>(t05 - t04);
tdurams = tdura.count() * 1e-6;
SDK_LOG(SDK_INFO, "organize using time is %f ", tdurams);
//SDK_LOG(SDK_INFO, "Z = [%s]", GetMatrixStr(Z.get(), no, bs).c_str());
//SDK_LOG(SDK_INFO, "X = [%s]", GetMatrixStr(X.get(), ns, bs).c_str());
//SDK_LOG(SDK_INFO, "P = [%s]", GetMatrixStr(P.get(), ns * ns, bs).c_str());
......@@ -173,6 +194,10 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
nvtxRangePush("kalman_update_batch");
#endif
kalman_update_batch(Z.get(), X.get(), P.get(), R.get(), HX.get(), bs, ns, no);
auto t06 = std::chrono::steady_clock::now();
tdura = std::chrono::duration_cast<std::chrono::nanoseconds>(t06 - t05);
tdurams = tdura.count() * 1e-6;
SDK_LOG(SDK_INFO, "kalman_update_batch using time is %f ", tdurams);
#ifdef _USING_NSIGHT_
nvtxRangePop();
#endif
......@@ -192,6 +217,8 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
#ifdef _USING_NSIGHT_
nvtxRangePop();
#endif
auto t07 = std::chrono::steady_clock::now();
/*** Create new tracks for unmatched detections ***/
for (const auto& det : unmatched_det)
{
......@@ -221,6 +248,10 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
it++;
}
}
auto t08 = std::chrono::steady_clock::now();
tdura = std::chrono::duration_cast<std::chrono::nanoseconds>(t08 - t07);
tdurams = tdura.count() * 1e-6;
SDK_LOG(SDK_INFO, "create_delete using time is %f ", tdurams);
return 0;
}
......@@ -283,6 +314,7 @@ void BaseTracker<T>::AssociateDetectionsToTrackers(const std::vector<std::vector
}
else
{
auto t09 = std::chrono::steady_clock::now();
std::vector<std::vector<float> > tracker_states;
std::vector<int> tracker_type;
for (auto& iter : tracks)
......@@ -328,6 +360,11 @@ void BaseTracker<T>::AssociateDetectionsToTrackers(const std::vector<std::vector
tracker_ptr.get()[i * tra_size + j] = tracker_states[i][j];
}
}
auto t10 = std::chrono::steady_clock::now();
auto tdura = std::chrono::duration_cast<std::chrono::nanoseconds>(t10 - t09);
auto tdurams = tdura.count() * 1e-6;
SDK_LOG(SDK_INFO, "asso1 using time is %f ", tdurams);
//std::string dete_str = GetMatrixStr(detect_ptr.get(), measure_size, detections.size());
//std::string track_str = GetMatrixStr(tracker_states, tracker_states.size(), tra_size);
//SDK_LOG(SDK_INFO, "detections = [%s]",dete_str.c_str());
......@@ -337,6 +374,10 @@ void BaseTracker<T>::AssociateDetectionsToTrackers(const std::vector<std::vector
nvtxRangePush("bev_overlap");
#endif
bev_overlap(detections.size(), detect_ptr.get(), tracker_states.size(), tracker_ptr.get(), iou_ptr.get());
auto t11 = std::chrono::steady_clock::now();
tdura = std::chrono::duration_cast<std::chrono::nanoseconds>(t11 - t10);
tdurams = tdura.count() * 1e-6;
SDK_LOG(SDK_INFO, "bev_overlap using time is %f ", tdurams);
#ifdef _USING_NSIGHT_
nvtxRangePop();
#endif
......@@ -359,7 +400,41 @@ void BaseTracker<T>::AssociateDetectionsToTrackers(const std::vector<std::vector
// Find association
//std::string str = GetMatrixStr(iou_matrix, detections.size(), tracks.size());
//SDK_LOG(SDK_INFO, "iou_matrix = [%s]",str.c_str());
HungarianMatching(iou_matrix, detections.size(), tracks.size(), association);
auto t12 = std::chrono::steady_clock::now();
//std::vector<std::vector<float>> cuda_iou_matrix(iou_matrix);
//std::vector<std::vector<float>> cuda_association(association);
//HungarianMatching(iou_matrix, detections.size(), tracks.size(), association);
/*
std::ofstream ofs("/workspace/asso.txt",std::ios_base::out);
for(int ii =0; ii < detections.size(); ii++){
for(int jj =0; jj < tracks.size(); jj++){
ofs<< association[ii][jj] <<" ";
}
ofs<<std::endl;
}
ofs.close();
*/
//Hungarian_Cuda(cuda_iou_matrix, detections.size(), tracks.size(), cuda_association);
Hungarian_Cuda(iou_matrix, detections.size(), tracks.size(), association);
/*
std::ofstream cuda_ofs("/workspace/cuda_asso.txt", std::ios_base::out);
for(int ii =0; ii < detections.size(); ii++){
for(int jj =0; jj < tracks.size(); jj++){
cuda_ofs<< association[ii][jj] <<" ";
}
cuda_ofs<<std::endl;
}
cuda_ofs.close();
exit(-1);
*/
auto t13 = std::chrono::steady_clock::now();
auto tdura = std::chrono::duration_cast<std::chrono::nanoseconds>(t13 - t12);
auto tdurams = tdura.count() * 1e-6;
SDK_LOG(SDK_INFO, "HungarianMatching using time is %f ", tdurams);
for (size_t i = 0; i < detections.size(); i++)
{
......@@ -387,6 +462,11 @@ void BaseTracker<T>::AssociateDetectionsToTrackers(const std::vector<std::vector
unmatched_det.push_back(i);
}
}
auto t14 = std::chrono::steady_clock::now();
tdura = std::chrono::duration_cast<std::chrono::nanoseconds>(t14 - t13);
tdurams = tdura.count() * 1e-6;
SDK_LOG(SDK_INFO, "asso2 using time is %f ", tdurams);
}
template<class T>
......
This diff is collapsed.
#ifndef _HUNGARIAN_CUDA_H_
#define _HUNGARIAN_CUDA_H_
#include <vector>
// Hungarian_Algorithm
// cpu version, call HungarianMatching(iou_matrix, detections.size(), tracks.size(), association);
// h_cost: [det_size, tracks_size] input iou_matrix
// rows: detections.size()
// cols: tracks.size()
// association: [det_size, tracks_size] output association_matrix
void Hungarian_Cuda(std::vector<std::vector<float>> h_cost, int rows, int cols, std::vector<std::vector<float>>& association);
#endif
......@@ -520,7 +520,13 @@ void TrackingRos::ThreadTrackingProcess()
#ifdef _USING_NSIGHT_
nvtxRangePush("m_tracker.Run");
#endif
auto t00 = std::chrono::steady_clock::now();
m_tracker.Run(input,7,10, detectionsId, updateId, lostId);
auto t01 = std::chrono::steady_clock::now();
auto tdura = std::chrono::duration_cast<std::chrono::nanoseconds>(t01 - t00);
auto tdurams = tdura.count() * 1e-6;
SDK_LOG(SDK_INFO, "m_tracker.Run using time is %f ", tdurams);
#ifdef _USING_NSIGHT_
nvtxRangePop();
#endif
......
......@@ -28,6 +28,7 @@
#include <iostream>
#include <cmath>
#include <limits>
#include <fstream>
template<typename Data> class Munkres
{
......@@ -65,6 +66,16 @@ public:
matrix.resize(size, size, matrix.max());
}
/*
std::ofstream ofs("/workspace/input.txt",std::ios_base::out);
for(int ii =0; ii < size; ii++){
for(int jj =0; jj < size; jj++){
ofs<< matrix(ii,jj) <<" ";
}
ofs<<std::endl;
}
ofs.close();
*/
// STAR == 1 == starred, PRIME == 2 == primed
mask_matrix.resize(size, size);
......@@ -126,6 +137,29 @@ public:
}
}
/*
std::ofstream out_ofs("/workspace/output.txt",std::ios_base::out);
for(int ii =0; ii < size; ii++){
for(int jj =0; jj < size; jj++){
if ( mask_matrix(ii, jj) == STAR ) {
out_ofs<< ii <<" ";
}
}
}
out_ofs<<std::endl;
for(int ii =0; ii < size; ii++){
for(int jj =0; jj < size; jj++){
if ( mask_matrix(ii, jj) == STAR ) {
out_ofs<< jj <<" ";
}
}
}
out_ofs<<std::endl;
out_ofs.close();
*/
#ifdef DEBUG
std::cout << "Munkres output: " << matrix << std::endl;
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment