Commit 1ad751e6 authored by oscar's avatar oscar

提交传入R矩阵

parent a4316e5d
......@@ -130,4 +130,10 @@ float* BaseTrack::GetPredictPtr()
if (kf_ == nullptr)
return nullptr;
return kf_->P_.data();
}
\ No newline at end of file
}
float* BaseTrack::GetRPtr()
{
if (kf_ == nullptr)
return nullptr;
return kf_->R_.data();
}
......@@ -41,6 +41,7 @@ public:
float* GetStatesXPtr();
float* GetPredictPtr();
float* GetRPtr();
int coast_cycles_ = 0, hit_streak_ = 0;
......
......@@ -89,6 +89,7 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, std:
std::shared_ptr<float> Z = std::shared_ptr<float>(new float[bs * no], [](float* p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float*> X = std::shared_ptr<float*>(new float* [bs], [](float** p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float*> P = std::shared_ptr<float*>(new float* [bs], [](float** p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float> R = std::shared_ptr<float>(new float[no * no], [](float* p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float> HX = std::shared_ptr<float>(new float[bs * no], [](float* p) {if (p) delete[] p; p = nullptr; });
int bs_i = 0;
for (const auto& match : matched)
......@@ -108,6 +109,8 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, std:
if (ns == 0)
{
ns = m_tracker[id]->GetStatesNum();
float* _r = m_tracker[id]->GetRPtr();
memcpy(R.get(), _r, sizeof(no * no * sizeof(float)));
}
bs_i++;
m_tracker[id]->UpdateHit();//gpu时需要update里面的变量
......@@ -119,7 +122,7 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, std:
//SDK_LOG(SDK_INFO, "P = [%s]", GetMatrixStr(P.get(), ns*ns, bs).c_str());
//SDK_LOG(SDK_INFO, "HX = [%s]", GetMatrixStr(HX.get(),no,bs).c_str());
#ifdef _KF_IOU_CUDA_
kalman_update_batch(Z.get(), X.get(), P.get(), HX.get(), bs, ns, no);
kalman_update_batch(Z.get(), X.get(), P.get(), R.get(), HX.get(), bs, ns, no);
#endif
//SDK_LOG(SDK_INFO, "after X = [%s]", GetMatrixStr(X.get(), ns, bs).c_str());
//SDK_LOG(SDK_INFO, "after P = [%s]", GetMatrixStr(P.get(), ns * ns, bs).c_str());
......
......@@ -11,16 +11,30 @@ __global__ void MatSub(float* C, const float* A, const float* B, int bs, int no)
__global__ void KalmanUpdateS2(float* d_S, float* d_P, const int bs, const int ns, const int no){
// __global__ void KalmanUpdateS2(float* d_S, float* d_P, const int bs, const int ns, const int no){
// int tid = blockDim.x * blockIdx.x + threadIdx.x;
// if (tid >= bs) return;
// int p_stride = 11;
// int s_stride = 8;
// for (int i = 0; i < no; i++){
// d_S[tid * no * no + i * s_stride] = d_P[tid * ns * ns + i * p_stride] + 1;
// }
// }
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid >= bs) return;
int p_stride = 11;
int s_stride = 8;
for (int i = 0; i < no; i++){
d_S[tid * no * no + i * s_stride] = d_P[tid * ns * ns + i * p_stride] + 1;
}
__global__ void KalmanUpdateS2(float* d_S, float* d_P, float* d_R, const int bs, const int ns, const int no){
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid >= bs) return;
int p_stride = 11;
int s_stride = 8;
for (int i = 0; i < no; i++){
d_S[tid * no * no + i * s_stride] = d_P[tid * ns * ns + i * p_stride] + d_R[tid * no * no + i * s_stride];
}
}
......@@ -37,7 +51,6 @@ __global__ void KalmanUpdateS3(float* d_K, float* d_P, float* d_S, const int bs,
d_K[tid * ns * no + i * k_stride] = d_P[tid * ns * ns + i * p_stride] * (1 / d_S[tid * no * no + i * s_stride]);
}
else{
d_K[tid * ns * no + no * no + (i - no) * k_stride] = d_P[tid * ns * ns + ns * no + (i - no) * p_stride] * (1 / d_S[tid * no * no + (i - no) * s_stride]);
}
}
......
......@@ -35,6 +35,7 @@
void kalmanUpdateLauncher_batch(float* d_Z, //(bs, no)
float* d_X, //(bs, ns)
float* d_P, //(bs, ns * ns)
float* d_R, //(bs, no * no)
float* d_HX, //(bs, no)
int bs,
int ns,
......@@ -74,9 +75,9 @@ void kalmanUpdateLauncher_batch(float* d_Z, //(bs, no)
free(tmp_y);
#endif
// step 2 S = H * P * H^T + E
// step 2 S = H * P * H^T + E, E also called R, measurement noise
int blocksPerGrid2 = DIVUP(bs * no, threadsPerBlock);
KalmanUpdateS2<<<blocksPerGrid2, threadsPerBlock>>>(d_S, d_P, bs, ns, no); // (bs, no * no) + (bs, no * no)
KalmanUpdateS2<<<blocksPerGrid2, threadsPerBlock>>>(d_S, d_P, d_R, bs, ns, no); // (bs, no * no) + (bs, no * no)
#ifdef DEBUG
cudaDeviceSynchronize();
......@@ -186,6 +187,7 @@ void kalmanUpdateLauncher_batch(float* d_Z, //(bs, no)
Z: measurement (bs, 7)
X: estimate (bs, 10)
P: uncertainty covariance (bs, 100)
R: measurement noise (bs, 49)
MAKE SURE ALL INPUTS ARE TWO-DIM NUMPY ARRAY
*/
......@@ -312,6 +314,7 @@ void kalmanUpdateLauncher_batch(float* d_Z, //(bs, no)
void kalman_update_batch(float* Z,// measurement size = bs * no
float** X, // in-place update states size = bs * ns
float** P, // in-place update predict size = bs * ns * ns
float* R, //R covariance matrix of observation noise no * no
float* HX, // H*X size = bs * no
const int bs,
const int ns, //ns = 10
......@@ -327,6 +330,7 @@ void kalman_update_batch(float* Z,// measurement size = bs * no
int size_ZZ = bs * no * sizeof(float);
int size_XX = bs * ns * sizeof(float);
int size_PP = bs * ns * ns * sizeof(float);
int size_RR = no * no * sizeof(float);
int size_HXX = bs * no * sizeof(float);
// std::cout << "size_HXX: " << size_HXX <<"\n";
......@@ -338,11 +342,13 @@ void kalman_update_batch(float* Z,// measurement size = bs * no
float* device_Z;
float* device_X;
float* device_P;
float* device_R;
float* device_HX;
GPU_CHECK(cudaMalloc(&device_Z, size_ZZ));
GPU_CHECK(cudaMalloc(&device_X, size_XX));
GPU_CHECK(cudaMalloc(&device_P, size_PP));
GPU_CHECK(cudaMalloc(&device_R, size_RR));
GPU_CHECK(cudaMalloc(&device_HX, size_HXX));
GPU_CHECK(cudaMemcpy(device_Z , Z, size_ZZ, cudaMemcpyHostToDevice));
for (int i = 0; i < bs; i++)
......@@ -350,9 +356,10 @@ void kalman_update_batch(float* Z,// measurement size = bs * no
GPU_CHECK(cudaMemcpy(device_X + i*ns, X[i], ns * sizeof(float), cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_P + i*ns*ns, P[i], ns*ns * sizeof(float), cudaMemcpyHostToDevice));
}
GPU_CHECK(cudaMemcpy(device_R, R, size_RR, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_HX, HX, size_HXX, cudaMemcpyHostToDevice));
kalmanUpdateLauncher_batch(device_Z, device_X, device_P, device_HX, bs, ns, no);
kalmanUpdateLauncher_batch(device_Z, device_X, device_P, device_R, device_HX, bs, ns, no);
for (int i = 0; i < bs; i++)
{
......@@ -363,6 +370,7 @@ void kalman_update_batch(float* Z,// measurement size = bs * no
GPU_CHECK(cudaFree(device_Z));
GPU_CHECK(cudaFree(device_X));
GPU_CHECK(cudaFree(device_P));
GPU_CHECK(cudaFree(device_R));
GPU_CHECK(cudaFree(device_HX));
}
......
......@@ -6,6 +6,7 @@
void kalman_update_batch(float* Z,// measurement size = bs * no
float** X, // in-place update states size = bs * ns
float** P, // in-place update predict size = bs * ns * ns
float* R, //R covariance matrix of observation noise no * no
float* HX, // H*X size = bs * no
const int bs,
const int ns, //ns = 10
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment