提交传入R矩阵

1ad751e6 · oscar · a4316e5d · 1ad751e6 · 1ad751e6 · 1ad751e6
Commit 1ad751e6 authored Dec 27, 2021 by oscar
6 changed files
--- a/BaseTracker/BaseTrack.cpp
+++ b/BaseTracker/BaseTrack.cpp
@@ -130,4 +130,10 @@ float* BaseTrack::GetPredictPtr()
    if (kf_ == nullptr)
        return nullptr;
    return kf_->P_.data();
-}
\ No newline at end of file
+}
+float* BaseTrack::GetRPtr()
+{
+    if (kf_ == nullptr)
+        return nullptr;
+    return kf_->R_.data();
+}
--- a/BaseTracker/BaseTrack.h
+++ b/BaseTracker/BaseTrack.h
@@ -41,6 +41,7 @@ public:

    float* GetStatesXPtr();
    float* GetPredictPtr();
+    float* GetRPtr();

    int coast_cycles_ = 0, hit_streak_ = 0;


--- a/BaseTracker/BaseTracker.h
+++ b/BaseTracker/BaseTracker.h
@@ -89,6 +89,7 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, std:
        std::shared_ptr<float> Z = std::shared_ptr<float>(new float[bs * no], [](float* p) {if (p) delete[] p; p = nullptr; });
        std::shared_ptr<float*> X = std::shared_ptr<float*>(new float* [bs], [](float** p) {if (p) delete[] p; p = nullptr; });
        std::shared_ptr<float*> P = std::shared_ptr<float*>(new float* [bs], [](float** p) {if (p) delete[] p; p = nullptr; });
+        std::shared_ptr<float> R = std::shared_ptr<float>(new float[no * no], [](float* p) {if (p) delete[] p; p = nullptr; });
        std::shared_ptr<float> HX = std::shared_ptr<float>(new float[bs * no], [](float* p) {if (p) delete[] p; p = nullptr; });
        int bs_i = 0;
        for (const auto& match : matched)
@@ -108,6 +109,8 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, std:
            if (ns == 0)
            {
                ns = m_tracker[id]->GetStatesNum();
+                float* _r = m_tracker[id]->GetRPtr();
+                memcpy(R.get(), _r, sizeof(no * no * sizeof(float)));
            }
            bs_i++;
            m_tracker[id]->UpdateHit();//gpu时需要update里面的变量
@@ -119,7 +122,7 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, std:
        //SDK_LOG(SDK_INFO, "P = [%s]", GetMatrixStr(P.get(), ns*ns, bs).c_str());
        //SDK_LOG(SDK_INFO, "HX = [%s]", GetMatrixStr(HX.get(),no,bs).c_str());
 #ifdef _KF_IOU_CUDA_
-        kalman_update_batch(Z.get(), X.get(), P.get(), HX.get(), bs, ns, no);
+        kalman_update_batch(Z.get(), X.get(), P.get(), R.get(), HX.get(), bs, ns, no);
 #endif
        //SDK_LOG(SDK_INFO, "after X = [%s]", GetMatrixStr(X.get(), ns, bs).c_str());
        //SDK_LOG(SDK_INFO, "after P = [%s]", GetMatrixStr(P.get(), ns * ns, bs).c_str());

--- a/BaseTracker/kf_gpu/kalman_batch_ops.cu
+++ b/BaseTracker/kf_gpu/kalman_batch_ops.cu
@@ -11,16 +11,30 @@ __global__ void MatSub(float* C, const float* A, const float* B, int bs, int no)



-__global__ void KalmanUpdateS2(float* d_S, float* d_P, const int bs, const int ns, const int no){
+// __global__ void KalmanUpdateS2(float* d_S, float* d_P, const int bs, const int ns, const int no){
+
+// 	int tid = blockDim.x * blockIdx.x + threadIdx.x;
+// 	if (tid >= bs)	return;
+// 	int p_stride = 11; 					
+// 	int s_stride = 8; 					
+
+// 	for (int i = 0; i < no; i++){
+// 		d_S[tid * no * no + i * s_stride] = d_P[tid * ns * ns + i * p_stride] + 1;
+// 	}
+// }
+

-	int tid = blockDim.x * blockIdx.x + threadIdx.x;
-	if (tid >= bs)	return;
-	int p_stride = 11; 					
-	int s_stride = 8; 					

-	for (int i = 0; i < no; i++){
-		d_S[tid * no * no + i * s_stride] = d_P[tid * ns * ns + i * p_stride] + 1;
-	}
+__global__ void KalmanUpdateS2(float* d_S, float* d_P, float* d_R, const int bs, const int ns, const int no){
+
+int tid = blockDim.x * blockIdx.x + threadIdx.x;
+if (tid >= bs)	return;
+int p_stride = 11; 					
+int s_stride = 8; 					
+
+for (int i = 0; i < no; i++){
+	d_S[tid * no * no + i * s_stride] = d_P[tid * ns * ns + i * p_stride] + d_R[tid * no * no + i * s_stride];
+}
 }


@@ -37,7 +51,6 @@ __global__ void KalmanUpdateS3(float* d_K, float* d_P, float* d_S, const int bs,
 			d_K[tid * ns * no + i * k_stride] = d_P[tid * ns * ns + i * p_stride] * (1 / d_S[tid * no * no + i * s_stride]);
 		}
 		else{
-			
 			d_K[tid * ns * no + no * no + (i - no) * k_stride] = d_P[tid * ns * ns + ns * no + (i - no) * p_stride] * (1 / d_S[tid * no * no + (i - no) * s_stride]);
 		}
 	}	

--- a/BaseTracker/kf_gpu/kalman_update_batch_online.cu
+++ b/BaseTracker/kf_gpu/kalman_update_batch_online.cu
@@ -35,6 +35,7 @@
 void kalmanUpdateLauncher_batch(float* d_Z, 					//(bs, no)
 				float* d_X, 					//(bs, ns)
 				float* d_P, 					//(bs, ns * ns)
+				float* d_R, 					//(bs, no * no)
 				float* d_HX, 					//(bs, no)
 				int bs,
 				int ns, 
@@ -74,9 +75,9 @@ void kalmanUpdateLauncher_batch(float* d_Z, 					//(bs, no)
 	free(tmp_y);
 #endif

-	// step 2	S = H * P * H^T + E
+	// step 2	S = H * P * H^T + E, E also called R, measurement noise
 	int blocksPerGrid2 = DIVUP(bs * no, threadsPerBlock);	
-	KalmanUpdateS2<<<blocksPerGrid2, threadsPerBlock>>>(d_S, d_P, bs, ns, no);		// (bs, no * no) + (bs, no * no)
+	KalmanUpdateS2<<<blocksPerGrid2, threadsPerBlock>>>(d_S, d_P, d_R, bs, ns, no);		// (bs, no * no) + (bs, no * no)

 #ifdef DEBUG
 	cudaDeviceSynchronize();
@@ -186,6 +187,7 @@ void kalmanUpdateLauncher_batch(float* d_Z, 					//(bs, no)
 	Z: measurement                                                              (bs, 7)
 	X: estimate                                                                 (bs, 10)
 	P: uncertainty covariance                                                   (bs, 100)
+	R: measurement noise														(bs, 49)

  MAKE SURE ALL INPUTS ARE TWO-DIM NUMPY ARRAY
  */
@@ -312,6 +314,7 @@ void kalmanUpdateLauncher_batch(float* d_Z, 					//(bs, no)
 void kalman_update_batch(float* Z,// measurement size = bs * no
 	float** X,     // in-place update states  size = bs * ns
 	float** P,     // in-place update predict size = bs * ns * ns
+	float* R,		//R covariance matrix of observation noise no * no
 	float* HX,	   // H*X  size = bs * no
 	const int bs,
 	const int ns,  //ns = 10
@@ -327,6 +330,7 @@ void kalman_update_batch(float* Z,// measurement size = bs * no
 	int size_ZZ = bs * no * sizeof(float);
 	int size_XX = bs * ns * sizeof(float);
 	int size_PP = bs * ns * ns * sizeof(float);
+	int size_RR = no * no * sizeof(float);
 	int size_HXX = bs * no * sizeof(float);
 	// std::cout << "size_HXX: " << size_HXX <<"\n";

@@ -338,11 +342,13 @@ void kalman_update_batch(float* Z,// measurement size = bs * no
 	float* device_Z;
 	float* device_X;
 	float* device_P;
+	float* device_R;
 	float* device_HX;

 	GPU_CHECK(cudaMalloc(&device_Z, size_ZZ));
 	GPU_CHECK(cudaMalloc(&device_X, size_XX));
 	GPU_CHECK(cudaMalloc(&device_P, size_PP));
+	GPU_CHECK(cudaMalloc(&device_R, size_RR));
 	GPU_CHECK(cudaMalloc(&device_HX, size_HXX));
 	GPU_CHECK(cudaMemcpy(device_Z , Z, size_ZZ, cudaMemcpyHostToDevice));
 	for (int i = 0; i < bs; i++)
@@ -350,9 +356,10 @@ void kalman_update_batch(float* Z,// measurement size = bs * no
 		GPU_CHECK(cudaMemcpy(device_X + i*ns, X[i], ns * sizeof(float), cudaMemcpyHostToDevice));
 		GPU_CHECK(cudaMemcpy(device_P + i*ns*ns, P[i], ns*ns * sizeof(float), cudaMemcpyHostToDevice));
 	}
+	GPU_CHECK(cudaMemcpy(device_R, R, size_RR, cudaMemcpyHostToDevice));
 	GPU_CHECK(cudaMemcpy(device_HX, HX, size_HXX, cudaMemcpyHostToDevice));

-	kalmanUpdateLauncher_batch(device_Z, device_X, device_P, device_HX, bs, ns, no);
+	kalmanUpdateLauncher_batch(device_Z, device_X, device_P, device_R, device_HX, bs, ns, no);

 	for (int i = 0; i < bs; i++)
 	{
@@ -363,6 +370,7 @@ void kalman_update_batch(float* Z,// measurement size = bs * no
 	GPU_CHECK(cudaFree(device_Z));
 	GPU_CHECK(cudaFree(device_X));
 	GPU_CHECK(cudaFree(device_P));
+	GPU_CHECK(cudaFree(device_R));
 	GPU_CHECK(cudaFree(device_HX));

 }

--- a/BaseTracker/kf_gpu/kalman_update_batch_online.h
+++ b/BaseTracker/kf_gpu/kalman_update_batch_online.h
@@ -6,6 +6,7 @@
 void kalman_update_batch(float* Z,// measurement size = bs * no
 	float** X,     // in-place update states  size = bs * ns
 	float** P,     // in-place update predict size = bs * ns * ns
+	float* R,		//R covariance matrix of observation noise no * no
 	float* HX,	   // H*X  size = bs * no
 	const int bs,
 	const int ns,  //ns = 10