Commit 65308c5b authored by oscar's avatar oscar

修改gpu的memcpy问题

parent d5994126
......@@ -113,8 +113,8 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
int ns = 0;
int no = _no;
std::shared_ptr<float> Z = std::shared_ptr<float>(new float[bs * no], [](float* p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float*> X = std::shared_ptr<float*>(new float* [bs], [](float** p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float*> P = std::shared_ptr<float*>(new float* [bs], [](float** p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float> X = std::shared_ptr<float>(new float[bs * _ns], [](float* p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float> P = std::shared_ptr<float>(new float[bs * _ns * _ns], [](float* p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float> R = std::shared_ptr<float>(new float[no * no], [](float* p) {if (p) delete[] p; p = nullptr; });
std::shared_ptr<float> HX = std::shared_ptr<float>(new float[bs * no], [](float* p) {if (p) delete[] p; p = nullptr; });
int bs_i = 0;
......@@ -128,8 +128,10 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
{
ptr_Z[i] = cre_det[i];
}
X.get()[bs_i] = m_tracker[id]->GetStatesXPtr();
P.get()[bs_i] = m_tracker[id]->GetPredictPtr();
memcpy(X.get() + bs_i * _ns, m_tracker[id]->GetStatesXPtr(), _ns * sizeof(float));
memcpy(P.get() + bs_i * _ns * _ns, m_tracker[id]->GetPredictPtr(), _ns * _ns * sizeof(float));
//X.get()[bs_i] = m_tracker[id]->GetStatesXPtr();
//P.get()[bs_i] = m_tracker[id]->GetPredictPtr();
float* ptr_HX = HX.get() + bs_i * no;
memcpy(ptr_HX, m_tracker[id]->GetStatesXPtr(), no * sizeof(float));
if (ns == 0)
......@@ -159,6 +161,14 @@ int BaseTracker<T>::Run(const std::vector<std::vector<float> >& detections, int
#endif
//SDK_LOG(SDK_INFO, "after X = [%s]", GetMatrixStr(X.get(), ns, bs).c_str());
//SDK_LOG(SDK_INFO, "after P = [%s]", GetMatrixStr(P.get(), ns * ns, bs).c_str());
bs_i = 0;
for (const auto& match : matched)
{
const auto& id = match.first;
memcpy(m_tracker[id]->GetStatesXPtr(), X.get() + bs_i * _ns, _ns * sizeof(float));
memcpy(m_tracker[id]->GetPredictPtr(), P.get() + bs_i * _ns * _ns, _ns * _ns * sizeof(float));
bs_i++;
}
}
}
#ifdef _USING_NSIGHT_
......
......@@ -312,8 +312,8 @@ void kalmanUpdateLauncher_batch(float* d_Z, //(bs, no)
MAKE SURE ALL INPUTS ARE TWO-DIM NUMPY ARRAY
*/
void kalman_update_batch(float* Z,// measurement size = bs * no
float** X, // in-place update states size = bs * ns
float** P, // in-place update predict size = bs * ns * ns
float* X, // in-place update states size = bs * ns
float* P, // in-place update predict size = bs * ns * ns
float* R, //R covariance matrix of observation noise no * no
float* HX, // H*X size = bs * no
const int bs,
......@@ -350,22 +350,16 @@ void kalman_update_batch(float* Z,// measurement size = bs * no
GPU_CHECK(cudaMalloc(&device_P, size_PP));
GPU_CHECK(cudaMalloc(&device_R, size_RR));
GPU_CHECK(cudaMalloc(&device_HX, size_HXX));
GPU_CHECK(cudaMemcpy(device_Z , Z, size_ZZ, cudaMemcpyHostToDevice));
for (int i = 0; i < bs; i++)
{
GPU_CHECK(cudaMemcpy(device_X + i*ns, X[i], ns * sizeof(float), cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_P + i*ns*ns, P[i], ns*ns * sizeof(float), cudaMemcpyHostToDevice));
}
GPU_CHECK(cudaMemcpy(device_Z, Z, size_ZZ, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_X, X, size_XX, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_P, P, size_PP, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_R, R, size_RR, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_HX, HX, size_HXX, cudaMemcpyHostToDevice));
kalmanUpdateLauncher_batch(device_Z, device_X, device_P, device_R, device_HX, bs, ns, no);
for (int i = 0; i < bs; i++)
{
GPU_CHECK(cudaMemcpy(X[i], device_X + i*ns, ns * sizeof(float), cudaMemcpyDeviceToHost));
GPU_CHECK(cudaMemcpy(P[i], device_P + i*ns*ns, ns * ns * sizeof(float), cudaMemcpyDeviceToHost));
}
GPU_CHECK(cudaMemcpy(X, device_X, size_XX, cudaMemcpyDeviceToHost));
GPU_CHECK(cudaMemcpy(P, device_P, size_PP, cudaMemcpyDeviceToHost));
GPU_CHECK(cudaFree(device_Z));
GPU_CHECK(cudaFree(device_X));
......
......@@ -4,8 +4,8 @@
void kalman_update_batch(float* Z,// measurement size = bs * no
float** X, // in-place update states size = bs * ns
float** P, // in-place update predict size = bs * ns * ns
float* X, // in-place update states size = bs * ns
float* P, // in-place update predict size = bs * ns * ns
float* R, //R covariance matrix of observation noise no * no
float* HX, // H*X size = bs * no
const int bs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment