Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
J
jfx_kalman_filter_src
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
oscar
jfx_kalman_filter_src
Commits
4b5f6f63
Commit
4b5f6f63
authored
Dec 15, 2021
by
oscar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改cuda函数
parent
b3d8bc6f
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
95 additions
and
26 deletions
+95
-26
bev_overlap_online.cu
BaseTracker/kf_gup/bev_overlap_online.cu
+67
-3
kalman_update_batch_online.cu
BaseTracker/kf_gup/kalman_update_batch_online.cu
+28
-23
No files found.
BaseTracker/kf_gup/bev_overlap_online.cu
View file @
4b5f6f63
...
...
@@ -374,9 +374,73 @@ void map_bev_overlap(const int num_a, pybind11::array_t<float> boxes_a,const int
free(b_row_ptr);
}
void bev_overlap(const int num_a, float* boxes_a, const int num_b, float* boxes_b, float* ans_overlap) {
PYBIND11_MODULE(juefx_iou, m)
{
m.def("bev_overlap", &map_bev_overlap);
//pybind11::buffer_info bx_a = boxes_a.request();
//pybind11::buffer_info bx_b = boxes_b.request();
//pybind11::buffer_info ans = ans_overlap.request();
int size_a = num_a * 7 * sizeof(float);
int size_b = num_b * 7 * sizeof(float);
int size_ans = num_a * num_b * sizeof(float);
float* a_gpu;
float* b_gpu;
float* ans_gpu;
GPU_CHECK(cudaMalloc(&a_gpu, size_a));
GPU_CHECK(cudaMalloc(&b_gpu, size_b));
GPU_CHECK(cudaMalloc(&ans_gpu, size_ans));
//float* a_ptr = reinterpret_cast<float*>(bx_a.ptr);
//const float* a_ptr = reinterpret_cast<const float*>(bx_a.ptr);
//float* b_ptr = reinterpret_cast<float*>(bx_b.ptr);
//float* ans_ptr = reinterpret_cast<float*>(ans.ptr);
//int an = bx_a.shape[0];
//int bn = bx_b.shape[0];
// A AND B POINTERS ARE COLUMN-BASED WHEN IN ROS, CONVERTING THIS TO ROW-BASED.
//#ifdef ROS
//float* a_row_ptr;
//a_row_ptr = (float*)malloc(size_a);
//for (int ii = 0; ii < an; ii++) {
// for (int jj = 0; jj < 7; jj++) {
// *(a_row_ptr + jj + ii * 7) = *(a_ptr + ii + jj * an);
// }
//}
//float* b_row_ptr;
//b_row_ptr = (float*)malloc(size_b);
//for (int ii = 0; ii < bn; ii++) {
// for (int jj = 0; jj < 7; jj++) {
// *(b_row_ptr + jj + ii * 7) = *(b_ptr + ii + jj * bn);
// }
//}
GPU_CHECK(cudaMemcpy(a_gpu, boxes_a, size_a, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(b_gpu, boxes_b, size_b, cudaMemcpyHostToDevice));
//#else
//GPU_CHECK(cudaMemcpy(a_gpu, a_ptr, size_a, cudaMemcpyHostToDevice));
//GPU_CHECK(cudaMemcpy(b_gpu, b_ptr, size_b, cudaMemcpyHostToDevice));
//#endif
boxesoverlapLauncher(num_a, a_gpu, num_b, b_gpu, ans_gpu, num_a * num_b, num_a, num_b);
GPU_CHECK(cudaMemcpy(ans_overlap, ans_gpu, size_ans, cudaMemcpyDeviceToHost));
GPU_CHECK(cudaFree(a_gpu));
GPU_CHECK(cudaFree(b_gpu));
GPU_CHECK(cudaFree(ans_gpu));
free(a_row_ptr);
free(b_row_ptr);
}
//PYBIND11_MODULE(juefx_iou, m)
//{
// m.def("bev_overlap", &map_bev_overlap);
//}
BaseTracker/kf_gup/kalman_update_batch_online.cu
View file @
4b5f6f63
...
...
@@ -309,30 +309,31 @@ void map_kalman_update_batch( pybind11::array_t<float> Z,
MAKE SURE ALL INPUTS ARE TWO-DIM NUMPY ARRAY
*/
void kalman_update_batch(float** Z,
float** X, // in-place update
float** P, // in-place update
float** HX,
void kalman_update_batch(float** Z,
// measurement size = bs * no
float** X, // in-place update
states size = bs * ns
float** P, // in-place update
predict size = bs * ns * ns
float** HX,
// H*X size = bs * no
const int bs,
const int ns,
const int no
) {
const int ns, //ns = 10
const int no // no = 7
)
{
//pybind11::buffer_info ZZ = Z.request();
//pybind11::buffer_info XX = X.request();
//pybind11::buffer_info PP = P.request();
//pybind11::buffer_info HXX = HX.request();
int size_ZZ =
ZZ.shape[0] * ZZ.shape[1]
* sizeof(float);
int size_XX =
XX.shape[0] * XX.shape[1]
* sizeof(float);
int size_PP =
PP.shape[0] * PP.shape[1]
* sizeof(float);
int size_HXX =
HXX.shape[0] * HXX.shape[1]
* sizeof(float);
int size_ZZ =
bs * no
* sizeof(float);
int size_XX =
bs * ns
* sizeof(float);
int size_PP =
bs * ns * ns
* sizeof(float);
int size_HXX =
bs * no
* sizeof(float);
// std::cout << "size_HXX: " << size_HXX <<"\n";
float* host_Z = reinterpret_cast<float*>(ZZ.ptr);
float* host_X = reinterpret_cast<float*>(XX.ptr);
float* host_P = reinterpret_cast<float*>(PP.ptr);
float* host_HX = reinterpret_cast<float*>(HXX.ptr);
//
float* host_Z = reinterpret_cast<float*>(ZZ.ptr);
//
float* host_X = reinterpret_cast<float*>(XX.ptr);
//
float* host_P = reinterpret_cast<float*>(PP.ptr);
//
float* host_HX = reinterpret_cast<float*>(HXX.ptr);
float* device_Z;
float* device_X;
...
...
@@ -343,16 +344,20 @@ void kalman_update_batch(float** Z,
GPU_CHECK(cudaMalloc(&device_X, size_XX));
GPU_CHECK(cudaMalloc(&device_P, size_PP));
GPU_CHECK(cudaMalloc(&device_HX, size_HXX));
GPU_CHECK(cudaMemcpy(device_Z, host_Z, size_ZZ, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_X, host_X, size_XX, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_P, host_P, size_PP, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_HX, host_HX, size_HXX, cudaMemcpyHostToDevice));
for (int i = 0; i < bs; i++)
{
GPU_CHECK(cudaMemcpy(device_Z + i*no, Z[i], no * sizeof(float), cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_X + i*ns, X[i], ns * sizeof(float), cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_P + i*ns*ns, P[i], ns*ns * sizeof(float), cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_HX + i*no, HX[i], no * sizeof(float), cudaMemcpyHostToDevice));
}
kalmanUpdateLauncher_batch(device_Z, device_X, device_P, device_HX, bs, ns, no);
GPU_CHECK(cudaMemcpy(host_X, device_X, size_XX, cudaMemcpyDeviceToHost));
GPU_CHECK(cudaMemcpy(host_P, device_P, size_PP, cudaMemcpyDeviceToHost));
for (int i = 0; i < bs; i++)
{
GPU_CHECK(cudaMemcpy(X[i], device_X + i*ns, ns * sizeof(float), cudaMemcpyDeviceToHost));
GPU_CHECK(cudaMemcpy(P[i], device_P + i*ns*ns, ns * ns * sizeof(float), cudaMemcpyDeviceToHost));
}
GPU_CHECK(cudaFree(device_Z));
GPU_CHECK(cudaFree(device_X));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment