Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
J
jfx_kalman_filter_src
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
oscar
jfx_kalman_filter_src
Commits
59540421
Commit
59540421
authored
Dec 16, 2021
by
oscar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
提交修改cu文件
parent
f6453873
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
172 additions
and
172 deletions
+172
-172
bev_overlap_online.cu
BaseTracker/kf_gpu/bev_overlap_online.cu
+67
-67
kalman_update_batch_online.cu
BaseTracker/kf_gpu/kalman_update_batch_online.cu
+105
-105
No files found.
BaseTracker/kf_gpu/bev_overlap_online.cu
View file @
59540421
#include <stdio.h>
#include <iostream>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
//
#include <pybind11/pybind11.h>
//
#include <pybind11/numpy.h>
//
#include <pybind11/stl.h>
#include "common.h"
...
...
@@ -309,70 +309,70 @@ void boxesoverlapLauncher(const int num_a, const float *boxes_a, const int num_b
}
void map_bev_overlap(const int num_a, pybind11::array_t<float> boxes_a,const int num_b, pybind11::array_t<float> boxes_b, pybind11::array_t<float> ans_overlap){
pybind11::buffer_info bx_a = boxes_a.request();
pybind11::buffer_info bx_b = boxes_b.request();
pybind11::buffer_info ans = ans_overlap.request();
int size_a = bx_a.shape[0] * bx_a.shape[1] * sizeof(float);
int size_b = bx_b.shape[0] * bx_b.shape[1] * sizeof(float);
int size_ans = ans.shape[0] * ans.shape[1] * sizeof(float);
float* a_gpu;
float* b_gpu;
float* ans_gpu;
GPU_CHECK(cudaMalloc(&a_gpu, size_a));
GPU_CHECK(cudaMalloc(&b_gpu, size_b));
GPU_CHECK(cudaMalloc(&ans_gpu, size_ans));
//float* a_ptr = reinterpret_cast<float*>(bx_a.ptr);
const float* a_ptr = reinterpret_cast<const float*>(bx_a.ptr);
float* b_ptr = reinterpret_cast<float*>(bx_b.ptr);
float* ans_ptr = reinterpret_cast<float*>(ans.ptr);
int an = bx_a.shape[0];
int bn = bx_b.shape[0];
// A AND B POINTERS ARE COLUMN-BASED WHEN IN ROS, CONVERTING THIS TO ROW-BASED.
#ifdef ROS
float* a_row_ptr;
a_row_ptr = (float*)malloc(size_a);
for (int ii = 0; ii < an; ii++){
for (int jj = 0; jj < 7; jj++){
*(a_row_ptr + jj + ii * 7) = *(a_ptr + ii + jj * an);
}
}
float* b_row_ptr;
b_row_ptr = (float*)malloc(size_b);
for (int ii = 0; ii < bn; ii++){
for (int jj = 0; jj < 7; jj++){
*(b_row_ptr + jj + ii * 7) = *(b_ptr + ii + jj * bn);
}
}
GPU_CHECK(cudaMemcpy(a_gpu, a_row_ptr, size_a, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(b_gpu, b_row_ptr, size_b, cudaMemcpyHostToDevice));
#else
GPU_CHECK(cudaMemcpy(a_gpu, a_ptr, size_a, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(b_gpu, b_ptr, size_b, cudaMemcpyHostToDevice));
#endif
boxesoverlapLauncher(num_a, a_gpu, num_b, b_gpu, ans_gpu, ans.shape[0] * ans.shape[1], bx_a.shape[0], bx_b.shape[0]);
GPU_CHECK(cudaMemcpy(ans_ptr, ans_gpu, size_ans, cudaMemcpyDeviceToHost));
GPU_CHECK(cudaFree(a_gpu));
GPU_CHECK(cudaFree(b_gpu));
GPU_CHECK(cudaFree(ans_gpu));
free(a_row_ptr);
free(b_row_ptr);
}
//
void map_bev_overlap(const int num_a, pybind11::array_t<float> boxes_a,const int num_b, pybind11::array_t<float> boxes_b, pybind11::array_t<float> ans_overlap){
//
//
pybind11::buffer_info bx_a = boxes_a.request();
//
pybind11::buffer_info bx_b = boxes_b.request();
//
pybind11::buffer_info ans = ans_overlap.request();
//
//
int size_a = bx_a.shape[0] * bx_a.shape[1] * sizeof(float);
//
int size_b = bx_b.shape[0] * bx_b.shape[1] * sizeof(float);
//
int size_ans = ans.shape[0] * ans.shape[1] * sizeof(float);
//
//
float* a_gpu;
//
float* b_gpu;
//
float* ans_gpu;
//
//
GPU_CHECK(cudaMalloc(&a_gpu, size_a));
//
GPU_CHECK(cudaMalloc(&b_gpu, size_b));
//
GPU_CHECK(cudaMalloc(&ans_gpu, size_ans));
//
//
//
//
float* a_ptr = reinterpret_cast<float*>(bx_a.ptr);
//
//
const float* a_ptr = reinterpret_cast<const float*>(bx_a.ptr);
//
float* b_ptr = reinterpret_cast<float*>(bx_b.ptr);
//
float* ans_ptr = reinterpret_cast<float*>(ans.ptr);
//
//
int an = bx_a.shape[0];
//
int bn = bx_b.shape[0];
//
//
// A AND B POINTERS ARE COLUMN-BASED WHEN IN ROS, CONVERTING THIS TO ROW-BASED.
//
#ifdef ROS
//
float* a_row_ptr;
//
a_row_ptr = (float*)malloc(size_a);
//
for (int ii = 0; ii < an; ii++){
//
for (int jj = 0; jj < 7; jj++){
//
*(a_row_ptr + jj + ii * 7) = *(a_ptr + ii + jj * an);
//
}
//
}
//
//
float* b_row_ptr;
//
b_row_ptr = (float*)malloc(size_b);
//
for (int ii = 0; ii < bn; ii++){
//
for (int jj = 0; jj < 7; jj++){
//
*(b_row_ptr + jj + ii * 7) = *(b_ptr + ii + jj * bn);
//
}
//
}
//
//
GPU_CHECK(cudaMemcpy(a_gpu, a_row_ptr, size_a, cudaMemcpyHostToDevice));
//
GPU_CHECK(cudaMemcpy(b_gpu, b_row_ptr, size_b, cudaMemcpyHostToDevice));
//
#else
//
GPU_CHECK(cudaMemcpy(a_gpu, a_ptr, size_a, cudaMemcpyHostToDevice));
//
GPU_CHECK(cudaMemcpy(b_gpu, b_ptr, size_b, cudaMemcpyHostToDevice));
//
#endif
//
//
boxesoverlapLauncher(num_a, a_gpu, num_b, b_gpu, ans_gpu, ans.shape[0] * ans.shape[1], bx_a.shape[0], bx_b.shape[0]);
//
//
GPU_CHECK(cudaMemcpy(ans_ptr, ans_gpu, size_ans, cudaMemcpyDeviceToHost));
//
//
GPU_CHECK(cudaFree(a_gpu));
//
GPU_CHECK(cudaFree(b_gpu));
//
GPU_CHECK(cudaFree(ans_gpu));
//
//
free(a_row_ptr);
//
free(b_row_ptr);
//
}
void bev_overlap(const int num_a, float* boxes_a, const int num_b, float* boxes_b, float* ans_overlap)
{
...
...
BaseTracker/kf_gpu/kalman_update_batch_online.cu
View file @
59540421
...
...
@@ -6,9 +6,9 @@
#include <cuda_runtime.h>
#include <cublas.h>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
//
#include <pybind11/pybind11.h>
//
#include <pybind11/numpy.h>
//
#include <pybind11/stl.h>
#include "kalman_batch_ops.cu"
#include "common.h"
...
...
@@ -189,108 +189,108 @@ void kalmanUpdateLauncher_batch(float* d_Z, //(bs, no)
MAKE SURE ALL INPUTS ARE TWO-DIM NUMPY ARRAY
*/
void map_kalman_update_batch( pybind11::array_t<float> Z,
pybind11::array_t<float> X, // in-place update
pybind11::array_t<float> P, // in-place update
pybind11::array_t<float> HX,
const int bs,
const int ns,
const int no
){
pybind11::buffer_info ZZ = Z.request();
pybind11::buffer_info XX = X.request();
pybind11::buffer_info PP = P.request();
pybind11::buffer_info HXX = HX.request();
int size_ZZ = ZZ.shape[0] * ZZ.shape[1] * sizeof(float);
int size_XX = XX.shape[0] * XX.shape[1] * sizeof(float);
int size_PP = PP.shape[0] * PP.shape[1] * sizeof(float);
int size_HXX = HXX.shape[0] * HXX.shape[1] * sizeof(float);
// std::cout << "size_HXX: " << size_HXX <<"\n";
float* host_Z = reinterpret_cast<float*>(ZZ.ptr);
float* host_X = reinterpret_cast<float*>(XX.ptr);
float* host_P = reinterpret_cast<float*>(PP.ptr);
float* host_HX = reinterpret_cast<float*>(HXX.ptr);
float* device_Z;
float* device_X;
float* device_P;
float* device_HX;
GPU_CHECK(cudaMalloc(&device_Z, size_ZZ));
GPU_CHECK(cudaMalloc(&device_X, size_XX));
GPU_CHECK(cudaMalloc(&device_P, size_PP));
GPU_CHECK(cudaMalloc(&device_HX, size_HXX));
GPU_CHECK(cudaMemcpy(device_Z, host_Z, size_ZZ, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_X, host_X, size_XX, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_P, host_P, size_PP, cudaMemcpyHostToDevice));
GPU_CHECK(cudaMemcpy(device_HX, host_HX, size_HXX, cudaMemcpyHostToDevice));
kalmanUpdateLauncher_batch(device_Z, device_X, device_P, device_HX, bs, ns, no);
GPU_CHECK(cudaMemcpy(host_X, device_X, size_XX, cudaMemcpyDeviceToHost));
GPU_CHECK(cudaMemcpy(host_P, device_P, size_PP, cudaMemcpyDeviceToHost));
GPU_CHECK(cudaFree(device_Z));
GPU_CHECK(cudaFree(device_X));
GPU_CHECK(cudaFree(device_P));
GPU_CHECK(cudaFree(device_HX));
#ifdef DEBUG
int c_row = no;
int c_col = ns;
std::cout << "################################### kalman update gpu host_h before reinterpret_cast: no * ns" << "\n";
auto a = H.mutable_unchecked<2>();
for (int i = 0; i < a.shape(0); i++){
std::cout << "[";
for (int j = 0; j < a.shape(1); j++){
std::cout << a(i, j)<< ", ";
}
std::cout << "],\n";
}
std::cout << "++++++++++++++++++++++++++++++++++ kalman update gpu host_h shape: no * ns" << "\n";
for (int i=0;i<c_row;i++){
for (int j=0;j<c_col;j++){
std::cout<< *(host_H + i * c_col + j) << " ";
}
std::cout <<"\n";
}
float* tmp;
tmp = (float*)malloc(size_HH);
for (int ii = 0; ii < c_row; ii++){
for (int jj = 0; jj < c_col; jj++){
*(tmp + jj + ii * c_col) = *(host_H + ii + jj * c_row);
}
}
std::cout << "-------------------to rowMajor host_e_row: " << "\n";
for (int i=0;i<c_row;i++){
for (int j=0;j<c_col;j++){
std::cout<< *(tmp + i * c_col + j) << " ";
}
std::cout <<"\n";
}
free(tmp);
#endif
// ATTENTION ORDER COULD BE CHANGED IN ROS !
}
//
void map_kalman_update_batch( pybind11::array_t<float> Z,
//
pybind11::array_t<float> X, // in-place update
//
pybind11::array_t<float> P, // in-place update
//
pybind11::array_t<float> HX,
//
const int bs,
//
const int ns,
//
const int no
//
){
//
//
pybind11::buffer_info ZZ = Z.request();
//
pybind11::buffer_info XX = X.request();
//
pybind11::buffer_info PP = P.request();
//
pybind11::buffer_info HXX = HX.request();
//
//
int size_ZZ = ZZ.shape[0] * ZZ.shape[1] * sizeof(float);
//
int size_XX = XX.shape[0] * XX.shape[1] * sizeof(float);
//
int size_PP = PP.shape[0] * PP.shape[1] * sizeof(float);
//
int size_HXX = HXX.shape[0] * HXX.shape[1] * sizeof(float);
//
// std::cout << "size_HXX: " << size_HXX <<"\n";
//
//
float* host_Z = reinterpret_cast<float*>(ZZ.ptr);
//
float* host_X = reinterpret_cast<float*>(XX.ptr);
//
float* host_P = reinterpret_cast<float*>(PP.ptr);
//
float* host_HX = reinterpret_cast<float*>(HXX.ptr);
//
//
float* device_Z;
//
float* device_X;
//
float* device_P;
//
float* device_HX;
//
//
GPU_CHECK(cudaMalloc(&device_Z, size_ZZ));
//
GPU_CHECK(cudaMalloc(&device_X, size_XX));
//
GPU_CHECK(cudaMalloc(&device_P, size_PP));
//
GPU_CHECK(cudaMalloc(&device_HX, size_HXX));
//
//
GPU_CHECK(cudaMemcpy(device_Z, host_Z, size_ZZ, cudaMemcpyHostToDevice));
//
GPU_CHECK(cudaMemcpy(device_X, host_X, size_XX, cudaMemcpyHostToDevice));
//
GPU_CHECK(cudaMemcpy(device_P, host_P, size_PP, cudaMemcpyHostToDevice));
//
GPU_CHECK(cudaMemcpy(device_HX, host_HX, size_HXX, cudaMemcpyHostToDevice));
//
//
kalmanUpdateLauncher_batch(device_Z, device_X, device_P, device_HX, bs, ns, no);
//
//
GPU_CHECK(cudaMemcpy(host_X, device_X, size_XX, cudaMemcpyDeviceToHost));
//
GPU_CHECK(cudaMemcpy(host_P, device_P, size_PP, cudaMemcpyDeviceToHost));
//
//
GPU_CHECK(cudaFree(device_Z));
//
GPU_CHECK(cudaFree(device_X));
//
GPU_CHECK(cudaFree(device_P));
//
GPU_CHECK(cudaFree(device_HX));
//
//
#ifdef DEBUG
//
int c_row = no;
//
int c_col = ns;
//
std::cout << "################################### kalman update gpu host_h before reinterpret_cast: no * ns" << "\n";
//
auto a = H.mutable_unchecked<2>();
//
for (int i = 0; i < a.shape(0); i++){
//
std::cout << "[";
//
for (int j = 0; j < a.shape(1); j++){
//
//
std::cout << a(i, j)<< ", ";
//
}
//
std::cout << "],\n";
//
}
//
//
//
std::cout << "++++++++++++++++++++++++++++++++++ kalman update gpu host_h shape: no * ns" << "\n";
//
for (int i=0;i<c_row;i++){
//
for (int j=0;j<c_col;j++){
//
//
std::cout<< *(host_H + i * c_col + j) << " ";
//
}
//
std::cout <<"\n";
//
}
//
//
//
float* tmp;
//
tmp = (float*)malloc(size_HH);
//
for (int ii = 0; ii < c_row; ii++){
//
for (int jj = 0; jj < c_col; jj++){
//
*(tmp + jj + ii * c_col) = *(host_H + ii + jj * c_row);
//
}
//
}
//
//
//
//
std::cout << "-------------------to rowMajor host_e_row: " << "\n";
//
for (int i=0;i<c_row;i++){
//
for (int j=0;j<c_col;j++){
//
//
std::cout<< *(tmp + i * c_col + j) << " ";
//
}
//
std::cout <<"\n";
//
}
//
free(tmp);
//
#endif
//
// ATTENTION ORDER COULD BE CHANGED IN ROS !
//
//
//
//
//
//
}
//PYBIND11_MODULE(juefx_kalman_multi_shared, m)
//PYBIND11_MODULE(juefx_kalman_multi_1, m)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment