__global__ void MatSub(float* C, const float* A, const float* B, int bs, int no)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < bs * no)
C[i] = A[i] - B[i];
}
__global__ void KalmanUpdateS2(float* d_S, float* d_P, const int bs, const int ns, const int no){
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid >= bs) return;
int p_stride = 11;
int s_stride = 8;
for (int i = 0; i < no; i++){
d_S[tid * no * no + i * s_stride] = d_P[tid * ns * ns + i * p_stride] + 1;
}
}
__global__ void KalmanUpdateS3(float* d_K, float* d_P, float* d_S, const int bs, const int ns, const int no){
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid >= bs) return;
int p_stride = 11;
int k_stride = 8;
int s_stride = 8;
for (int i = 0; i < ns; i++){
if (i < no){
d_K[tid * ns * no + i * k_stride] = d_P[tid * ns * ns + i * p_stride] * (1 / d_S[tid * no * no + i * s_stride]);
}
else{
d_K[tid * ns * no + no * no + (i - no) * k_stride] = d_P[tid * ns * ns + ns * no + (i - no) * p_stride] * (1 / d_S[tid * no * no + (i - no) * s_stride]);
}
}
}
__global__ void KalmanUpdateS4(float* d_X, float* d_K, float* d_Y, const int bs, const int ns, const int no){
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid >= bs) return;
int k_stride = 8;
for (int i = 0; i < ns; i++){
if (i < no){
d_X[tid * ns + i] += d_K[tid * ns * no + i * k_stride] * d_Y[tid * no + i];
}
else{
d_X[tid * ns + i] += d_K[tid * ns * no + no * no + (i - no) * k_stride] * d_Y[tid * no + i - no];
}
}
}
__global__ void KalmanUpdateS5(float* d_P, float* d_K, const int bs, const int ns, const int no){
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid >= bs) return;
int p_stride = 11;
int k_stride = 8;
for (int i = ns - 1; i > -1; i--){
if (i < no){
float IKH_tl = 1 - d_K[tid * ns * no + i * k_stride];
d_P[tid * ns * ns + i * p_stride] *= IKH_tl;
if (i < 3) d_P[tid * ns * ns + no + i * p_stride] *= IKH_tl;
}
else{
float IKH_bl = 0 - d_K[tid * ns * no + no * no + (i - no) * k_stride];
float IKH_br = 1;
float P_bl = d_P[tid * ns * ns + ns * no + (i - no) * p_stride];