Commit 90d534a6 authored by oscar's avatar oscar

提交AB3D的iou

parent 6f002650
# Author: Xinshuo Weng
# email: xinshuo.weng@gmail.com
import numpy as np, copy
from numba import jit
from scipy.spatial import ConvexHull
@jit
def poly_area(x,y):
""" Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """
return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
@jit
def box3d_vol(corners):
''' corners: (8,3) no assumption on axis direction '''
a = np.sqrt(np.sum((corners[0,:] - corners[1,:])**2))
b = np.sqrt(np.sum((corners[1,:] - corners[2,:])**2))
c = np.sqrt(np.sum((corners[0,:] - corners[4,:])**2))
return a*b*c
@jit
def convex_hull_intersection(p1, p2):
""" Compute area of two convex hull's intersection area.
p1,p2 are a list of (x,y) tuples of hull vertices.
return a list of (x,y) for the intersection and its volume
"""
inter_p = polygon_clip(p1,p2)
if inter_p is not None:
hull_inter = ConvexHull(inter_p)
return inter_p, hull_inter.volume
else:
return None, 0.0
def polygon_clip(subjectPolygon, clipPolygon):
""" Clip a polygon with another polygon.
Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python
Args:
subjectPolygon: a list of (x,y) 2d points, any polygon.
clipPolygon: a list of (x,y) 2d points, has to be *convex*
Note:
**points have to be counter-clockwise ordered**
Return:
a list of (x,y) vertex point for the intersection polygon.
"""
def inside(p):
return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) > (cp2[1] - cp1[1]) * (p[0] - cp1[0])
def computeIntersection():
dc = [cp1[0] - cp2[0], cp1[1] - cp2[1]]
dp = [s[0] - e[0], s[1] - e[1]]
n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
n2 = s[0] * e[1] - s[1] * e[0]
n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return [(n1 * dp[0] - n2 * dc[0]) * n3, (n1 * dp[1] - n2 * dc[1]) * n3]
outputList = subjectPolygon
cp1 = clipPolygon[-1]
for clipVertex in clipPolygon:
cp2 = clipVertex
inputList = outputList
outputList = []
s = inputList[-1]
for subjectVertex in inputList:
e = subjectVertex
if inside(e):
if not inside(s): outputList.append(computeIntersection())
outputList.append(e)
elif inside(s): outputList.append(computeIntersection())
s = e
cp1 = cp2
if len(outputList) == 0: return None
return (outputList)
def iou3d(corners1, corners2):
''' Compute 3D bounding box IoU, only working for object parallel to ground
Input:
corners1: numpy array (8,3), assume up direction is negative Y
corners2: numpy array (8,3), assume up direction is negative Y
Output:
iou: 3D bounding box IoU
iou_2d: bird's eye view 2D bounding box IoU
todo (rqi): add more description on corner points' orders.
'''
# corner points are in counter clockwise order
rect1 = [(corners1[i,0], corners1[i,2]) for i in range(3,-1,-1)]
rect2 = [(corners2[i,0], corners2[i,2]) for i in range(3,-1,-1)]
area1 = poly_area(np.array(rect1)[:,0], np.array(rect1)[:,1])
area2 = poly_area(np.array(rect2)[:,0], np.array(rect2)[:,1])
# inter_area = shapely_polygon_intersection(rect1, rect2)
_, inter_area = convex_hull_intersection(rect1, rect2)
# try:
# _, inter_area = convex_hull_intersection(rect1, rect2)
# except ValueError:
# inter_area = 0
# except scipy.spatial.qhull.QhullError:
# inter_area = 0
iou_2d = inter_area/(area1+area2-inter_area)
ymax = min(corners1[0,1], corners2[0,1])
ymin = max(corners1[4,1], corners2[4,1])
inter_vol = inter_area * max(0.0, ymax-ymin)
vol1 = box3d_vol(corners1)
vol2 = box3d_vol(corners2)
iou = inter_vol / (vol1 + vol2 - inter_vol)
return iou, iou_2d
@jit
def roty(t):
''' Rotation about the y-axis. '''
c = np.cos(t)
s = np.sin(t)
return np.array([[c, 0, s],
[0, 1, 0],
[-s, 0, c]])
def convert_3dbox_to_8corner(bbox3d_input):
''' Takes an object's 3D box with the representation of [x,y,z,theta,l,w,h] and
convert it to the 8 corners of the 3D box
Returns:
corners_3d: (8,3) array in in rect camera coord
'''
# compute rotational matrix around yaw axis
bbox3d = copy.copy(bbox3d_input)
R = roty(bbox3d[3])
# 3d bounding box dimensions
l = bbox3d[4]
w = bbox3d[5]
h = bbox3d[6]
# 3d bounding box corners
x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];
y_corners = [0,0,0,0,-h,-h,-h,-h];
z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];
# rotate and translate 3d bounding box
corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
#print corners_3d.shape
corners_3d[0,:] = corners_3d[0,:] + bbox3d[0]
corners_3d[1,:] = corners_3d[1,:] + bbox3d[1]
corners_3d[2,:] = corners_3d[2,:] + bbox3d[2]
return np.transpose(corners_3d)
\ No newline at end of file
# Author: Xinshuo Weng
# email: xinshuo.weng@gmail.com
import numpy as np
from filterpy.kalman import KalmanFilter
class KalmanBoxTracker(object):
"""
This class represents the internel state of individual tracked objects observed as bbox.
"""
count = 0
def __init__(self, bbox3D, info):
"""
Initialises a tracker using initial bounding box.
"""
# define constant velocity model
self.kf = KalmanFilter(dim_x=10, dim_z=7)
self.kf.F = np.array([[1,0,0,0,0,0,0,1,0,0], # state transition matrix
[0,1,0,0,0,0,0,0,1,0],
[0,0,1,0,0,0,0,0,0,1],
[0,0,0,1,0,0,0,0,0,0],
[0,0,0,0,1,0,0,0,0,0],
[0,0,0,0,0,1,0,0,0,0],
[0,0,0,0,0,0,1,0,0,0],
[0,0,0,0,0,0,0,1,0,0],
[0,0,0,0,0,0,0,0,1,0],
[0,0,0,0,0,0,0,0,0,1]])
self.kf.H = np.array([[1,0,0,0,0,0,0,0,0,0], # measurement function,
[0,1,0,0,0,0,0,0,0,0],
[0,0,1,0,0,0,0,0,0,0],
[0,0,0,1,0,0,0,0,0,0],
[0,0,0,0,1,0,0,0,0,0],
[0,0,0,0,0,1,0,0,0,0],
[0,0,0,0,0,0,1,0,0,0]])
# # with angular velocity
# self.kf = KalmanFilter(dim_x=11, dim_z=7)
# self.kf.F = np.array([[1,0,0,0,0,0,0,1,0,0,0], # state transition matrix
# [0,1,0,0,0,0,0,0,1,0,0],
# [0,0,1,0,0,0,0,0,0,1,0],
# [0,0,0,1,0,0,0,0,0,0,1],
# [0,0,0,0,1,0,0,0,0,0,0],
# [0,0,0,0,0,1,0,0,0,0,0],
# [0,0,0,0,0,0,1,0,0,0,0],
# [0,0,0,0,0,0,0,1,0,0,0],
# [0,0,0,0,0,0,0,0,1,0,0],
# [0,0,0,0,0,0,0,0,0,1,0],
# [0,0,0,0,0,0,0,0,0,0,1]])
# self.kf.H = np.array([[1,0,0,0,0,0,0,0,0,0,0], # measurement function,
# [0,1,0,0,0,0,0,0,0,0,0],
# [0,0,1,0,0,0,0,0,0,0,0],
# [0,0,0,1,0,0,0,0,0,0,0],
# [0,0,0,0,1,0,0,0,0,0,0],
# [0,0,0,0,0,1,0,0,0,0,0],
# [0,0,0,0,0,0,1,0,0,0,0]])
# self.kf.R[0:,0:] *= 10. # measurement uncertainty
self.kf.P[7:, 7:] *= 1000. # state uncertainty, give high uncertainty to the unobservable initial velocities, covariance matrix
self.kf.P *= 10.
# self.kf.Q[-1,-1] *= 0.01 # process uncertainty
self.kf.Q[7:, 7:] *= 0.01
self.kf.x[:7] = bbox3D.reshape((7, 1))
self.time_since_update = 0
self.id = KalmanBoxTracker.count
KalmanBoxTracker.count += 1
self.history = []
self.hits = 1 # number of total hits including the first detection
self.hit_streak = 1 # number of continuing hit considering the first detection
self.first_continuing_hit = 1
self.still_first = True
self.age = 0
self.info = info # other info associated
def update(self, bbox3D, info):
"""
Updates the state vector with observed bbox.
"""
self.time_since_update = 0
self.history = []
self.hits += 1
self.hit_streak += 1 # number of continuing hit
if self.still_first:
self.first_continuing_hit += 1 # number of continuing hit in the fist time
######################### orientation correction
if self.kf.x[3] >= np.pi: self.kf.x[3] -= np.pi * 2 # make the theta still in the range
if self.kf.x[3] < -np.pi: self.kf.x[3] += np.pi * 2
new_theta = bbox3D[3]
if new_theta >= np.pi: new_theta -= np.pi * 2 # make the theta still in the range
if new_theta < -np.pi: new_theta += np.pi * 2
bbox3D[3] = new_theta
predicted_theta = self.kf.x[3]
if abs(new_theta - predicted_theta) > np.pi / 2.0 and abs(new_theta - predicted_theta) < np.pi * 3 / 2.0: # if the angle of two theta is not acute angle
self.kf.x[3] += np.pi
if self.kf.x[3] > np.pi: self.kf.x[3] -= np.pi * 2 # make the theta still in the range
if self.kf.x[3] < -np.pi: self.kf.x[3] += np.pi * 2
# now the angle is acute: < 90 or > 270, convert the case of > 270 to < 90
if abs(new_theta - self.kf.x[3]) >= np.pi * 3 / 2.0:
if new_theta > 0: self.kf.x[3] += np.pi * 2
else: self.kf.x[3] -= np.pi * 2
######################### # flip
self.kf.update(bbox3D)
if self.kf.x[3] >= np.pi: self.kf.x[3] -= np.pi * 2 # make the theta still in the rage
if self.kf.x[3] < -np.pi: self.kf.x[3] += np.pi * 2
self.info = info
def predict(self):
"""
Advances the state vector and returns the predicted bounding box estimate.
"""
self.kf.predict()
if self.kf.x[3] >= np.pi: self.kf.x[3] -= np.pi * 2
if self.kf.x[3] < -np.pi: self.kf.x[3] += np.pi * 2
self.age += 1
if (self.time_since_update > 0):
self.hit_streak = 0
self.still_first = False
self.time_since_update += 1
self.history.append(self.kf.x)
return self.history[-1]
def get_state(self):
"""
Returns the current bounding box estimate.
"""
return self.kf.x[:7].reshape((7, ))
\ No newline at end of file
import numpy as np, cv2, os
class Object3d(object):
''' 3d object label '''
def __init__(self, label_file_line):
data = label_file_line.split(' ')
data[1:] = [float(x) for x in data[1:]]
# extract label, truncation, occlusion
self.type = data[0] # 'Car', 'Pedestrian', ...
self.truncation = data[1] # truncated pixel ratio [0..1]
self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown
self.alpha = data[3] # object observation angle [-pi..pi]
# extract 2d bounding box in 0-based coordinates
self.xmin = data[4] # left
self.ymin = data[5] # top
self.xmax = data[6] # right
self.ymax = data[7] # bottom
self.box2d = np.array([self.xmin,self.ymin,self.xmax,self.ymax])
# extract 3d bounding box information
self.h = data[8] # box height
self.w = data[9] # box width
self.l = data[10] # box length (in meters)
self.t = (data[11],data[12],data[13]) # location (x,y,z) in camera coord.
self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi]
if len(data) > 15: self.score = float(data[15])
if len(data) > 16: self.id = int(data[16])
def print_object(self):
print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % (self.type, self.truncation, self.occlusion, self.alpha))
print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % (self.xmin, self.ymin, self.xmax, self.ymax))
print('3d bbox h,w,l: %f, %f, %f' % (self.h, self.w, self.l))
print('3d bbox location, ry: (%f, %f, %f), %f' % (self.t[0], self.t[1], self.t[2], self.ry))
def convert_to_str(self):
return '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' % \
(self.type, self.truncation, self.occlusion, self.alpha, self.xmin, self.ymin, self.xmax, self.ymax,
self.h, self.w, self.l, self.t[0], self.t[1], self.t[2], self.ry)
class Calibration(object):
''' Calibration matrices and utils
3d XYZ in <label>.txt are in rect camera coord.
2d box xy are in image2 coord
Points in <lidar>.bin are in Velodyne coord.
y_image2 = P^2_rect * x_rect
y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
x_ref = Tr_velo_to_cam * x_velo
x_rect = R0_rect * x_ref
P^2_rect = [f^2_u, 0, c^2_u, -f^2_u b^2_x;
0, f^2_v, c^2_v, -f^2_v b^2_y;
0, 0, 1, 0]
= K * [1|t]
image2 coord:
----> x-axis (u)
|
|
v y-axis (v)
velodyne coord:
front x, left y, up z
rect/ref camera coord:
right x, down y, front z
Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf
TODO(rqi): do matrix multiplication only once for each projection.
'''
def __init__(self, calib_filepath, from_video=False):
if from_video:
calibs = self.read_calib_from_video(calib_filepath)
else:
calibs = self.read_calib_file(calib_filepath)
# Projection matrix from rect camera coord to image2 coord
self.P = calibs['P2']
self.P = np.reshape(self.P, [3,4])
# Rigid transform from Velodyne coord to reference camera coord
self.V2C = calibs['Tr_velo_to_cam']
self.V2C = np.reshape(self.V2C, [3,4])
self.C2V = inverse_rigid_trans(self.V2C)
# Rotation from reference camera coord to rect camera coord
self.R0 = calibs['R0_rect']
self.R0 = np.reshape(self.R0,[3,3])
# Camera intrinsics and extrinsics
self.c_u = self.P[0,2]
self.c_v = self.P[1,2]
self.f_u = self.P[0,0]
self.f_v = self.P[1,1]
self.b_x = self.P[0,3]/(-self.f_u) # relative
self.b_y = self.P[1,3]/(-self.f_v)
def read_calib_file(self, filepath):
''' Read in a calibration file and parse into a dictionary.
Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
'''
data = {}
with open(filepath, 'r') as f:
for line in f.readlines():
line = line.rstrip()
if len(line)==0: continue
key, value = line.split(':', 1)
# The only non-float values in these files are dates, which
# we don't care about anyway
try:
data[key] = np.array([float(x) for x in value.split()])
except ValueError:
pass
return data
def read_calib_from_video(self, calib_root_dir):
''' Read calibration for camera 2 from video calib files.
there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir
'''
data = {}
cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))
velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))
Tr_velo_to_cam = np.zeros((3,4))
Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])
Tr_velo_to_cam[:,3] = velo2cam['T']
data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])
data['R0_rect'] = cam2cam['R_rect_00']
data['P2'] = cam2cam['P_rect_02']
return data
def cart2hom(self, pts_3d):
''' Input: nx3 points in Cartesian
Oupput: nx4 points in Homogeneous by pending 1
'''
n = pts_3d.shape[0]
pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))
return pts_3d_hom
# ===========================
# ------- 3d to 3d ----------
# ===========================
def project_velo_to_ref(self, pts_3d_velo):
pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4
return np.dot(pts_3d_velo, np.transpose(self.V2C))
def project_ref_to_velo(self, pts_3d_ref):
pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4
return np.dot(pts_3d_ref, np.transpose(self.C2V))
def project_rect_to_ref(self, pts_3d_rect):
''' Input and Output are nx3 points '''
return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))
def project_ref_to_rect(self, pts_3d_ref):
''' Input and Output are nx3 points '''
return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))
def project_rect_to_velo(self, pts_3d_rect):
''' Input: nx3 points in rect camera coord.
Output: nx3 points in velodyne coord.
'''
pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)
return self.project_ref_to_velo(pts_3d_ref)
def project_velo_to_rect(self, pts_3d_velo):
pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)
return self.project_ref_to_rect(pts_3d_ref)
# ===========================
# ------- 3d to 2d ----------
# ===========================
def project_rect_to_image(self, pts_3d_rect):
''' Input: nx3 points in rect camera coord.
Output: nx2 points in image2 coord.
'''
pts_3d_rect = self.cart2hom(pts_3d_rect)
pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3
pts_2d[:,0] /= pts_2d[:,2]
pts_2d[:,1] /= pts_2d[:,2]
return pts_2d[:,0:2]
def project_velo_to_image(self, pts_3d_velo):
''' Input: nx3 points in velodyne coord.
Output: nx2 points in image2 coord.
'''
pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)
return self.project_rect_to_image(pts_3d_rect)
# ===========================
# ------- 2d to 3d ----------
# ===========================
def project_image_to_rect(self, uv_depth):
''' Input: nx3 first two channels are uv, 3rd channel
is depth in rect camera coord.
Output: nx3 points in rect camera coord.
'''
n = uv_depth.shape[0]
x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x
y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y
pts_3d_rect = np.zeros((n,3))
pts_3d_rect[:,0] = x
pts_3d_rect[:,1] = y
pts_3d_rect[:,2] = uv_depth[:,2]
return pts_3d_rect
def project_image_to_velo(self, uv_depth):
pts_3d_rect = self.project_image_to_rect(uv_depth)
return self.project_rect_to_velo(pts_3d_rect)
def inverse_rigid_trans(Tr):
''' Inverse a rigid body transform matrix (3x4 as [R|t])
[R'|-R't; 0|1]
'''
inv_Tr = np.zeros_like(Tr) # 3x4
inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])
inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])
return inv_Tr
def read_label(label_filename):
lines = [line.rstrip() for line in open(label_filename)]
# if mask: objects = [Object3d_Mask(line) for line in lines]
objects = [Object3d(line) for line in lines]
return objects
def compute_box_3d(obj, P):
''' Takes an object and a projection matrix (P) and projects the 3d
bounding box into the image plane.
Returns:
corners_2d: (8,2) array in left image coord.
corners_3d: (8,3) array in in rect camera coord.
'''
# compute rotational matrix around yaw axis
R = roty(obj.ry)
# 3d bounding box dimensions
l = obj.l;
w = obj.w;
h = obj.h;
# 3d bounding box corners
x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];
y_corners = [0,0,0,0,-h,-h,-h,-h];
z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];
# rotate and translate 3d bounding box
corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
#print corners_3d.shape
corners_3d[0,:] = corners_3d[0,:] + obj.t[0];
corners_3d[1,:] = corners_3d[1,:] + obj.t[1];
corners_3d[2,:] = corners_3d[2,:] + obj.t[2];
# print('cornsers_3d: ', corners_3d)
# TODO: bugs when the point is behind the camera, the 2D coordinate is wrong
# only draw 3d bounding box for objs in front of the camera
if np.any(corners_3d[2,:]<0.1):
corners_2d = None
return corners_2d, np.transpose(corners_3d)
# project the 3d bounding box into the image plane
corners_2d = project_to_image(np.transpose(corners_3d), P);
#print 'corners_2d: ', corners_2d
return corners_2d, np.transpose(corners_3d)
def project_to_image(pts_3d, P):
''' Project 3d points to image plane.
Usage: pts_2d = projectToImage(pts_3d, P)
input: pts_3d: nx3 matrix
P: 3x4 projection matrix
output: pts_2d: nx2 matrix
P(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)
=> normalize projected_pts_2d(2xn)
<=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)
=> normalize projected_pts_2d(nx2)
'''
n = pts_3d.shape[0]
pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))
# print(('pts_3d_extend shape: ', pts_3d_extend.shape))
pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3
pts_2d[:,0] /= pts_2d[:,2]
pts_2d[:,1] /= pts_2d[:,2]
return pts_2d[:,0:2]
def draw_projected_box3d(image, qs, color=(255,255,255), thickness=4):
''' Draw 3d bounding box in image
qs: (8,2) array of vertices for the 3d box in following order:
1 -------- 0
/| /|
2 -------- 3 .
| | | |
. 5 -------- 4
|/ |/
6 -------- 7
'''
if qs is not None:
qs = qs.astype(np.int32)
for k in range(0,4):
i,j=k,(k+1)%4
image = cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness) # use LINE_AA for opencv3
i,j=k+4,(k+1)%4 + 4
image = cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness)
i,j=k,k+4
image = cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness)
return image
def roty(t):
''' Rotation about the y-axis. '''
c = np.cos(t)
s = np.sin(t)
return np.array([[c, 0, s],
[0, 1, 0],
[-s, 0, c]])
# Author: Xinshuo Weng
# email: xinshuo.weng@gmail.com
import numpy as np
# from sklearn.utils.linear_assignment_ import linear_assignment # deprecated
from scipy.optimize import linear_sum_assignment
from AB3DMOT_libs.bbox_utils import convert_3dbox_to_8corner, iou3d
from AB3DMOT_libs.kalman_filter import KalmanBoxTracker
def associate_detections_to_trackers(detections, trackers, iou_threshold=0.01):
"""
Assigns detections to tracked object (both represented as bounding boxes)
detections: N x 8 x 3
trackers: M x 8 x 3
Returns 3 lists of matches, unmatched_detections and unmatched_trackers
"""
if (len(trackers)==0):
return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 8, 3), dtype=int)
iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)
for d, det in enumerate(detections):
for t, trk in enumerate(trackers):
iou_matrix[d, t] = iou3d(det, trk)[0] # det: 8 x 3, trk: 8 x 3
# matched_indices = linear_assignment(-iou_matrix) # hougarian algorithm, compatible to linear_assignment in sklearn.utils
row_ind, col_ind = linear_sum_assignment(-iou_matrix) # hougarian algorithm
matched_indices = np.stack((row_ind, col_ind), axis=1)
unmatched_detections = []
for d, det in enumerate(detections):
if (d not in matched_indices[:, 0]): unmatched_detections.append(d)
unmatched_trackers = []
for t, trk in enumerate(trackers):
if (t not in matched_indices[:, 1]): unmatched_trackers.append(t)
#filter out matched with low IOU
matches = []
for m in matched_indices:
if (iou_matrix[m[0], m[1]] < iou_threshold):
unmatched_detections.append(m[0])
unmatched_trackers.append(m[1])
else: matches.append(m.reshape(1, 2))
if (len(matches) == 0):
matches = np.empty((0, 2),dtype=int)
else: matches = np.concatenate(matches, axis=0)
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
class AB3DMOT(object): # A baseline of 3D multi-object tracking
def __init__(self, max_age=2, min_hits=3): # max age will preserve the bbox does not appear no more than 2 frames, interpolate the detection
"""
Sets key parameters for SORT
"""
self.max_age = max_age
self.min_hits = min_hits
self.trackers = []
self.frame_count = 0
self.reorder = [3, 4, 5, 6, 2, 1, 0]
self.reorder_back = [6, 5, 4, 0, 1, 2, 3]
def update(self, dets_all):
"""
Params:
dets_all: dict
dets - a numpy array of detections in the format [[h,w,l,x,y,z,theta],...]
info: a array of other info for each det
Requires: this method must be called once for each frame even with empty detections.
Returns the a similar array, where the last column is the object ID.
NOTE: The number of objects returned may differ from the number of detections provided.
"""
dets, info = dets_all['dets'], dets_all['info'] # dets: N x 7, float numpy array
# reorder the data to put x,y,z in front to be compatible with the state transition matrix
# where the constant velocity model is defined in the first three rows of the matrix
dets = dets[:, self.reorder] # reorder the data to [[x,y,z,theta,l,w,h], ...]
self.frame_count += 1
trks = np.zeros((len(self.trackers), 7)) # N x 7 , # get predicted locations from existing trackers.
to_del = []
ret = []
for t, trk in enumerate(trks):
pos = self.trackers[t].predict().reshape((-1, 1))
trk[:] = [pos[0], pos[1], pos[2], pos[3], pos[4], pos[5], pos[6]]
if (np.any(np.isnan(pos))):
to_del.append(t)
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
for t in reversed(to_del):
self.trackers.pop(t)
dets_8corner = [convert_3dbox_to_8corner(det_tmp) for det_tmp in dets]
if len(dets_8corner) > 0: dets_8corner = np.stack(dets_8corner, axis=0)
else: dets_8corner = []
trks_8corner = [convert_3dbox_to_8corner(trk_tmp) for trk_tmp in trks]
if len(trks_8corner) > 0: trks_8corner = np.stack(trks_8corner, axis=0)
matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets_8corner, trks_8corner)
# update matched trackers with assigned detections
for t, trk in enumerate(self.trackers):
if t not in unmatched_trks:
d = matched[np.where(matched[:, 1] == t)[0], 0] # a list of index
trk.update(dets[d, :][0], info[d, :][0])
# create and initialise new trackers for unmatched detections
for i in unmatched_dets: # a scalar of index
trk = KalmanBoxTracker(dets[i, :], info[i, :])
self.trackers.append(trk)
i = len(self.trackers)
for trk in reversed(self.trackers):
d = trk.get_state() # bbox location
d = d[self.reorder_back] # change format from [x,y,z,theta,l,w,h] to [h,w,l,x,y,z,theta]
if ((trk.time_since_update < self.max_age) and (trk.hits >= self.min_hits or self.frame_count <= self.min_hits)):
ret.append(np.concatenate((d, [trk.id + 1], trk.info)).reshape(1, -1)) # +1 as MOT benchmark requires positive
i -= 1
# remove dead tracklet
if (trk.time_since_update >= self.max_age):
self.trackers.pop(i)
if (len(ret) > 0): return np.concatenate(ret) # h,w,l,x,y,z,theta, ID, other info, confidence
return np.empty((0, 15))
\ No newline at end of file
# Author: Xinshuo Weng
# email: xinshuo.weng@gmail.com
import yaml
from easydict import EasyDict as edict
def Config(filename):
listfile1 = open(filename, 'r')
listfile2 = open(filename, 'r')
cfg = edict(yaml.safe_load(listfile1))
settings_show = listfile2.read().splitlines()
listfile1.close()
listfile2.close()
return cfg, settings_show
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment