提交AB3D的iou

90d534a6 · oscar · 6f002650 · 90d534a6 · 90d534a6 · 90d534a6
Commit 90d534a6 authored Jan 19, 2022 by oscar
6 changed files
--- a/script/AB3DMOT_libs/__init__.py
+++ b/script/AB3DMOT_libs/__init__.py
--- a/script/AB3DMOT_libs/bbox_utils.py
+++ b/script/AB3DMOT_libs/bbox_utils.py
+# Author: Xinshuo Weng
+# email: xinshuo.weng@gmail.com
+
+import numpy as np, copy
+from numba import jit
+from scipy.spatial import ConvexHull
+
+@jit          
+def poly_area(x,y):
+	""" Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """
+	return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
+
+@jit         
+def box3d_vol(corners):
+	''' corners: (8,3) no assumption on axis direction '''
+	a = np.sqrt(np.sum((corners[0,:] - corners[1,:])**2))
+	b = np.sqrt(np.sum((corners[1,:] - corners[2,:])**2))
+	c = np.sqrt(np.sum((corners[0,:] - corners[4,:])**2))
+	return a*b*c
+
+@jit          
+def convex_hull_intersection(p1, p2):
+	""" Compute area of two convex hull's intersection area.
+		p1,p2 are a list of (x,y) tuples of hull vertices.
+		return a list of (x,y) for the intersection and its volume
+	"""
+	inter_p = polygon_clip(p1,p2)
+	if inter_p is not None:
+		hull_inter = ConvexHull(inter_p)
+		return inter_p, hull_inter.volume
+	else:
+		return None, 0.0  
+
+def polygon_clip(subjectPolygon, clipPolygon):
+	""" Clip a polygon with another polygon.
+	Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python
+
+	Args:
+		subjectPolygon: a list of (x,y) 2d points, any polygon.
+		clipPolygon: a list of (x,y) 2d points, has to be *convex*
+	Note:
+		**points have to be counter-clockwise ordered**
+
+	Return:
+		a list of (x,y) vertex point for the intersection polygon.
+	"""
+	def inside(p):
+		return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) > (cp2[1] - cp1[1]) * (p[0] - cp1[0])
+ 
+	def computeIntersection():
+		dc = [cp1[0] - cp2[0], cp1[1] - cp2[1]]
+		dp = [s[0] - e[0], s[1] - e[1]]
+		n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
+		n2 = s[0] * e[1] - s[1] * e[0] 
+		n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
+		return [(n1 * dp[0] - n2 * dc[0]) * n3, (n1 * dp[1] - n2 * dc[1]) * n3]
+ 
+	outputList = subjectPolygon
+	cp1 = clipPolygon[-1]
+ 
+	for clipVertex in clipPolygon:
+		cp2 = clipVertex
+		inputList = outputList
+		outputList = []
+		s = inputList[-1]
+ 
+		for subjectVertex in inputList:
+			e = subjectVertex
+			if inside(e):
+				if not inside(s): outputList.append(computeIntersection())
+				outputList.append(e)
+			elif inside(s): outputList.append(computeIntersection())
+			s = e
+		cp1 = cp2
+		if len(outputList) == 0: return None
+	return (outputList)
+
+def iou3d(corners1, corners2):
+	''' Compute 3D bounding box IoU, only working for object parallel to ground
+
+	Input:
+	    corners1: numpy array (8,3), assume up direction is negative Y
+	    corners2: numpy array (8,3), assume up direction is negative Y
+	Output:
+	    iou: 3D bounding box IoU
+	    iou_2d: bird's eye view 2D bounding box IoU
+
+	todo (rqi): add more description on corner points' orders.
+	'''
+	# corner points are in counter clockwise order
+	rect1 = [(corners1[i,0], corners1[i,2]) for i in range(3,-1,-1)]
+	rect2 = [(corners2[i,0], corners2[i,2]) for i in range(3,-1,-1)] 
+	area1 = poly_area(np.array(rect1)[:,0], np.array(rect1)[:,1])
+	area2 = poly_area(np.array(rect2)[:,0], np.array(rect2)[:,1])
+
+	# inter_area = shapely_polygon_intersection(rect1, rect2)
+	_, inter_area = convex_hull_intersection(rect1, rect2)
+
+	# try:
+	#   _, inter_area = convex_hull_intersection(rect1, rect2)
+	# except ValueError:
+	#   inter_area = 0
+	# except scipy.spatial.qhull.QhullError:
+	#   inter_area = 0
+
+	iou_2d = inter_area/(area1+area2-inter_area)
+	ymax = min(corners1[0,1], corners2[0,1])
+	ymin = max(corners1[4,1], corners2[4,1])
+	inter_vol = inter_area * max(0.0, ymax-ymin)
+	vol1 = box3d_vol(corners1)
+	vol2 = box3d_vol(corners2)
+	iou = inter_vol / (vol1 + vol2 - inter_vol)
+	return iou, iou_2d
+
+@jit          
+def roty(t):
+	''' Rotation about the y-axis. '''
+	c = np.cos(t)
+	s = np.sin(t)
+	return np.array([[c,  0,  s],
+                     [0,  1,  0],
+                     [-s, 0,  c]])
+     
+def convert_3dbox_to_8corner(bbox3d_input):
+	''' Takes an object's 3D box with the representation of [x,y,z,theta,l,w,h] and 
+	    convert it to the 8 corners of the 3D box
+	    
+	    Returns:
+	        corners_3d: (8,3) array in in rect camera coord
+	'''
+	# compute rotational matrix around yaw axis
+	bbox3d = copy.copy(bbox3d_input)
+
+	R = roty(bbox3d[3])    
+
+	# 3d bounding box dimensions
+	l = bbox3d[4]
+	w = bbox3d[5]
+	h = bbox3d[6]
+
+	# 3d bounding box corners
+	x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];
+	y_corners = [0,0,0,0,-h,-h,-h,-h];
+	z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];
+
+	# rotate and translate 3d bounding box
+	corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
+	#print corners_3d.shape
+	corners_3d[0,:] = corners_3d[0,:] + bbox3d[0]
+	corners_3d[1,:] = corners_3d[1,:] + bbox3d[1]
+	corners_3d[2,:] = corners_3d[2,:] + bbox3d[2]
+
+	return np.transpose(corners_3d)
\ No newline at end of file
--- a/script/AB3DMOT_libs/kalman_filter.py
+++ b/script/AB3DMOT_libs/kalman_filter.py
+# Author: Xinshuo Weng
+# email: xinshuo.weng@gmail.com
+
+import numpy as np
+from filterpy.kalman import KalmanFilter
+
+class KalmanBoxTracker(object):
+	"""
+	This class represents the internel state of individual tracked objects observed as bbox.
+	"""
+	count = 0
+	def __init__(self, bbox3D, info):
+		"""
+		Initialises a tracker using initial bounding box.
+		"""
+		# define constant velocity model
+		self.kf = KalmanFilter(dim_x=10, dim_z=7)       
+		self.kf.F = np.array([[1,0,0,0,0,0,0,1,0,0],      # state transition matrix
+		                      [0,1,0,0,0,0,0,0,1,0],
+		                      [0,0,1,0,0,0,0,0,0,1],
+		                      [0,0,0,1,0,0,0,0,0,0],  
+		                      [0,0,0,0,1,0,0,0,0,0],
+		                      [0,0,0,0,0,1,0,0,0,0],
+		                      [0,0,0,0,0,0,1,0,0,0],
+		                      [0,0,0,0,0,0,0,1,0,0],
+		                      [0,0,0,0,0,0,0,0,1,0],
+		                      [0,0,0,0,0,0,0,0,0,1]])     
+
+		self.kf.H = np.array([[1,0,0,0,0,0,0,0,0,0],      # measurement function,
+		                      [0,1,0,0,0,0,0,0,0,0],
+		                      [0,0,1,0,0,0,0,0,0,0],
+		                      [0,0,0,1,0,0,0,0,0,0],
+		                      [0,0,0,0,1,0,0,0,0,0],
+		                      [0,0,0,0,0,1,0,0,0,0],
+		                      [0,0,0,0,0,0,1,0,0,0]])
+
+		# # with angular velocity
+		# self.kf = KalmanFilter(dim_x=11, dim_z=7)       
+		# self.kf.F = np.array([[1,0,0,0,0,0,0,1,0,0,0],      # state transition matrix
+		#                       [0,1,0,0,0,0,0,0,1,0,0],
+		#                       [0,0,1,0,0,0,0,0,0,1,0],
+		#                       [0,0,0,1,0,0,0,0,0,0,1],  
+		#                       [0,0,0,0,1,0,0,0,0,0,0],
+		#                       [0,0,0,0,0,1,0,0,0,0,0],
+		#                       [0,0,0,0,0,0,1,0,0,0,0],
+		#                       [0,0,0,0,0,0,0,1,0,0,0],
+		#                       [0,0,0,0,0,0,0,0,1,0,0],
+		#                       [0,0,0,0,0,0,0,0,0,1,0],
+		#                       [0,0,0,0,0,0,0,0,0,0,1]])     
+
+		# self.kf.H = np.array([[1,0,0,0,0,0,0,0,0,0,0],      # measurement function,
+		#                       [0,1,0,0,0,0,0,0,0,0,0],
+		#                       [0,0,1,0,0,0,0,0,0,0,0],
+		#                       [0,0,0,1,0,0,0,0,0,0,0],
+		#                       [0,0,0,0,1,0,0,0,0,0,0],
+		#                       [0,0,0,0,0,1,0,0,0,0,0],
+		#                       [0,0,0,0,0,0,1,0,0,0,0]])
+		
+		# self.kf.R[0:,0:] *= 10.   # measurement uncertainty
+		self.kf.P[7:, 7:] *= 1000. 	# state uncertainty, give high uncertainty to the unobservable initial velocities, covariance matrix
+		self.kf.P *= 10.
+
+		# self.kf.Q[-1,-1] *= 0.01    # process uncertainty
+		self.kf.Q[7:, 7:] *= 0.01
+		self.kf.x[:7] = bbox3D.reshape((7, 1))
+
+		self.time_since_update = 0
+		self.id = KalmanBoxTracker.count
+		KalmanBoxTracker.count += 1
+		self.history = []
+		self.hits = 1           # number of total hits including the first detection
+		self.hit_streak = 1     # number of continuing hit considering the first detection
+		self.first_continuing_hit = 1
+		self.still_first = True
+		self.age = 0
+		self.info = info        # other info associated
+
+	def update(self, bbox3D, info): 
+		""" 
+		Updates the state vector with observed bbox.
+		"""
+		self.time_since_update = 0
+		self.history = []
+		self.hits += 1
+		self.hit_streak += 1          # number of continuing hit
+		if self.still_first:
+			self.first_continuing_hit += 1      # number of continuing hit in the fist time
+
+		######################### orientation correction
+		if self.kf.x[3] >= np.pi: self.kf.x[3] -= np.pi * 2    # make the theta still in the range
+		if self.kf.x[3] < -np.pi: self.kf.x[3] += np.pi * 2
+
+		new_theta = bbox3D[3]
+		if new_theta >= np.pi: new_theta -= np.pi * 2    # make the theta still in the range
+		if new_theta < -np.pi: new_theta += np.pi * 2
+		bbox3D[3] = new_theta
+
+		predicted_theta = self.kf.x[3]
+		if abs(new_theta - predicted_theta) > np.pi / 2.0 and abs(new_theta - predicted_theta) < np.pi * 3 / 2.0:     # if the angle of two theta is not acute angle
+			self.kf.x[3] += np.pi       
+			if self.kf.x[3] > np.pi: self.kf.x[3] -= np.pi * 2    # make the theta still in the range
+			if self.kf.x[3] < -np.pi: self.kf.x[3] += np.pi * 2
+
+		# now the angle is acute: < 90 or > 270, convert the case of > 270 to < 90
+		if abs(new_theta - self.kf.x[3]) >= np.pi * 3 / 2.0:
+			if new_theta > 0: self.kf.x[3] += np.pi * 2
+			else: self.kf.x[3] -= np.pi * 2
+
+		#########################     # flip
+
+		self.kf.update(bbox3D)
+
+		if self.kf.x[3] >= np.pi: self.kf.x[3] -= np.pi * 2    # make the theta still in the rage
+		if self.kf.x[3] < -np.pi: self.kf.x[3] += np.pi * 2
+		self.info = info
+
+	def predict(self):       
+		"""
+		Advances the state vector and returns the predicted bounding box estimate.
+		"""
+		self.kf.predict()      
+		if self.kf.x[3] >= np.pi: self.kf.x[3] -= np.pi * 2
+		if self.kf.x[3] < -np.pi: self.kf.x[3] += np.pi * 2
+
+		self.age += 1
+		if (self.time_since_update > 0):
+			self.hit_streak = 0
+			self.still_first = False
+		self.time_since_update += 1
+		self.history.append(self.kf.x)
+		return self.history[-1]
+
+	def get_state(self):
+		"""
+		Returns the current bounding box estimate.
+		"""
+		return self.kf.x[:7].reshape((7, ))
\ No newline at end of file
--- a/script/AB3DMOT_libs/kitti_utils.py
+++ b/script/AB3DMOT_libs/kitti_utils.py
+import numpy as np, cv2, os
+
+class Object3d(object):
+    ''' 3d object label '''
+    def __init__(self, label_file_line):
+        data = label_file_line.split(' ')
+        data[1:] = [float(x) for x in data[1:]]
+
+        # extract label, truncation, occlusion
+        self.type = data[0] # 'Car', 'Pedestrian', ...
+        self.truncation = data[1] # truncated pixel ratio [0..1]
+        self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown
+        self.alpha = data[3] # object observation angle [-pi..pi]
+
+        # extract 2d bounding box in 0-based coordinates
+        self.xmin = data[4] # left
+        self.ymin = data[5] # top
+        self.xmax = data[6] # right
+        self.ymax = data[7] # bottom
+        self.box2d = np.array([self.xmin,self.ymin,self.xmax,self.ymax])
+        
+        # extract 3d bounding box information
+        self.h = data[8] # box height
+        self.w = data[9] # box width
+        self.l = data[10] # box length (in meters)
+        self.t = (data[11],data[12],data[13]) # location (x,y,z) in camera coord.
+        self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi]
+        if len(data) > 15: self.score = float(data[15])
+        if len(data) > 16: self.id = int(data[16])
+
+    def print_object(self):
+        print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % (self.type, self.truncation, self.occlusion, self.alpha))
+        print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % (self.xmin, self.ymin, self.xmax, self.ymax))
+        print('3d bbox h,w,l: %f, %f, %f' % (self.h, self.w, self.l))
+        print('3d bbox location, ry: (%f, %f, %f), %f' % (self.t[0], self.t[1], self.t[2], self.ry))
+
+    def convert_to_str(self):
+        return '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' % \
+            (self.type, self.truncation, self.occlusion, self.alpha, self.xmin, self.ymin, self.xmax, self.ymax,
+                self.h, self.w, self.l, self.t[0], self.t[1], self.t[2], self.ry)
+
+class Calibration(object):
+    ''' Calibration matrices and utils
+        3d XYZ in <label>.txt are in rect camera coord.
+        2d box xy are in image2 coord
+        Points in <lidar>.bin are in Velodyne coord.
+
+        y_image2 = P^2_rect * x_rect
+        y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
+        x_ref = Tr_velo_to_cam * x_velo
+        x_rect = R0_rect * x_ref
+
+        P^2_rect = [f^2_u,  0,      c^2_u,  -f^2_u b^2_x;
+                    0,      f^2_v,  c^2_v,  -f^2_v b^2_y;
+                    0,      0,      1,      0]
+                 = K * [1|t]
+
+        image2 coord:
+         ----> x-axis (u)
+        |
+        |
+        v y-axis (v)
+
+        velodyne coord:
+        front x, left y, up z
+
+        rect/ref camera coord:
+        right x, down y, front z
+
+        Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf
+
+        TODO(rqi): do matrix multiplication only once for each projection.
+    '''
+    def __init__(self, calib_filepath, from_video=False):
+        if from_video:
+            calibs = self.read_calib_from_video(calib_filepath)
+        else:
+            calibs = self.read_calib_file(calib_filepath)
+        # Projection matrix from rect camera coord to image2 coord
+        self.P = calibs['P2'] 
+        self.P = np.reshape(self.P, [3,4])
+        # Rigid transform from Velodyne coord to reference camera coord
+        self.V2C = calibs['Tr_velo_to_cam']
+        self.V2C = np.reshape(self.V2C, [3,4])
+        self.C2V = inverse_rigid_trans(self.V2C)
+        # Rotation from reference camera coord to rect camera coord
+        self.R0 = calibs['R0_rect']
+        self.R0 = np.reshape(self.R0,[3,3])
+
+        # Camera intrinsics and extrinsics
+        self.c_u = self.P[0,2]              
+        self.c_v = self.P[1,2]
+        self.f_u = self.P[0,0]
+        self.f_v = self.P[1,1]
+        self.b_x = self.P[0,3]/(-self.f_u) # relative 
+        self.b_y = self.P[1,3]/(-self.f_v)
+
+    def read_calib_file(self, filepath):
+        ''' Read in a calibration file and parse into a dictionary.
+        Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
+        '''
+        data = {}
+        with open(filepath, 'r') as f:
+            for line in f.readlines():
+                line = line.rstrip()
+                if len(line)==0: continue
+                key, value = line.split(':', 1)
+                # The only non-float values in these files are dates, which
+                # we don't care about anyway
+                try:
+                    data[key] = np.array([float(x) for x in value.split()])
+                except ValueError:
+                    pass
+
+        return data
+    
+    def read_calib_from_video(self, calib_root_dir):
+        ''' Read calibration for camera 2 from video calib files.
+            there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir
+        '''
+        data = {}
+        cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))
+        velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))
+        Tr_velo_to_cam = np.zeros((3,4))
+        Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])
+        Tr_velo_to_cam[:,3] = velo2cam['T']
+        data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])
+        data['R0_rect'] = cam2cam['R_rect_00']
+        data['P2'] = cam2cam['P_rect_02']
+        return data
+
+    def cart2hom(self, pts_3d):
+        ''' Input: nx3 points in Cartesian
+            Oupput: nx4 points in Homogeneous by pending 1
+        '''
+        n = pts_3d.shape[0]
+        pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))
+        return pts_3d_hom
+ 
+    # =========================== 
+    # ------- 3d to 3d ---------- 
+    # =========================== 
+    def project_velo_to_ref(self, pts_3d_velo):
+        pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4
+        return np.dot(pts_3d_velo, np.transpose(self.V2C))
+
+    def project_ref_to_velo(self, pts_3d_ref):
+        pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4
+        return np.dot(pts_3d_ref, np.transpose(self.C2V))
+
+    def project_rect_to_ref(self, pts_3d_rect):
+        ''' Input and Output are nx3 points '''
+        return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))
+    
+    def project_ref_to_rect(self, pts_3d_ref):
+        ''' Input and Output are nx3 points '''
+        return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))
+ 
+    def project_rect_to_velo(self, pts_3d_rect):
+        ''' Input: nx3 points in rect camera coord.
+            Output: nx3 points in velodyne coord.
+        ''' 
+        pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)
+        return self.project_ref_to_velo(pts_3d_ref)
+
+    def project_velo_to_rect(self, pts_3d_velo):
+        pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)
+        return self.project_ref_to_rect(pts_3d_ref)
+
+    # =========================== 
+    # ------- 3d to 2d ---------- 
+    # =========================== 
+    def project_rect_to_image(self, pts_3d_rect):
+        ''' Input: nx3 points in rect camera coord.
+            Output: nx2 points in image2 coord.
+        '''
+        pts_3d_rect = self.cart2hom(pts_3d_rect)
+        pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3
+        pts_2d[:,0] /= pts_2d[:,2]
+        pts_2d[:,1] /= pts_2d[:,2]
+        return pts_2d[:,0:2]
+    
+    def project_velo_to_image(self, pts_3d_velo):
+        ''' Input: nx3 points in velodyne coord.
+            Output: nx2 points in image2 coord.
+        '''
+        pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)
+        return self.project_rect_to_image(pts_3d_rect)
+
+    # =========================== 
+    # ------- 2d to 3d ---------- 
+    # =========================== 
+    def project_image_to_rect(self, uv_depth):
+        ''' Input: nx3 first two channels are uv, 3rd channel
+                   is depth in rect camera coord.
+            Output: nx3 points in rect camera coord.
+        '''
+        n = uv_depth.shape[0]
+        x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x
+        y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y
+        pts_3d_rect = np.zeros((n,3))
+        pts_3d_rect[:,0] = x
+        pts_3d_rect[:,1] = y
+        pts_3d_rect[:,2] = uv_depth[:,2]
+        return pts_3d_rect
+
+    def project_image_to_velo(self, uv_depth):
+        pts_3d_rect = self.project_image_to_rect(uv_depth)
+        return self.project_rect_to_velo(pts_3d_rect)
+
+def inverse_rigid_trans(Tr):
+    ''' Inverse a rigid body transform matrix (3x4 as [R|t])
+        [R'|-R't; 0|1]
+    '''
+    inv_Tr = np.zeros_like(Tr) # 3x4
+    inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])
+    inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])
+    return inv_Tr
+
+def read_label(label_filename):
+    lines = [line.rstrip() for line in open(label_filename)]
+    # if mask: objects = [Object3d_Mask(line) for line in lines]
+    objects = [Object3d(line) for line in lines]
+    return objects
+
+def compute_box_3d(obj, P):
+    ''' Takes an object and a projection matrix (P) and projects the 3d
+        bounding box into the image plane.
+        Returns:
+            corners_2d: (8,2) array in left image coord.
+            corners_3d: (8,3) array in in rect camera coord.
+    '''
+    # compute rotational matrix around yaw axis
+    R = roty(obj.ry)    
+
+    # 3d bounding box dimensions
+    l = obj.l;
+    w = obj.w;
+    h = obj.h;
+    
+    # 3d bounding box corners
+    x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];
+    y_corners = [0,0,0,0,-h,-h,-h,-h];
+    z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];
+    
+    # rotate and translate 3d bounding box
+    corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
+    #print corners_3d.shape
+    corners_3d[0,:] = corners_3d[0,:] + obj.t[0];
+    corners_3d[1,:] = corners_3d[1,:] + obj.t[1];
+    corners_3d[2,:] = corners_3d[2,:] + obj.t[2];
+    # print('cornsers_3d: ', corners_3d)
+
+    # TODO: bugs when the point is behind the camera, the 2D coordinate is wrong
+
+    # only draw 3d bounding box for objs in front of the camera
+    if np.any(corners_3d[2,:]<0.1):
+        corners_2d = None
+        return corners_2d, np.transpose(corners_3d)
+    
+    # project the 3d bounding box into the image plane
+    corners_2d = project_to_image(np.transpose(corners_3d), P);
+    #print 'corners_2d: ', corners_2d
+    return corners_2d, np.transpose(corners_3d)
+
+def project_to_image(pts_3d, P):
+    ''' Project 3d points to image plane.
+
+    Usage: pts_2d = projectToImage(pts_3d, P)
+      input: pts_3d: nx3 matrix
+             P:      3x4 projection matrix
+      output: pts_2d: nx2 matrix
+
+      P(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)
+      => normalize projected_pts_2d(2xn)
+
+      <=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)
+          => normalize projected_pts_2d(nx2)
+    '''
+    n = pts_3d.shape[0]
+    pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))
+    # print(('pts_3d_extend shape: ', pts_3d_extend.shape))
+    pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3
+    pts_2d[:,0] /= pts_2d[:,2]
+    pts_2d[:,1] /= pts_2d[:,2]
+    return pts_2d[:,0:2]
+
+def draw_projected_box3d(image, qs, color=(255,255,255), thickness=4):
+    ''' Draw 3d bounding box in image
+        qs: (8,2) array of vertices for the 3d box in following order:
+            1 -------- 0
+           /|         /|
+          2 -------- 3 .
+          | |        | |
+          . 5 -------- 4
+          |/         |/
+          6 -------- 7
+    '''
+    if qs is not None:
+        qs = qs.astype(np.int32)
+        for k in range(0,4):
+           i,j=k,(k+1)%4
+           image = cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness) # use LINE_AA for opencv3
+
+           i,j=k+4,(k+1)%4 + 4
+           image = cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness)
+
+           i,j=k,k+4
+           image = cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness)
+    return image
+
+
+def roty(t):
+    ''' Rotation about the y-axis. '''
+    c = np.cos(t)
+    s = np.sin(t)
+    return np.array([[c,  0,  s],
+                     [0,  1,  0],
+                     [-s, 0,  c]])
--- a/script/AB3DMOT_libs/model.py
+++ b/script/AB3DMOT_libs/model.py
+# Author: Xinshuo Weng
+# email: xinshuo.weng@gmail.com
+
+import numpy as np
+# from sklearn.utils.linear_assignment_ import linear_assignment    # deprecated
+from scipy.optimize import linear_sum_assignment
+from AB3DMOT_libs.bbox_utils import convert_3dbox_to_8corner, iou3d
+from AB3DMOT_libs.kalman_filter import KalmanBoxTracker
+
+def associate_detections_to_trackers(detections, trackers, iou_threshold=0.01):   
+	"""
+	Assigns detections to tracked object (both represented as bounding boxes)
+
+	detections:  N x 8 x 3
+	trackers:    M x 8 x 3
+
+
+	Returns 3 lists of matches, unmatched_detections and unmatched_trackers
+	"""
+	if (len(trackers)==0): 
+		return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 8, 3), dtype=int)    
+	iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)
+
+	for d, det in enumerate(detections):
+		for t, trk in enumerate(trackers):
+			iou_matrix[d, t] = iou3d(det, trk)[0]             # det: 8 x 3, trk: 8 x 3
+	# matched_indices = linear_assignment(-iou_matrix)      # hougarian algorithm, compatible to linear_assignment in sklearn.utils
+
+	row_ind, col_ind = linear_sum_assignment(-iou_matrix)      # hougarian algorithm
+	matched_indices = np.stack((row_ind, col_ind), axis=1)
+
+	unmatched_detections = []
+	for d, det in enumerate(detections):
+		if (d not in matched_indices[:, 0]): unmatched_detections.append(d)
+	unmatched_trackers = []
+	for t, trk in enumerate(trackers):
+		if (t not in matched_indices[:, 1]): unmatched_trackers.append(t)
+
+	#filter out matched with low IOU
+	matches = []
+	for m in matched_indices:
+		if (iou_matrix[m[0], m[1]] < iou_threshold):
+			unmatched_detections.append(m[0])
+			unmatched_trackers.append(m[1])
+		else: matches.append(m.reshape(1, 2))
+	if (len(matches) == 0): 
+		matches = np.empty((0, 2),dtype=int)
+	else: matches = np.concatenate(matches, axis=0)
+
+	return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
+
+class AB3DMOT(object):			  # A baseline of 3D multi-object tracking
+	def __init__(self, max_age=2, min_hits=3):      # max age will preserve the bbox does not appear no more than 2 frames, interpolate the detection
+		"""
+		Sets key parameters for SORT                
+		"""
+		self.max_age = max_age
+		self.min_hits = min_hits
+		self.trackers = []
+		self.frame_count = 0
+		self.reorder = [3, 4, 5, 6, 2, 1, 0]
+		self.reorder_back = [6, 5, 4, 0, 1, 2, 3]
+
+	def update(self, dets_all):
+		"""
+		Params:
+		  dets_all: dict
+			dets - a numpy array of detections in the format [[h,w,l,x,y,z,theta],...]
+			info: a array of other info for each det
+		Requires: this method must be called once for each frame even with empty detections.
+		Returns the a similar array, where the last column is the object ID.
+
+		NOTE: The number of objects returned may differ from the number of detections provided.
+		"""
+		dets, info = dets_all['dets'], dets_all['info']         # dets: N x 7, float numpy array
+
+		# reorder the data to put x,y,z in front to be compatible with the state transition matrix
+		# where the constant velocity model is defined in the first three rows of the matrix
+		dets = dets[:, self.reorder]					# reorder the data to [[x,y,z,theta,l,w,h], ...]
+		self.frame_count += 1
+
+		trks = np.zeros((len(self.trackers), 7))         # N x 7 , # get predicted locations from existing trackers.
+		to_del = []
+		ret = []
+		for t, trk in enumerate(trks):
+			pos = self.trackers[t].predict().reshape((-1, 1))
+			trk[:] = [pos[0], pos[1], pos[2], pos[3], pos[4], pos[5], pos[6]]       
+			if (np.any(np.isnan(pos))): 
+				to_del.append(t)
+		trks = np.ma.compress_rows(np.ma.masked_invalid(trks))   
+		for t in reversed(to_del): 
+			self.trackers.pop(t)
+
+		dets_8corner = [convert_3dbox_to_8corner(det_tmp) for det_tmp in dets]
+		if len(dets_8corner) > 0: dets_8corner = np.stack(dets_8corner, axis=0)
+		else: dets_8corner = []
+		trks_8corner = [convert_3dbox_to_8corner(trk_tmp) for trk_tmp in trks]
+		if len(trks_8corner) > 0: trks_8corner = np.stack(trks_8corner, axis=0)
+		matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets_8corner, trks_8corner)
+
+		# update matched trackers with assigned detections
+		for t, trk in enumerate(self.trackers):
+			if t not in unmatched_trks:
+				d = matched[np.where(matched[:, 1] == t)[0], 0]     # a list of index
+				trk.update(dets[d, :][0], info[d, :][0])
+
+		# create and initialise new trackers for unmatched detections
+		for i in unmatched_dets:        # a scalar of index
+			trk = KalmanBoxTracker(dets[i, :], info[i, :]) 
+			self.trackers.append(trk)
+		i = len(self.trackers)
+		for trk in reversed(self.trackers):
+			d = trk.get_state()      # bbox location
+			d = d[self.reorder_back]			# change format from [x,y,z,theta,l,w,h] to [h,w,l,x,y,z,theta]
+
+			if ((trk.time_since_update < self.max_age) and (trk.hits >= self.min_hits or self.frame_count <= self.min_hits)):      
+				ret.append(np.concatenate((d, [trk.id + 1], trk.info)).reshape(1, -1)) # +1 as MOT benchmark requires positive
+			i -= 1
+
+			# remove dead tracklet
+			if (trk.time_since_update >= self.max_age): 
+				self.trackers.pop(i)
+		if (len(ret) > 0): return np.concatenate(ret)			# h,w,l,x,y,z,theta, ID, other info, confidence
+		return np.empty((0, 15))    
\ No newline at end of file
--- a/script/AB3DMOT_libs/utils.py
+++ b/script/AB3DMOT_libs/utils.py
+# Author: Xinshuo Weng
+# email: xinshuo.weng@gmail.com
+
+import yaml
+from easydict import EasyDict as edict
+
+def Config(filename):
+    listfile1 = open(filename, 'r')
+    listfile2 = open(filename, 'r')
+    cfg = edict(yaml.safe_load(listfile1))
+    settings_show = listfile2.read().splitlines()
+
+    listfile1.close()
+    listfile2.close()
+
+    return cfg, settings_show
\ No newline at end of file