Commit 20131189 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

new/improved Python samples by Alexander Mordvintsev

parent 2c2d6fa5
import numpy as np import numpy as np
import cv2 import cv2
import os import os
from contextlib import contextmanager from contextlib import contextmanager
import itertools as it import itertools as it
image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm'] image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm']
def splitfn(fn): class Bunch(object):
path, fn = os.path.split(fn) def __init__(self, **kw):
name, ext = os.path.splitext(fn) self.__dict__.update(kw)
return path, name, ext def __str__(self):
return str(self.__dict__)
def anorm2(a):
return (a*a).sum(-1) def splitfn(fn):
def anorm(a): path, fn = os.path.split(fn)
return np.sqrt( anorm2(a) ) name, ext = os.path.splitext(fn)
return path, name, ext
def homotrans(H, x, y):
xs = H[0, 0]*x + H[0, 1]*y + H[0, 2] def anorm2(a):
ys = H[1, 0]*x + H[1, 1]*y + H[1, 2] return (a*a).sum(-1)
s = H[2, 0]*x + H[2, 1]*y + H[2, 2] def anorm(a):
return xs/s, ys/s return np.sqrt( anorm2(a) )
def to_rect(a): def homotrans(H, x, y):
a = np.ravel(a) xs = H[0, 0]*x + H[0, 1]*y + H[0, 2]
if len(a) == 2: ys = H[1, 0]*x + H[1, 1]*y + H[1, 2]
a = (0, 0, a[0], a[1]) s = H[2, 0]*x + H[2, 1]*y + H[2, 2]
return np.array(a, np.float64).reshape(2, 2) return xs/s, ys/s
def rect2rect_mtx(src, dst): def to_rect(a):
src, dst = to_rect(src), to_rect(dst) a = np.ravel(a)
cx, cy = (dst[1] - dst[0]) / (src[1] - src[0]) if len(a) == 2:
tx, ty = dst[0] - src[0] * (cx, cy) a = (0, 0, a[0], a[1])
M = np.float64([[ cx, 0, tx], return np.array(a, np.float64).reshape(2, 2)
[ 0, cy, ty],
[ 0, 0, 1]]) def rect2rect_mtx(src, dst):
return M src, dst = to_rect(src), to_rect(dst)
cx, cy = (dst[1] - dst[0]) / (src[1] - src[0])
tx, ty = dst[0] - src[0] * (cx, cy)
def lookat(eye, target, up = (0, 0, 1)): M = np.float64([[ cx, 0, tx],
fwd = np.asarray(target, np.float64) - eye [ 0, cy, ty],
fwd /= anorm(fwd) [ 0, 0, 1]])
right = np.cross(fwd, up) return M
right /= anorm(right)
down = np.cross(fwd, right)
R = np.float64([right, down, fwd]) def lookat(eye, target, up = (0, 0, 1)):
tvec = -np.dot(R, eye) fwd = np.asarray(target, np.float64) - eye
return R, tvec fwd /= anorm(fwd)
right = np.cross(fwd, up)
def mtx2rvec(R): right /= anorm(right)
w, u, vt = cv2.SVDecomp(R - np.eye(3)) down = np.cross(fwd, right)
p = vt[0] + u[:,0]*w[0] # same as np.dot(R, vt[0]) R = np.float64([right, down, fwd])
c = np.dot(vt[0], p) tvec = -np.dot(R, eye)
s = np.dot(vt[1], p) return R, tvec
axis = np.cross(vt[0], vt[1])
return axis * np.arctan2(s, c) def mtx2rvec(R):
w, u, vt = cv2.SVDecomp(R - np.eye(3))
def draw_str(dst, (x, y), s): p = vt[0] + u[:,0]*w[0] # same as np.dot(R, vt[0])
cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.CV_AA) c = np.dot(vt[0], p)
cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.CV_AA) s = np.dot(vt[1], p)
axis = np.cross(vt[0], vt[1])
class Sketcher: return axis * np.arctan2(s, c)
def __init__(self, windowname, dests, colors_func):
self.prev_pt = None def draw_str(dst, (x, y), s):
self.windowname = windowname cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.CV_AA)
self.dests = dests cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.CV_AA)
self.colors_func = colors_func
self.dirty = False class Sketcher:
self.show() def __init__(self, windowname, dests, colors_func):
cv2.setMouseCallback(self.windowname, self.on_mouse) self.prev_pt = None
self.windowname = windowname
def show(self): self.dests = dests
cv2.imshow(self.windowname, self.dests[0]) self.colors_func = colors_func
self.dirty = False
def on_mouse(self, event, x, y, flags, param): self.show()
pt = (x, y) cv2.setMouseCallback(self.windowname, self.on_mouse)
if event == cv2.EVENT_LBUTTONDOWN:
self.prev_pt = pt def show(self):
if self.prev_pt and flags & cv2.EVENT_FLAG_LBUTTON: cv2.imshow(self.windowname, self.dests[0])
for dst, color in zip(self.dests, self.colors_func()):
cv2.line(dst, self.prev_pt, pt, color, 5) def on_mouse(self, event, x, y, flags, param):
self.dirty = True pt = (x, y)
self.prev_pt = pt if event == cv2.EVENT_LBUTTONDOWN:
self.show() self.prev_pt = pt
else: if self.prev_pt and flags & cv2.EVENT_FLAG_LBUTTON:
self.prev_pt = None for dst, color in zip(self.dests, self.colors_func()):
cv2.line(dst, self.prev_pt, pt, color, 5)
self.dirty = True
# palette data from matplotlib/_cm.py self.prev_pt = pt
_jet_data = {'red': ((0., 0, 0), (0.35, 0, 0), (0.66, 1, 1), (0.89,1, 1), self.show()
(1, 0.5, 0.5)), else:
'green': ((0., 0, 0), (0.125,0, 0), (0.375,1, 1), (0.64,1, 1), self.prev_pt = None
(0.91,0,0), (1, 0, 0)),
'blue': ((0., 0.5, 0.5), (0.11, 1, 1), (0.34, 1, 1), (0.65,0, 0),
(1, 0, 0))} # palette data from matplotlib/_cm.py
_jet_data = {'red': ((0., 0, 0), (0.35, 0, 0), (0.66, 1, 1), (0.89,1, 1),
cmap_data = { 'jet' : _jet_data } (1, 0.5, 0.5)),
'green': ((0., 0, 0), (0.125,0, 0), (0.375,1, 1), (0.64,1, 1),
def make_cmap(name, n=256): (0.91,0,0), (1, 0, 0)),
data = cmap_data[name] 'blue': ((0., 0.5, 0.5), (0.11, 1, 1), (0.34, 1, 1), (0.65,0, 0),
xs = np.linspace(0.0, 1.0, n) (1, 0, 0))}
channels = []
eps = 1e-6 cmap_data = { 'jet' : _jet_data }
for ch_name in ['blue', 'green', 'red']:
ch_data = data[ch_name] def make_cmap(name, n=256):
xp, yp = [], [] data = cmap_data[name]
for x, y1, y2 in ch_data: xs = np.linspace(0.0, 1.0, n)
xp += [x, x+eps] channels = []
yp += [y1, y2] eps = 1e-6
ch = np.interp(xs, xp, yp) for ch_name in ['blue', 'green', 'red']:
channels.append(ch) ch_data = data[ch_name]
return np.uint8(np.array(channels).T*255) xp, yp = [], []
for x, y1, y2 in ch_data:
def nothing(*arg, **kw): xp += [x, x+eps]
pass yp += [y1, y2]
ch = np.interp(xs, xp, yp)
def clock(): channels.append(ch)
return cv2.getTickCount() / cv2.getTickFrequency() return np.uint8(np.array(channels).T*255)
@contextmanager def nothing(*arg, **kw):
def Timer(msg): pass
print msg, '...',
start = clock() def clock():
try: return cv2.getTickCount() / cv2.getTickFrequency()
yield
finally: @contextmanager
print "%.2f ms" % ((clock()-start)*1000) def Timer(msg):
print msg, '...',
class StatValue: start = clock()
def __init__(self, smooth_coef = 0.5): try:
self.value = None yield
self.smooth_coef = smooth_coef finally:
def update(self, v): print "%.2f ms" % ((clock()-start)*1000)
if self.value is None:
self.value = v class StatValue:
else: def __init__(self, smooth_coef = 0.5):
c = self.smooth_coef self.value = None
self.value = c * self.value + (1.0-c) * v self.smooth_coef = smooth_coef
def update(self, v):
class RectSelector: if self.value is None:
def __init__(self, win, callback): self.value = v
self.win = win else:
self.callback = callback c = self.smooth_coef
cv2.setMouseCallback(win, self.onmouse) self.value = c * self.value + (1.0-c) * v
self.drag_start = None
self.drag_rect = None class RectSelector:
def onmouse(self, event, x, y, flags, param): def __init__(self, win, callback):
x, y = np.int16([x, y]) # BUG self.win = win
if event == cv2.EVENT_LBUTTONDOWN: self.callback = callback
self.drag_start = (x, y) cv2.setMouseCallback(win, self.onmouse)
if self.drag_start: self.drag_start = None
if flags & cv2.EVENT_FLAG_LBUTTON: self.drag_rect = None
xo, yo = self.drag_start def onmouse(self, event, x, y, flags, param):
x0, y0 = np.minimum([xo, yo], [x, y]) x, y = np.int16([x, y]) # BUG
x1, y1 = np.maximum([xo, yo], [x, y]) if event == cv2.EVENT_LBUTTONDOWN:
self.drag_rect = None self.drag_start = (x, y)
if x1-x0 > 0 and y1-y0 > 0: if self.drag_start:
self.drag_rect = (x0, y0, x1, y1) if flags & cv2.EVENT_FLAG_LBUTTON:
else: xo, yo = self.drag_start
rect = self.drag_rect x0, y0 = np.minimum([xo, yo], [x, y])
self.drag_start = None x1, y1 = np.maximum([xo, yo], [x, y])
self.drag_rect = None self.drag_rect = None
if rect: if x1-x0 > 0 and y1-y0 > 0:
self.callback(rect) self.drag_rect = (x0, y0, x1, y1)
def draw(self, vis): else:
if not self.drag_rect: rect = self.drag_rect
return False self.drag_start = None
x0, y0, x1, y1 = self.drag_rect self.drag_rect = None
cv2.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2) if rect:
return True self.callback(rect)
@property def draw(self, vis):
def dragging(self): if not self.drag_rect:
return self.drag_rect is not None return False
x0, y0, x1, y1 = self.drag_rect
cv2.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2)
def grouper(n, iterable, fillvalue=None): return True
'''grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx''' @property
args = [iter(iterable)] * n def dragging(self):
return it.izip_longest(fillvalue=fillvalue, *args) return self.drag_rect is not None
def mosaic(w, imgs):
'''Make a grid from images. def grouper(n, iterable, fillvalue=None):
'''grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx'''
w -- number of grid columns args = [iter(iterable)] * n
imgs -- images (must have same size and format) return it.izip_longest(fillvalue=fillvalue, *args)
'''
imgs = iter(imgs) def mosaic(w, imgs):
img0 = imgs.next() '''Make a grid from images.
pad = np.zeros_like(img0)
imgs = it.chain([img0], imgs) w -- number of grid columns
rows = grouper(w, imgs, pad) imgs -- images (must have same size and format)
return np.vstack(map(np.hstack, rows)) '''
imgs = iter(imgs)
def getsize(img): img0 = imgs.next()
h, w = img.shape[:2] pad = np.zeros_like(img0)
return w, h imgs = it.chain([img0], imgs)
rows = grouper(w, imgs, pad)
def mdot(*args): return np.vstack(map(np.hstack, rows))
return reduce(np.dot, args)
def getsize(img):
h, w = img.shape[:2]
return w, h
def mdot(*args):
return reduce(np.dot, args)
def draw_keypoints(vis, keypoints, color = (0, 255, 255)):
for kp in keypoints:
x, y = kp.pt
cv2.circle(vis, (int(x), int(y)), 2, color)
''' '''
Feature homography Feature homography
================== ==================
Example of using features2d framework for interactive video homography matching. Example of using features2d framework for interactive video homography matching.
ORB features and FLANN matcher are used. ORB features and FLANN matcher are used. The actual tracking is implemented by
PlaneTracker class in plane_tracker.py
Inspired by http://www.youtube.com/watch?v=-ZNYoL8rzPY
Inspired by http://www.youtube.com/watch?v=-ZNYoL8rzPY
Usage
----- video: http://www.youtube.com/watch?v=FirtmYcC0Vc
feature_homography.py [<video source>]
Usage
Select a textured planar object to track by drawing a box with a mouse. -----
feature_homography.py [<video source>]
'''
Keys:
import numpy as np SPACE - pause video
import cv2
import video Select a textured planar object to track by drawing a box with a mouse.
import common '''
from collections import namedtuple
from common import getsize import numpy as np
import cv2
import video
FLANN_INDEX_KDTREE = 1 import common
FLANN_INDEX_LSH = 6 from common import getsize, draw_keypoints
flann_params= dict(algorithm = FLANN_INDEX_LSH, from plane_tracker import PlaneTracker
table_number = 6, # 12
key_size = 12, # 20
multi_probe_level = 1) #2 class App:
def __init__(self, src):
MIN_MATCH_COUNT = 10 self.cap = video.create_capture(src)
self.frame = None
self.paused = False
ar_verts = np.float32([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0], self.tracker = PlaneTracker()
[0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1],
[0.5, 0.5, 2]]) cv2.namedWindow('plane')
ar_edges = [(0, 1), (1, 2), (2, 3), (3, 0), self.rect_sel = common.RectSelector('plane', self.on_rect)
(4, 5), (5, 6), (6, 7), (7, 4),
(0, 4), (1, 5), (2, 6), (3, 7), def on_rect(self, rect):
(4, 8), (5, 8), (6, 8), (7, 8)] self.tracker.clear()
self.tracker.add_target(self.frame, rect)
def run(self):
def draw_keypoints(vis, keypoints, color = (0, 255, 255)): while True:
for kp in keypoints: playing = not self.paused and not self.rect_sel.dragging
x, y = kp.pt if playing or self.frame is None:
cv2.circle(vis, (int(x), int(y)), 2, color) ret, frame = self.cap.read()
if not ret:
class App: break
def __init__(self, src): self.frame = np.frame.copy()
self.cap = video.create_capture(src)
self.frame = None w, h = getsize(self.frame)
self.paused = False vis = np.zeros((h, w*2, 3), np.uint8)
self.ref_frame = None vis[:h,:w] = self.frame
if len(self.tracker.targets) > 0:
self.detector = cv2.ORB( nfeatures = 1000 ) target = self.tracker.targets[0]
self.matcher = cv2.FlannBasedMatcher(flann_params, {}) # bug : need to pass empty dict (#1329) vis[:,w:] = target.image
draw_keypoints(vis[:,w:], target.keypoints)
cv2.namedWindow('plane') x0, y0, x1, y1 = target.rect
self.rect_sel = common.RectSelector('plane', self.on_rect) cv2.rectangle(vis, (x0+w, y0), (x1+w, y1), (0, 255, 0), 2)
if playing:
def match_frames(self): tracked = self.tracker.track(self.frame)
if len(self.frame_desc) < MIN_MATCH_COUNT or len(self.frame_desc) < MIN_MATCH_COUNT: if len(tracked) > 0:
return tracked = tracked[0]
cv2.polylines(vis, [np.int32(tracked.quad)], True, (255, 255, 255), 2)
raw_matches = self.matcher.knnMatch(self.frame_desc, k = 2) for (x0, y0), (x1, y1) in zip(np.int32(tracked.p0), np.int32(tracked.p1)):
p0, p1 = [], [] cv2.line(vis, (x0+w, y0), (x1, y1), (0, 255, 0))
for m in raw_matches: draw_keypoints(vis, self.tracker.frame_points)
if len(m) == 2 and m[0].distance < m[1].distance * 0.75:
m = m[0] self.rect_sel.draw(vis)
p0.append( self.ref_points[m.trainIdx].pt ) # queryIdx cv2.imshow('plane', vis)
p1.append( self.frame_points[m.queryIdx].pt ) ch = cv2.waitKey(1)
p0, p1 = np.float32((p0, p1)) if ch == ord(' '):
if len(p0) < MIN_MATCH_COUNT: self.paused = not self.paused
return if ch == 27:
break
H, status = cv2.findHomography(p0, p1, cv2.RANSAC, 4.0)
status = status.ravel() != 0
if status.sum() < MIN_MATCH_COUNT: if __name__ == '__main__':
return print __doc__
p0, p1 = p0[status], p1[status]
return p0, p1, H import sys
try: video_src = sys.argv[1]
except: video_src = 0
def on_frame(self, vis): App(video_src).run()
match = self.match_frames()
if match is None:
return
w, h = getsize(self.frame)
p0, p1, H = match
for (x0, y0), (x1, y1) in zip(np.int32(p0), np.int32(p1)):
cv2.line(vis, (x0+w, y0), (x1, y1), (0, 255, 0))
x0, y0, x1, y1 = self.ref_rect
corners0 = np.float32([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
img_corners = cv2.perspectiveTransform(corners0.reshape(1, -1, 2), H)
cv2.polylines(vis, [np.int32(img_corners)], True, (255, 255, 255), 2)
corners3d = np.hstack([corners0, np.zeros((4, 1), np.float32)])
fx = 0.9
K = np.float64([[fx*w, 0, 0.5*(w-1)],
[0, fx*w, 0.5*(h-1)],
[0.0,0.0, 1.0]])
dist_coef = np.zeros(4)
ret, rvec, tvec = cv2.solvePnP(corners3d, img_corners, K, dist_coef)
verts = ar_verts * [(x1-x0), (y1-y0), -(x1-x0)*0.3] + (x0, y0, 0)
verts = cv2.projectPoints(verts, rvec, tvec, K, dist_coef)[0].reshape(-1, 2)
for i, j in ar_edges:
(x0, y0), (x1, y1) = verts[i], verts[j]
cv2.line(vis, (int(x0), int(y0)), (int(x1), int(y1)), (255, 255, 0), 2)
def on_rect(self, rect):
x0, y0, x1, y1 = rect
self.ref_frame = self.frame.copy()
self.ref_rect = rect
points, descs = [], []
for kp, desc in zip(self.frame_points, self.frame_desc):
x, y = kp.pt
if x0 <= x <= x1 and y0 <= y <= y1:
points.append(kp)
descs.append(desc)
self.ref_points, self.ref_descs = points, np.uint8(descs)
self.matcher.clear()
self.matcher.add([self.ref_descs])
def run(self):
while True:
playing = not self.paused and not self.rect_sel.dragging
if playing or self.frame is None:
ret, frame = self.cap.read()
if not ret:
break
self.frame = np.fliplr(frame).copy()
self.frame_points, self.frame_desc = self.detector.detectAndCompute(self.frame, None)
if self.frame_desc is None: # detectAndCompute returns descs=None if not keypoints found
self.frame_desc = []
w, h = getsize(self.frame)
vis = np.zeros((h, w*2, 3), np.uint8)
vis[:h,:w] = self.frame
if self.ref_frame is not None:
vis[:h,w:] = self.ref_frame
x0, y0, x1, y1 = self.ref_rect
cv2.rectangle(vis, (x0+w, y0), (x1+w, y1), (0, 255, 0), 2)
draw_keypoints(vis[:,w:], self.ref_points)
draw_keypoints(vis, self.frame_points)
if playing and self.ref_frame is not None:
self.on_frame(vis)
self.rect_sel.draw(vis)
cv2.imshow('plane', vis)
ch = cv2.waitKey(1)
if ch == ord(' '):
self.paused = not self.paused
if ch == 27:
break
if __name__ == '__main__':
print __doc__
import sys
try: video_src = sys.argv[1]
except: video_src = 0
App(video_src).run()
'''
Planar augmented reality
==================
This sample shows an example of augmented reality overlay over a planar object
tracked by PlaneTracker from plane_tracker.py. solvePnP funciton is used to
estimate the tracked object location in 3d space.
video: http://www.youtube.com/watch?v=pzVbhxx6aog
Usage
-----
plane_ar.py [<video source>]
Keys:
SPACE - pause video
c - clear targets
Select a textured planar object to track by drawing a box with a mouse.
Use 'focal' slider to adjust to camera focal length for proper video augmentation.
'''
import numpy as np
import cv2
import video
import common
from plane_tracker import PlaneTracker
ar_verts = np.float32([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0],
[0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1],
[0, 0.5, 2], [1, 0.5, 2]])
ar_edges = [(0, 1), (1, 2), (2, 3), (3, 0),
(4, 5), (5, 6), (6, 7), (7, 4),
(0, 4), (1, 5), (2, 6), (3, 7),
(4, 8), (5, 8), (6, 9), (7, 9), (8, 9)]
class App:
def __init__(self, src):
self.cap = video.create_capture(src)
self.frame = None
self.paused = False
self.tracker = PlaneTracker()
cv2.namedWindow('plane')
cv2.createTrackbar('focal', 'plane', 25, 50, common.nothing)
self.rect_sel = common.RectSelector('plane', self.on_rect)
def on_rect(self, rect):
self.tracker.add_target(self.frame, rect)
def run(self):
while True:
playing = not self.paused and not self.rect_sel.dragging
if playing or self.frame is None:
ret, frame = self.cap.read()
if not ret:
break
self.frame = frame.copy()
vis = self.frame.copy()
if playing:
tracked = self.tracker.track(self.frame)
for tr in tracked:
cv2.polylines(vis, [np.int32(tr.quad)], True, (255, 255, 255), 2)
for (x, y) in np.int32(tr.p1):
cv2.circle(vis, (x, y), 2, (255, 255, 255))
self.draw_overlay(vis, tr)
self.rect_sel.draw(vis)
cv2.imshow('plane', vis)
ch = cv2.waitKey(1)
if ch == ord(' '):
self.paused = not self.paused
if ch == ord('c'):
self.tracker.clear()
if ch == 27:
break
def draw_overlay(self, vis, tracked):
x0, y0, x1, y1 = tracked.target.rect
quad_3d = np.float32([[x0, y0, 0], [x1, y0, 0], [x1, y1, 0], [x0, y1, 0]])
fx = 0.5 + cv2.getTrackbarPos('focal', 'plane') / 50.0
h, w = vis.shape[:2]
K = np.float64([[fx*w, 0, 0.5*(w-1)],
[0, fx*w, 0.5*(h-1)],
[0.0,0.0, 1.0]])
dist_coef = np.zeros(4)
ret, rvec, tvec = cv2.solvePnP(quad_3d, tracked.quad, K, dist_coef)
verts = ar_verts * [(x1-x0), (y1-y0), -(x1-x0)*0.3] + (x0, y0, 0)
verts = cv2.projectPoints(verts, rvec, tvec, K, dist_coef)[0].reshape(-1, 2)
for i, j in ar_edges:
(x0, y0), (x1, y1) = verts[i], verts[j]
cv2.line(vis, (int(x0), int(y0)), (int(x1), int(y1)), (255, 255, 0), 2)
if __name__ == '__main__':
print __doc__
import sys
try: video_src = sys.argv[1]
except: video_src = 0
App(video_src).run()
'''
Multitarget planar tracking
==================
Example of using features2d framework for interactive video homography matching.
ORB features and FLANN matcher are used. This sample provides PlaneTracker class
and an example of its usage.
video: http://www.youtube.com/watch?v=pzVbhxx6aog
Usage
-----
plane_tracker.py [<video source>]
Keys:
SPACE - pause video
c - clear targets
Select a textured planar object to track by drawing a box with a mouse.
'''
import numpy as np
import cv2
from collections import namedtuple
import video
import common
FLANN_INDEX_KDTREE = 1
FLANN_INDEX_LSH = 6
flann_params= dict(algorithm = FLANN_INDEX_LSH,
table_number = 6, # 12
key_size = 12, # 20
multi_probe_level = 1) #2
MIN_MATCH_COUNT = 10
'''
image - image to track
rect - tracked rectangle (x1, y1, x2, y2)
keypoints - keypoints detected inside rect
descrs - their descriptors
data - some user-provided data
'''
PlanarTarget = namedtuple('PlaneTarget', 'image, rect, keypoints, descrs, data')
'''
target - reference to PlanarTarget
p0 - matched points coords in target image
p1 - matched points coords in input frame
H - homography matrix from p0 to p1
quad - target bounary quad in input frame
'''
TrackedTarget = namedtuple('TrackedTarget', 'target, p0, p1, H, quad')
class PlaneTracker:
def __init__(self):
self.detector = cv2.ORB( nfeatures = 1000 )
self.matcher = cv2.FlannBasedMatcher(flann_params, {}) # bug : need to pass empty dict (#1329)
self.targets = []
def add_target(self, image, rect, data=None):
'''Add a new tracking target.'''
x0, y0, x1, y1 = rect
raw_points, raw_descrs = self.detect_features(image)
points, descs = [], []
for kp, desc in zip(raw_points, raw_descrs):
x, y = kp.pt
if x0 <= x <= x1 and y0 <= y <= y1:
points.append(kp)
descs.append(desc)
descs = np.uint8(descs)
self.matcher.add([descs])
target = PlanarTarget(image = image, rect=rect, keypoints = points, descrs=descs, data=None)
self.targets.append(target)
def clear(self):
'''Remove all targets'''
self.targets = []
self.matcher.clear()
def track(self, frame):
'''Returns a list of detected TrackedTarget objects'''
self.frame_points, self.frame_descrs = self.detect_features(frame)
if len(self.frame_points) < MIN_MATCH_COUNT:
return []
matches = self.matcher.knnMatch(self.frame_descrs, k = 2)
matches = [m[0] for m in matches if len(m) == 2 and m[0].distance < m[1].distance * 0.75]
if len(matches) < MIN_MATCH_COUNT:
return []
matches_by_id = [[] for _ in xrange(len(self.targets))]
for m in matches:
matches_by_id[m.imgIdx].append(m)
tracked = []
for imgIdx, matches in enumerate(matches_by_id):
if len(matches) < MIN_MATCH_COUNT:
continue
target = self.targets[imgIdx]
p0 = [target.keypoints[m.trainIdx].pt for m in matches]
p1 = [self.frame_points[m.queryIdx].pt for m in matches]
p0, p1 = np.float32((p0, p1))
H, status = cv2.findHomography(p0, p1, cv2.RANSAC, 3.0)
status = status.ravel() != 0
if status.sum() < MIN_MATCH_COUNT:
continue
p0, p1 = p0[status], p1[status]
x0, y0, x1, y1 = target.rect
quad = np.float32([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
quad = cv2.perspectiveTransform(quad.reshape(1, -1, 2), H).reshape(-1, 2)
track = TrackedTarget(target=target, p0=p0, p1=p1, H=H, quad=quad)
tracked.append(track)
tracked.sort(key = lambda t: len(t.p0), reverse=True)
return tracked
def detect_features(self, frame):
'''detect_features(self, frame) -> keypoints, descrs'''
keypoints, descrs = self.detector.detectAndCompute(frame, None)
if descrs is None: # detectAndCompute returns descs=None if not keypoints found
descrs = []
return keypoints, descrs
class App:
def __init__(self, src):
self.cap = video.create_capture(src)
self.frame = None
self.paused = False
self.tracker = PlaneTracker()
cv2.namedWindow('plane')
self.rect_sel = common.RectSelector('plane', self.on_rect)
def on_rect(self, rect):
self.tracker.add_target(self.frame, rect)
def run(self):
while True:
playing = not self.paused and not self.rect_sel.dragging
if playing or self.frame is None:
ret, frame = self.cap.read()
if not ret:
break
self.frame = frame.copy()
vis = self.frame.copy()
if playing:
tracked = self.tracker.track(self.frame)
for tr in tracked:
cv2.polylines(vis, [np.int32(tr.quad)], True, (255, 255, 255), 2)
for (x, y) in np.int32(tr.p1):
cv2.circle(vis, (x, y), 2, (255, 255, 255))
self.rect_sel.draw(vis)
cv2.imshow('plane', vis)
ch = cv2.waitKey(1)
if ch == ord(' '):
self.paused = not self.paused
if ch == ord('c'):
self.tracker.clear()
if ch == 27:
break
if __name__ == '__main__':
print __doc__
import sys
try: video_src = sys.argv[1]
except: video_src = 0
App(video_src).run()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment