new/improved Python samples by Alexander Mordvintsev

import numpy as np
import cv2
import os
from contextlib import contextmanager
import itertools as it
image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm']
def splitfn(fn):
path, fn = os.path.split(fn)
name, ext = os.path.splitext(fn)
return path, name, ext
def anorm2(a):
return (a*a).sum(-1)
def anorm(a):
return np.sqrt( anorm2(a) )
def homotrans(H, x, y):
xs = H[0, 0]*x + H[0, 1]*y + H[0, 2]
ys = H[1, 0]*x + H[1, 1]*y + H[1, 2]
s = H[2, 0]*x + H[2, 1]*y + H[2, 2]
return xs/s, ys/s
def to_rect(a):
a = np.ravel(a)
if len(a) == 2:
a = (0, 0, a[0], a[1])
return np.array(a, np.float64).reshape(2, 2)
def rect2rect_mtx(src, dst):
src, dst = to_rect(src), to_rect(dst)
cx, cy = (dst[1] - dst[0]) / (src[1] - src[0])
tx, ty = dst[0] - src[0] * (cx, cy)
M = np.float64([[ cx, 0, tx],
[ 0, cy, ty],
[ 0, 0, 1]])
return M
def lookat(eye, target, up = (0, 0, 1)):
fwd = np.asarray(target, np.float64) - eye
fwd /= anorm(fwd)
right = np.cross(fwd, up)
right /= anorm(right)
down = np.cross(fwd, right)
R = np.float64([right, down, fwd])
tvec =, eye)
return R, tvec
def mtx2rvec(R):
w, u, vt = cv2.SVDecomp(R - np.eye(3))
p = vt[0] + u[:,0]*w[0] # same as, vt[0])
c =[0], p)
s =[1], p)
axis = np.cross(vt[0], vt[1])
return axis * np.arctan2(s, c)
def draw_str(dst, (x, y), s):
cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.CV_AA)
cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.CV_AA)
class Sketcher:
def __init__(self, windowname, dests, colors_func):
self.prev_pt = None
self.windowname = windowname
self.dests = dests
self.colors_func = colors_func
self.dirty = False
cv2.setMouseCallback(self.windowname, self.on_mouse)
def show(self):
cv2.imshow(self.windowname, self.dests[0])
def on_mouse(self, event, x, y, flags, param):
pt = (x, y)
if event == cv2.EVENT_LBUTTONDOWN:
self.prev_pt = pt
if self.prev_pt and flags & cv2.EVENT_FLAG_LBUTTON:
for dst, color in zip(self.dests, self.colors_func()):
cv2.line(dst, self.prev_pt, pt, color, 5)
self.dirty = True
self.prev_pt = pt
self.prev_pt = None
# palette data from matplotlib/
_jet_data = {'red': ((0., 0, 0), (0.35, 0, 0), (0.66, 1, 1), (0.89,1, 1),
(1, 0.5, 0.5)),
'green': ((0., 0, 0), (0.125,0, 0), (0.375,1, 1), (0.64,1, 1),
(0.91,0,0), (1, 0, 0)),
'blue': ((0., 0.5, 0.5), (0.11, 1, 1), (0.34, 1, 1), (0.65,0, 0),
(1, 0, 0))}
cmap_data = { 'jet' : _jet_data }
def make_cmap(name, n=256):
data = cmap_data[name]
xs = np.linspace(0.0, 1.0, n)
channels = []
eps = 1e-6
for ch_name in ['blue', 'green', 'red']:
ch_data = data[ch_name]
xp, yp = [], []
for x, y1, y2 in ch_data:
xp += [x, x+eps]
yp += [y1, y2]
ch = np.interp(xs, xp, yp)
return np.uint8(np.array(channels).T*255)
def nothing(*arg, **kw):
def clock():
return cv2.getTickCount() / cv2.getTickFrequency()
def Timer(msg):
print msg, '...',
start = clock()
print "%.2f ms" % ((clock()-start)*1000)
class StatValue:
def __init__(self, smooth_coef = 0.5):
self.value = None
self.smooth_coef = smooth_coef
def update(self, v):
if self.value is None:
self.value = v
c = self.smooth_coef
self.value = c * self.value + (1.0-c) * v
class RectSelector:
def __init__(self, win, callback): = win
self.callback = callback
cv2.setMouseCallback(win, self.onmouse)
self.drag_start = None
self.drag_rect = None
def onmouse(self, event, x, y, flags, param):
x, y = np.int16([x, y]) # BUG
if event == cv2.EVENT_LBUTTONDOWN:
self.drag_start = (x, y)
if self.drag_start:
if flags & cv2.EVENT_FLAG_LBUTTON:
xo, yo = self.drag_start
x0, y0 = np.minimum([xo, yo], [x, y])
x1, y1 = np.maximum([xo, yo], [x, y])
self.drag_rect = None
if x1-x0 > 0 and y1-y0 > 0:
self.drag_rect = (x0, y0, x1, y1)
rect = self.drag_rect
self.drag_start = None
self.drag_rect = None
if rect:
def draw(self, vis):
if not self.drag_rect:
return False
x0, y0, x1, y1 = self.drag_rect
cv2.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2)
return True
def dragging(self):
return self.drag_rect is not None
def grouper(n, iterable, fillvalue=None):
'''grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx'''
args = [iter(iterable)] * n
return it.izip_longest(fillvalue=fillvalue, *args)
def mosaic(w, imgs):
'''Make a grid from images.
w -- number of grid columns
imgs -- images (must have same size and format)
imgs = iter(imgs)
img0 =
pad = np.zeros_like(img0)
imgs = it.chain([img0], imgs)
rows = grouper(w, imgs, pad)
return np.vstack(map(np.hstack, rows))
def getsize(img):
h, w = img.shape[:2]
return w, h
def mdot(*args):
return reduce(, args)
Feature homography
Example of using features2d framework for interactive video homography matching.
ORB features and FLANN matcher are used.
Inspired by
----- [<video source>]
Select a textured planar object to track by drawing a box with a mouse.
import numpy as np
import cv2
import video
import common
from collections import namedtuple
from common import getsize
flann_params= dict(algorithm = FLANN_INDEX_LSH,
table_number = 6, # 12
key_size = 12, # 20
multi_probe_level = 1) #2
ar_verts = np.float32([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0],
[0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1],
[0.5, 0.5, 2]])
ar_edges = [(0, 1), (1, 2), (2, 3), (3, 0),
(4, 5), (5, 6), (6, 7), (7, 4),
(0, 4), (1, 5), (2, 6), (3, 7),
(4, 8), (5, 8), (6, 8), (7, 8)]
def draw_keypoints(vis, keypoints, color = (0, 255, 255)):
for kp in keypoints:
x, y =, (int(x), int(y)), 2, color)
class App:
def __init__(self, src):
self.cap = video.create_capture(src)
self.frame = None
self.paused = False
self.ref_frame = None
self.detector = cv2.ORB( nfeatures = 1000 )
self.matcher = cv2.FlannBasedMatcher(flann_params, {}) # bug : need to pass empty dict (#1329)
self.rect_sel = common.RectSelector('plane', self.on_rect)
def match_frames(self):
if len(self.frame_desc) < MIN_MATCH_COUNT or len(self.frame_desc) < MIN_MATCH_COUNT:
raw_matches = self.matcher.knnMatch(self.frame_desc, k = 2)
p0, p1 = [], []
for m in raw_matches:
if len(m) == 2 and m[0].distance < m[1].distance * 0.75:
m = m[0]
p0.append( self.ref_points[m.trainIdx].pt ) # queryIdx
p1.append( self.frame_points[m.queryIdx].pt )
p0, p1 = np.float32((p0, p1))
if len(p0) < MIN_MATCH_COUNT:
H, status = cv2.findHomography(p0, p1, cv2.RANSAC, 4.0)
status = status.ravel() != 0
if status.sum() < MIN_MATCH_COUNT:
p0, p1 = p0[status], p1[status]
return p0, p1, H
def on_frame(self, vis):
match = self.match_frames()
if match is None:
w, h = getsize(self.frame)
p0, p1, H = match
for (x0, y0), (x1, y1) in zip(np.int32(p0), np.int32(p1)):
cv2.line(vis, (x0+w, y0), (x1, y1), (0, 255, 0))
x0, y0, x1, y1 = self.ref_rect
corners0 = np.float32([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
img_corners = cv2.perspectiveTransform(corners0.reshape(1, -1, 2), H)
cv2.polylines(vis, [np.int32(img_corners)], True, (255, 255, 255), 2)
corners3d = np.hstack([corners0, np.zeros((4, 1), np.float32)])
fx = 0.9
K = np.float64([[fx*w, 0, 0.5*(w-1)],
[0, fx*w, 0.5*(h-1)],
[0.0,0.0, 1.0]])
dist_coef = np.zeros(4)
ret, rvec, tvec = cv2.solvePnP(corners3d, img_corners, K, dist_coef)
verts = ar_verts * [(x1-x0), (y1-y0), -(x1-x0)*0.3] + (x0, y0, 0)
verts = cv2.projectPoints(verts, rvec, tvec, K, dist_coef)[0].reshape(-1, 2)
for i, j in ar_edges:
(x0, y0), (x1, y1) = verts[i], verts[j]
cv2.line(vis, (int(x0), int(y0)), (int(x1), int(y1)), (255, 255, 0), 2)
def on_rect(self, rect):
x0, y0, x1, y1 = rect
self.ref_frame = self.frame.copy()
self.ref_rect = rect
points, descs = [], []
for kp, desc in zip(self.frame_points, self.frame_desc):
x, y =
if x0 <= x <= x1 and y0 <= y <= y1:
self.ref_points, self.ref_descs = points, np.uint8(descs)
def run(self):
while True:
playing = not self.paused and not self.rect_sel.dragging
if playing or self.frame is None:
ret, frame =
if not ret:
self.frame = np.fliplr(frame).copy()
self.frame_points, self.frame_desc = self.detector.detectAndCompute(self.frame, None)
if self.frame_desc is None: # detectAndCompute returns descs=None if not keypoints found
self.frame_desc = []
w, h = getsize(self.frame)
vis = np.zeros((h, w*2, 3), np.uint8)
vis[:h,:w] = self.frame
if self.ref_frame is not None:
vis[:h,w:] = self.ref_frame
x0, y0, x1, y1 = self.ref_rect
cv2.rectangle(vis, (x0+w, y0), (x1+w, y1), (0, 255, 0), 2)
draw_keypoints(vis[:,w:], self.ref_points)
draw_keypoints(vis, self.frame_points)
if playing and self.ref_frame is not None:
cv2.imshow('plane', vis)
ch = cv2.waitKey(1)
if ch == ord(' '):
self.paused = not self.paused
if ch == 27:
if __name__ == '__main__':
print __doc__
import sys
try: video_src = sys.argv[1]
except: video_src = 0
Feature homography
Example of using features2d framework for interactive video homography matching.
ORB features and FLANN matcher are used. The actual tracking is implemented by
PlaneTracker class in
Inspired by
----- [<video source>]
SPACE - pause video
Select a textured planar object to track by drawing a box with a mouse.
import numpy as np
import cv2
import video
import common
from common import getsize, draw_keypoints
from plane_tracker import PlaneTracker
class App:
def __init__(self, src):
self.cap = video.create_capture(src)
self.frame = None
self.paused = False
self.tracker = PlaneTracker()
self.rect_sel = common.RectSelector('plane', self.on_rect)
def on_rect(self, rect):
self.tracker.add_target(self.frame, rect)
def run(self):
while True:
playing = not self.paused and not self.rect_sel.dragging
if playing or self.frame is None:
ret, frame =
if not ret:
self.frame = np.frame.copy()
w, h = getsize(self.frame)
vis = np.zeros((h, w*2, 3), np.uint8)
vis[:h,:w] = self.frame
if len(self.tracker.targets) > 0:
target = self.tracker.targets[0]
vis[:,w:] = target.image
draw_keypoints(vis[:,w:], target.keypoints)
x0, y0, x1, y1 = target.rect
cv2.rectangle(vis, (x0+w, y0), (x1+w, y1), (0, 255, 0), 2)
if playing:
tracked = self.tracker.track(self.frame)
if len(tracked) > 0:
tracked = tracked[0]
cv2.polylines(vis, [np.int32(tracked.quad)], True, (255, 255, 255), 2)
for (x0, y0), (x1, y1) in zip(np.int32(tracked.p0), np.int32(tracked.p1)):
cv2.line(vis, (x0+w, y0), (x1, y1), (0, 255, 0))
draw_keypoints(vis, self.tracker.frame_points)
cv2.imshow('plane', vis)
ch = cv2.waitKey(1)
if ch == ord(' '):
self.paused = not self.paused
if ch == 27:
if __name__ == '__main__':
print __doc__
import sys
try: video_src = sys.argv[1]
except: video_src = 0
Planar augmented reality
This sample shows an example of augmented reality overlay over a planar object
tracked by PlaneTracker from solvePnP funciton is used to
estimate the tracked object location in 3d space.
----- [<video source>]
SPACE - pause video
c - clear targets
Select a textured planar object to track by drawing a box with a mouse.
Use 'focal' slider to adjust to camera focal length for proper video augmentation.
import numpy as np
import cv2
import video
import common
from plane_tracker import PlaneTracker
ar_verts = np.float32([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0],
[0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1],
[0, 0.5, 2], [1, 0.5, 2]])
ar_edges = [(0, 1), (1, 2), (2, 3), (3, 0),
(4, 5), (5, 6), (6, 7), (7, 4),
(0, 4), (1, 5), (2, 6), (3, 7),
(4, 8), (5, 8), (6, 9), (7, 9), (8, 9)]
class App:
def __init__(self, src):
self.cap = video.create_capture(src)
self.frame = None
self.paused = False
self.tracker = PlaneTracker()
cv2.createTrackbar('focal', 'plane', 25, 50, common.nothing)
self.rect_sel = common.RectSelector('plane', self.on_rect)
def on_rect(self, rect):
self.tracker.add_target(self.frame, rect)
def run(self):
while True:
playing = not self.paused and not self.rect_sel.dragging
if playing or self.frame is None:
ret, frame =
if not ret:
self.frame = frame.copy()
vis = self.frame.copy()
if playing:
tracked = self.tracker.track(self.frame)
for tr in tracked:
cv2.polylines(vis, [np.int32(tr.quad)], True, (255, 255, 255), 2)
for (x, y) in np.int32(tr.p1):, (x, y), 2, (255, 255, 255))
self.draw_overlay(vis, tr)
cv2.imshow('plane', vis)
ch = cv2.waitKey(1)
if ch == ord(' '):
self.paused = not self.paused
if ch == ord('c'):
if ch == 27:
def draw_overlay(self, vis, tracked):
x0, y0, x1, y1 =
quad_3d = np.float32([[x0, y0, 0], [x1, y0, 0], [x1, y1, 0], [x0, y1, 0]])
fx = 0.5 + cv2.getTrackbarPos('focal', 'plane') / 50.0
h, w = vis.shape[:2]
K = np.float64([[fx*w, 0, 0.5*(w-1)],
[0, fx*w, 0.5*(h-1)],
[0.0,0.0, 1.0]])
dist_coef = np.zeros(4)
ret, rvec, tvec = cv2.solvePnP(quad_3d, tracked.quad, K, dist_coef)
verts = ar_verts * [(x1-x0), (y1-y0), -(x1-x0)*0.3] + (x0, y0, 0)
verts = cv2.projectPoints(verts, rvec, tvec, K, dist_coef)[0].reshape(-1, 2)
for i, j in ar_edges:
(x0, y0), (x1, y1) = verts[i], verts[j]
cv2.line(vis, (int(x0), int(y0)), (int(x1), int(y1)), (255, 255, 0), 2)
if __name__ == '__main__':
print __doc__
import sys
try: video_src = sys.argv[1]
except: video_src = 0
Multitarget planar tracking
Example of using features2d framework for interactive video homography matching.
ORB features and FLANN matcher are used. This sample provides PlaneTracker class
and an example of its usage.
----- [<video source>]
SPACE - pause video
c - clear targets
Select a textured planar object to track by drawing a box with a mouse.
import numpy as np
import cv2
from collections import namedtuple
import video
import common
flann_params= dict(algorithm = FLANN_INDEX_LSH,
table_number = 6, # 12
key_size = 12, # 20
multi_probe_level = 1) #2
image - image to track
rect - tracked rectangle (x1, y1, x2, y2)
keypoints - keypoints detected inside rect
descrs - their descriptors
data - some user-provided data
PlanarTarget = namedtuple('PlaneTarget', 'image, rect, keypoints, descrs, data')
target - reference to PlanarTarget
p0 - matched points coords in target image
p1 - matched points coords in input frame
H - homography matrix from p0 to p1
quad - target bounary quad in input frame
TrackedTarget = namedtuple('TrackedTarget', 'target, p0, p1, H, quad')
class PlaneTracker:
def __init__(self):
self.detector = cv2.ORB( nfeatures = 1000 )
self.matcher = cv2.FlannBasedMatcher(flann_params, {}) # bug : need to pass empty dict (#1329)
self.targets = []
def add_target(self, image, rect, data=None):
'''Add a new tracking target.'''
x0, y0, x1, y1 = rect
raw_points, raw_descrs = self.detect_features(image)
points, descs = [], []
for kp, desc in zip(raw_points, raw_descrs):
x, y =
if x0 <= x <= x1 and y0 <= y <= y1:
descs = np.uint8(descs)
target = PlanarTarget(image = image, rect=rect, keypoints = points, descrs=descs, data=None)
def clear(self):
'''Remove all targets'''
self.targets = []
def track(self, frame):
'''Returns a list of detected TrackedTarget objects'''
self.frame_points, self.frame_descrs = self.detect_features(frame)
if len(self.frame_points) < MIN_MATCH_COUNT:
return []
matches = self.matcher.knnMatch(self.frame_descrs, k = 2)
matches = [m[0] for m in matches if len(m) == 2 and m[0].distance < m[1].distance * 0.75]
if len(matches) < MIN_MATCH_COUNT:
return []
matches_by_id = [[] for _ in xrange(len(self.targets))]
for m in matches:
tracked = []
for imgIdx, matches in enumerate(matches_by_id):
if len(matches) < MIN_MATCH_COUNT:
target = self.targets[imgIdx]
p0 = [target.keypoints[m.trainIdx].pt for m in matches]
p1 = [self.frame_points[m.queryIdx].pt for m in matches]
p0, p1 = np.float32((p0, p1))
H, status = cv2.findHomography(p0, p1, cv2.RANSAC, 3.0)
status = status.ravel() != 0
if status.sum() < MIN_MATCH_COUNT:
p0, p1 = p0[status], p1[status]
x0, y0, x1, y1 = target.rect
quad = np.float32([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
quad = cv2.perspectiveTransform(quad.reshape(1, -1, 2), H).reshape(-1, 2)
track = TrackedTarget(target=target, p0=p0, p1=p1, H=H, quad=quad)
tracked.sort(key = lambda t: len(t.p0), reverse=True)
return tracked
def detect_features(self, frame):
'''detect_features(self, frame) -> keypoints, descrs'''
keypoints, descrs = self.detector.detectAndCompute(frame, None)
if descrs is None: # detectAndCompute returns descs=None if not keypoints found
descrs = []
return keypoints, descrs
class App:
def __init__(self, src):
self.cap = video.create_capture(src)
self.frame = None
self.paused = False
self.tracker = PlaneTracker()
self.rect_sel = common.RectSelector('plane', self.on_rect)
def on_rect(self, rect):
self.tracker.add_target(self.frame, rect)
def run(self):
while True:
playing = not self.paused and not self.rect_sel.dragging
if playing or self.frame is None:
ret, frame =
if not ret:
self.frame = frame.copy()
vis = self.frame.copy()
if playing:
tracked = self.tracker.track(self.frame)
for tr in tracked:
cv2.polylines(vis, [np.int32(tr.quad)], True, (255, 255, 255), 2)
for (x, y) in np.int32(tr.p1):, (x, y), 2, (255, 255, 255))
cv2.imshow('plane', vis)
ch = cv2.waitKey(1)
if ch == ord(' '):
self.paused = not self.paused
if ch == ord('c'):
if ch == 27:
if __name__ == '__main__':
print __doc__
import sys
try: video_src = sys.argv[1]
except: video_src = 0
