new/improved Python samples by Alexander Mordvintsev

20131189 · Vadim Pisarevsky · 2c2d6fa5 · 20131189 · 20131189 · 20131189
Commit 20131189 authored Aug 06, 2012 by Vadim Pisarevsky
4 changed files
--- a/samples/python2/common.py
+++ b/samples/python2/common.py
@@ -6,6 +6,12 @@ import itertools as it
 image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm']
+class Bunch(object):
+    def __init__(self, **kw):
+        self.__dict__.update(kw)
+    def __str__(self):
+        return str(self.__dict__)
 def splitfn(fn):
    path, fn = os.path.split(fn)
    name, ext = os.path.splitext(fn)
@@ -198,3 +204,9 @@ def getsize(img):
 def mdot(*args):
    return reduce(np.dot, args)
+def draw_keypoints(vis, keypoints, color = (0, 255, 255)):
+    for kp in keypoints:
+            x, y = kp.pt
+            cv2.circle(vis, (int(x), int(y)), 2, color)
--- a/samples/python2/feature_homography.py
+++ b/samples/python2/feature_homography.py
@@ -3,128 +3,44 @@ Feature homography
 ==================
 Example of using features2d framework for interactive video homography matching.
-ORB features and FLANN matcher are used.
+ORB features and FLANN matcher are used. The actual tracking is implemented by
+PlaneTracker class in plane_tracker.py
 Inspired by http://www.youtube.com/watch?v=-ZNYoL8rzPY
+video: http://www.youtube.com/watch?v=FirtmYcC0Vc
 Usage
 -----
 feature_homography.py [<video source>]
-Select a textured planar object to track by drawing a box with a mouse.
+Keys:
+   SPACE  -  pause video
+Select a textured planar object to track by drawing a box with a mouse.
 '''
 import numpy as np
 import cv2
 import video
 import common
-from collections import namedtuple
+from common import getsize, draw_keypoints
-from common import getsize
+from plane_tracker import PlaneTracker
-FLANN_INDEX_KDTREE = 1
-FLANN_INDEX_LSH    = 6
-flann_params= dict(algorithm = FLANN_INDEX_LSH,
-                   table_number = 6, # 12
-                   key_size = 12,     # 20
-                   multi_probe_level = 1) #2
-MIN_MATCH_COUNT = 10
-ar_verts = np.float32([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0],
-                       [0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1], 
-                       [0.5, 0.5, 2]])
-ar_edges = [(0, 1), (1, 2), (2, 3), (3, 0), 
-            (4, 5), (5, 6), (6, 7), (7, 4),
-            (0, 4), (1, 5), (2, 6), (3, 7), 
-            (4, 8), (5, 8), (6, 8), (7, 8)]
-def draw_keypoints(vis, keypoints, color = (0, 255, 255)):
-    for kp in keypoints:
-            x, y = kp.pt
-            cv2.circle(vis, (int(x), int(y)), 2, color)
 class App:
    def __init__(self, src):
        self.cap = video.create_capture(src)
        self.frame = None
        self.paused = False
-        self.ref_frame  = None
+        self.tracker = PlaneTracker()
-        self.detector = cv2.ORB( nfeatures = 1000 )
-        self.matcher = cv2.FlannBasedMatcher(flann_params, {})  # bug : need to pass empty dict (#1329)
        cv2.namedWindow('plane')
        self.rect_sel = common.RectSelector('plane', self.on_rect)
-    def match_frames(self):
-        if len(self.frame_desc) < MIN_MATCH_COUNT or len(self.frame_desc) < MIN_MATCH_COUNT:
-            return
-        raw_matches = self.matcher.knnMatch(self.frame_desc, k = 2)
-        p0, p1 = [], []
-        for m in raw_matches:
-            if len(m) == 2 and m[0].distance < m[1].distance * 0.75:
-                m = m[0]
-                p0.append( self.ref_points[m.trainIdx].pt )  # queryIdx
-                p1.append( self.frame_points[m.queryIdx].pt )
-        p0, p1 = np.float32((p0, p1))
-        if len(p0) < MIN_MATCH_COUNT:
-            return
-        H, status = cv2.findHomography(p0, p1, cv2.RANSAC, 4.0)
-        status = status.ravel() != 0
-        if status.sum() < MIN_MATCH_COUNT:
-            return
-        p0, p1 = p0[status], p1[status]
-        return p0, p1, H
-    def on_frame(self, vis):
-        match = self.match_frames()
-        if match is None:
-            return
-        w, h = getsize(self.frame)
-        p0, p1, H = match
-        for (x0, y0), (x1, y1) in zip(np.int32(p0), np.int32(p1)):
-            cv2.line(vis, (x0+w, y0), (x1, y1), (0, 255, 0))
-        x0, y0, x1, y1 = self.ref_rect
-        corners0 = np.float32([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
-        img_corners = cv2.perspectiveTransform(corners0.reshape(1, -1, 2), H)
-        cv2.polylines(vis, [np.int32(img_corners)], True, (255, 255, 255), 2)
-        corners3d = np.hstack([corners0, np.zeros((4, 1), np.float32)])
-        fx = 0.9
-        K = np.float64([[fx*w, 0, 0.5*(w-1)],
-                        [0, fx*w, 0.5*(h-1)],
-                        [0.0,0.0,      1.0]])
-        dist_coef = np.zeros(4)
-        ret, rvec, tvec = cv2.solvePnP(corners3d, img_corners, K, dist_coef)
-        verts = ar_verts * [(x1-x0), (y1-y0), -(x1-x0)*0.3] + (x0, y0, 0)
-        verts = cv2.projectPoints(verts, rvec, tvec, K, dist_coef)[0].reshape(-1, 2)
-        for i, j in ar_edges:
-            (x0, y0), (x1, y1) = verts[i], verts[j]
-            cv2.line(vis, (int(x0), int(y0)), (int(x1), int(y1)), (255, 255, 0), 2)
    def on_rect(self, rect):
-        x0, y0, x1, y1 = rect
+        self.tracker.clear()
-        self.ref_frame = self.frame.copy()
+        self.tracker.add_target(self.frame, rect)
-        self.ref_rect = rect
-        points, descs = [], []
-        for kp, desc in zip(self.frame_points, self.frame_desc):
-            x, y = kp.pt
-            if x0 <= x <= x1 and y0 <= y <= y1:
-                points.append(kp)
-                descs.append(desc)
-        self.ref_points, self.ref_descs = points, np.uint8(descs)
-        self.matcher.clear()
-        self.matcher.add([self.ref_descs])
    def run(self):
        while True:
@@ -133,23 +49,26 @@ class App:
                ret, frame = self.cap.read()
                if not ret:
                    break
-                self.frame = np.fliplr(frame).copy()
+                self.frame = np.frame.copy()
-                self.frame_points, self.frame_desc = self.detector.detectAndCompute(self.frame, None)
-                if self.frame_desc is None:  # detectAndCompute returns descs=None if not keypoints found
-                    self.frame_desc = []
            w, h = getsize(self.frame)
            vis = np.zeros((h, w*2, 3), np.uint8)
            vis[:h,:w] = self.frame
-            if self.ref_frame is not None:
+            if len(self.tracker.targets) > 0:
-                vis[:h,w:] = self.ref_frame
+                target = self.tracker.targets[0]
-                x0, y0, x1, y1 = self.ref_rect
+                vis[:,w:] = target.image
+                draw_keypoints(vis[:,w:], target.keypoints)
+                x0, y0, x1, y1 = target.rect
                cv2.rectangle(vis, (x0+w, y0), (x1+w, y1), (0, 255, 0), 2)
-                draw_keypoints(vis[:,w:], self.ref_points)
-            draw_keypoints(vis, self.frame_points)
-            if playing and self.ref_frame is not None:
+            if playing:
-                self.on_frame(vis)
+                tracked = self.tracker.track(self.frame)
+                if len(tracked) > 0:
+                    tracked = tracked[0]
+                    cv2.polylines(vis, [np.int32(tracked.quad)], True, (255, 255, 255), 2)
+                    for (x0, y0), (x1, y1) in zip(np.int32(tracked.p0), np.int32(tracked.p1)):
+                        cv2.line(vis, (x0+w, y0), (x1, y1), (0, 255, 0))
+            draw_keypoints(vis, self.tracker.frame_points)
            self.rect_sel.draw(vis)
            cv2.imshow('plane', vis)
@@ -159,6 +78,7 @@ class App:
            if ch == 27:
                break
 if __name__ == '__main__':
    print __doc__

--- a/samples/python2/plane_ar.py
+++ b/samples/python2/plane_ar.py
+'''
+Planar augmented reality
+==================
+This sample shows an example of augmented reality overlay over a planar object
+tracked by PlaneTracker from plane_tracker.py. solvePnP funciton is used to
+estimate the tracked object location in 3d space.
+video: http://www.youtube.com/watch?v=pzVbhxx6aog
+Usage
+-----
+plane_ar.py [<video source>]
+Keys:
+   SPACE  -  pause video
+   c      -  clear targets
+Select a textured planar object to track by drawing a box with a mouse.
+Use 'focal' slider to adjust to camera focal length for proper video augmentation.
+'''
+import numpy as np
+import cv2
+import video
+import common
+from plane_tracker import PlaneTracker
+ar_verts = np.float32([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0],
+                       [0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1], 
+                       [0, 0.5, 2], [1, 0.5, 2]])
+ar_edges = [(0, 1), (1, 2), (2, 3), (3, 0), 
+            (4, 5), (5, 6), (6, 7), (7, 4),
+            (0, 4), (1, 5), (2, 6), (3, 7), 
+            (4, 8), (5, 8), (6, 9), (7, 9), (8, 9)]
+class App:
+    def __init__(self, src):
+        self.cap = video.create_capture(src)
+        self.frame = None
+        self.paused = False
+        self.tracker = PlaneTracker()
+        cv2.namedWindow('plane')
+        cv2.createTrackbar('focal', 'plane', 25, 50, common.nothing)
+        self.rect_sel = common.RectSelector('plane', self.on_rect)
+    def on_rect(self, rect):
+        self.tracker.add_target(self.frame, rect)
+    def run(self):
+        while True:
+            playing = not self.paused and not self.rect_sel.dragging
+            if playing or self.frame is None:
+                ret, frame = self.cap.read()
+                if not ret:
+                    break
+                self.frame = frame.copy()
+            vis = self.frame.copy()
+            if playing:
+                tracked = self.tracker.track(self.frame)
+                for tr in tracked:
+                    cv2.polylines(vis, [np.int32(tr.quad)], True, (255, 255, 255), 2)
+                    for (x, y) in np.int32(tr.p1):
+                        cv2.circle(vis, (x, y), 2, (255, 255, 255))
+                    self.draw_overlay(vis, tr)
+            self.rect_sel.draw(vis)
+            cv2.imshow('plane', vis)
+            ch = cv2.waitKey(1)
+            if ch == ord(' '):
+                self.paused = not self.paused
+            if ch == ord('c'):
+                self.tracker.clear()
+            if ch == 27:
+                break
+    def draw_overlay(self, vis, tracked):
+        x0, y0, x1, y1 = tracked.target.rect
+        quad_3d = np.float32([[x0, y0, 0], [x1, y0, 0], [x1, y1, 0], [x0, y1, 0]])
+        fx = 0.5 + cv2.getTrackbarPos('focal', 'plane') / 50.0
+        h, w = vis.shape[:2]
+        K = np.float64([[fx*w, 0, 0.5*(w-1)],
+                        [0, fx*w, 0.5*(h-1)],
+                        [0.0,0.0,      1.0]])
+        dist_coef = np.zeros(4)
+        ret, rvec, tvec = cv2.solvePnP(quad_3d, tracked.quad, K, dist_coef)
+        verts = ar_verts * [(x1-x0), (y1-y0), -(x1-x0)*0.3] + (x0, y0, 0)
+        verts = cv2.projectPoints(verts, rvec, tvec, K, dist_coef)[0].reshape(-1, 2)
+        for i, j in ar_edges:
+            (x0, y0), (x1, y1) = verts[i], verts[j]
+            cv2.line(vis, (int(x0), int(y0)), (int(x1), int(y1)), (255, 255, 0), 2)
+if __name__ == '__main__':
+    print __doc__
+    import sys
+    try: video_src = sys.argv[1]
+    except: video_src = 0
+    App(video_src).run()
--- a/samples/python2/plane_tracker.py
+++ b/samples/python2/plane_tracker.py
+'''
+Multitarget planar tracking
+==================
+Example of using features2d framework for interactive video homography matching.
+ORB features and FLANN matcher are used. This sample provides PlaneTracker class
+and an example of its usage.
+video: http://www.youtube.com/watch?v=pzVbhxx6aog
+Usage
+-----
+plane_tracker.py [<video source>]
+Keys:
+   SPACE  -  pause video
+   c      -  clear targets
+Select a textured planar object to track by drawing a box with a mouse.
+'''
+import numpy as np
+import cv2
+from collections import namedtuple
+import video
+import common
+FLANN_INDEX_KDTREE = 1
+FLANN_INDEX_LSH    = 6
+flann_params= dict(algorithm = FLANN_INDEX_LSH,
+                   table_number = 6, # 12
+                   key_size = 12,     # 20
+                   multi_probe_level = 1) #2
+MIN_MATCH_COUNT = 10
+'''
+  image     - image to track
+  rect      - tracked rectangle (x1, y1, x2, y2)
+  keypoints - keypoints detected inside rect
+  descrs    - their descriptors
+  data      - some user-provided data
+'''
+PlanarTarget = namedtuple('PlaneTarget', 'image, rect, keypoints, descrs, data')
+'''
+  target - reference to PlanarTarget
+  p0     - matched points coords in target image
+  p1     - matched points coords in input frame
+  H      - homography matrix from p0 to p1
+  quad   - target bounary quad in input frame
+'''
+TrackedTarget = namedtuple('TrackedTarget', 'target, p0, p1, H, quad')
+class PlaneTracker:
+    def __init__(self):
+        self.detector = cv2.ORB( nfeatures = 1000 )
+        self.matcher = cv2.FlannBasedMatcher(flann_params, {})  # bug : need to pass empty dict (#1329)
+        self.targets = []
+    def add_target(self, image, rect, data=None):
+        '''Add a new tracking target.'''
+        x0, y0, x1, y1 = rect
+        raw_points, raw_descrs = self.detect_features(image)
+        points, descs = [], []
+        for kp, desc in zip(raw_points, raw_descrs):
+            x, y = kp.pt
+            if x0 <= x <= x1 and y0 <= y <= y1:
+                points.append(kp)
+                descs.append(desc)
+        descs = np.uint8(descs)
+        self.matcher.add([descs])
+        target = PlanarTarget(image = image, rect=rect, keypoints = points, descrs=descs, data=None)
+        self.targets.append(target)
+    def clear(self):
+        '''Remove all targets'''
+        self.targets = []
+        self.matcher.clear()
+    def track(self, frame):
+        '''Returns a list of detected TrackedTarget objects'''
+        self.frame_points, self.frame_descrs = self.detect_features(frame)
+        if len(self.frame_points) < MIN_MATCH_COUNT:
+            return []
+        matches = self.matcher.knnMatch(self.frame_descrs, k = 2)
+        matches = [m[0] for m in matches if len(m) == 2 and m[0].distance < m[1].distance * 0.75]
+        if len(matches) < MIN_MATCH_COUNT:
+            return []
+        matches_by_id = [[] for _ in xrange(len(self.targets))]
+        for m in matches:
+            matches_by_id[m.imgIdx].append(m)
+        tracked = []
+        for imgIdx, matches in enumerate(matches_by_id):
+            if len(matches) < MIN_MATCH_COUNT:
+                continue
+            target = self.targets[imgIdx]
+            p0 = [target.keypoints[m.trainIdx].pt for m in matches]
+            p1 = [self.frame_points[m.queryIdx].pt for m in matches]
+            p0, p1 = np.float32((p0, p1))
+            H, status = cv2.findHomography(p0, p1, cv2.RANSAC, 3.0)
+            status = status.ravel() != 0
+            if status.sum() < MIN_MATCH_COUNT:
+                continue
+            p0, p1 = p0[status], p1[status]
+            x0, y0, x1, y1 = target.rect
+            quad = np.float32([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
+            quad = cv2.perspectiveTransform(quad.reshape(1, -1, 2), H).reshape(-1, 2)
+            track = TrackedTarget(target=target, p0=p0, p1=p1, H=H, quad=quad)
+            tracked.append(track)
+        tracked.sort(key = lambda t: len(t.p0), reverse=True)
+        return tracked
+    def detect_features(self, frame):
+        '''detect_features(self, frame) -> keypoints, descrs'''
+        keypoints, descrs = self.detector.detectAndCompute(frame, None)
+        if descrs is None:  # detectAndCompute returns descs=None if not keypoints found
+            descrs = []
+        return keypoints, descrs
+class App:
+    def __init__(self, src):
+        self.cap = video.create_capture(src)
+        self.frame = None
+        self.paused = False
+        self.tracker = PlaneTracker()
+        cv2.namedWindow('plane')
+        self.rect_sel = common.RectSelector('plane', self.on_rect)
+    def on_rect(self, rect):
+        self.tracker.add_target(self.frame, rect)
+    def run(self):
+        while True:
+            playing = not self.paused and not self.rect_sel.dragging
+            if playing or self.frame is None:
+                ret, frame = self.cap.read()
+                if not ret:
+                    break
+                self.frame = frame.copy()
+            vis = self.frame.copy()
+            if playing:
+                tracked = self.tracker.track(self.frame)
+                for tr in tracked:
+                    cv2.polylines(vis, [np.int32(tr.quad)], True, (255, 255, 255), 2)
+                    for (x, y) in np.int32(tr.p1):
+                        cv2.circle(vis, (x, y), 2, (255, 255, 255))
+            self.rect_sel.draw(vis)
+            cv2.imshow('plane', vis)
+            ch = cv2.waitKey(1)
+            if ch == ord(' '):
+                self.paused = not self.paused
+            if ch == ord('c'):
+                self.tracker.clear()
+            if ch == 27:
+                break
+if __name__ == '__main__':
+    print __doc__
+    import sys
+    try: video_src = sys.argv[1]
+    except: video_src = 0
+    App(video_src).run()