python-opencv-rectangle-tracker/rectangle_tracker.py at master · mathandy/python-opencv-rectangle-tracker · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
"""
Notes:
    1) This is algorithm is primarily designed for a rectangular piece of
        paper lying flat on a flat surface, but may work in other situations
        assuming that the paper's corners are not obstructed.
    2) The camera's view of the paper must be unobstructed in first frame.

"""

# Basic Dependencies
from __future__ import division, print_function
from math import ceil, acos
from time import time

# External Dependencies
import numpy as np
from numpy.linalg import norm
import cv2


# Default User Parameters
VIDEO_FILE_LOCATION = 'sample.avi'
FPS_INTERVAL = 10  # updates FPS estimate after this many frames
MHI_DURATION = 10  # max frames remembered by motion history
FRAME_WIDTH, FRAME_HEIGHT = 640, 424
PAPER_RATIO = 11/8.5  # height/width of paper
ROT180 = False  # If paper is upside down, change this
REDUCE_DISPLAY_SIZE = False  # Use if output window is too big for your screen


# Internal Parameters
tol_corner_movement = 1
obst_tol = 10   # used to determine tolerance
closing_iterations = 10
show_thresholding = False  # Use to display thresholding

def rotate180(im):
    """Rotates an image by 180 degrees."""
    return cv2.flip(im, -1)


def persTransform(pts, H):
    """Transforms a list of points, `pts`,
    using the perspective transform `H`."""
    src = np.zeros((len(pts), 1, 2))
    src[:, 0] = pts
    dst = cv2.perspectiveTransform(src, H)
    return np.array(dst[:, 0, :], dtype='float32')


def affTransform(pts, A):
    """Transforms a list of points, `pts`,
    using the affine transform `A`."""
    src = np.zeros((len(pts), 1, 2))
    src[:, 0] = pts
    dst = cv2.transform(src, A)
    return np.array(dst[:, 0, :], dtype='float32')


def draw_polygon(im, vertices, vertex_colors=None, edge_colors=None,
                 alter_input_image=False, draw_edges=True, draw_vertices=True,
                 display=False, title='', pause=False):
    """returns image with polygon drawn on it."""
    _default_vertex_color = (255, 0, 0)
    _default_edge_color = (255, 0, 0)
    if not alter_input_image:
        im2 = im.copy()
    else:
        im2 = im
    if vertices is not None:
        N = len(vertices)
        vertices = [tuple(v) for v in vertices]
        if vertex_colors is None:
            vertex_colors = [_default_vertex_color] * N
        if edge_colors is None:
            edge_colors = [_default_edge_color] * N
        for i in range(N):
            startpt = vertices[(i - 1) % N]
            endpt = vertices[i]
            if draw_vertices:
                cv2.circle(im2, startpt, 3, vertex_colors[(i - 1) % N], -1)
            if draw_edges:
                cv2.line(im2, startpt, endpt, edge_colors[(i - 1) % N], 2)
    if display:
        cv2.imshow(title, im2)
        # Note: `0xFF == ord('q')`is apparently necessary for 64bit machines
        if pause and cv2.waitKey(0) & 0xFF == ord('q'):
            pass
    return im2


def run_main():

    # Initialize some variables
    frame = None
    old_homog = None
    old_inv_homog = None
    corner_history = []

    video_feed = cv2.VideoCapture(VIDEO_FILE_LOCATION)
    video_feed.set(cv2.cv.CV_CAP_PROP_FRAME_WIDTH, FRAME_WIDTH)
    video_feed.set(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT)

    frame_count = 0
    fps_time = time()
    while True:
        # initialize some stuff
        c_colors = [(0, 0, 255)] * 4

        # grab current frame from video_feed
        previous_frame = frame
        _, frame = video_feed.read()

        # Report FPS
        if not (frame_count % 10):
            fps = FPS_INTERVAL/(time() - fps_time)
            print('Frame:', frame_count, ' | FPS:', fps)
            fps_time = time()
        frame_count += 1

        # Convert to grayscale
        try:
            gray_img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        except:
            print("\nVideo feed ended.\n")
            break

        # get binary thresholding of image
        gray_smooth = cv2.GaussianBlur(gray_img, (15, 15), 0)
        _, bin_img = cv2.threshold(gray_smooth, 100, 255, cv2.THRESH_BINARY)

        # morphological closing
        kernel = np.ones((3, 3), np.uint8)
        bin_img = cv2.morphologyEx(bin_img, cv2.MORPH_CLOSE,
                                   kernel, iterations=closing_iterations)

        # Find corners.  To do this:
        # 1) Find the largest (area) contour in frame (after thresholding)
        # 2) get contours convex hull,
        # 3) reduce degree of convex hull with Douglas-Peucker algorithm,
        # 4) refine corners with subpixel corner finder

        # step 1
        contours, _ = cv2.findContours(bin_img, cv2.RETR_EXTERNAL,
                                                cv2.CHAIN_APPROX_SIMPLE)
        biggest_contour = max(contours, key=cv2.contourArea)

        # step 2
        hull = cv2.convexHull(biggest_contour)
        epsilon = 0.05 * cv2.arcLength(biggest_contour, True)

        # step 3
        hull = cv2.approxPolyDP(hull, epsilon, True)

        # step 4
        hull = np.float32(hull)
        method = cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT
        criteria = (method, 1000, 1e-4)
        cv2.cornerSubPix(gray_img, hull, (5, 5), (-1, -1), criteria)
        corners = [pt[0] for pt in hull]

        # Find top-right corner and use to label corners
        # Note: currently corners are in CW order
        # Note: ordering will be checked below against expected corners
        tr_index = np.argmin(c[0] + c[1] for c in corners)
        tl = corners[tr_index]
        bl = corners[(tr_index - 1) % 4]
        br = corners[(tr_index - 2) % 4]
        tr = corners[(tr_index - 3) % 4]

        # reformat and ensure that ordering is as expected below
        corners = np.float32([[c[0], c[1]] for c in [tl, bl, br, tr]])

        # IMPORTANT ASSUMPTIONS on paper tracking (used in code block below):
        # 1) if any one point is stationary from previous frame, then all
        #    are stationary with probability 1.
        # 2) if any corners are obstructed, assume the paper is still flat
        #    against the same plane as it was in the previous frame.
        #    I.e. the transformation from previous frame to this frame should
        #    be of the form of a translation and a rotation in said plane.
        # 3) see code comments for additional assumptions, haha, sorry

        def get_edge_lengths(topl, botl, botr, topr):
            """ Takes in list of four corners, returns four edge lengths
            in order top, right, bottom, left."""
            tbrl = [topr - topl, topr - botr, botr - botl, botl - topl]
            return [norm(edge) for edge in tbrl]

        if not corner_history:
            last_unob_corners = corners
        else:
            # determine expected corner locations and edge lengths
            expected_corners = last_unob_corners
            if last_unob_corners is None:
                expected_lengths = get_edge_lengths(*expected_corners)
            else:
                expected_lengths = get_edge_lengths(*last_unob_corners)

            # check ordering
            def cyclist(lst, k):
                if k:
                    return [lst[(i+k)%len(lst)] for i in xrange(len(lst))]
                return lst

            def _order_dist(offset):
                offset_corners = corners[cyclist(range(4), offset)]
                return norm(expected_corners - offset_corners), offset_corners
            if corner_history:
                corners = min(_order_dist(k) for k in range(4))[1]

            # Look for obstructions by looking for changes in edge lengths
            # Note: these lengths are not perspective invariant
            # TODO: checking by Hessian may be a better method
            new_lengths = get_edge_lengths(*corners)
            top_is_bad, rgt_is_bad, bot_is_bad, lft_is_bad = \
                [abs(l0 - l1) > obst_tol for l1, l0 in
                                zip(new_lengths, expected_lengths)]
            tl_ob = top_is_bad and lft_is_bad
            bl_ob = bot_is_bad and lft_is_bad
            br_ob = bot_is_bad and rgt_is_bad
            tr_ob = top_is_bad and rgt_is_bad

            is_obstr = [tl_ob, bl_ob, br_ob, tr_ob]
            ob_indices = [i for i, c in enumerate(is_obstr) if c]
            ob_corner_ct = sum(is_obstr)
            c_colors = [(0, 255, 0) if b else (0, 0, 255) for b in is_obstr]

            # Find difference of corners from expected location
            diffs = [norm(c - ec) for c, ec in zip(corners, expected_corners)]
            has_moved = [d > tol_corner_movement for d in diffs]

            # Check if paper has likely moved
            if sum(has_moved) < 4:
                # assume all is cool, just trust the corners found
                corners = last_unob_corners
                pass
            else:
                if sum(has_moved) == 1:
                    # only one corner has moved, just assume it's obstructed
                    # and replace it with the expected location
                    bad_corner_idx = np.argmax(diffs)
                    corners[bad_corner_idx] = expected_corners[bad_corner_idx]

                else:  # find paper's affine transformation in expected plane
                    print("frame={} | ob_corner_ct={}"
                          "".format(frame_count, ob_corner_ct))

                    if sum(is_obstr) in (1, 2, 3):
                        eco = zip(expected_corners, is_obstr)
                        exp_unob = np.float32([c for c, b in eco if not b])
                        exp_ob = np.float32([c for c, b in eco if b])
                        co = zip(corners, is_obstr)
                        new_unob = np.float32([c for c, b in co if not b])

                        exp_unob_pp = persTransform(exp_unob, old_homog)
                        exp_ob_pp = persTransform(exp_ob, old_homog)
                        new_unob_pp = persTransform(new_unob, old_homog)

                    # check for obstructions
                    if sum(is_obstr) == 0:  # yay! no obstructed corners!
                        pass

                    elif sum(is_obstr) == 1:
                        # Find the affine transformation in the paper's plane
                        # from expected locations of the three unobstructed
                        # corners to the found locations, then use this to
                        # estimate the obstructed corner's location
                        A = cv2.getAffineTransform(exp_unob_pp, new_unob_pp)
                        new_ob_pp = affTransform(exp_ob_pp, A)
                        new_ob = persTransform(new_ob_pp, old_inv_homog)
                        corners[np.ix_(ob_indices)] = new_ob

                    elif sum(is_obstr) == 2:
                        # Align the line between the good corners
                        # with the same line w.r.t the old corners
                        p1, q1 = new_unob_pp[0], new_unob_pp[1]
                        p0, q0 = exp_unob_pp[0], exp_unob_pp[1]
                        u0 = (q0 - p0) / norm(q0 - p0)
                        u1 = (q1 - p1) / norm(q1 - p1)
                        angle = acos(np.dot(u0, u1))  # unsigned
                        trans = p1 - p0

                        # Find rotation that moves u0 to u1
                        rotat = cv2.getRotationMatrix2D(tuple(p1), angle, 1)
                        rotat = rotat[:, :2]

                        # Expensive sign check for angle (could be improved)
                        if norm(np.dot(u0, rotat) - u1) > norm(np.dot(u1, rotat) - u0):
                            rotat = np.linalg.inv(rotat)

                        # transform the old coords of the hidden corners
                        # and map them back to the paper plane
                        exp_ob_pp += trans
                        new_ob_pp = affTransform(exp_ob_pp, rotat)
                        new_ob = persTransform(new_ob_pp, old_inv_homog)
                        corners[np.ix_(ob_indices)] = new_ob

                    elif sum(is_obstr) in (3, 4):
                        print("Uh oh, {} corners obstructed..."
                              "".format(ob_corner_ct))
                        corners = expected_corners
                    else:
                        raise Exception("This should never happen.")


        # Homography
        w = max(abs(br[0] - bl[0]),
                abs(tr[0] - tl[0]))  # width of paper in pixels
        h = PAPER_RATIO * w
        corners_pp = np.float32([[0, 0], [0, h], [w, h], [w, 0]])
        homog, mask = cv2.findHomography(corners, corners_pp)
        inv_homog, inv_mask = cv2.findHomography(corners_pp, corners)
        paper = cv2.warpPerspective(frame, homog, (int(ceil(w)), int(ceil(h))))
        if ROT180:
            paper = rotate180(paper)


        # Draw detected paper boundary on frame
        segmented_frame = draw_polygon(frame, corners, c_colors)

        # Resize paper to simplify display
        h = segmented_frame.shape[0]
        paper_w = int(round(h*paper.shape[1]/paper.shape[0]))
        resized_paper = cv2.resize(paper, (paper_w, h))

        # Display
        big_img = np.hstack((segmented_frame, resized_paper))
        if show_thresholding:
            bin_img = cv2.cvtColor(bin_img, cv2.COLOR_GRAY2BGR)
            big_img = np.hstack((big_img, bin_img))
        if REDUCE_DISPLAY_SIZE:
            reduced_size = tuple(np.array(big_img.shape[:2][::-1])//2)
            smaller_big_img = cv2.resize(big_img, reduced_size)
        cv2.imshow('', big_img)


        # Updates for next iteration
        corner_history.append(corners)
        old_homog = homog
        old_inv_homog = inv_homog


        # this is apparently necessary for 64bit machines
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_feed.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    try:
        run_main()
    except:
        cv2.destroyAllWindows()
        raise