Use a hash table to index existing vtables (#5314)

* Use a hash table to index existing vtables This allows for quick deduplication even in situations where there might be thousands of vtables due to 'combinatoric explosion'. This fixes issue #5301. * Refactor 0-offset trimming * Improve deduplication benchmark The routine now generates a set of realistic logical layouts and uses a timer function that randomly picks a layout for each iteration. The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000. (Note that due to alignment, the actual number of vtables is usually slightly higher.)

Use a hash table to index existing vtables (#5314)
* Use a hash table to index existing vtables This allows for quick deduplication even in situations where there might be thousands of vtables due to 'combinatoric explosion'. This fixes issue #5301. * Refactor 0-offset trimming * Improve deduplication benchmark The routine now generates a set of realistic logical layouts and uses a timer function that randomly picks a layout for each iteration. The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000. (Note that due to alignment, the actual number of vtables is usually slightly higher.)
e47ca7ab · Malthe Borch · Wouter van Oortmerssen · d79f4e97 · e47ca7ab · e47ca7ab
Commit e47ca7ab authored May 06, 2019 by Malthe Borch Committed by Wouter van Oortmerssen May 06, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 68 additions and 61 deletions

builder.py python/flatbuffers/builder.py +33 -41

py_test.py tests/py_test.py +35 -20

No files found.
--- a/python/flatbuffers/builder.py
+++ b/python/flatbuffers/builder.py
@@ -94,7 +94,7 @@ class Builder(object):
    It holds the following internal state:
        - Bytes: an array of bytes.
        - current_vtable: a list of integers.
-        - vtables: a list of vtable entries (i.e. a list of list of integers).
+        - vtables: a hash of vtable entries.
    Attributes:
      Bytes: The internal `bytearray` for the Builder.
@@ -129,7 +129,7 @@ class Builder(object):
        self.head = UOffsetTFlags.py_type(initialSize)
        self.minalign = 1
        self.objectEnd = None
-        self.vtables = []
+        self.vtables = {}
        self.nested = False
        ## @endcond
        self.finished = False
@@ -191,52 +191,45 @@ class Builder(object):
        self.PrependSOffsetTRelative(0)
        objectOffset = self.Offset()
-        existingVtable = None
+        vtKey = []
-        # Trim trailing 0 offsets.
+        trim = True
-        while self.current_vtable and self.current_vtable[-1] == 0:
+        for elem in reversed(self.current_vtable):
-            self.current_vtable.pop()
+            if elem == 0:
+                if trim:
-        # Search backwards through existing vtables, because similar vtables
+                    continue
-        # are likely to have been recently appended. See
+            else:
-        # BenchmarkVtableDeduplication for a case in which this heuristic
+                elem = objectOffset - elem
-        # saves about 30% of the time used in writing objects with duplicate
+                trim = False
-        # tables.
+            vtKey.append(elem)
-        i = len(self.vtables) - 1
-        while i >= 0:
+        vtKey = tuple(vtKey)
-            # Find the other vtable, which is associated with `i`:
+        vt2Offset = self.vtables.get(vtKey)
-            vt2Offset = self.vtables[i]
+        if vt2Offset is None:
-            vt2Start = len(self.Bytes) - vt2Offset
-            vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start)
-            metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth
-            vt2End = vt2Start + vt2Len
-            vt2 = self.Bytes[vt2Start+metadata:vt2End]
-            # Compare the other vtable to the one under consideration.
-            # If they are equal, store the offset and break:
-            if vtableEqual(self.current_vtable, objectOffset, vt2):
-                existingVtable = vt2Offset
-                break
-            i -= 1
-        if existingVtable is None:
            # Did not find a vtable, so write this one to the buffer.
            # Write out the current vtable in reverse , because
            # serialization occurs in last-first order:
            i = len(self.current_vtable) - 1
+            trailing = 0
+            trim = True
            while i >= 0:
                off = 0
-                if self.current_vtable[i] != 0:
+                elem = self.current_vtable[i]
+                i -= 1
+                if elem == 0:
+                    if trim:
+                        trailing += 1
+                        continue
+                else:
                    # Forward reference to field;
                    # use 32bit number to ensure no overflow:
-                    off = objectOffset - self.current_vtable[i]
+                    off = objectOffset - elem
+                    trim = False
                self.PrependVOffsetT(off)
-                i -= 1
            # The two metadata fields are written last.
@@ -245,7 +238,7 @@ class Builder(object):
            self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))
            # Second, store the vtable bytesize:
-            vBytes = len(self.current_vtable) + VtableMetadataFields
+            vBytes = len(self.current_vtable) - trailing + VtableMetadataFields
            vBytes *= N.VOffsetTFlags.bytewidth
            self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))
@@ -257,17 +250,16 @@ class Builder(object):
            # Finally, store this vtable in memory for future
            # deduplication:
-            self.vtables.append(self.Offset())
+            self.vtables[vtKey] = self.Offset()
        else:
            # Found a duplicate vtable.
            objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
            self.head = UOffsetTFlags.py_type(objectStart)
            # Write the offset to the found vtable in the
            # already-allocated SOffsetT at the beginning of this object:
            encode.Write(packer.soffset, self.Bytes, self.Head(),
-                         SOffsetTFlags.py_type(existingVtable - objectOffset))
+                         SOffsetTFlags.py_type(vt2Offset - objectOffset))
        self.current_vtable = None
        return objectOffset

--- a/tests/py_test.py
+++ b/tests/py_test.py
@@ -21,6 +21,7 @@ PY_VERSION = sys.version_info[:2]
 import ctypes
 from collections import defaultdict
 import math
+import random
 import timeit
 import unittest
@@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count):
    When count is large (as in long benchmarks), memory usage may be high.
    '''
-    prePop = 10
+    for prePop in (1, 10, 100, 1000):
-    builder = flatbuffers.Builder(0)
+        builder = flatbuffers.Builder(0)
+        n = 1 + int(math.log(prePop, 1.5))
-    # pre-populate some vtables:
-    for i in compat_range(prePop):
+        # generate some layouts:
-        builder.StartObject(i)
+        layouts = set()
-        for j in compat_range(i):
+        r = list(compat_range(n))
-            builder.PrependInt16Slot(j, j, 0)
+        while len(layouts) < prePop:
-        builder.EndObject()
+            layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2))))))
-    # benchmark deduplication of a new vtable:
+        layouts = list(layouts)
-    def f():
-        builder.StartObject(prePop)
+        # pre-populate vtables:
-        for j in compat_range(prePop):
+        for layout in layouts:
-            builder.PrependInt16Slot(j, j, 0)
+            builder.StartObject(n)
-        builder.EndObject()
+            for j in layout:
+                builder.PrependInt16Slot(j, j, 0)
-    duration = timeit.timeit(stmt=f, number=count)
+            builder.EndObject()
-    rate = float(count) / duration
-    print(('vtable deduplication rate: %.2f/sec' % rate))
+        # benchmark deduplication of a new vtable:
+        def f():
+            layout = random.choice(layouts)
+            builder.StartObject(n)
+            for j in layout:
+                builder.PrependInt16Slot(j, j, 0)
+            builder.EndObject()
+        duration = timeit.timeit(stmt=f, number=count)
+        rate = float(count) / duration
+        print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % (
+            prePop,
+            len(builder.vtables),
+            rate))
+        )
 def BenchmarkCheckReadBuffer(count, buf, off):