Commit e47ca7ab authored by Malthe Borch's avatar Malthe Borch Committed by Wouter van Oortmerssen

Use a hash table to index existing vtables (#5314)

* Use a hash table to index existing vtables

This allows for quick deduplication even in situations where there
might be thousands of vtables due to 'combinatoric explosion'.

This fixes issue #5301.

* Refactor 0-offset trimming

* Improve deduplication benchmark

The routine now generates a set of realistic logical layouts and
uses a timer function that randomly picks a layout for each iteration.

The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000.

(Note that due to alignment, the actual number of vtables is usually slightly
higher.)
parent d79f4e97
...@@ -94,7 +94,7 @@ class Builder(object): ...@@ -94,7 +94,7 @@ class Builder(object):
It holds the following internal state: It holds the following internal state:
- Bytes: an array of bytes. - Bytes: an array of bytes.
- current_vtable: a list of integers. - current_vtable: a list of integers.
- vtables: a list of vtable entries (i.e. a list of list of integers). - vtables: a hash of vtable entries.
Attributes: Attributes:
Bytes: The internal `bytearray` for the Builder. Bytes: The internal `bytearray` for the Builder.
...@@ -129,7 +129,7 @@ class Builder(object): ...@@ -129,7 +129,7 @@ class Builder(object):
self.head = UOffsetTFlags.py_type(initialSize) self.head = UOffsetTFlags.py_type(initialSize)
self.minalign = 1 self.minalign = 1
self.objectEnd = None self.objectEnd = None
self.vtables = [] self.vtables = {}
self.nested = False self.nested = False
## @endcond ## @endcond
self.finished = False self.finished = False
...@@ -191,52 +191,45 @@ class Builder(object): ...@@ -191,52 +191,45 @@ class Builder(object):
self.PrependSOffsetTRelative(0) self.PrependSOffsetTRelative(0)
objectOffset = self.Offset() objectOffset = self.Offset()
existingVtable = None
vtKey = []
# Trim trailing 0 offsets. trim = True
while self.current_vtable and self.current_vtable[-1] == 0: for elem in reversed(self.current_vtable):
self.current_vtable.pop() if elem == 0:
if trim:
# Search backwards through existing vtables, because similar vtables continue
# are likely to have been recently appended. See else:
# BenchmarkVtableDeduplication for a case in which this heuristic elem = objectOffset - elem
# saves about 30% of the time used in writing objects with duplicate trim = False
# tables.
vtKey.append(elem)
i = len(self.vtables) - 1
while i >= 0: vtKey = tuple(vtKey)
# Find the other vtable, which is associated with `i`: vt2Offset = self.vtables.get(vtKey)
vt2Offset = self.vtables[i] if vt2Offset is None:
vt2Start = len(self.Bytes) - vt2Offset
vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start)
metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth
vt2End = vt2Start + vt2Len
vt2 = self.Bytes[vt2Start+metadata:vt2End]
# Compare the other vtable to the one under consideration.
# If they are equal, store the offset and break:
if vtableEqual(self.current_vtable, objectOffset, vt2):
existingVtable = vt2Offset
break
i -= 1
if existingVtable is None:
# Did not find a vtable, so write this one to the buffer. # Did not find a vtable, so write this one to the buffer.
# Write out the current vtable in reverse , because # Write out the current vtable in reverse , because
# serialization occurs in last-first order: # serialization occurs in last-first order:
i = len(self.current_vtable) - 1 i = len(self.current_vtable) - 1
trailing = 0
trim = True
while i >= 0: while i >= 0:
off = 0 off = 0
if self.current_vtable[i] != 0: elem = self.current_vtable[i]
i -= 1
if elem == 0:
if trim:
trailing += 1
continue
else:
# Forward reference to field; # Forward reference to field;
# use 32bit number to ensure no overflow: # use 32bit number to ensure no overflow:
off = objectOffset - self.current_vtable[i] off = objectOffset - elem
trim = False
self.PrependVOffsetT(off) self.PrependVOffsetT(off)
i -= 1
# The two metadata fields are written last. # The two metadata fields are written last.
...@@ -245,7 +238,7 @@ class Builder(object): ...@@ -245,7 +238,7 @@ class Builder(object):
self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize)) self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))
# Second, store the vtable bytesize: # Second, store the vtable bytesize:
vBytes = len(self.current_vtable) + VtableMetadataFields vBytes = len(self.current_vtable) - trailing + VtableMetadataFields
vBytes *= N.VOffsetTFlags.bytewidth vBytes *= N.VOffsetTFlags.bytewidth
self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes)) self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))
...@@ -257,17 +250,16 @@ class Builder(object): ...@@ -257,17 +250,16 @@ class Builder(object):
# Finally, store this vtable in memory for future # Finally, store this vtable in memory for future
# deduplication: # deduplication:
self.vtables.append(self.Offset()) self.vtables[vtKey] = self.Offset()
else: else:
# Found a duplicate vtable. # Found a duplicate vtable.
objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset) objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
self.head = UOffsetTFlags.py_type(objectStart) self.head = UOffsetTFlags.py_type(objectStart)
# Write the offset to the found vtable in the # Write the offset to the found vtable in the
# already-allocated SOffsetT at the beginning of this object: # already-allocated SOffsetT at the beginning of this object:
encode.Write(packer.soffset, self.Bytes, self.Head(), encode.Write(packer.soffset, self.Bytes, self.Head(),
SOffsetTFlags.py_type(existingVtable - objectOffset)) SOffsetTFlags.py_type(vt2Offset - objectOffset))
self.current_vtable = None self.current_vtable = None
return objectOffset return objectOffset
......
...@@ -21,6 +21,7 @@ PY_VERSION = sys.version_info[:2] ...@@ -21,6 +21,7 @@ PY_VERSION = sys.version_info[:2]
import ctypes import ctypes
from collections import defaultdict from collections import defaultdict
import math import math
import random
import timeit import timeit
import unittest import unittest
...@@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count): ...@@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count):
When count is large (as in long benchmarks), memory usage may be high. When count is large (as in long benchmarks), memory usage may be high.
''' '''
prePop = 10 for prePop in (1, 10, 100, 1000):
builder = flatbuffers.Builder(0) builder = flatbuffers.Builder(0)
n = 1 + int(math.log(prePop, 1.5))
# pre-populate some vtables:
for i in compat_range(prePop): # generate some layouts:
builder.StartObject(i) layouts = set()
for j in compat_range(i): r = list(compat_range(n))
builder.PrependInt16Slot(j, j, 0) while len(layouts) < prePop:
builder.EndObject() layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2))))))
# benchmark deduplication of a new vtable: layouts = list(layouts)
def f():
builder.StartObject(prePop) # pre-populate vtables:
for j in compat_range(prePop): for layout in layouts:
builder.PrependInt16Slot(j, j, 0) builder.StartObject(n)
builder.EndObject() for j in layout:
builder.PrependInt16Slot(j, j, 0)
duration = timeit.timeit(stmt=f, number=count) builder.EndObject()
rate = float(count) / duration
print(('vtable deduplication rate: %.2f/sec' % rate)) # benchmark deduplication of a new vtable:
def f():
layout = random.choice(layouts)
builder.StartObject(n)
for j in layout:
builder.PrependInt16Slot(j, j, 0)
builder.EndObject()
duration = timeit.timeit(stmt=f, number=count)
rate = float(count) / duration
print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % (
prePop,
len(builder.vtables),
rate))
)
def BenchmarkCheckReadBuffer(count, buf, off): def BenchmarkCheckReadBuffer(count, buf, off):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment