Commit 90eae25e authored by Kenton Varda's avatar Kenton Varda

Improve how struct lists work.

parent e9fa0685
......@@ -79,30 +79,58 @@ struct Descriptor {
enum class FieldSize: uint8_t {
// TODO: Rename to FieldLayout or maybe ValueLayout.
BIT = 0,
BYTE = 1,
TWO_BYTES = 2,
FOUR_BYTES = 3,
EIGHT_BYTES = 4,
REFERENCE = 5, // Indicates that the field lives in the reference segment, not the data segment.
KEY_REFERENCE = 6, // A 64-bit key, 64-bit reference pair. Valid only in lists.
STRUCT = 7 // An arbitrary-sized inlined struct. Used only for list elements, not struct
// fields, since a struct cannot embed another struct inline.
VOID = 0,
BIT = 1,
BYTE = 2,
TWO_BYTES = 3,
FOUR_BYTES = 4,
EIGHT_BYTES = 5,
REFERENCE = 6, // Indicates that the field lives in the reference segment, not the data segment.
INLINE_COMPOSITE = 7
// A composite type of fixed width. This serves two purposes:
// 1) For lists of composite types where all the elements would have the exact same width,
// allocating a list of references which in turn point at the elements would waste space. We
// can avoid a layer of indirection by placing all the elements in a flat sequence, and only
// indicating the element properties (e.g. field count for structs) once.
//
// Specifically, a list reference indicating INLINE_COMPOSITE element size actually points to
// a "tag" describing one element. This tag is formatted like a wire reference, but the
// "offset" instead stores the element count of the list. The flat list of elements appears
// immediately after the tag. In the list reference itself, the element count is replaced with
// a word count for the whole list (excluding tag). This allows the tag and elements to be
// precached in a single step rather than two sequential steps.
//
// It is NOT intended to be possible to substitute an INLINE_COMPOSITE list for a REFERENCE
// list or vice-versa without breaking recipients. Recipients expect one or the other
// depending on the message definition.
//
// However, it IS allowed to substitute an INLINE_COMPOSITE list -- specifically, of structs --
// when a list was expected, or vice versa, with the assumption that the first field of the
// struct (field number zero) correspond to the element type. This allows a list of
// primitives to be upgraded to a list of structs, avoiding the need to use parallel arrays
// when you realize that you need to attach some extra information to each element of some
// primitive list.
//
// 2) For struct fields of composite types where the field's total size is known at compile time,
// we can embed the field directly into the parent struct to avoid indirection through a
// reference. However, this means that the field size can never change -- e.g. if it is a
// struct, new fields cannot be added to it. It's unclear if this is really useful so at this
// time it is not supported.
};
typedef decltype(BITS / ELEMENTS) BitsPerElement;
namespace internal {
static constexpr BitsPerElement BITS_PER_ELEMENT_TABLE[] = {
static constexpr BitsPerElement BITS_PER_ELEMENT_TABLE[8] = {
0 * BITS / ELEMENTS,
1 * BITS / ELEMENTS,
8 * BITS / ELEMENTS,
16 * BITS / ELEMENTS,
32 * BITS / ELEMENTS,
64 * BITS / ELEMENTS,
64 * BITS / ELEMENTS,
128 * BITS / ELEMENTS,
0 * BITS / ELEMENTS
};
}
......@@ -190,7 +218,7 @@ struct FieldDescriptor {
// If the field is a reference field (size == REFERENCE), then this is the index within the
// reference array at which the field is located.
//
// A value of INVALID_FIELD_OFFSET means that this is a void field.
// For void fields, the offset is irrelevant and may be INVALID_FIELD_OFFSET.
ByteCount16 unionTagOffset;
// Offset within the data segment at which a union tag exists deciding whether this field is
......
......@@ -53,7 +53,7 @@ MallocMessage::MallocMessage(WordCount preferredSegmentSize)
MallocMessage::~MallocMessage() {}
SegmentReader* MallocMessage::tryGetSegment(SegmentId id) {
if (id.value > segments.size()) {
if (id.value >= segments.size()) {
return nullptr;
} else {
return segments[id.value].get();
......
......@@ -427,6 +427,8 @@ constexpr auto BITS_PER_REFERENCE = 64 * BITS / REFERENCES;
constexpr auto BYTES_PER_REFERENCE = 8 * BYTES / REFERENCES;
constexpr auto WORDS_PER_REFERENCE = 1 * WORDS / REFERENCES;
constexpr WordCount REFERENCE_SIZE_IN_WORDS = 1 * REFERENCES * WORDS_PER_REFERENCE;
template <typename T>
inline constexpr decltype(BYTES / ELEMENTS) bytesPerElement() {
return sizeof(T) * BYTES / ELEMENTS;
......
......@@ -150,6 +150,59 @@ static void setupStruct(StructBuilder builder) {
}
}
static void checkStruct(StructBuilder builder) {
EXPECT_EQ(0x1011121314151617ull, builder.getDataField<uint64_t>(0 * ELEMENTS));
EXPECT_EQ(0x20212223u, builder.getDataField<uint32_t>(2 * ELEMENTS));
EXPECT_EQ(0x3031u, builder.getDataField<uint16_t>(6 * ELEMENTS));
EXPECT_EQ(0x40u, builder.getDataField<uint8_t>(14 * ELEMENTS));
EXPECT_FALSE(builder.getDataField<bool>(120 * ELEMENTS));
EXPECT_FALSE(builder.getDataField<bool>(121 * ELEMENTS));
EXPECT_TRUE (builder.getDataField<bool>(122 * ELEMENTS));
EXPECT_FALSE(builder.getDataField<bool>(123 * ELEMENTS));
EXPECT_TRUE (builder.getDataField<bool>(124 * ELEMENTS));
EXPECT_TRUE (builder.getDataField<bool>(125 * ELEMENTS));
EXPECT_TRUE (builder.getDataField<bool>(126 * ELEMENTS));
EXPECT_FALSE(builder.getDataField<bool>(127 * ELEMENTS));
{
StructBuilder subStruct = builder.getStructField(
0 * REFERENCES, FieldNumber(1), 1 * WORDS, 0 * REFERENCES);
EXPECT_EQ(123u, subStruct.getDataField<uint32_t>(0 * ELEMENTS));
}
{
ListBuilder list = builder.getListField(1 * REFERENCES, FieldSize::FOUR_BYTES);
ASSERT_EQ(3 * ELEMENTS, list.size());
EXPECT_EQ(200, list.getDataElement<int32_t>(0 * ELEMENTS));
EXPECT_EQ(201, list.getDataElement<int32_t>(1 * ELEMENTS));
EXPECT_EQ(202, list.getDataElement<int32_t>(2 * ELEMENTS));
}
{
ListBuilder list = builder.getListField(2 * REFERENCES, FieldSize::INLINE_COMPOSITE);
ASSERT_EQ(4 * ELEMENTS, list.size());
for (int i = 0; i < 4; i++) {
StructBuilder element = list.getStructElement(i * ELEMENTS, 2 * WORDS / ELEMENTS, 1 * WORDS);
EXPECT_EQ(300 + i, element.getDataField<int32_t>(0 * ELEMENTS));
EXPECT_EQ(400 + i,
element.getStructField(0 * REFERENCES, FieldNumber(1), 1 * WORDS, 0 * REFERENCES)
.getDataField<int32_t>(0 * ELEMENTS));
}
}
{
ListBuilder list = builder.getListField(3 * REFERENCES, FieldSize::REFERENCE);
ASSERT_EQ(5 * ELEMENTS, list.size());
for (uint i = 0; i < 5; i++) {
ListBuilder element = list.getListElement(i * REFERENCES, FieldSize::TWO_BYTES);
ASSERT_EQ((i + 1) * ELEMENTS, element.size());
for (uint j = 0; j <= i; j++) {
EXPECT_EQ(500u + j, element.getDataElement<uint16_t>(j * ELEMENTS));
}
}
}
}
static void checkStruct(StructReader reader) {
EXPECT_EQ(0x1011121314151617ull, reader.getDataField<uint64_t>(0 * ELEMENTS, 1616));
EXPECT_EQ(0x20212223u, reader.getDataField<uint32_t>(2 * ELEMENTS, 1616));
......@@ -181,7 +234,7 @@ static void checkStruct(StructReader reader) {
{
// TODO: Use valid default value.
ListReader list = reader.getListField(2 * REFERENCES, FieldSize::STRUCT, nullptr);
ListReader list = reader.getListField(2 * REFERENCES, FieldSize::INLINE_COMPOSITE, nullptr);
ASSERT_EQ(4 * ELEMENTS, list.size());
for (int i = 0; i < 4; i++) {
StructReader element = list.getStructElement(i * ELEMENTS, nullptr);
......@@ -206,7 +259,7 @@ static void checkStruct(StructReader reader) {
}
}
TEST(WireFormat, StructRoundTrip) {
TEST(WireFormat, StructRoundTrip_OneSegment) {
std::unique_ptr<MessageBuilder> message = newMallocMessage(512 * WORDS);
SegmentBuilder* segment = message->getSegmentWithAvailable(1 * WORDS);
word* rootLocation = segment->allocate(1 * WORDS);
......@@ -233,12 +286,13 @@ TEST(WireFormat, StructRoundTrip) {
// 34
EXPECT_EQ(34 * WORDS, segment->getSize());
checkStruct(builder);
checkStruct(builder.asReader());
checkStruct(StructReader::readRootTrusted(segment->getStartPtr(), nullptr));
checkStruct(StructReader::readRoot(segment->getStartPtr(), nullptr, segment, 4));
}
TEST(WireFormat, StructRoundTrip_MultipleSegments) {
TEST(WireFormat, StructRoundTrip_OneSegmentPerAllocation) {
std::unique_ptr<MessageBuilder> message = newMallocMessage(1 * WORDS);
SegmentBuilder* segment = message->getSegmentWithAvailable(1 * WORDS);
word* rootLocation = segment->allocate(1 * WORDS);
......@@ -269,7 +323,36 @@ TEST(WireFormat, StructRoundTrip_MultipleSegments) {
EXPECT_EQ( 2 * WORDS, message->getSegment(SegmentId(13))->getSize()); // list list sublist 4
EXPECT_EQ( 3 * WORDS, message->getSegment(SegmentId(14))->getSize()); // list list sublist 5
checkStruct(builder);
checkStruct(builder.asReader());
checkStruct(StructReader::readRoot(segment->getStartPtr(), nullptr, segment, 4));
}
TEST(WireFormat, StructRoundTrip_MultipleSegmentsWithMultipleAllocations) {
std::unique_ptr<MessageBuilder> message = newMallocMessage(8 * WORDS);
SegmentBuilder* segment = message->getSegmentWithAvailable(1 * WORDS);
word* rootLocation = segment->allocate(1 * WORDS);
StructBuilder builder =
StructBuilder::initRoot(segment, rootLocation, FieldNumber(16), 2 * WORDS, 4 * REFERENCES);
setupStruct(builder);
// Verify that we made 6 segments.
ASSERT_TRUE(message->tryGetSegment(SegmentId(5)) != nullptr);
EXPECT_EQ(nullptr, message->tryGetSegment(SegmentId(6)));
// Check that each segment has the expected size. Recall that each object will be prefixed by an
// extra word if its parent is in a different segment.
EXPECT_EQ( 8 * WORDS, message->getSegment(SegmentId(0))->getSize()); // root ref + struct + sub
EXPECT_EQ( 3 * WORDS, message->getSegment(SegmentId(1))->getSize()); // 3-element int32 list
EXPECT_EQ(10 * WORDS, message->getSegment(SegmentId(2))->getSize()); // struct list
EXPECT_EQ( 8 * WORDS, message->getSegment(SegmentId(3))->getSize()); // struct list substructs
EXPECT_EQ( 8 * WORDS, message->getSegment(SegmentId(4))->getSize()); // list list + sublist 1,2
EXPECT_EQ( 7 * WORDS, message->getSegment(SegmentId(5))->getSize()); // list list sublist 3,4,5
checkStruct(builder);
checkStruct(builder.asReader());
checkStruct(StructReader::readRoot(segment->getStartPtr(), nullptr, segment, 4));
}
} // namespace
......
This diff is collapsed.
......@@ -132,8 +132,8 @@ private:
class StructReader {
public:
inline StructReader()
: segment(nullptr), ptr(nullptr), fieldCount(0), dataSize(0), referenceCount(0),
bit0Offset(0 * BITS), recursionLimit(0) {}
: segment(nullptr), data(nullptr), references(nullptr), fieldCount(0), dataSize(0),
referenceCount(0), bit0Offset(0 * BITS), recursionLimit(0) {}
static StructReader readRootTrusted(const word* location, const word* defaultValue);
static StructReader readRoot(const word* location, const word* defaultValue,
......@@ -167,17 +167,8 @@ public:
private:
SegmentReader* segment; // Memory segment in which the struct resides.
const void* ptr;
// ptr[0] points to the location between the struct's data and reference segments.
// ptr[1] points to the end of the *default* data segment.
// We put these in an array so we can choose between them without a branch.
// These pointers are not necessarily word-aligned -- they are aligned as well as necessary for
// the data they might point at. So if the struct has only one field that we know of, and it is
// of type Int16, then the pointers only need to be 16-bit aligned. Or if the struct has fields
// of type Int16 and Int64 (in that order), but the struct reference on the wire self-reported
// as having only one field (therefore, only the Int16), then ptr[0] need only be 16-bit aligned
// while ptr[1] must be 64-bit aligned. This relaxation of alignment is needed to handle the
// case where a list of primitives is upgraded to a list of structs.
const void* data;
const WireReference* references;
FieldNumber fieldCount; // Number of fields the struct is reported to have.
WordCount8 dataSize; // Size of data segment.
......@@ -192,11 +183,12 @@ private:
// Limits the depth of message structures to guard against stack-overflow-based DoS attacks.
// Once this reaches zero, further pointers will be pruned.
inline StructReader(SegmentReader* segment, const void* ptr, FieldNumber fieldCount,
WordCount dataSize, WireReferenceCount referenceCount,
inline StructReader(SegmentReader* segment, const void* data, const WireReference* references,
FieldNumber fieldCount, WordCount dataSize, WireReferenceCount referenceCount,
BitCount bit0Offset, int recursionLimit)
: segment(segment), ptr(ptr), fieldCount(fieldCount), dataSize(dataSize),
referenceCount(referenceCount), bit0Offset(bit0Offset), recursionLimit(recursionLimit) {}
: segment(segment), data(data), references(references), fieldCount(fieldCount),
dataSize(dataSize), referenceCount(referenceCount), bit0Offset(bit0Offset),
recursionLimit(recursionLimit) {}
friend class ListReader;
friend class StructBuilder;
......@@ -354,7 +346,7 @@ inline void StructBuilder::setDataField<bool>(ElementCount offset, bool value) c
template <typename T>
T StructReader::getDataField(ElementCount offset, typename NoInfer<T>::Type defaultValue) const {
if (offset * bytesPerElement<T>() < dataSize * BYTES_PER_WORD) {
return reinterpret_cast<const WireValue<T>*>(ptr)[offset / ELEMENTS].get();
return reinterpret_cast<const WireValue<T>*>(data)[offset / ELEMENTS].get();
} else {
return defaultValue;
}
......@@ -368,7 +360,7 @@ inline bool StructReader::getDataField<bool>(ElementCount offset, bool defaultVa
if (boffset == 0 * BITS) boffset = bit0Offset;
if (boffset < dataSize * BITS_PER_WORD) {
const byte* b = reinterpret_cast<const byte*>(ptr) + boffset / BITS_PER_BYTE;
const byte* b = reinterpret_cast<const byte*>(data) + boffset / BITS_PER_BYTE;
return (*reinterpret_cast<const uint8_t*>(b) & (1 << (boffset % BITS_PER_BYTE / BITS))) != 0;
} else {
return defaultValue;
......@@ -381,7 +373,7 @@ T StructReader::getDataFieldCheckingNumber(
// Intentionally use & rather than && to reduce branches.
if ((fieldNumber < fieldCount) &
(offset * bytesPerElement<T>() < dataSize * BYTES_PER_WORD)) {
return reinterpret_cast<const WireValue<T>*>(ptr)[offset / ELEMENTS].get();
return reinterpret_cast<const WireValue<T>*>(data)[offset / ELEMENTS].get();
} else {
return defaultValue;
}
......@@ -397,7 +389,7 @@ inline bool StructReader::getDataFieldCheckingNumber<bool>(
// Intentionally use & rather than && to reduce branches.
if ((fieldNumber < fieldCount) & (boffset < dataSize * BITS_PER_WORD)) {
const byte* b = reinterpret_cast<const byte*>(ptr) + boffset / BITS_PER_BYTE;
const byte* b = reinterpret_cast<const byte*>(data) + boffset / BITS_PER_BYTE;
return (*reinterpret_cast<const uint8_t*>(b) & (1 << (boffset % BITS_PER_BYTE / BITS))) != 0;
} else {
return defaultValue;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment