Commit bc965cd8 authored by Kenton Varda's avatar Kenton Varda

Make it possible for a message builder to incorporate a byte array not owned by…

Make it possible for a message builder to incorporate a byte array not owned by it without copying.  Useful for injecting large data blobs into a message without copying their contents.  Access this functionality through the Orphanage interface.
parent 8caf313b
......@@ -47,6 +47,13 @@ void ReadLimiter::unread(WordCount64 amount) {
}
}
void SegmentBuilder::throwNotWritable() {
KJ_FAIL_REQUIRE(
"Tried to form a Builder to an external data segment referenced by the MessageBuilder. "
"When you use Orphanage::reference*(), you are not allowed to obtain Builders to the "
"referenced data, only Readers, because that data is const.");
}
// =======================================================================================
ReaderArena::ReaderArena(MessageReader* message)
......@@ -137,29 +144,48 @@ BuilderArena::AllocateResult BuilderArena::allocate(WordCount amount) {
// pointers to this segment yet, so it should be fine.
kj::dtor(segment0);
kj::ctor(segment0, this, SegmentId(0), ptr, &this->dummyLimiter);
segmentWithSpace = &segment0;
return AllocateResult { &segment0, segment0.allocate(amount) };
} else {
// Check if there is space in the first segment.
word* attempt = segment0.allocate(amount);
if (attempt != nullptr) {
return AllocateResult { &segment0, attempt };
}
// Need to fall back to additional segments.
MultiSegmentState* segmentState;
KJ_IF_MAYBE(s, moreSegments) {
if (segmentWithSpace != nullptr) {
// Check if there is space in an existing segment.
// TODO(perf): Check for available space in more than just the last segment. We don't
// want this to be O(n), though, so we'll need to maintain some sort of table. Complicating
// matters, we want SegmentBuilders::allocate() to be fast, so we can't update any such
// table when allocation actually happens. Instead, we could have a priority queue based
// on the last-known available size, and then re-check the size when we pop segments off it
// and shove them to the back of the queue if they have become too small.
attempt = s->get()->builders.back()->allocate(amount);
word* attempt = segmentWithSpace->allocate(amount);
if (attempt != nullptr) {
return AllocateResult { s->get()->builders.back().get(), attempt };
return AllocateResult { segmentWithSpace, attempt };
}
}
// Need to allocate a new segment.
SegmentBuilder* result = addSegmentInternal(message->allocateSegment(amount / WORDS));
// Check this new segment first the next time we need to allocate.
segmentWithSpace = result;
// Allocating from the new segment is guaranteed to succeed since we made it big enough.
return AllocateResult { result, result->allocate(amount) };
}
}
SegmentBuilder* BuilderArena::addExternalSegment(kj::ArrayPtr<const word> content) {
return addSegmentInternal(content);
}
template <typename T>
SegmentBuilder* BuilderArena::addSegmentInternal(kj::ArrayPtr<T> content) {
// This check should never fail in practice, since you can't get an Orphanage without allocating
// the root segment.
KJ_REQUIRE(segment0.getArena() != nullptr,
"Can't allocate external segments before allocating the root segment.");
MultiSegmentState* segmentState;
KJ_IF_MAYBE(s, moreSegments) {
segmentState = *s;
} else {
auto newSegmentState = kj::heap<MultiSegmentState>();
......@@ -168,8 +194,7 @@ BuilderArena::AllocateResult BuilderArena::allocate(WordCount amount) {
}
kj::Own<SegmentBuilder> newBuilder = kj::heap<SegmentBuilder>(
this, SegmentId(segmentState->builders.size() + 1),
message->allocateSegment(amount / WORDS), &this->dummyLimiter);
this, SegmentId(segmentState->builders.size() + 1), content, &this->dummyLimiter);
SegmentBuilder* result = newBuilder.get();
segmentState->builders.add(kj::mv(newBuilder));
......@@ -177,9 +202,7 @@ BuilderArena::AllocateResult BuilderArena::allocate(WordCount amount) {
// getSegmentsForOutput(), which callers might reasonably expect is a thread-safe method.
segmentState->forOutput.resize(segmentState->builders.size() + 1);
// Allocating from the new segment is guaranteed to succeed since we made it big enough.
return AllocateResult { result, result->allocate(amount) };
}
return result;
}
kj::ArrayPtr<const kj::ArrayPtr<const word>> BuilderArena::getSegmentsForOutput() {
......
......@@ -135,9 +135,19 @@ class SegmentBuilder: public SegmentReader {
public:
inline SegmentBuilder(BuilderArena* arena, SegmentId id, kj::ArrayPtr<word> ptr,
ReadLimiter* readLimiter);
inline SegmentBuilder(BuilderArena* arena, SegmentId id, kj::ArrayPtr<const word> ptr,
ReadLimiter* readLimiter);
inline SegmentBuilder(BuilderArena* arena, SegmentId id, decltype(nullptr),
ReadLimiter* readLimiter);
KJ_ALWAYS_INLINE(word* allocate(WordCount amount));
inline word* getPtrUnchecked(WordCount offset);
KJ_ALWAYS_INLINE(void checkWritable());
// Throw an exception if the segment is read-only (meaning it is a reference to external data).
KJ_ALWAYS_INLINE(word* getPtrUnchecked(WordCount offset));
// Get a writable pointer into the segment. Throws an exception if the segment is read-only (i.e.
// a reference to external immutable data).
inline BuilderArena* getArena();
......@@ -145,11 +155,17 @@ public:
inline void reset();
inline bool isWritable() { return !readOnly; }
private:
word* pos;
// Pointer to a pointer to the current end point of the segment, i.e. the location where the
// next object should be allocated.
bool readOnly;
void throwNotWritable();
KJ_DISALLOW_COPY(SegmentBuilder);
};
......@@ -238,6 +254,17 @@ public:
// the arena is guaranteed to succeed. Therefore callers should try to allocate from a specific
// segment first if there is one, then fall back to the arena.
SegmentBuilder* addExternalSegment(kj::ArrayPtr<const word> content);
// Add a new segment to the arena which points to some existing memory region. The segment is
// assumed to be completley full; the arena will never allocate from it. In fact, the segment
// is considered read-only. Any attempt to get a Builder pointing into this segment will throw
// an exception. Readers are allowed, however.
//
// This can be used to inject some external data into a message without a copy, e.g. embedding a
// large mmap'd file into a message as `Data` without forcing that data to actually be read in
// from disk (until the message itself is written out). `Orphanage` provides the public API for
// this feature.
uint injectCap(kj::Own<ClientHook>&& cap);
// Add the capability to the message and return its index. If the same ClientHook is injected
// twice, this may return the same index both times, but in this case dropCap() needs to be
......@@ -264,6 +291,14 @@ private:
kj::Vector<kj::ArrayPtr<const word>> forOutput;
};
kj::Maybe<kj::Own<MultiSegmentState>> moreSegments;
SegmentBuilder* segmentWithSpace = nullptr;
// When allocating, look for space in this segment first before resorting to allocating a new
// segment. This is not necessarily the last segment because addExternalSegment() may add a
// segment that is already-full, in which case we don't update this pointer.
template <typename T> // Can be `word` or `const word`.
SegmentBuilder* addSegmentInternal(kj::ArrayPtr<T> content);
};
// =======================================================================================
......@@ -316,7 +351,17 @@ inline void SegmentReader::unread(WordCount64 amount) { readLimiter->unread(amou
inline SegmentBuilder::SegmentBuilder(
BuilderArena* arena, SegmentId id, kj::ArrayPtr<word> ptr, ReadLimiter* readLimiter)
: SegmentReader(arena, id, ptr, readLimiter), pos(ptr.begin()) {}
: SegmentReader(arena, id, ptr, readLimiter), pos(ptr.begin()), readOnly(false) {}
inline SegmentBuilder::SegmentBuilder(
BuilderArena* arena, SegmentId id, kj::ArrayPtr<const word> ptr, ReadLimiter* readLimiter)
: SegmentReader(arena, id, ptr, readLimiter),
// const_cast is safe here because the member won't ever be dereferenced because it appears
// to point to the end of the segment anyway.
pos(const_cast<word*>(ptr.end())),
readOnly(true) {}
inline SegmentBuilder::SegmentBuilder(BuilderArena* arena, SegmentId id, decltype(nullptr),
ReadLimiter* readLimiter)
: SegmentReader(arena, id, nullptr, readLimiter), pos(nullptr), readOnly(false) {}
inline word* SegmentBuilder::allocate(WordCount amount) {
if (intervalLength(pos, ptr.end()) < amount) {
......@@ -330,9 +375,11 @@ inline word* SegmentBuilder::allocate(WordCount amount) {
}
}
inline void SegmentBuilder::checkWritable() {
if (KJ_UNLIKELY(readOnly)) throwNotWritable();
}
inline word* SegmentBuilder::getPtrUnchecked(WordCount offset) {
// const_cast OK because SegmentBuilder's constructor always initializes its SegmentReader base
// class with a pointer that was originally non-const.
return const_cast<word*>(ptr.begin() + offset);
}
......
......@@ -376,7 +376,7 @@ struct WireHelpers {
}
}
static KJ_ALWAYS_INLINE(word* followFars(
static KJ_ALWAYS_INLINE(word* followFarsNoWritableCheck(
WirePointer*& ref, word* refTarget, SegmentBuilder*& segment)) {
// If `ref` is a far pointer, follow it. On return, `ref` will have been updated to point at
// a WirePointer that contains the type information about the target object, and a pointer to
......@@ -407,6 +407,13 @@ struct WireHelpers {
}
}
static KJ_ALWAYS_INLINE(word* followFars(
WirePointer*& ref, word* refTarget, SegmentBuilder*& segment)) {
auto result = followFarsNoWritableCheck(ref, refTarget, segment);
segment->checkWritable();
return result;
}
static KJ_ALWAYS_INLINE(const word* followFars(
const WirePointer*& ref, const word* refTarget, SegmentReader*& segment)) {
// Like the other followFars() but operates on readers.
......@@ -456,6 +463,9 @@ struct WireHelpers {
// Zero out the pointed-to object. Use when the pointer is about to be overwritten making the
// target object no longer reachable.
// We shouldn't zero out external data linked into the message.
if (!segment->isWritable()) return;
switch (ref->kind()) {
case WirePointer::STRUCT:
case WirePointer::LIST:
......@@ -463,17 +473,21 @@ struct WireHelpers {
break;
case WirePointer::FAR: {
segment = segment->getArena()->getSegment(ref->farRef.segmentId.get());
if (segment->isWritable()) { // Don't zero external data.
WirePointer* pad =
reinterpret_cast<WirePointer*>(segment->getPtrUnchecked(ref->farPositionInSegment()));
if (ref->isDoubleFar()) {
segment = segment->getArena()->getSegment(pad->farRef.segmentId.get());
if (segment->isWritable()) {
zeroObject(segment, pad + 1, segment->getPtrUnchecked(pad->farPositionInSegment()));
}
memset(pad, 0, sizeof(WirePointer) * 2);
} else {
zeroObject(segment, pad);
memset(pad, 0, sizeof(WirePointer));
}
}
break;
}
case WirePointer::OTHER:
......@@ -487,6 +501,9 @@ struct WireHelpers {
}
static void zeroObject(SegmentBuilder* segment, WirePointer* tag, word* ptr) {
// We shouldn't zero out external data linked into the message.
if (!segment->isWritable()) return;
switch (tag->kind()) {
case WirePointer::STRUCT: {
WirePointer* pointerSection =
......@@ -566,10 +583,12 @@ struct WireHelpers {
// do not zero the object body. Used when upgrading.
if (ref->kind() == WirePointer::FAR) {
word* pad = segment->getArena()->getSegment(ref->farRef.segmentId.get())
->getPtrUnchecked(ref->farPositionInSegment());
SegmentBuilder* padSegment = segment->getArena()->getSegment(ref->farRef.segmentId.get());
if (padSegment->isWritable()) { // Don't zero external data.
word* pad = padSegment->getPtrUnchecked(ref->farPositionInSegment());
memset(pad, 0, sizeof(WirePointer) * (1 + ref->isDoubleFar()));
}
}
memset(ref, 0, sizeof(*ref));
}
......@@ -1768,7 +1787,7 @@ struct WireHelpers {
location = reinterpret_cast<word*>(ref); // dummy so that it is non-null
} else {
WirePointer* refCopy = ref;
location = followFars(refCopy, ref->target(), segment);
location = followFarsNoWritableCheck(refCopy, ref->target(), segment);
}
OrphanBuilder result(ref, segment, location);
......@@ -2591,6 +2610,25 @@ OrphanBuilder OrphanBuilder::copy(BuilderArena* arena, kj::Own<ClientHook> copyF
return result;
}
OrphanBuilder OrphanBuilder::referenceExternalData(BuilderArena* arena, Data::Reader data) {
KJ_REQUIRE(reinterpret_cast<uintptr_t>(data.begin()) % sizeof(void*) == 0,
"Cannot referenceExternalData() that is not aligned.");
auto wordCount = WireHelpers::roundBytesUpToWords(data.size() * BYTES);
kj::ArrayPtr<const word> words(reinterpret_cast<const word*>(data.begin()), wordCount / WORDS);
OrphanBuilder result;
result.tagAsPtr()->setKindForOrphan(WirePointer::LIST);
result.tagAsPtr()->listRef.set(FieldSize::BYTE, data.size() * ELEMENTS);
result.segment = arena->addExternalSegment(words);
// const_cast OK here because we will check whether the segment is writable when we try to get
// a builder.
result.location = const_cast<word*>(words.begin());
return result;
}
StructBuilder OrphanBuilder::asStruct(StructSize size) {
KJ_DASSERT(tagAsPtr()->isNull() == (location == nullptr));
......
......@@ -719,6 +719,8 @@ public:
static OrphanBuilder copy(BuilderArena* arena, Data::Reader copyFrom);
static OrphanBuilder copy(BuilderArena* arena, kj::Own<ClientHook> copyFrom);
static OrphanBuilder referenceExternalData(BuilderArena* arena, Data::Reader data);
OrphanBuilder& operator=(const OrphanBuilder& other) = delete;
inline OrphanBuilder& operator=(OrphanBuilder&& other);
......@@ -752,14 +754,16 @@ private:
// Contains an encoded WirePointer representing this object. WirePointer is defined in
// layout.c++, but fits in a word.
//
// If the pointer is a FAR pointer, then the tag is a complete pointer, `location` is null, and
// `segment` is any arbitrary segment in the message. Otherwise, the tag's offset is garbage,
// `location` points at the actual object, and `segment` points at the segment where `location`
// resides.
// This may be a FAR pointer. Even in that case, `location` points to the eventual destination
// of that far pointer. The reason we keep the far pointer around rather than just making `tag`
// represent the final destination is because if the eventual adopter of the pointer is not in
// the target's segment then it may be useful to reuse the far pointer landing pad.
//
// If `tag` is not a far pointer, its offset is garbage; only `location` points to the actual
// target.
SegmentBuilder* segment;
// Segment in which the object resides, or an arbitrary segment in the message if the tag is a
// FAR pointer.
// Segment in which the object resides.
word* location;
// Pointer to the object, or nullptr if the pointer is null. For capabilities, we make this
......
......@@ -208,7 +208,7 @@ public:
Orphanage getOrphanage();
private:
void* arenaSpace[18];
void* arenaSpace[19];
// Space in which we can construct a BuilderArena. We don't use BuilderArena directly here
// because we don't want clients to have to #include arena.h, which itself includes a bunch of
// big STL headers. We don't use a pointer to a BuilderArena because that would require an
......
......@@ -881,6 +881,114 @@ TEST(Orphans, DisownNull) {
}
}
TEST(Orphans, ReferenceExternalData) {
MallocMessageBuilder builder;
union {
word align;
byte data[50];
};
memset(data, 0x55, sizeof(data));
auto orphan = builder.getOrphanage().referenceExternalData(Data::Builder(data, sizeof(data)));
// Data was added as a new segment.
{
auto segments = builder.getSegmentsForOutput();
ASSERT_EQ(2, segments.size());
EXPECT_EQ(data, reinterpret_cast<const byte*>(segments[1].begin()));
}
// Can't get builder because it's read-only.
EXPECT_ANY_THROW(orphan.get());
// Can get reader.
{
auto reader = orphan.getReader();
EXPECT_EQ(data, reader.begin());
EXPECT_EQ(sizeof(data), reader.size());
}
// Adopt into message tree.
auto root = builder.getRoot<TestAllTypes>();
root.adoptDataField(kj::mv(orphan));
// Can't get child builder.
EXPECT_ANY_THROW(root.getDataField());
// Can get child reader.
{
auto reader = root.asReader().getDataField();
EXPECT_EQ(data, reader.begin());
EXPECT_EQ(sizeof(data), reader.size());
}
// Back to orphan.
orphan = root.disownDataField();
// Now the orphan may be pointing to a far pointer landing pad, so check that it still does the
// right things.
// Can't get builder because it's read-only.
EXPECT_ANY_THROW(orphan.get());
// Can get reader.
{
auto reader = orphan.getReader();
EXPECT_EQ(data, reader.begin());
EXPECT_EQ(sizeof(data), reader.size());
}
// Finally, let's abandon the orphan and check that this doesn't zero out the data.
orphan = Orphan<Data>();
for (byte b: data) {
EXPECT_EQ(0x55, b);
}
}
TEST(Orphans, ReferenceExternalData_NoZeroOnSet) {
// Verify that an external blob is not zeroed by setFoo().
union {
word align;
byte data[50];
};
memset(data, 0x55, sizeof(data));
MallocMessageBuilder builder;
auto root = builder.getRoot<TestAllTypes>();
root.adoptDataField(builder.getOrphanage().referenceExternalData(
Data::Builder(data, sizeof(data))));
root.setDataField(Data::Builder());
for (byte b: data) {
EXPECT_EQ(0x55, b);
}
}
TEST(Orphans, ReferenceExternalData_NoZeroImmediateAbandon) {
// Verify that an external blob is not zeroed when abandoned immediately, without ever being
// adopted.
union {
word align;
byte data[50];
};
memset(data, 0x55, sizeof(data));
MallocMessageBuilder builder;
builder.getOrphanage().referenceExternalData(Data::Builder(data, sizeof(data)));
for (byte b: data) {
EXPECT_EQ(0x55, b);
}
}
} // namespace
} // namespace _ (private)
} // namespace capnp
......@@ -120,6 +120,28 @@ public:
// Allocate a new orphaned object (struct, list, or blob) and initialize it as a copy of the
// given object.
Orphan<Data> referenceExternalData(Data::Reader data) const;
// Creates an Orphan<Data> that points at an existing region of memory (e.g. from another message)
// without copying it. There are some SEVERE restrictions on how this can be used:
// - The memory must remain valid until the `MessageBuilder` is destroyed (even if the orphan is
// abandoned).
// - Because the data is const, you will not be allowed to obtain a `Data::Builder`
// for this blob. Any call which would return such a builder will throw an exception. You
// can, however, obtain a Reader, e.g. via orphan.getReader() or from a parent Reader (once
// the orphan is adopted). It is your responsibility to make sure your code can deal with
// these problems when using this optimization; if you can't, allocate a copy instead.
// - `data.begin()` must be aligned to a machine word boundary (32-bit or 64-bit depending on
// the CPU). Any pointer returned by malloc() as well as any data blob obtained from another
// Cap'n Proto message satisfies this.
// - If `data.size()` is not a multiple of 8, extra bytes past data.end() up until the next 8-byte
// boundary will be visible in the raw message when it is written out. Thus, there must be no
// secrets in these bytes. Data blobs obtained from other Cap'n Proto messages should be safe
// as these bytes should be zero (unless the sender had the same problem).
//
// The array will actually become one of the message's segments. The data can thus be adopted
// into the message tree without copying it. This is particularly useful when referencing very
// large blobs, such as whole mmap'd files.
private:
_::BuilderArena* arena;
......@@ -303,6 +325,10 @@ inline Orphan<FromReader<Reader>> Orphanage::newOrphanCopy(Reader& copyFrom) con
return newOrphanCopy(kj::implicitCast<const Reader&>(copyFrom));
}
inline Orphan<Data> Orphanage::referenceExternalData(Data::Reader data) const {
return Orphan<Data>(_::OrphanBuilder::referenceExternalData(arena, data));
}
} // namespace capnp
#endif // CAPNP_ORPHAN_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment