Commit 3aa2b2aa authored by Kenton Varda's avatar Kenton Varda

Add officially-sanctioned way to read from unaligned buffers. Fixes #313.

parent 4fe15bf5
......@@ -3101,11 +3101,6 @@ StructReader ListReader::getStructElement(ElementCount index) const {
const WirePointer* structPointers =
reinterpret_cast<const WirePointer*>(structData + structDataSize / BITS_PER_BYTE);
// This check should pass if there are no bugs in the list pointer validation code.
KJ_DASSERT(structPointerCount == ZERO * POINTERS ||
(uintptr_t)structPointers % sizeof(void*) == 0,
"Pointer section of struct list element not aligned.");
KJ_DASSERT(indexBit % BITS_PER_BYTE == ZERO * BITS);
return StructReader(
segment, capTable, structData, structPointers,
......@@ -3486,6 +3481,8 @@ OrphanBuilder OrphanBuilder::concat(
}
OrphanBuilder OrphanBuilder::referenceExternalData(BuilderArena* arena, Data::Reader data) {
// TODO(someday): We now allow unaligned segments on architectures thata support it. We could
// consider relaxing this check as well?
KJ_REQUIRE(reinterpret_cast<uintptr_t>(data.begin()) % sizeof(void*) == 0,
"Cannot referenceExternalData() that is not aligned.");
......
......@@ -105,6 +105,10 @@ public:
virtual kj::ArrayPtr<const word> getSegment(uint id) = 0;
// Gets the segment with the given ID, or returns null if no such segment exists. This method
// will be called at most once for each segment ID.
//
// The returned array must be aligned properly for the host architecture. This means that on
// x86/x64, alignment is optional, though recommended for performance, whereas on many other
// architectures, alignment is required.
inline const ReaderOptions& getOptions();
// Get the options passed to the constructor.
......@@ -192,6 +196,10 @@ public:
// allocateSegment() is responsible for zeroing the memory before returning. This is required
// because otherwise the Cap'n Proto implementation would have to zero the memory anyway, and
// many allocators are able to provide already-zero'd memory more efficiently.
//
// The returned array must be aligned properly for the host architecture. This means that on
// x86/x64, alignment is optional, though recommended for performance, whereas on many other
// architectures, alignment is required.
template <typename RootType>
typename RootType::Builder initRoot();
......
......@@ -102,6 +102,20 @@ TEST(Serialize, FlatArray) {
EXPECT_EQ(serializedWithSuffix.end() - 5, reader.getEnd());
}
#if __i386__ || __x86_64__ || __aarch64__ || _MSC_VER
// Try unaligned.
{
auto bytes = kj::heapArray<byte>(serializedWithSuffix.size() * sizeof(word) + 1);
auto unalignedWords = kj::arrayPtr(
reinterpret_cast<word*>(bytes.begin() + 1), serializedWithSuffix.size());
memcpy(unalignedWords.asBytes().begin(), serializedWithSuffix.asBytes().begin(),
serializedWithSuffix.asBytes().size());
UnalignedFlatArrayMessageReader reader(unalignedWords);
checkTestMessage(reader.getRoot<TestAllTypes>());
EXPECT_EQ(unalignedWords.end() - 5, reader.getEnd());
}
#endif
{
MallocMessageBuilder builder2;
auto remaining = initMessageBuilderFromFlatArrayCopy(serializedWithSuffix, builder2);
......
......@@ -26,7 +26,7 @@
namespace capnp {
FlatArrayMessageReader::FlatArrayMessageReader(
UnalignedFlatArrayMessageReader::UnalignedFlatArrayMessageReader(
kj::ArrayPtr<const word> array, ReaderOptions options)
: MessageReader(options), end(array.end()) {
if (array.size() < 1) {
......@@ -98,7 +98,7 @@ size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> array) {
return totalSize;
}
kj::ArrayPtr<const word> FlatArrayMessageReader::getSegment(uint id) {
kj::ArrayPtr<const word> UnalignedFlatArrayMessageReader::getSegment(uint id) {
if (id == 0) {
return segment0;
} else if (id <= moreSegments.size()) {
......@@ -108,6 +108,15 @@ kj::ArrayPtr<const word> FlatArrayMessageReader::getSegment(uint id) {
}
}
kj::ArrayPtr<const word> FlatArrayMessageReader::checkAlignment(kj::ArrayPtr<const word> array) {
KJ_REQUIRE((uintptr_t)array.begin() % sizeof(void*) == 0,
"Input to FlatArrayMessageReader is not aligned. If your architecture supports unaligned "
"access (e.g. x86/x64/modern ARM), you may use UnalignedFlatArrayMessageReader instead, "
"though this may harm performance.");
return array;
}
kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy(
kj::ArrayPtr<const word> array, MessageBuilder& target, ReaderOptions options) {
FlatArrayMessageReader reader(array, options);
......
......@@ -49,7 +49,30 @@
namespace capnp {
class FlatArrayMessageReader: public MessageReader {
class UnalignedFlatArrayMessageReader: public MessageReader {
// Like FlatArrayMessageReader, but skips checking that the array is properly-aligned.
//
// WARNING: This only works on architectures that support unaligned reads, like x86/x64 and
// modern ARM. Unaligned access may incur a performance penalty on these platforms. On many
// other platforms, the program will simply crash on unaligned reads. Also note that unaligned
// data access may be considered undefined behavior by compilers; use at your own risk. If at
// all possible, try to ensure your data ends up in aligned buffers rather than rely on this
// class.
public:
UnalignedFlatArrayMessageReader(
kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions());
kj::ArrayPtr<const word> getSegment(uint id) override;
const word* getEnd() const { return end; }
private:
// Optimize for single-segment case.
kj::ArrayPtr<const word> segment0;
kj::Array<kj::ArrayPtr<const word>> moreSegments;
const word* end;
};
class FlatArrayMessageReader: public UnalignedFlatArrayMessageReader {
// Parses a message from a flat array. Note that it makes sense to use this together with mmap()
// for extremely fast parsing.
......@@ -57,19 +80,14 @@ public:
FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions());
// The array must remain valid until the MessageReader is destroyed.
kj::ArrayPtr<const word> getSegment(uint id) override;
const word* getEnd() const { return end; }
const word* getEnd() const { return UnalignedFlatArrayMessageReader::getEnd(); }
// Get a pointer just past the end of the message as determined by reading the message header.
// This could actually be before the end of the input array. This pointer is useful e.g. if
// you know that the input array has extra stuff appended after the message and you want to
// get at it.
private:
// Optimize for single-segment case.
kj::ArrayPtr<const word> segment0;
kj::Array<kj::ArrayPtr<const word>> moreSegments;
const word* end;
static kj::ArrayPtr<const word> checkAlignment(kj::ArrayPtr<const word> array);
};
kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy(
......@@ -215,6 +233,14 @@ void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segme
// =======================================================================================
// inline stuff
inline FlatArrayMessageReader::FlatArrayMessageReader(
kj::ArrayPtr<const word> array, ReaderOptions options)
#ifdef KJ_DEBUG
: UnalignedFlatArrayMessageReader(checkAlignment(array), options) {}
#else
: UnalignedFlatArrayMessageReader(array, options) {}
#endif
inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) {
return messageToFlatArray(builder.getSegmentsForOutput());
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment