serialize.h 12.6 KB
Newer Older
Kenton Varda's avatar
Kenton Varda committed
1 2
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
Kenton Varda's avatar
Kenton Varda committed
3
//
Kenton Varda's avatar
Kenton Varda committed
4 5 6 7 8 9
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
Kenton Varda's avatar
Kenton Varda committed
10
//
Kenton Varda's avatar
Kenton Varda committed
11 12
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
Kenton Varda's avatar
Kenton Varda committed
13
//
Kenton Varda's avatar
Kenton Varda committed
14 15 16 17 18 19 20
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
Kenton Varda's avatar
Kenton Varda committed
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40

// This file implements a simple serialization format for Cap'n Proto messages.  The format
// is as follows:
//
// * 32-bit little-endian segment count (4 bytes).
// * 32-bit little-endian size of each segment (4*(segment count) bytes).
// * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes).  (I.e., if there are an even
//     number of segments, there are 4 bytes of zeros here, otherwise there is no padding.)
// * Data from each segment, in order (8*sum(segment sizes) bytes)
//
// This format has some important properties:
// - It is self-delimiting, so multiple messages may be written to a stream without any external
//   delimiter.
// - The total size and position of each segment can be determined by reading only the first part
//   of the message, allowing lazy and random-access reading of the segment data.
// - A message is always at least 8 bytes.
// - A single-segment message can be read entirely in two system calls with no buffering.
// - A multi-segment message can be read entirely in three system calls with no buffering.
// - The format is appropriate for mmap()ing since all data is aligned.

41
#pragma once
Kenton Varda's avatar
Kenton Varda committed
42

43
#if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS)
44 45 46
#pragma GCC system_header
#endif

Kenton Varda's avatar
Kenton Varda committed
47
#include "message.h"
48
#include <kj/io.h>
Kenton Varda's avatar
Kenton Varda committed
49

50
namespace capnp {
Kenton Varda's avatar
Kenton Varda committed
51

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
class UnalignedFlatArrayMessageReader: public MessageReader {
  // Like FlatArrayMessageReader, but skips checking that the array is properly-aligned.
  //
  // WARNING: This only works on architectures that support unaligned reads, like x86/x64 and
  //   modern ARM. Unaligned access may incur a performance penalty on these platforms. On many
  //   other platforms, the program will simply crash on unaligned reads. Also note that unaligned
  //   data access may be considered undefined behavior by compilers; use at your own risk. If at
  //   all possible, try to ensure your data ends up in aligned buffers rather than rely on this
  //   class.

public:
  UnalignedFlatArrayMessageReader(
      kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions());
  kj::ArrayPtr<const word> getSegment(uint id) override;
  const word* getEnd() const { return end; }

private:
  // Optimize for single-segment case.
  kj::ArrayPtr<const word> segment0;
  kj::Array<kj::ArrayPtr<const word>> moreSegments;
  const word* end;
};

class FlatArrayMessageReader: public UnalignedFlatArrayMessageReader {
Kenton Varda's avatar
Kenton Varda committed
76 77 78 79
  // Parses a message from a flat array.  Note that it makes sense to use this together with mmap()
  // for extremely fast parsing.

public:
80
  FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions());
Kenton Varda's avatar
Kenton Varda committed
81 82
  // The array must remain valid until the MessageReader is destroyed.

83
  const word* getEnd() const { return UnalignedFlatArrayMessageReader::getEnd(); }
84 85 86 87 88
  // Get a pointer just past the end of the message as determined by reading the message header.
  // This could actually be before the end of the input array.  This pointer is useful e.g. if
  // you know that the input array has extra stuff appended after the message and you want to
  // get at it.

Kenton Varda's avatar
Kenton Varda committed
89
private:
90
  static kj::ArrayPtr<const word> checkAlignment(kj::ArrayPtr<const word> array);
Kenton Varda's avatar
Kenton Varda committed
91 92
};

93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy(
    kj::ArrayPtr<const word> array, MessageBuilder& target,
    ReaderOptions options = ReaderOptions());
// Convenience function which reads a message using `FlatArrayMessageReader` then copies the
// content into the target `MessageBuilder`, verifying that the message structure is valid
// (although not necessarily that it matches the desired schema).
//
// Returns an ArrayPtr containing any words left over in the array after consuming the whole
// message. This is useful when reading multiple messages that have been concatenated. See also
// FlatArrayMessageReader::getEnd().
//
// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)

108
kj::Array<word> messageToFlatArray(MessageBuilder& builder);
Kenton Varda's avatar
Kenton Varda committed
109
// Constructs a flat array containing the entire content of the given message.
110
//
Kenton Varda's avatar
Kenton Varda committed
111 112
// To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that
// `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being
113 114 115
// deleted. For example:
//
//     kj::Array<capnp::word> words = messageToFlatArray(myMessage);
Kenton Varda's avatar
Kenton Varda committed
116
//     kj::ArrayPtr<kj::byte> bytes = words.asBytes();
117
//     write(fd, bytes.begin(), bytes.size());
Kenton Varda's avatar
Kenton Varda committed
118

119
kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
Kenton Varda's avatar
Kenton Varda committed
120 121
// Version of messageToFlatArray that takes a raw segment array.

122 123 124 125 126 127
size_t computeSerializedSizeInWords(MessageBuilder& builder);
// Returns the size, in words, that will be needed to serialize the message, including the header.

size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
// Version of computeSerializedSizeInWords that takes a raw segment array.

128 129 130 131 132 133 134 135
size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix);
// Given a prefix of a serialized message, try to determine the expected total size of the message,
// in words. The returned size is based on the information known so far; it may be an underestimate
// if the prefix doesn't contain the full segment table.
//
// If the returned value is greater than `messagePrefix.size()`, then the message is not yet
// complete and the app cannot parse it yet. If the returned value is less than or equal to
// `messagePrefix.size()`, then the returned value is the exact total size of the message; any
Andrew Murray's avatar
Andrew Murray committed
136
// remaining bytes are part of the next message.
137 138 139 140 141
//
// This function is useful when reading messages from a stream in an asynchronous way, but when
// using the full KJ async infrastructure would be too difficult. Each time bytes are received,
// use this function to determine if an entire message is ready to be parsed.

Kenton Varda's avatar
Kenton Varda committed
142 143 144
// =======================================================================================

class InputStreamMessageReader: public MessageReader {
145 146 147
  // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader
  // for a subclass specific to file descriptors.

Kenton Varda's avatar
Kenton Varda committed
148
public:
149
  InputStreamMessageReader(kj::InputStream& inputStream,
Kenton Varda's avatar
Kenton Varda committed
150
                           ReaderOptions options = ReaderOptions(),
151
                           kj::ArrayPtr<word> scratchSpace = nullptr);
152
  ~InputStreamMessageReader() noexcept(false);
153

Kenton Varda's avatar
Kenton Varda committed
154
  // implements MessageReader ----------------------------------------
155
  kj::ArrayPtr<const word> getSegment(uint id) override;
Kenton Varda's avatar
Kenton Varda committed
156 157

private:
158
  kj::InputStream& inputStream;
159
  byte* readPos;
Kenton Varda's avatar
Kenton Varda committed
160 161

  // Optimize for single-segment case.
162 163
  kj::ArrayPtr<const word> segment0;
  kj::Array<kj::ArrayPtr<const word>> moreSegments;
Kenton Varda's avatar
Kenton Varda committed
164

165
  kj::Array<word> ownedSpace;
166
  // Only if scratchSpace wasn't big enough.
167 168

  kj::UnwindDetector unwindDetector;
Kenton Varda's avatar
Kenton Varda committed
169 170
};

171 172 173 174 175 176 177 178 179 180 181
void readMessageCopy(kj::InputStream& input, MessageBuilder& target,
                     ReaderOptions options = ReaderOptions(),
                     kj::ArrayPtr<word> scratchSpace = nullptr);
// Convenience function which reads a message using `InputStreamMessageReader` then copies the
// content into the target `MessageBuilder`, verifying that the message structure is valid
// (although not necessarily that it matches the desired schema).
//
// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)

182
void writeMessage(kj::OutputStream& output, MessageBuilder& builder);
Kenton Varda's avatar
Kenton Varda committed
183 184
// Write the message to the given output stream.

185
void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
Kenton Varda's avatar
Kenton Varda committed
186 187 188 189 190
// Write the segment array to the given output stream.

// =======================================================================================
// Specializations for reading from / writing to file descriptors.

191
class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader {
192
  // A MessageReader that reads from a stream-based file descriptor.
Kenton Varda's avatar
Kenton Varda committed
193 194 195

public:
  StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(),
196
                        kj::ArrayPtr<word> scratchSpace = nullptr)
197
      : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {}
Kenton Varda's avatar
Kenton Varda committed
198 199
  // Read message from a file descriptor, without taking ownership of the descriptor.

200
  StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(),
201
                        kj::ArrayPtr<word> scratchSpace = nullptr)
Kenton Varda's avatar
Kenton Varda committed
202
      : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {}
Kenton Varda's avatar
Kenton Varda committed
203 204
  // Read a message from a file descriptor, taking ownership of the descriptor.

205
  ~StreamFdMessageReader() noexcept(false);
Kenton Varda's avatar
Kenton Varda committed
206 207
};

208 209 210 211 212 213 214 215 216 217 218
void readMessageCopyFromFd(int fd, MessageBuilder& target,
                           ReaderOptions options = ReaderOptions(),
                           kj::ArrayPtr<word> scratchSpace = nullptr);
// Convenience function which reads a message using `StreamFdMessageReader` then copies the
// content into the target `MessageBuilder`, verifying that the message structure is valid
// (although not necessarily that it matches the desired schema).
//
// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)

Kenton Varda's avatar
Kenton Varda committed
219 220 221 222 223 224 225
void writeMessageToFd(int fd, MessageBuilder& builder);
// Write the message to the given file descriptor.
//
// This function throws an exception on any I/O error.  If your code is not exception-safe, be sure
// you catch this exception at the call site.  If throwing an exception is not acceptable, you
// can implement your own OutputStream with arbitrary error handling and then use writeMessage().

226
void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
Kenton Varda's avatar
Kenton Varda committed
227 228 229 230 231 232 233 234 235
// Write the segment array to the given file descriptor.
//
// This function throws an exception on any I/O error.  If your code is not exception-safe, be sure
// you catch this exception at the call site.  If throwing an exception is not acceptable, you
// can implement your own OutputStream with arbitrary error handling and then use writeMessage().

// =======================================================================================
// inline stuff

236 237 238 239 240 241 242 243
inline FlatArrayMessageReader::FlatArrayMessageReader(
    kj::ArrayPtr<const word> array, ReaderOptions options)
#ifdef KJ_DEBUG
    : UnalignedFlatArrayMessageReader(checkAlignment(array), options) {}
#else
    : UnalignedFlatArrayMessageReader(array, options) {}
#endif

244
inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) {
Kenton Varda's avatar
Kenton Varda committed
245 246 247
  return messageToFlatArray(builder.getSegmentsForOutput());
}

248 249 250 251
inline size_t computeSerializedSizeInWords(MessageBuilder& builder) {
  return computeSerializedSizeInWords(builder.getSegmentsForOutput());
}

252
inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) {
Kenton Varda's avatar
Kenton Varda committed
253 254 255 256 257 258 259
  writeMessage(output, builder.getSegmentsForOutput());
}

inline void writeMessageToFd(int fd, MessageBuilder& builder) {
  writeMessageToFd(fd, builder.getSegmentsForOutput());
}

260
}  // namespace capnp