serialize.h 11.4 KB
Newer Older
Kenton Varda's avatar
Kenton Varda committed
1 2
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
Kenton Varda's avatar
Kenton Varda committed
3
//
Kenton Varda's avatar
Kenton Varda committed
4 5 6 7 8 9
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
Kenton Varda's avatar
Kenton Varda committed
10
//
Kenton Varda's avatar
Kenton Varda committed
11 12
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
Kenton Varda's avatar
Kenton Varda committed
13
//
Kenton Varda's avatar
Kenton Varda committed
14 15 16 17 18 19 20
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
Kenton Varda's avatar
Kenton Varda committed
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40

// This file implements a simple serialization format for Cap'n Proto messages.  The format
// is as follows:
//
// * 32-bit little-endian segment count (4 bytes).
// * 32-bit little-endian size of each segment (4*(segment count) bytes).
// * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes).  (I.e., if there are an even
//     number of segments, there are 4 bytes of zeros here, otherwise there is no padding.)
// * Data from each segment, in order (8*sum(segment sizes) bytes)
//
// This format has some important properties:
// - It is self-delimiting, so multiple messages may be written to a stream without any external
//   delimiter.
// - The total size and position of each segment can be determined by reading only the first part
//   of the message, allowing lazy and random-access reading of the segment data.
// - A message is always at least 8 bytes.
// - A single-segment message can be read entirely in two system calls with no buffering.
// - A multi-segment message can be read entirely in three system calls with no buffering.
// - The format is appropriate for mmap()ing since all data is aligned.

Kenton Varda's avatar
Kenton Varda committed
41 42
#ifndef CAPNP_SERIALIZE_H_
#define CAPNP_SERIALIZE_H_
Kenton Varda's avatar
Kenton Varda committed
43

44
#if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS)
45 46 47
#pragma GCC system_header
#endif

Kenton Varda's avatar
Kenton Varda committed
48
#include "message.h"
49
#include <kj/io.h>
Kenton Varda's avatar
Kenton Varda committed
50

51
namespace capnp {
Kenton Varda's avatar
Kenton Varda committed
52 53 54 55 56 57

class FlatArrayMessageReader: public MessageReader {
  // Parses a message from a flat array.  Note that it makes sense to use this together with mmap()
  // for extremely fast parsing.

public:
58
  FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions());
Kenton Varda's avatar
Kenton Varda committed
59 60
  // The array must remain valid until the MessageReader is destroyed.

61
  kj::ArrayPtr<const word> getSegment(uint id) override;
Kenton Varda's avatar
Kenton Varda committed
62

63 64 65 66 67 68
  const word* getEnd() const { return end; }
  // Get a pointer just past the end of the message as determined by reading the message header.
  // This could actually be before the end of the input array.  This pointer is useful e.g. if
  // you know that the input array has extra stuff appended after the message and you want to
  // get at it.

Kenton Varda's avatar
Kenton Varda committed
69 70
private:
  // Optimize for single-segment case.
71 72
  kj::ArrayPtr<const word> segment0;
  kj::Array<kj::ArrayPtr<const word>> moreSegments;
73
  const word* end;
Kenton Varda's avatar
Kenton Varda committed
74 75
};

76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy(
    kj::ArrayPtr<const word> array, MessageBuilder& target,
    ReaderOptions options = ReaderOptions());
// Convenience function which reads a message using `FlatArrayMessageReader` then copies the
// content into the target `MessageBuilder`, verifying that the message structure is valid
// (although not necessarily that it matches the desired schema).
//
// Returns an ArrayPtr containing any words left over in the array after consuming the whole
// message. This is useful when reading multiple messages that have been concatenated. See also
// FlatArrayMessageReader::getEnd().
//
// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)

91
kj::Array<word> messageToFlatArray(MessageBuilder& builder);
Kenton Varda's avatar
Kenton Varda committed
92
// Constructs a flat array containing the entire content of the given message.
93
//
Kenton Varda's avatar
Kenton Varda committed
94 95
// To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that
// `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being
96 97 98
// deleted. For example:
//
//     kj::Array<capnp::word> words = messageToFlatArray(myMessage);
Kenton Varda's avatar
Kenton Varda committed
99
//     kj::ArrayPtr<kj::byte> bytes = words.asBytes();
100
//     write(fd, bytes.begin(), bytes.size());
Kenton Varda's avatar
Kenton Varda committed
101

102
kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
Kenton Varda's avatar
Kenton Varda committed
103 104
// Version of messageToFlatArray that takes a raw segment array.

105 106 107 108 109 110
size_t computeSerializedSizeInWords(MessageBuilder& builder);
// Returns the size, in words, that will be needed to serialize the message, including the header.

size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
// Version of computeSerializedSizeInWords that takes a raw segment array.

111 112 113 114 115 116 117 118
size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix);
// Given a prefix of a serialized message, try to determine the expected total size of the message,
// in words. The returned size is based on the information known so far; it may be an underestimate
// if the prefix doesn't contain the full segment table.
//
// If the returned value is greater than `messagePrefix.size()`, then the message is not yet
// complete and the app cannot parse it yet. If the returned value is less than or equal to
// `messagePrefix.size()`, then the returned value is the exact total size of the message; any
Andrew Murray's avatar
Andrew Murray committed
119
// remaining bytes are part of the next message.
120 121 122 123 124
//
// This function is useful when reading messages from a stream in an asynchronous way, but when
// using the full KJ async infrastructure would be too difficult. Each time bytes are received,
// use this function to determine if an entire message is ready to be parsed.

Kenton Varda's avatar
Kenton Varda committed
125 126 127
// =======================================================================================

class InputStreamMessageReader: public MessageReader {
128 129 130
  // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader
  // for a subclass specific to file descriptors.

Kenton Varda's avatar
Kenton Varda committed
131
public:
132
  InputStreamMessageReader(kj::InputStream& inputStream,
Kenton Varda's avatar
Kenton Varda committed
133
                           ReaderOptions options = ReaderOptions(),
134
                           kj::ArrayPtr<word> scratchSpace = nullptr);
135
  ~InputStreamMessageReader() noexcept(false);
136

Kenton Varda's avatar
Kenton Varda committed
137
  // implements MessageReader ----------------------------------------
138
  kj::ArrayPtr<const word> getSegment(uint id) override;
Kenton Varda's avatar
Kenton Varda committed
139 140

private:
141
  kj::InputStream& inputStream;
142
  byte* readPos;
Kenton Varda's avatar
Kenton Varda committed
143 144

  // Optimize for single-segment case.
145 146
  kj::ArrayPtr<const word> segment0;
  kj::Array<kj::ArrayPtr<const word>> moreSegments;
Kenton Varda's avatar
Kenton Varda committed
147

148
  kj::Array<word> ownedSpace;
149
  // Only if scratchSpace wasn't big enough.
150 151

  kj::UnwindDetector unwindDetector;
Kenton Varda's avatar
Kenton Varda committed
152 153
};

154 155 156 157 158 159 160 161 162 163 164
void readMessageCopy(kj::InputStream& input, MessageBuilder& target,
                     ReaderOptions options = ReaderOptions(),
                     kj::ArrayPtr<word> scratchSpace = nullptr);
// Convenience function which reads a message using `InputStreamMessageReader` then copies the
// content into the target `MessageBuilder`, verifying that the message structure is valid
// (although not necessarily that it matches the desired schema).
//
// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)

165
void writeMessage(kj::OutputStream& output, MessageBuilder& builder);
Kenton Varda's avatar
Kenton Varda committed
166 167
// Write the message to the given output stream.

168
void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
Kenton Varda's avatar
Kenton Varda committed
169 170 171 172 173
// Write the segment array to the given output stream.

// =======================================================================================
// Specializations for reading from / writing to file descriptors.

174
class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader {
175
  // A MessageReader that reads from a steam-based file descriptor.
Kenton Varda's avatar
Kenton Varda committed
176 177 178

public:
  StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(),
179
                        kj::ArrayPtr<word> scratchSpace = nullptr)
180
      : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {}
Kenton Varda's avatar
Kenton Varda committed
181 182
  // Read message from a file descriptor, without taking ownership of the descriptor.

183
  StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(),
184
                        kj::ArrayPtr<word> scratchSpace = nullptr)
Kenton Varda's avatar
Kenton Varda committed
185
      : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {}
Kenton Varda's avatar
Kenton Varda committed
186 187
  // Read a message from a file descriptor, taking ownership of the descriptor.

188
  ~StreamFdMessageReader() noexcept(false);
Kenton Varda's avatar
Kenton Varda committed
189 190
};

191 192 193 194 195 196 197 198 199 200 201
void readMessageCopyFromFd(int fd, MessageBuilder& target,
                           ReaderOptions options = ReaderOptions(),
                           kj::ArrayPtr<word> scratchSpace = nullptr);
// Convenience function which reads a message using `StreamFdMessageReader` then copies the
// content into the target `MessageBuilder`, verifying that the message structure is valid
// (although not necessarily that it matches the desired schema).
//
// (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
// of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
// safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)

Kenton Varda's avatar
Kenton Varda committed
202 203 204 205 206 207 208
void writeMessageToFd(int fd, MessageBuilder& builder);
// Write the message to the given file descriptor.
//
// This function throws an exception on any I/O error.  If your code is not exception-safe, be sure
// you catch this exception at the call site.  If throwing an exception is not acceptable, you
// can implement your own OutputStream with arbitrary error handling and then use writeMessage().

209
void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
Kenton Varda's avatar
Kenton Varda committed
210 211 212 213 214 215 216 217 218
// Write the segment array to the given file descriptor.
//
// This function throws an exception on any I/O error.  If your code is not exception-safe, be sure
// you catch this exception at the call site.  If throwing an exception is not acceptable, you
// can implement your own OutputStream with arbitrary error handling and then use writeMessage().

// =======================================================================================
// inline stuff

219
inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) {
Kenton Varda's avatar
Kenton Varda committed
220 221 222
  return messageToFlatArray(builder.getSegmentsForOutput());
}

223 224 225 226
inline size_t computeSerializedSizeInWords(MessageBuilder& builder) {
  return computeSerializedSizeInWords(builder.getSegmentsForOutput());
}

227
inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) {
Kenton Varda's avatar
Kenton Varda committed
228 229 230 231 232 233 234
  writeMessage(output, builder.getSegmentsForOutput());
}

inline void writeMessageToFd(int fd, MessageBuilder& builder) {
  writeMessageToFd(fd, builder.getSegmentsForOutput());
}

235
}  // namespace capnp
Kenton Varda's avatar
Kenton Varda committed
236 237

#endif  // SERIALIZE_H_