message.h 18.8 KB
Newer Older
Kenton Varda's avatar
Kenton Varda committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
//    list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
//    this list of conditions and the following disclaimer in the documentation
//    and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Kenton Varda's avatar
Kenton Varda committed
24
#include <kj/common.h>
25
#include <kj/memory.h>
26
#include <kj/mutex.h>
27
#include "common.h"
28
#include "layout.h"
Kenton Varda's avatar
Kenton Varda committed
29

30 31
#include "list.h"  // TODO(cleanup):  For FromReader.  Move elsewhere?

Kenton Varda's avatar
Kenton Varda committed
32 33
#ifndef CAPNP_MESSAGE_H_
#define CAPNP_MESSAGE_H_
Kenton Varda's avatar
Kenton Varda committed
34

35
namespace capnp {
Kenton Varda's avatar
Kenton Varda committed
36

37
namespace _ {  // private
38 39 40 41
  class ReaderArena;
  class BuilderArena;
}

42
class StructSchema;
Kenton Varda's avatar
Kenton Varda committed
43

44
// =======================================================================================
Kenton Varda's avatar
Kenton Varda committed
45

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
struct ReaderOptions {
  // Options controlling how data is read.

  uint64_t traversalLimitInWords = 8 * 1024 * 1024;
  // Limits how many total words of data are allowed to be traversed.  Traversal is counted when
  // a new struct or list builder is obtained, e.g. from a get() accessor.  This means that calling
  // the getter for the same sub-struct multiple times will cause it to be double-counted.  Once
  // the traversal limit is reached, an error will be reported.
  //
  // This limit exists for security reasons.  It is possible for an attacker to construct a message
  // in which multiple pointers point at the same location.  This is technically invalid, but hard
  // to detect.  Using such a message, an attacker could cause a message which is small on the wire
  // to appear much larger when actually traversed, possibly exhausting server resources leading to
  // denial-of-service.
  //
  // It makes sense to set a traversal limit that is much larger than the underlying message.
  // Together with sensible coding practices (e.g. trying to avoid calling sub-object getters
  // multiple times, which is expensive anyway), this should provide adequate protection without
  // inconvenience.
  //
  // The default limit is 64 MiB.  This may or may not be a sensible number for any given use case,
  // but probably at least prevents easy exploitation while also avoiding causing problems in most
  // typical cases.

  uint nestingLimit = 64;
  // Limits how deeply-nested a message structure can be, e.g. structs containing other structs or
  // lists of structs.
  //
  // Like the traversal limit, this limit exists for security reasons.  Since it is common to use
  // recursive code to traverse recursive data structures, an attacker could easily cause a stack
  // overflow by sending a very-deeply-nested (or even cyclic) message, without the message even
  // being very large.  The default limit of 64 is probably low enough to prevent any chance of
  // stack overflow, yet high enough that it is never a problem in practice.
};

class MessageReader {
82 83 84 85 86 87 88 89 90 91
  // Abstract interface for an object used to read a Cap'n Proto message.  Subclasses of
  // MessageReader are responsible for reading the raw, flat message content.  Callers should
  // usually call `messageReader.getRoot<MyStructType>()` to get a `MyStructType::Reader`
  // representing the root of the message, then use that to traverse the message content.
  //
  // Some common subclasses of `MessageReader` include `SegmentArrayMessageReader`, whose
  // constructor accepts pointers to the raw data, and `StreamFdMessageReader` (from
  // `serialize.h`), which reads the message from a file descriptor.  One might implement other
  // subclasses to handle things like reading from shared memory segments, mmap()ed files, etc.

Kenton Varda's avatar
Kenton Varda committed
92
public:
93 94 95 96 97
  MessageReader(ReaderOptions options);
  // It is suggested that subclasses take ReaderOptions as a constructor parameter, but give it a
  // default value of "ReaderOptions()".  The base class constructor doesn't have a default value
  // in order to remind subclasses that they really need to give the user a way to provide this.

98
  virtual ~MessageReader() noexcept(false);
Kenton Varda's avatar
Kenton Varda committed
99

100
  virtual kj::ArrayPtr<const word> getSegment(uint id) = 0;
101
  // Gets the segment with the given ID, or returns null if no such segment exists.
102 103 104
  //
  // Normally getSegment() will only be called once for each segment ID.  Subclasses can call
  // reset() to clear the segment table and start over with new segments.
105

106 107
  inline const ReaderOptions& getOptions();
  // Get the options passed to the constructor.
108

109 110
  template <typename RootType>
  typename RootType::Reader getRoot();
Kenton Varda's avatar
Kenton Varda committed
111 112 113
  // Get the root struct of the message, interpreting it as the given struct type.

  template <typename RootType>
114 115
  typename RootType::Reader getRoot(StructSchema schema);
  // Dynamically interpret the root struct of the message using the given schema.
116
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
Kenton Varda's avatar
Kenton Varda committed
117
  // use this.
118

119 120 121 122 123 124 125 126 127
private:
  ReaderOptions options;

  // Space in which we can construct a ReaderArena.  We don't use ReaderArena directly here
  // because we don't want clients to have to #include arena.h, which itself includes a bunch of
  // big STL headers.  We don't use a pointer to a ReaderArena because that would require an
  // extra malloc on every message which could be expensive when processing small messages.
  void* arenaSpace[15];
  bool allocatedArena;
Kenton Varda's avatar
Kenton Varda committed
128

129 130
  _::ReaderArena* arena() { return reinterpret_cast<_::ReaderArena*>(arenaSpace); }
  _::StructReader getRootInternal();
131
};
132

133
class MessageBuilder {
134 135 136 137 138 139 140 141 142 143 144
  // Abstract interface for an object used to allocate and build a message.  Subclasses of
  // MessageBuilder are responsible for allocating the space in which the message will be written.
  // The most common subclass is `MallocMessageBuilder`, but other subclasses may be used to do
  // tricky things like allocate messages in shared memory or mmap()ed files.
  //
  // Creating a new message ususually means allocating a new MessageBuilder (ideally on the stack)
  // and then calling `messageBuilder.initRoot<MyStructType>()` to get a `MyStructType::Builder`.
  // That, in turn, can be used to fill in the message content.  When done, you can call
  // `messageBuilder.getSegmentsForOutput()` to get a list of flat data arrays containing the
  // message.

145
public:
146
  MessageBuilder();
147
  virtual ~MessageBuilder() noexcept(false);
Kenton Varda's avatar
Kenton Varda committed
148

149
  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) = 0;
150 151 152
  // Allocates an array of at least the given number of words, throwing an exception or crashing if
  // this is not possible.  It is expected that this method will usually return more space than
  // requested, and the caller should use that extra space as much as possible before allocating
153
  // more.  The returned space remains valid at least until the MessageBuilder is destroyed.
154 155 156
  //
  // Cap'n Proto will only call this once at a time, so the subclass need not worry about
  // thread-safety.
157 158 159

  template <typename RootType>
  typename RootType::Builder initRoot();
Kenton Varda's avatar
Kenton Varda committed
160 161
  // Initialize the root struct of the message as the given struct type.

162 163 164 165
  template <typename Reader>
  void setRoot(Reader&& value);
  // Set the root struct to a deep copy of the given struct.

166 167
  template <typename RootType>
  typename RootType::Builder getRoot();
Kenton Varda's avatar
Kenton Varda committed
168 169 170
  // Get the root struct of the message, interpreting it as the given struct type.

  template <typename RootType>
171 172
  typename RootType::Builder getRoot(StructSchema schema);
  // Dynamically interpret the root struct of the message using the given schema.
173
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
Kenton Varda's avatar
Kenton Varda committed
174 175 176
  // use this.

  template <typename RootType>
177 178
  typename RootType::Builder initRoot(StructSchema schema);
  // Dynamically init the root struct of the message using the given schema.
179
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
Kenton Varda's avatar
Kenton Varda committed
180
  // use this.
181

182
  kj::ArrayPtr<const kj::ArrayPtr<const word>> getSegmentsForOutput();
183

184 185
  Orphanage getOrphanage();

186
private:
187
  void* arenaSpace[15 + sizeof(kj::MutexGuarded<void*>) / sizeof(void*)];
188 189 190 191
  // Space in which we can construct a BuilderArena.  We don't use BuilderArena directly here
  // because we don't want clients to have to #include arena.h, which itself includes a bunch of
  // big STL headers.  We don't use a pointer to a BuilderArena because that would require an
  // extra malloc on every message which could be expensive when processing small messages.
192

193
  bool allocatedArena = false;
194 195 196 197 198
  // We have to initialize the arena lazily because when we do so we want to allocate the root
  // pointer immediately, and this will allocate a segment, which requires a virtual function
  // call on the MessageBuilder.  We can't do such a call in the constructor since the subclass
  // isn't constructed yet.  This is kind of annoying because it means that getOrphanage() is
  // not thread-safe, but that shouldn't be a huge deal...
199

200 201 202 203 204
  _::BuilderArena* arena() { return reinterpret_cast<_::BuilderArena*>(arenaSpace); }
  _::SegmentBuilder* getRootSegment();
  _::StructBuilder initRoot(_::StructSize size);
  void setRootInternal(_::StructReader reader);
  _::StructBuilder getRoot(_::StructSize size);
Kenton Varda's avatar
Kenton Varda committed
205 206
};

207
template <typename RootType>
208
typename RootType::Reader readMessageUnchecked(const word* data);
209 210 211 212 213
// IF THE INPUT IS INVALID, THIS MAY CRASH, CORRUPT MEMORY, CREATE A SECURITY HOLE IN YOUR APP,
// MURDER YOUR FIRST-BORN CHILD, AND/OR BRING ABOUT ETERNAL DAMNATION ON ALL OF HUMANITY.  DO NOT
// USE UNLESS YOU UNDERSTAND THE CONSEQUENCES.
//
// Given a pointer to a known-valid message located in a single contiguous memory segment,
214 215 216
// returns a reader for that message.  No bounds-checking will be done while traversing this
// message.  Use this only if you have already verified that all pointers are valid and in-bounds,
// and there are no far pointers in the message.
217
//
218 219 220 221 222
// To create a message that can be passed to this function, build a message using a MallocAllocator
// whose preferred segment size is larger than the message size.  This guarantees that the message
// will be allocated as a single segment, meaning getSegmentsForOutput() returns a single word
// array.  That word array is your message; you may pass a pointer to its first word into
// readMessageUnchecked() to read the message.
223 224 225 226 227
//
// This can be particularly handy for embedding messages in generated code:  you can
// embed the raw bytes (using AlignedData) then make a Reader for it using this.  This is the way
// default values are embedded in code generated by the Cap'n Proto compiler.  E.g., if you have
// a message MyMessage, you can read its default value like so:
228 229 230
//    MyMessage::Reader reader = Message<MyMessage>::readMessageUnchecked(MyMessage::DEFAULT.words);
//
// To sanitize a message from an untrusted source such that it can be safely passed to
231 232 233
// readMessageUnchecked(), use copyToUnchecked().

template <typename Reader>
234
void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer);
235 236 237
// Copy the content of the given reader into the given buffer, such that it can safely be passed to
// readMessageUnchecked().  The buffer's size must be exactly reader.totalSizeInWords() + 1,
// otherwise an exception will be thrown.
Kenton Varda's avatar
Kenton Varda committed
238

239
template <typename Type>
Kenton Varda's avatar
Kenton Varda committed
240
static typename Type::Reader defaultValue();
241 242 243 244
// Get a default instance of the given struct or list type.
//
// TODO(cleanup):  Find a better home for this function?

245
// =======================================================================================
Kenton Varda's avatar
Kenton Varda committed
246

247 248 249 250
class SegmentArrayMessageReader: public MessageReader {
  // A simple MessageReader that reads from an array of word arrays representing all segments.
  // In particular you can read directly from the output of MessageBuilder::getSegmentsForOutput()
  // (although it would probably make more sense to call builder.getRoot().asReader() in that case).
Kenton Varda's avatar
Kenton Varda committed
251

252
public:
253
  SegmentArrayMessageReader(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments,
254 255 256
                            ReaderOptions options = ReaderOptions());
  // Creates a message pointing at the given segment array, without taking ownership of the
  // segments.  All arrays passed in must remain valid until the MessageReader is destroyed.
Kenton Varda's avatar
Kenton Varda committed
257

258
  KJ_DISALLOW_COPY(SegmentArrayMessageReader);
259
  ~SegmentArrayMessageReader() noexcept(false);
260

261
  virtual kj::ArrayPtr<const word> getSegment(uint id) override;
262

263
private:
264
  kj::ArrayPtr<const kj::ArrayPtr<const word>> segments;
265
};
266

267
enum class AllocationStrategy: uint8_t {
268 269 270 271 272 273 274 275 276 277 278 279 280 281
  FIXED_SIZE,
  // The builder will prefer to allocate the same amount of space for each segment with no
  // heuristic growth.  It will still allocate larger segments when the preferred size is too small
  // for some single object.  This mode is generally not recommended, but can be particularly useful
  // for testing in order to force a message to allocate a predictable number of segments.  Note
  // that you can force every single object in the message to be located in a separate segment by
  // using this mode with firstSegmentWords = 0.

  GROW_HEURISTICALLY
  // The builder will heuristically decide how much space to allocate for each segment.  Each
  // allocated segment will be progressively larger than the previous segments on the assumption
  // that message sizes are exponentially distributed.  The total number of segments that will be
  // allocated for a message of size n is O(log n).
};
Kenton Varda's avatar
Kenton Varda committed
282

283 284
constexpr uint SUGGESTED_FIRST_SEGMENT_WORDS = 1024;
constexpr AllocationStrategy SUGGESTED_ALLOCATION_STRATEGY = AllocationStrategy::GROW_HEURISTICALLY;
Kenton Varda's avatar
Kenton Varda committed
285

286 287 288 289
class MallocMessageBuilder: public MessageBuilder {
  // A simple MessageBuilder that uses malloc() (actually, calloc()) to allocate segments.  This
  // implementation should be reasonable for any case that doesn't require writing the message to
  // a specific location in memory.
290

291
public:
292
  explicit MallocMessageBuilder(uint firstSegmentWords = 1024,
293 294 295 296 297 298 299 300 301 302 303 304 305
      AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
  // Creates a BuilderContext which allocates at least the given number of words for the first
  // segment, and then uses the given strategy to decide how much to allocate for subsequent
  // segments.  When choosing a value for firstSegmentWords, consider that:
  // 1) Reading and writing messages gets slower when multiple segments are involved, so it's good
  //    if most messages fit in a single segment.
  // 2) Unused bytes will not be written to the wire, so generally it is not a big deal to allocate
  //    more space than you need.  It only becomes problematic if you are allocating many messages
  //    in parallel and thus use lots of memory, or if you allocate so much extra space that just
  //    zeroing it out becomes a bottleneck.
  // The defaults have been chosen to be reasonable for most people, so don't change them unless you
  // have reason to believe you need to.

306
  explicit MallocMessageBuilder(kj::ArrayPtr<word> firstSegment,
307 308 309 310
      AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
  // This version always returns the given array for the first segment, and then proceeds with the
  // allocation strategy.  This is useful for optimization when building lots of small messages in
  // a tight loop:  you can reuse the space for the first segment.
Kenton Varda's avatar
Kenton Varda committed
311 312 313
  //
  // firstSegment MUST be zero-initialized.  MallocMessageBuilder's destructor will write new zeros
  // over any space that was used so that it can be reused.
314

315
  KJ_DISALLOW_COPY(MallocMessageBuilder);
316
  virtual ~MallocMessageBuilder() noexcept(false);
317

318
  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;
319 320 321 322 323

private:
  uint nextSize;
  AllocationStrategy allocationStrategy;

324
  bool ownFirstSegment;
325 326
  bool returnedFirstSegment;

327 328 329
  void* firstSegment;

  struct MoreSegments;
330
  kj::Maybe<kj::Own<MoreSegments>> moreSegments;
Kenton Varda's avatar
Kenton Varda committed
331 332
};

333 334 335 336 337
class FlatMessageBuilder: public MessageBuilder {
  // A message builder implementation which allocates from a single flat array, throwing an
  // exception if it runs out of space.

public:
338 339
  explicit FlatMessageBuilder(kj::ArrayPtr<word> array);
  KJ_DISALLOW_COPY(FlatMessageBuilder);
340
  virtual ~FlatMessageBuilder() noexcept(false);
341 342 343 344

  void requireFilled();
  // Throws an exception if the flat array is not exactly full.

345
  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;
346 347

private:
348
  kj::ArrayPtr<word> array;
349 350 351
  bool allocated;
};

Kenton Varda's avatar
Kenton Varda committed
352
// =======================================================================================
353
// implementation details
Kenton Varda's avatar
Kenton Varda committed
354

355 356
inline const ReaderOptions& MessageReader::getOptions() {
  return options;
Kenton Varda's avatar
Kenton Varda committed
357 358
}

359
template <typename RootType>
360
inline typename RootType::Reader MessageReader::getRoot() {
Kenton Varda's avatar
Kenton Varda committed
361
  static_assert(kind<RootType>() == Kind::STRUCT, "Root type must be a Cap'n Proto struct type.");
362
  return typename RootType::Reader(getRootInternal());
Kenton Varda's avatar
Kenton Varda committed
363 364
}

365
template <typename RootType>
366
inline typename RootType::Builder MessageBuilder::initRoot() {
Kenton Varda's avatar
Kenton Varda committed
367
  static_assert(kind<RootType>() == Kind::STRUCT, "Root type must be a Cap'n Proto struct type.");
368
  return typename RootType::Builder(initRoot(_::structSize<RootType>()));
Kenton Varda's avatar
Kenton Varda committed
369 370
}

371 372 373 374 375 376 377
template <typename Reader>
inline void MessageBuilder::setRoot(Reader&& value) {
  typedef FromReader<Reader> RootType;
  static_assert(kind<RootType>() == Kind::STRUCT, "Root type must be a Cap'n Proto struct type.");
  setRootInternal(value._reader);
}

378
template <typename RootType>
379
inline typename RootType::Builder MessageBuilder::getRoot() {
Kenton Varda's avatar
Kenton Varda committed
380
  static_assert(kind<RootType>() == Kind::STRUCT, "Root type must be a Cap'n Proto struct type.");
381
  return typename RootType::Builder(getRoot(_::structSize<RootType>()));
Kenton Varda's avatar
Kenton Varda committed
382 383
}

384
template <typename RootType>
385
typename RootType::Reader readMessageUnchecked(const word* data) {
386
  return typename RootType::Reader(_::StructReader::readRootUnchecked(data));
387 388
}

389
template <typename Reader>
390
void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer) {
391
  FlatMessageBuilder builder(uncheckedBuffer);
Kenton Varda's avatar
Kenton Varda committed
392
  builder.setRoot(kj::fwd<Reader>(reader));
393 394 395
  builder.requireFilled();
}

396 397 398
template <typename Type>
static typename Type::Reader defaultValue() {
  // TODO(soon):  Correctly handle lists.  Maybe primitives too?
399
  return typename Type::Reader(_::StructReader());
400 401
}

402
}  // namespace capnp
Kenton Varda's avatar
Kenton Varda committed
403

Kenton Varda's avatar
Kenton Varda committed
404
#endif  // CAPNP_MESSAGE_H_