message.h 20.9 KB
Newer Older
Kenton Varda's avatar
Kenton Varda committed
1 2
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
Kenton Varda's avatar
Kenton Varda committed
3
//
Kenton Varda's avatar
Kenton Varda committed
4 5 6 7 8 9
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
Kenton Varda's avatar
Kenton Varda committed
10
//
Kenton Varda's avatar
Kenton Varda committed
11 12
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
Kenton Varda's avatar
Kenton Varda committed
13
//
Kenton Varda's avatar
Kenton Varda committed
14 15 16 17 18 19 20
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
Kenton Varda's avatar
Kenton Varda committed
21

Kenton Varda's avatar
Kenton Varda committed
22
#include <kj/common.h>
23
#include <kj/memory.h>
24
#include <kj/mutex.h>
25
#include "common.h"
26
#include "layout.h"
27
#include "any.h"
Kenton Varda's avatar
Kenton Varda committed
28

Kenton Varda's avatar
Kenton Varda committed
29 30
#ifndef CAPNP_MESSAGE_H_
#define CAPNP_MESSAGE_H_
Kenton Varda's avatar
Kenton Varda committed
31

32 33 34 35
#if defined(__GNUC__) && !CAPNP_HEADER_WARNINGS
#pragma GCC system_header
#endif

36
namespace capnp {
Kenton Varda's avatar
Kenton Varda committed
37

38
namespace _ {  // private
39 40
  class ReaderArena;
  class BuilderArena;
41 42
}

43
class StructSchema;
Kenton Varda's avatar
Kenton Varda committed
44
class Orphanage;
45 46
template <typename T>
class Orphan;
Kenton Varda's avatar
Kenton Varda committed
47

48
// =======================================================================================
Kenton Varda's avatar
Kenton Varda committed
49

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
struct ReaderOptions {
  // Options controlling how data is read.

  uint64_t traversalLimitInWords = 8 * 1024 * 1024;
  // Limits how many total words of data are allowed to be traversed.  Traversal is counted when
  // a new struct or list builder is obtained, e.g. from a get() accessor.  This means that calling
  // the getter for the same sub-struct multiple times will cause it to be double-counted.  Once
  // the traversal limit is reached, an error will be reported.
  //
  // This limit exists for security reasons.  It is possible for an attacker to construct a message
  // in which multiple pointers point at the same location.  This is technically invalid, but hard
  // to detect.  Using such a message, an attacker could cause a message which is small on the wire
  // to appear much larger when actually traversed, possibly exhausting server resources leading to
  // denial-of-service.
  //
  // It makes sense to set a traversal limit that is much larger than the underlying message.
  // Together with sensible coding practices (e.g. trying to avoid calling sub-object getters
  // multiple times, which is expensive anyway), this should provide adequate protection without
  // inconvenience.
  //
  // The default limit is 64 MiB.  This may or may not be a sensible number for any given use case,
  // but probably at least prevents easy exploitation while also avoiding causing problems in most
  // typical cases.

74
  int nestingLimit = 64;
75 76 77 78 79 80 81 82 83 84 85
  // Limits how deeply-nested a message structure can be, e.g. structs containing other structs or
  // lists of structs.
  //
  // Like the traversal limit, this limit exists for security reasons.  Since it is common to use
  // recursive code to traverse recursive data structures, an attacker could easily cause a stack
  // overflow by sending a very-deeply-nested (or even cyclic) message, without the message even
  // being very large.  The default limit of 64 is probably low enough to prevent any chance of
  // stack overflow, yet high enough that it is never a problem in practice.
};

class MessageReader {
86 87 88 89 90 91 92 93 94 95
  // Abstract interface for an object used to read a Cap'n Proto message.  Subclasses of
  // MessageReader are responsible for reading the raw, flat message content.  Callers should
  // usually call `messageReader.getRoot<MyStructType>()` to get a `MyStructType::Reader`
  // representing the root of the message, then use that to traverse the message content.
  //
  // Some common subclasses of `MessageReader` include `SegmentArrayMessageReader`, whose
  // constructor accepts pointers to the raw data, and `StreamFdMessageReader` (from
  // `serialize.h`), which reads the message from a file descriptor.  One might implement other
  // subclasses to handle things like reading from shared memory segments, mmap()ed files, etc.

Kenton Varda's avatar
Kenton Varda committed
96
public:
97 98 99 100 101
  MessageReader(ReaderOptions options);
  // It is suggested that subclasses take ReaderOptions as a constructor parameter, but give it a
  // default value of "ReaderOptions()".  The base class constructor doesn't have a default value
  // in order to remind subclasses that they really need to give the user a way to provide this.

102
  virtual ~MessageReader() noexcept(false);
Kenton Varda's avatar
Kenton Varda committed
103

104
  virtual kj::ArrayPtr<const word> getSegment(uint id) = 0;
105 106
  // Gets the segment with the given ID, or returns null if no such segment exists. This method
  // will be called at most once for each segment ID.
107

108 109
  inline const ReaderOptions& getOptions();
  // Get the options passed to the constructor.
110

111 112
  template <typename RootType>
  typename RootType::Reader getRoot();
Kenton Varda's avatar
Kenton Varda committed
113 114
  // Get the root struct of the message, interpreting it as the given struct type.

Kenton Varda's avatar
Kenton Varda committed
115 116 117
  template <typename RootType, typename SchemaType>
  typename RootType::Reader getRoot(SchemaType schema);
  // Dynamically interpret the root struct of the message using the given schema (a StructSchema).
118
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
Kenton Varda's avatar
Kenton Varda committed
119
  // use this.
120

121 122 123 124 125 126 127
private:
  ReaderOptions options;

  // Space in which we can construct a ReaderArena.  We don't use ReaderArena directly here
  // because we don't want clients to have to #include arena.h, which itself includes a bunch of
  // big STL headers.  We don't use a pointer to a ReaderArena because that would require an
  // extra malloc on every message which could be expensive when processing small messages.
128
  void* arenaSpace[15 + sizeof(kj::MutexGuarded<void*>) / sizeof(void*)];
129
  bool allocatedArena;
Kenton Varda's avatar
Kenton Varda committed
130

131
  _::ReaderArena* arena() { return reinterpret_cast<_::ReaderArena*>(arenaSpace); }
132
  AnyPointer::Reader getRootInternal();
133
};
134

135
class MessageBuilder {
136 137 138 139 140 141 142 143 144 145 146
  // Abstract interface for an object used to allocate and build a message.  Subclasses of
  // MessageBuilder are responsible for allocating the space in which the message will be written.
  // The most common subclass is `MallocMessageBuilder`, but other subclasses may be used to do
  // tricky things like allocate messages in shared memory or mmap()ed files.
  //
  // Creating a new message ususually means allocating a new MessageBuilder (ideally on the stack)
  // and then calling `messageBuilder.initRoot<MyStructType>()` to get a `MyStructType::Builder`.
  // That, in turn, can be used to fill in the message content.  When done, you can call
  // `messageBuilder.getSegmentsForOutput()` to get a list of flat data arrays containing the
  // message.

147
public:
148
  MessageBuilder();
149
  virtual ~MessageBuilder() noexcept(false);
150
  KJ_DISALLOW_COPY(MessageBuilder);
Kenton Varda's avatar
Kenton Varda committed
151

152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
  struct SegmentInit {
    kj::ArrayPtr<word> space;

    size_t wordsUsed;
    // Number of words in `space` which are used; the rest are free space in which additional
    // objects may be allocated.
  };

  explicit MessageBuilder(kj::ArrayPtr<SegmentInit> segments);
  // Create a MessageBuilder backed by existing memory. This is an advanced interface that most
  // people should not use. THIS METHOD IS INSECURE; see below.
  //
  // This allows a MessageBuilder to be constructed to modify an in-memory message without first
  // making a copy of the content. This is especially useful in conjunction with mmap().
  //
  // The contents of each segment must outlive the MessageBuilder, but the SegmentInit array itself
  // only need outlive the constructor.
  //
  // SECURITY: Do not use this in conjunction with untrusted data. This constructor assumes that
  //   the input message is valid. This constructor is designed to be used with data you control,
  //   e.g. an mmap'd file which is owned and accessed by only one program. When reading data you
  //   do not trust, you *must* load it into a Reader and then copy into a Builder as a means of
  //   validating the content.
  //
  // WARNING: It is NOT safe to initialize a MessageBuilder in this way from memory that is
  //   currently in use by another MessageBuilder or MessageReader. Other readers/builders will
  //   not observe changes to the segment sizes nor newly-allocated segments caused by allocating
  //   new objects in this message.

181
  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) = 0;
182 183 184
  // Allocates an array of at least the given number of words, throwing an exception or crashing if
  // this is not possible.  It is expected that this method will usually return more space than
  // requested, and the caller should use that extra space as much as possible before allocating
185
  // more.  The returned space remains valid at least until the MessageBuilder is destroyed.
186 187 188
  //
  // Cap'n Proto will only call this once at a time, so the subclass need not worry about
  // thread-safety.
189 190 191

  template <typename RootType>
  typename RootType::Builder initRoot();
Kenton Varda's avatar
Kenton Varda committed
192 193
  // Initialize the root struct of the message as the given struct type.

194 195 196 197
  template <typename Reader>
  void setRoot(Reader&& value);
  // Set the root struct to a deep copy of the given struct.

198 199
  template <typename RootType>
  typename RootType::Builder getRoot();
Kenton Varda's avatar
Kenton Varda committed
200 201
  // Get the root struct of the message, interpreting it as the given struct type.

Kenton Varda's avatar
Kenton Varda committed
202 203 204
  template <typename RootType, typename SchemaType>
  typename RootType::Builder getRoot(SchemaType schema);
  // Dynamically interpret the root struct of the message using the given schema (a StructSchema).
205
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
Kenton Varda's avatar
Kenton Varda committed
206 207
  // use this.

Kenton Varda's avatar
Kenton Varda committed
208 209 210
  template <typename RootType, typename SchemaType>
  typename RootType::Builder initRoot(SchemaType schema);
  // Dynamically init the root struct of the message using the given schema (a StructSchema).
211
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
Kenton Varda's avatar
Kenton Varda committed
212
  // use this.
213

214 215 216 217
  template <typename T>
  void adoptRoot(Orphan<T>&& orphan);
  // Like setRoot() but adopts the orphan without copying.

218
  kj::ArrayPtr<const kj::ArrayPtr<const word>> getSegmentsForOutput();
219 220
  // Get the raw data that makes up the message.

221 222
  Orphanage getOrphanage();

223
private:
224
  void* arenaSpace[21];
225 226 227 228
  // Space in which we can construct a BuilderArena.  We don't use BuilderArena directly here
  // because we don't want clients to have to #include arena.h, which itself includes a bunch of
  // big STL headers.  We don't use a pointer to a BuilderArena because that would require an
  // extra malloc on every message which could be expensive when processing small messages.
229

230
  bool allocatedArena = false;
231 232 233 234 235
  // We have to initialize the arena lazily because when we do so we want to allocate the root
  // pointer immediately, and this will allocate a segment, which requires a virtual function
  // call on the MessageBuilder.  We can't do such a call in the constructor since the subclass
  // isn't constructed yet.  This is kind of annoying because it means that getOrphanage() is
  // not thread-safe, but that shouldn't be a huge deal...
236

237
  _::BuilderArena* arena() { return reinterpret_cast<_::BuilderArena*>(arenaSpace); }
238
  _::SegmentBuilder* getRootSegment();
239
  AnyPointer::Builder getRootInternal();
Kenton Varda's avatar
Kenton Varda committed
240 241
};

242
template <typename RootType>
243
typename RootType::Reader readMessageUnchecked(const word* data);
244 245 246 247 248
// IF THE INPUT IS INVALID, THIS MAY CRASH, CORRUPT MEMORY, CREATE A SECURITY HOLE IN YOUR APP,
// MURDER YOUR FIRST-BORN CHILD, AND/OR BRING ABOUT ETERNAL DAMNATION ON ALL OF HUMANITY.  DO NOT
// USE UNLESS YOU UNDERSTAND THE CONSEQUENCES.
//
// Given a pointer to a known-valid message located in a single contiguous memory segment,
249 250 251
// returns a reader for that message.  No bounds-checking will be done while traversing this
// message.  Use this only if you have already verified that all pointers are valid and in-bounds,
// and there are no far pointers in the message.
252
//
253 254 255 256 257
// To create a message that can be passed to this function, build a message using a MallocAllocator
// whose preferred segment size is larger than the message size.  This guarantees that the message
// will be allocated as a single segment, meaning getSegmentsForOutput() returns a single word
// array.  That word array is your message; you may pass a pointer to its first word into
// readMessageUnchecked() to read the message.
258 259 260 261 262
//
// This can be particularly handy for embedding messages in generated code:  you can
// embed the raw bytes (using AlignedData) then make a Reader for it using this.  This is the way
// default values are embedded in code generated by the Cap'n Proto compiler.  E.g., if you have
// a message MyMessage, you can read its default value like so:
263 264 265
//    MyMessage::Reader reader = Message<MyMessage>::readMessageUnchecked(MyMessage::DEFAULT.words);
//
// To sanitize a message from an untrusted source such that it can be safely passed to
266 267 268
// readMessageUnchecked(), use copyToUnchecked().

template <typename Reader>
269
void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer);
270 271
// Copy the content of the given reader into the given buffer, such that it can safely be passed to
// readMessageUnchecked().  The buffer's size must be exactly reader.totalSizeInWords() + 1,
272
// otherwise an exception will be thrown.  The buffer must be zero'd before calling.
Kenton Varda's avatar
Kenton Varda committed
273

274
template <typename Type>
Kenton Varda's avatar
Kenton Varda committed
275
static typename Type::Reader defaultValue();
276 277 278 279
// Get a default instance of the given struct or list type.
//
// TODO(cleanup):  Find a better home for this function?

280
// =======================================================================================
Kenton Varda's avatar
Kenton Varda committed
281

282 283 284 285
class SegmentArrayMessageReader: public MessageReader {
  // A simple MessageReader that reads from an array of word arrays representing all segments.
  // In particular you can read directly from the output of MessageBuilder::getSegmentsForOutput()
  // (although it would probably make more sense to call builder.getRoot().asReader() in that case).
Kenton Varda's avatar
Kenton Varda committed
286

287
public:
288
  SegmentArrayMessageReader(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments,
289 290 291
                            ReaderOptions options = ReaderOptions());
  // Creates a message pointing at the given segment array, without taking ownership of the
  // segments.  All arrays passed in must remain valid until the MessageReader is destroyed.
Kenton Varda's avatar
Kenton Varda committed
292

293
  KJ_DISALLOW_COPY(SegmentArrayMessageReader);
294
  ~SegmentArrayMessageReader() noexcept(false);
295

296
  virtual kj::ArrayPtr<const word> getSegment(uint id) override;
297

298
private:
299
  kj::ArrayPtr<const kj::ArrayPtr<const word>> segments;
300
};
301

302
enum class AllocationStrategy: uint8_t {
303 304 305 306 307 308 309 310 311 312 313 314 315 316
  FIXED_SIZE,
  // The builder will prefer to allocate the same amount of space for each segment with no
  // heuristic growth.  It will still allocate larger segments when the preferred size is too small
  // for some single object.  This mode is generally not recommended, but can be particularly useful
  // for testing in order to force a message to allocate a predictable number of segments.  Note
  // that you can force every single object in the message to be located in a separate segment by
  // using this mode with firstSegmentWords = 0.

  GROW_HEURISTICALLY
  // The builder will heuristically decide how much space to allocate for each segment.  Each
  // allocated segment will be progressively larger than the previous segments on the assumption
  // that message sizes are exponentially distributed.  The total number of segments that will be
  // allocated for a message of size n is O(log n).
};
Kenton Varda's avatar
Kenton Varda committed
317

318 319
constexpr uint SUGGESTED_FIRST_SEGMENT_WORDS = 1024;
constexpr AllocationStrategy SUGGESTED_ALLOCATION_STRATEGY = AllocationStrategy::GROW_HEURISTICALLY;
Kenton Varda's avatar
Kenton Varda committed
320

321 322 323 324
class MallocMessageBuilder: public MessageBuilder {
  // A simple MessageBuilder that uses malloc() (actually, calloc()) to allocate segments.  This
  // implementation should be reasonable for any case that doesn't require writing the message to
  // a specific location in memory.
325

326
public:
Kenton Varda's avatar
Kenton Varda committed
327
  explicit MallocMessageBuilder(uint firstSegmentWords = SUGGESTED_FIRST_SEGMENT_WORDS,
328 329 330 331 332 333 334 335 336 337 338 339 340
      AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
  // Creates a BuilderContext which allocates at least the given number of words for the first
  // segment, and then uses the given strategy to decide how much to allocate for subsequent
  // segments.  When choosing a value for firstSegmentWords, consider that:
  // 1) Reading and writing messages gets slower when multiple segments are involved, so it's good
  //    if most messages fit in a single segment.
  // 2) Unused bytes will not be written to the wire, so generally it is not a big deal to allocate
  //    more space than you need.  It only becomes problematic if you are allocating many messages
  //    in parallel and thus use lots of memory, or if you allocate so much extra space that just
  //    zeroing it out becomes a bottleneck.
  // The defaults have been chosen to be reasonable for most people, so don't change them unless you
  // have reason to believe you need to.

341
  explicit MallocMessageBuilder(kj::ArrayPtr<word> firstSegment,
342 343 344 345
      AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
  // This version always returns the given array for the first segment, and then proceeds with the
  // allocation strategy.  This is useful for optimization when building lots of small messages in
  // a tight loop:  you can reuse the space for the first segment.
Kenton Varda's avatar
Kenton Varda committed
346 347 348
  //
  // firstSegment MUST be zero-initialized.  MallocMessageBuilder's destructor will write new zeros
  // over any space that was used so that it can be reused.
349

350
  KJ_DISALLOW_COPY(MallocMessageBuilder);
351
  virtual ~MallocMessageBuilder() noexcept(false);
352

353
  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;
354 355 356 357 358

private:
  uint nextSize;
  AllocationStrategy allocationStrategy;

359
  bool ownFirstSegment;
360 361
  bool returnedFirstSegment;

362 363 364
  void* firstSegment;

  struct MoreSegments;
365
  kj::Maybe<kj::Own<MoreSegments>> moreSegments;
Kenton Varda's avatar
Kenton Varda committed
366 367
};

368
class FlatMessageBuilder: public MessageBuilder {
369 370 371 372 373 374 375 376 377 378 379
  // THIS IS NOT THE CLASS YOU'RE LOOKING FOR.
  //
  // If you want to write a message into already-existing scratch space, use `MallocMessageBuilder`
  // and pass the scratch space to its constructor.  It will then only fall back to malloc() if
  // the scratch space is not large enough.
  //
  // Do NOT use this class unless you really know what you're doing.  This class is problematic
  // because it requires advance knowledge of the size of your message, which is usually impossible
  // to determine without actually building the message.  The class was created primarily to
  // implement `copyToUnchecked()`, which itself exists only to support other internal parts of
  // the Cap'n Proto implementation.
380 381

public:
382 383
  explicit FlatMessageBuilder(kj::ArrayPtr<word> array);
  KJ_DISALLOW_COPY(FlatMessageBuilder);
384
  virtual ~FlatMessageBuilder() noexcept(false);
385 386 387 388

  void requireFilled();
  // Throws an exception if the flat array is not exactly full.

389
  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;
390 391

private:
392
  kj::ArrayPtr<word> array;
393 394 395
  bool allocated;
};

Kenton Varda's avatar
Kenton Varda committed
396
// =======================================================================================
397
// implementation details
Kenton Varda's avatar
Kenton Varda committed
398

399 400
inline const ReaderOptions& MessageReader::getOptions() {
  return options;
Kenton Varda's avatar
Kenton Varda committed
401 402
}

403
template <typename RootType>
404
inline typename RootType::Reader MessageReader::getRoot() {
Kenton Varda's avatar
Kenton Varda committed
405
  return getRootInternal().getAs<RootType>();
Kenton Varda's avatar
Kenton Varda committed
406 407
}

408
template <typename RootType>
409
inline typename RootType::Builder MessageBuilder::initRoot() {
Kenton Varda's avatar
Kenton Varda committed
410
  return getRootInternal().initAs<RootType>();
Kenton Varda's avatar
Kenton Varda committed
411 412
}

413 414
template <typename Reader>
inline void MessageBuilder::setRoot(Reader&& value) {
Kenton Varda's avatar
Kenton Varda committed
415
  getRootInternal().setAs<FromReader<Reader>>(value);
416 417
}

418
template <typename RootType>
419
inline typename RootType::Builder MessageBuilder::getRoot() {
Kenton Varda's avatar
Kenton Varda committed
420
  return getRootInternal().getAs<RootType>();
Kenton Varda's avatar
Kenton Varda committed
421 422
}

423 424
template <typename T>
void MessageBuilder::adoptRoot(Orphan<T>&& orphan) {
Kenton Varda's avatar
Kenton Varda committed
425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
  return getRootInternal().adopt(kj::mv(orphan));
}

template <typename RootType, typename SchemaType>
typename RootType::Reader MessageReader::getRoot(SchemaType schema) {
  return getRootInternal().getAs<RootType>(schema);
}

template <typename RootType, typename SchemaType>
typename RootType::Builder MessageBuilder::getRoot(SchemaType schema) {
  return getRootInternal().getAs<RootType>(schema);
}

template <typename RootType, typename SchemaType>
typename RootType::Builder MessageBuilder::initRoot(SchemaType schema) {
  return getRootInternal().initAs<RootType>(schema);
441 442
}

443
template <typename RootType>
444
typename RootType::Reader readMessageUnchecked(const word* data) {
445
  return AnyPointer::Reader(_::PointerReader::getRootUnchecked(data)).getAs<RootType>();
446 447
}

448
template <typename Reader>
449
void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer) {
450
  FlatMessageBuilder builder(uncheckedBuffer);
Kenton Varda's avatar
Kenton Varda committed
451
  builder.setRoot(kj::fwd<Reader>(reader));
452 453 454
  builder.requireFilled();
}

455
}  // namespace capnp
Kenton Varda's avatar
Kenton Varda committed
456

Kenton Varda's avatar
Kenton Varda committed
457
#endif  // CAPNP_MESSAGE_H_