message.h 20.6 KB
Newer Older
Kenton Varda's avatar
Kenton Varda committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
//    list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
//    this list of conditions and the following disclaimer in the documentation
//    and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Kenton Varda's avatar
Kenton Varda committed
24
#include <kj/common.h>
25
#include <kj/memory.h>
26
#include <kj/mutex.h>
27
#include "common.h"
28
#include "layout.h"
29
#include "any.h"
Kenton Varda's avatar
Kenton Varda committed
30

Kenton Varda's avatar
Kenton Varda committed
31 32
#ifndef CAPNP_MESSAGE_H_
#define CAPNP_MESSAGE_H_
Kenton Varda's avatar
Kenton Varda committed
33

34
namespace capnp {
Kenton Varda's avatar
Kenton Varda committed
35

36
namespace _ {  // private
37 38
  class ReaderArena;
  class BuilderArena;
39 40
}

41
class StructSchema;
Kenton Varda's avatar
Kenton Varda committed
42
class Orphanage;
43 44
template <typename T>
class Orphan;
Kenton Varda's avatar
Kenton Varda committed
45

46
// =======================================================================================
Kenton Varda's avatar
Kenton Varda committed
47

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
struct ReaderOptions {
  // Options controlling how data is read.

  uint64_t traversalLimitInWords = 8 * 1024 * 1024;
  // Limits how many total words of data are allowed to be traversed.  Traversal is counted when
  // a new struct or list builder is obtained, e.g. from a get() accessor.  This means that calling
  // the getter for the same sub-struct multiple times will cause it to be double-counted.  Once
  // the traversal limit is reached, an error will be reported.
  //
  // This limit exists for security reasons.  It is possible for an attacker to construct a message
  // in which multiple pointers point at the same location.  This is technically invalid, but hard
  // to detect.  Using such a message, an attacker could cause a message which is small on the wire
  // to appear much larger when actually traversed, possibly exhausting server resources leading to
  // denial-of-service.
  //
  // It makes sense to set a traversal limit that is much larger than the underlying message.
  // Together with sensible coding practices (e.g. trying to avoid calling sub-object getters
  // multiple times, which is expensive anyway), this should provide adequate protection without
  // inconvenience.
  //
  // The default limit is 64 MiB.  This may or may not be a sensible number for any given use case,
  // but probably at least prevents easy exploitation while also avoiding causing problems in most
  // typical cases.

72
  int nestingLimit = 64;
73 74 75 76 77 78 79 80 81 82 83
  // Limits how deeply-nested a message structure can be, e.g. structs containing other structs or
  // lists of structs.
  //
  // Like the traversal limit, this limit exists for security reasons.  Since it is common to use
  // recursive code to traverse recursive data structures, an attacker could easily cause a stack
  // overflow by sending a very-deeply-nested (or even cyclic) message, without the message even
  // being very large.  The default limit of 64 is probably low enough to prevent any chance of
  // stack overflow, yet high enough that it is never a problem in practice.
};

class MessageReader {
84 85 86 87 88 89 90 91 92 93
  // Abstract interface for an object used to read a Cap'n Proto message.  Subclasses of
  // MessageReader are responsible for reading the raw, flat message content.  Callers should
  // usually call `messageReader.getRoot<MyStructType>()` to get a `MyStructType::Reader`
  // representing the root of the message, then use that to traverse the message content.
  //
  // Some common subclasses of `MessageReader` include `SegmentArrayMessageReader`, whose
  // constructor accepts pointers to the raw data, and `StreamFdMessageReader` (from
  // `serialize.h`), which reads the message from a file descriptor.  One might implement other
  // subclasses to handle things like reading from shared memory segments, mmap()ed files, etc.

Kenton Varda's avatar
Kenton Varda committed
94
public:
95 96 97 98 99
  MessageReader(ReaderOptions options);
  // It is suggested that subclasses take ReaderOptions as a constructor parameter, but give it a
  // default value of "ReaderOptions()".  The base class constructor doesn't have a default value
  // in order to remind subclasses that they really need to give the user a way to provide this.

100
  virtual ~MessageReader() noexcept(false);
Kenton Varda's avatar
Kenton Varda committed
101

102
  virtual kj::ArrayPtr<const word> getSegment(uint id) = 0;
103
  // Gets the segment with the given ID, or returns null if no such segment exists.
104 105 106
  //
  // Normally getSegment() will only be called once for each segment ID.  Subclasses can call
  // reset() to clear the segment table and start over with new segments.
107

108 109
  inline const ReaderOptions& getOptions();
  // Get the options passed to the constructor.
110

111 112
  template <typename RootType>
  typename RootType::Reader getRoot();
Kenton Varda's avatar
Kenton Varda committed
113 114
  // Get the root struct of the message, interpreting it as the given struct type.

Kenton Varda's avatar
Kenton Varda committed
115 116 117
  template <typename RootType, typename SchemaType>
  typename RootType::Reader getRoot(SchemaType schema);
  // Dynamically interpret the root struct of the message using the given schema (a StructSchema).
118
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
Kenton Varda's avatar
Kenton Varda committed
119
  // use this.
120

121 122 123 124 125 126 127 128 129 130
  void initCapTable(kj::Array<kj::Maybe<kj::Own<ClientHook>>> capTable);
  // Sets the table of capabilities embedded in this message.  Capability pointers found in the
  // message content contain indexes into this table.  You must call this before attempting to
  // read any capability pointers (interface pointers) from the message.  The table is not passed
  // to the constructor because often (as in the RPC system) the cap table is actually constructed
  // based on a list read from the message itself.
  //
  // You must link against libcapnp-rpc to call this method (the rest of MessageBuilder is in
  // regular libcapnp).

131 132 133 134 135 136 137
private:
  ReaderOptions options;

  // Space in which we can construct a ReaderArena.  We don't use ReaderArena directly here
  // because we don't want clients to have to #include arena.h, which itself includes a bunch of
  // big STL headers.  We don't use a pointer to a ReaderArena because that would require an
  // extra malloc on every message which could be expensive when processing small messages.
138
  void* arenaSpace[15 + sizeof(kj::MutexGuarded<void*>) / sizeof(void*)];
139
  bool allocatedArena;
Kenton Varda's avatar
Kenton Varda committed
140

141
  _::ReaderArena* arena() { return reinterpret_cast<_::ReaderArena*>(arenaSpace); }
142
  AnyPointer::Reader getRootInternal();
143
};
144

145
class MessageBuilder {
146 147 148 149 150 151 152 153 154 155 156
  // Abstract interface for an object used to allocate and build a message.  Subclasses of
  // MessageBuilder are responsible for allocating the space in which the message will be written.
  // The most common subclass is `MallocMessageBuilder`, but other subclasses may be used to do
  // tricky things like allocate messages in shared memory or mmap()ed files.
  //
  // Creating a new message ususually means allocating a new MessageBuilder (ideally on the stack)
  // and then calling `messageBuilder.initRoot<MyStructType>()` to get a `MyStructType::Builder`.
  // That, in turn, can be used to fill in the message content.  When done, you can call
  // `messageBuilder.getSegmentsForOutput()` to get a list of flat data arrays containing the
  // message.

157
public:
158
  MessageBuilder();
159
  virtual ~MessageBuilder() noexcept(false);
160
  KJ_DISALLOW_COPY(MessageBuilder);
Kenton Varda's avatar
Kenton Varda committed
161

162
  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) = 0;
163 164 165
  // Allocates an array of at least the given number of words, throwing an exception or crashing if
  // this is not possible.  It is expected that this method will usually return more space than
  // requested, and the caller should use that extra space as much as possible before allocating
166
  // more.  The returned space remains valid at least until the MessageBuilder is destroyed.
167 168 169
  //
  // Cap'n Proto will only call this once at a time, so the subclass need not worry about
  // thread-safety.
170 171 172

  template <typename RootType>
  typename RootType::Builder initRoot();
Kenton Varda's avatar
Kenton Varda committed
173 174
  // Initialize the root struct of the message as the given struct type.

175 176 177 178
  template <typename Reader>
  void setRoot(Reader&& value);
  // Set the root struct to a deep copy of the given struct.

179 180
  template <typename RootType>
  typename RootType::Builder getRoot();
Kenton Varda's avatar
Kenton Varda committed
181 182
  // Get the root struct of the message, interpreting it as the given struct type.

Kenton Varda's avatar
Kenton Varda committed
183 184 185
  template <typename RootType, typename SchemaType>
  typename RootType::Builder getRoot(SchemaType schema);
  // Dynamically interpret the root struct of the message using the given schema (a StructSchema).
186
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
Kenton Varda's avatar
Kenton Varda committed
187 188
  // use this.

Kenton Varda's avatar
Kenton Varda committed
189 190 191
  template <typename RootType, typename SchemaType>
  typename RootType::Builder initRoot(SchemaType schema);
  // Dynamically init the root struct of the message using the given schema (a StructSchema).
192
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
Kenton Varda's avatar
Kenton Varda committed
193
  // use this.
194

195 196 197 198
  template <typename T>
  void adoptRoot(Orphan<T>&& orphan);
  // Like setRoot() but adopts the orphan without copying.

199
  kj::ArrayPtr<const kj::ArrayPtr<const word>> getSegmentsForOutput();
200 201 202 203 204 205 206
  // Get the raw data that makes up the message.

  kj::ArrayPtr<kj::Maybe<kj::Own<ClientHook>>> getCapTable();
  // Get the table of capabilities (interface pointers) that have been added to this message.
  // When you later parse this message, you must call `initCapTable()` on the `MessageReader` and
  // give it an equivalent set of capabilities, otherwise cap pointers in the message will be
  // unusable.
207

208 209
  Orphanage getOrphanage();

210
private:
Kenton Varda's avatar
Kenton Varda committed
211
  void* arenaSpace[20];
212 213 214 215
  // Space in which we can construct a BuilderArena.  We don't use BuilderArena directly here
  // because we don't want clients to have to #include arena.h, which itself includes a bunch of
  // big STL headers.  We don't use a pointer to a BuilderArena because that would require an
  // extra malloc on every message which could be expensive when processing small messages.
216

217
  bool allocatedArena = false;
218 219 220 221 222
  // We have to initialize the arena lazily because when we do so we want to allocate the root
  // pointer immediately, and this will allocate a segment, which requires a virtual function
  // call on the MessageBuilder.  We can't do such a call in the constructor since the subclass
  // isn't constructed yet.  This is kind of annoying because it means that getOrphanage() is
  // not thread-safe, but that shouldn't be a huge deal...
223

224
  _::BuilderArena* arena() { return reinterpret_cast<_::BuilderArena*>(arenaSpace); }
225
  _::SegmentBuilder* getRootSegment();
226
  AnyPointer::Builder getRootInternal();
Kenton Varda's avatar
Kenton Varda committed
227 228
};

229
template <typename RootType>
230
typename RootType::Reader readMessageUnchecked(const word* data);
231 232 233 234 235
// IF THE INPUT IS INVALID, THIS MAY CRASH, CORRUPT MEMORY, CREATE A SECURITY HOLE IN YOUR APP,
// MURDER YOUR FIRST-BORN CHILD, AND/OR BRING ABOUT ETERNAL DAMNATION ON ALL OF HUMANITY.  DO NOT
// USE UNLESS YOU UNDERSTAND THE CONSEQUENCES.
//
// Given a pointer to a known-valid message located in a single contiguous memory segment,
236 237 238
// returns a reader for that message.  No bounds-checking will be done while traversing this
// message.  Use this only if you have already verified that all pointers are valid and in-bounds,
// and there are no far pointers in the message.
239
//
240 241 242 243 244
// To create a message that can be passed to this function, build a message using a MallocAllocator
// whose preferred segment size is larger than the message size.  This guarantees that the message
// will be allocated as a single segment, meaning getSegmentsForOutput() returns a single word
// array.  That word array is your message; you may pass a pointer to its first word into
// readMessageUnchecked() to read the message.
245 246 247 248 249
//
// This can be particularly handy for embedding messages in generated code:  you can
// embed the raw bytes (using AlignedData) then make a Reader for it using this.  This is the way
// default values are embedded in code generated by the Cap'n Proto compiler.  E.g., if you have
// a message MyMessage, you can read its default value like so:
250 251 252
//    MyMessage::Reader reader = Message<MyMessage>::readMessageUnchecked(MyMessage::DEFAULT.words);
//
// To sanitize a message from an untrusted source such that it can be safely passed to
253 254 255
// readMessageUnchecked(), use copyToUnchecked().

template <typename Reader>
256
void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer);
257 258
// Copy the content of the given reader into the given buffer, such that it can safely be passed to
// readMessageUnchecked().  The buffer's size must be exactly reader.totalSizeInWords() + 1,
259
// otherwise an exception will be thrown.  The buffer must be zero'd before calling.
Kenton Varda's avatar
Kenton Varda committed
260

261
template <typename Type>
Kenton Varda's avatar
Kenton Varda committed
262
static typename Type::Reader defaultValue();
263 264 265 266
// Get a default instance of the given struct or list type.
//
// TODO(cleanup):  Find a better home for this function?

267
// =======================================================================================
Kenton Varda's avatar
Kenton Varda committed
268

269 270 271 272
class SegmentArrayMessageReader: public MessageReader {
  // A simple MessageReader that reads from an array of word arrays representing all segments.
  // In particular you can read directly from the output of MessageBuilder::getSegmentsForOutput()
  // (although it would probably make more sense to call builder.getRoot().asReader() in that case).
Kenton Varda's avatar
Kenton Varda committed
273

274
public:
275
  SegmentArrayMessageReader(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments,
276 277 278
                            ReaderOptions options = ReaderOptions());
  // Creates a message pointing at the given segment array, without taking ownership of the
  // segments.  All arrays passed in must remain valid until the MessageReader is destroyed.
Kenton Varda's avatar
Kenton Varda committed
279

280
  KJ_DISALLOW_COPY(SegmentArrayMessageReader);
281
  ~SegmentArrayMessageReader() noexcept(false);
282

283
  virtual kj::ArrayPtr<const word> getSegment(uint id) override;
284

285
private:
286
  kj::ArrayPtr<const kj::ArrayPtr<const word>> segments;
287
};
288

289
enum class AllocationStrategy: uint8_t {
290 291 292 293 294 295 296 297 298 299 300 301 302 303
  FIXED_SIZE,
  // The builder will prefer to allocate the same amount of space for each segment with no
  // heuristic growth.  It will still allocate larger segments when the preferred size is too small
  // for some single object.  This mode is generally not recommended, but can be particularly useful
  // for testing in order to force a message to allocate a predictable number of segments.  Note
  // that you can force every single object in the message to be located in a separate segment by
  // using this mode with firstSegmentWords = 0.

  GROW_HEURISTICALLY
  // The builder will heuristically decide how much space to allocate for each segment.  Each
  // allocated segment will be progressively larger than the previous segments on the assumption
  // that message sizes are exponentially distributed.  The total number of segments that will be
  // allocated for a message of size n is O(log n).
};
Kenton Varda's avatar
Kenton Varda committed
304

305 306
constexpr uint SUGGESTED_FIRST_SEGMENT_WORDS = 1024;
constexpr AllocationStrategy SUGGESTED_ALLOCATION_STRATEGY = AllocationStrategy::GROW_HEURISTICALLY;
Kenton Varda's avatar
Kenton Varda committed
307

308 309 310 311
class MallocMessageBuilder: public MessageBuilder {
  // A simple MessageBuilder that uses malloc() (actually, calloc()) to allocate segments.  This
  // implementation should be reasonable for any case that doesn't require writing the message to
  // a specific location in memory.
312

313
public:
Kenton Varda's avatar
Kenton Varda committed
314
  explicit MallocMessageBuilder(uint firstSegmentWords = SUGGESTED_FIRST_SEGMENT_WORDS,
315 316 317 318 319 320 321 322 323 324 325 326 327
      AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
  // Creates a BuilderContext which allocates at least the given number of words for the first
  // segment, and then uses the given strategy to decide how much to allocate for subsequent
  // segments.  When choosing a value for firstSegmentWords, consider that:
  // 1) Reading and writing messages gets slower when multiple segments are involved, so it's good
  //    if most messages fit in a single segment.
  // 2) Unused bytes will not be written to the wire, so generally it is not a big deal to allocate
  //    more space than you need.  It only becomes problematic if you are allocating many messages
  //    in parallel and thus use lots of memory, or if you allocate so much extra space that just
  //    zeroing it out becomes a bottleneck.
  // The defaults have been chosen to be reasonable for most people, so don't change them unless you
  // have reason to believe you need to.

328
  explicit MallocMessageBuilder(kj::ArrayPtr<word> firstSegment,
329 330 331 332
      AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
  // This version always returns the given array for the first segment, and then proceeds with the
  // allocation strategy.  This is useful for optimization when building lots of small messages in
  // a tight loop:  you can reuse the space for the first segment.
Kenton Varda's avatar
Kenton Varda committed
333 334 335
  //
  // firstSegment MUST be zero-initialized.  MallocMessageBuilder's destructor will write new zeros
  // over any space that was used so that it can be reused.
336

337
  KJ_DISALLOW_COPY(MallocMessageBuilder);
338
  virtual ~MallocMessageBuilder() noexcept(false);
339

340
  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;
341 342 343 344 345

private:
  uint nextSize;
  AllocationStrategy allocationStrategy;

346
  bool ownFirstSegment;
347 348
  bool returnedFirstSegment;

349 350 351
  void* firstSegment;

  struct MoreSegments;
352
  kj::Maybe<kj::Own<MoreSegments>> moreSegments;
Kenton Varda's avatar
Kenton Varda committed
353 354
};

355
class FlatMessageBuilder: public MessageBuilder {
356 357 358 359 360 361 362 363 364 365 366
  // THIS IS NOT THE CLASS YOU'RE LOOKING FOR.
  //
  // If you want to write a message into already-existing scratch space, use `MallocMessageBuilder`
  // and pass the scratch space to its constructor.  It will then only fall back to malloc() if
  // the scratch space is not large enough.
  //
  // Do NOT use this class unless you really know what you're doing.  This class is problematic
  // because it requires advance knowledge of the size of your message, which is usually impossible
  // to determine without actually building the message.  The class was created primarily to
  // implement `copyToUnchecked()`, which itself exists only to support other internal parts of
  // the Cap'n Proto implementation.
367 368

public:
369 370
  explicit FlatMessageBuilder(kj::ArrayPtr<word> array);
  KJ_DISALLOW_COPY(FlatMessageBuilder);
371
  virtual ~FlatMessageBuilder() noexcept(false);
372 373 374 375

  void requireFilled();
  // Throws an exception if the flat array is not exactly full.

376
  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;
377 378

private:
379
  kj::ArrayPtr<word> array;
380 381 382
  bool allocated;
};

Kenton Varda's avatar
Kenton Varda committed
383
// =======================================================================================
384
// implementation details
Kenton Varda's avatar
Kenton Varda committed
385

386 387
inline const ReaderOptions& MessageReader::getOptions() {
  return options;
Kenton Varda's avatar
Kenton Varda committed
388 389
}

390
template <typename RootType>
391
inline typename RootType::Reader MessageReader::getRoot() {
Kenton Varda's avatar
Kenton Varda committed
392
  return getRootInternal().getAs<RootType>();
Kenton Varda's avatar
Kenton Varda committed
393 394
}

395
template <typename RootType>
396
inline typename RootType::Builder MessageBuilder::initRoot() {
Kenton Varda's avatar
Kenton Varda committed
397
  return getRootInternal().initAs<RootType>();
Kenton Varda's avatar
Kenton Varda committed
398 399
}

400 401
template <typename Reader>
inline void MessageBuilder::setRoot(Reader&& value) {
Kenton Varda's avatar
Kenton Varda committed
402
  getRootInternal().setAs<FromReader<Reader>>(value);
403 404
}

405
template <typename RootType>
406
inline typename RootType::Builder MessageBuilder::getRoot() {
Kenton Varda's avatar
Kenton Varda committed
407
  return getRootInternal().getAs<RootType>();
Kenton Varda's avatar
Kenton Varda committed
408 409
}

410 411
template <typename T>
void MessageBuilder::adoptRoot(Orphan<T>&& orphan) {
Kenton Varda's avatar
Kenton Varda committed
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
  return getRootInternal().adopt(kj::mv(orphan));
}

template <typename RootType, typename SchemaType>
typename RootType::Reader MessageReader::getRoot(SchemaType schema) {
  return getRootInternal().getAs<RootType>(schema);
}

template <typename RootType, typename SchemaType>
typename RootType::Builder MessageBuilder::getRoot(SchemaType schema) {
  return getRootInternal().getAs<RootType>(schema);
}

template <typename RootType, typename SchemaType>
typename RootType::Builder MessageBuilder::initRoot(SchemaType schema) {
  return getRootInternal().initAs<RootType>(schema);
428 429
}

430
template <typename RootType>
431
typename RootType::Reader readMessageUnchecked(const word* data) {
432
  return AnyPointer::Reader(_::PointerReader::getRootUnchecked(data)).getAs<RootType>();
433 434
}

435
template <typename Reader>
436
void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer) {
437
  FlatMessageBuilder builder(uncheckedBuffer);
Kenton Varda's avatar
Kenton Varda committed
438
  builder.setRoot(kj::fwd<Reader>(reader));
439 440 441
  builder.requireFilled();
}

442
}  // namespace capnp
Kenton Varda's avatar
Kenton Varda committed
443

Kenton Varda's avatar
Kenton Varda committed
444
#endif  // CAPNP_MESSAGE_H_