// Copyright (c) 2013-2016 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#include <kj/common.h>
#include <kj/memory.h>
#include <kj/mutex.h>
#include <kj/debug.h>
#include "common.h"
#include "layout.h"
#include "any.h"

#ifndef CAPNP_MESSAGE_H_
#define CAPNP_MESSAGE_H_

#if defined(__GNUC__) && !defined(CAPNP_HEADER_WARNINGS)
#pragma GCC system_header
#endif

namespace capnp {

namespace _ {  // private
  class ReaderArena;
  class BuilderArena;
}

class StructSchema;
class Orphanage;
template <typename T>
class Orphan;

// =======================================================================================

struct ReaderOptions {
  // Options controlling how data is read.

  uint64_t traversalLimitInWords = 8 * 1024 * 1024;
  // Limits how many total words of data are allowed to be traversed.  Traversal is counted when
  // a new struct or list builder is obtained, e.g. from a get() accessor.  This means that calling
  // the getter for the same sub-struct multiple times will cause it to be double-counted.  Once
  // the traversal limit is reached, an error will be reported.
  //
  // This limit exists for security reasons.  It is possible for an attacker to construct a message
  // in which multiple pointers point at the same location.  This is technically invalid, but hard
  // to detect.  Using such a message, an attacker could cause a message which is small on the wire
  // to appear much larger when actually traversed, possibly exhausting server resources leading to
  // denial-of-service.
  //
  // It makes sense to set a traversal limit that is much larger than the underlying message.
  // Together with sensible coding practices (e.g. trying to avoid calling sub-object getters
  // multiple times, which is expensive anyway), this should provide adequate protection without
  // inconvenience.
  //
  // The default limit is 64 MiB.  This may or may not be a sensible number for any given use case,
  // but probably at least prevents easy exploitation while also avoiding causing problems in most
  // typical cases.

  int nestingLimit = 64;
  // Limits how deeply-nested a message structure can be, e.g. structs containing other structs or
  // lists of structs.
  //
  // Like the traversal limit, this limit exists for security reasons.  Since it is common to use
  // recursive code to traverse recursive data structures, an attacker could easily cause a stack
  // overflow by sending a very-deeply-nested (or even cyclic) message, without the message even
  // being very large.  The default limit of 64 is probably low enough to prevent any chance of
  // stack overflow, yet high enough that it is never a problem in practice.
};

class MessageReader {
  // Abstract interface for an object used to read a Cap'n Proto message.  Subclasses of
  // MessageReader are responsible for reading the raw, flat message content.  Callers should
  // usually call `messageReader.getRoot<MyStructType>()` to get a `MyStructType::Reader`
  // representing the root of the message, then use that to traverse the message content.
  //
  // Some common subclasses of `MessageReader` include `SegmentArrayMessageReader`, whose
  // constructor accepts pointers to the raw data, and `StreamFdMessageReader` (from
  // `serialize.h`), which reads the message from a file descriptor.  One might implement other
  // subclasses to handle things like reading from shared memory segments, mmap()ed files, etc.

public:
  MessageReader(ReaderOptions options);
  // It is suggested that subclasses take ReaderOptions as a constructor parameter, but give it a
  // default value of "ReaderOptions()".  The base class constructor doesn't have a default value
  // in order to remind subclasses that they really need to give the user a way to provide this.

  virtual ~MessageReader() noexcept(false);

  virtual kj::ArrayPtr<const word> getSegment(uint id) = 0;
  // Gets the segment with the given ID, or returns null if no such segment exists. This method
  // will be called at most once for each segment ID.

  inline const ReaderOptions& getOptions();
  // Get the options passed to the constructor.

  template <typename RootType>
  typename RootType::Reader getRoot();
  // Get the root struct of the message, interpreting it as the given struct type.

  template <typename RootType, typename SchemaType>
  typename RootType::Reader getRoot(SchemaType schema);
  // Dynamically interpret the root struct of the message using the given schema (a StructSchema).
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
  // use this.

  bool isCanonical();
  // Returns whether the message encoded in the reader is in canonical form.

private:
  ReaderOptions options;

  // Space in which we can construct a ReaderArena.  We don't use ReaderArena directly here
  // because we don't want clients to have to #include arena.h, which itself includes a bunch of
  // big STL headers.  We don't use a pointer to a ReaderArena because that would require an
  // extra malloc on every message which could be expensive when processing small messages.
  void* arenaSpace[15 + sizeof(kj::MutexGuarded<void*>) / sizeof(void*)];
  bool allocatedArena;

  _::ReaderArena* arena() { return reinterpret_cast<_::ReaderArena*>(arenaSpace); }
  AnyPointer::Reader getRootInternal();
};

class MessageBuilder {
  // Abstract interface for an object used to allocate and build a message.  Subclasses of
  // MessageBuilder are responsible for allocating the space in which the message will be written.
  // The most common subclass is `MallocMessageBuilder`, but other subclasses may be used to do
  // tricky things like allocate messages in shared memory or mmap()ed files.
  //
  // Creating a new message ususually means allocating a new MessageBuilder (ideally on the stack)
  // and then calling `messageBuilder.initRoot<MyStructType>()` to get a `MyStructType::Builder`.
  // That, in turn, can be used to fill in the message content.  When done, you can call
  // `messageBuilder.getSegmentsForOutput()` to get a list of flat data arrays containing the
  // message.

public:
  MessageBuilder();
  virtual ~MessageBuilder() noexcept(false);
  KJ_DISALLOW_COPY(MessageBuilder);

  struct SegmentInit {
    kj::ArrayPtr<word> space;

    size_t wordsUsed;
    // Number of words in `space` which are used; the rest are free space in which additional
    // objects may be allocated.
  };

  explicit MessageBuilder(kj::ArrayPtr<SegmentInit> segments);
  // Create a MessageBuilder backed by existing memory. This is an advanced interface that most
  // people should not use. THIS METHOD IS INSECURE; see below.
  //
  // This allows a MessageBuilder to be constructed to modify an in-memory message without first
  // making a copy of the content. This is especially useful in conjunction with mmap().
  //
  // The contents of each segment must outlive the MessageBuilder, but the SegmentInit array itself
  // only need outlive the constructor.
  //
  // SECURITY: Do not use this in conjunction with untrusted data. This constructor assumes that
  //   the input message is valid. This constructor is designed to be used with data you control,
  //   e.g. an mmap'd file which is owned and accessed by only one program. When reading data you
  //   do not trust, you *must* load it into a Reader and then copy into a Builder as a means of
  //   validating the content.
  //
  // WARNING: It is NOT safe to initialize a MessageBuilder in this way from memory that is
  //   currently in use by another MessageBuilder or MessageReader. Other readers/builders will
  //   not observe changes to the segment sizes nor newly-allocated segments caused by allocating
  //   new objects in this message.

  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) = 0;
  // Allocates an array of at least the given number of words, throwing an exception or crashing if
  // this is not possible.  It is expected that this method will usually return more space than
  // requested, and the caller should use that extra space as much as possible before allocating
  // more.  The returned space remains valid at least until the MessageBuilder is destroyed.
  //
  // Cap'n Proto will only call this once at a time, so the subclass need not worry about
  // thread-safety.

  template <typename RootType>
  typename RootType::Builder initRoot();
  // Initialize the root struct of the message as the given struct type.

  template <typename Reader>
  void setRoot(Reader&& value);
  // Set the root struct to a deep copy of the given struct.

  template <typename RootType>
  typename RootType::Builder getRoot();
  // Get the root struct of the message, interpreting it as the given struct type.

  template <typename RootType, typename SchemaType>
  typename RootType::Builder getRoot(SchemaType schema);
  // Dynamically interpret the root struct of the message using the given schema (a StructSchema).
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
  // use this.

  template <typename RootType, typename SchemaType>
  typename RootType::Builder initRoot(SchemaType schema);
  // Dynamically init the root struct of the message using the given schema (a StructSchema).
  // RootType in this case must be DynamicStruct, and you must #include <capnp/dynamic.h> to
  // use this.

  template <typename T>
  void adoptRoot(Orphan<T>&& orphan);
  // Like setRoot() but adopts the orphan without copying.

  kj::ArrayPtr<const kj::ArrayPtr<const word>> getSegmentsForOutput();
  // Get the raw data that makes up the message.

  Orphanage getOrphanage();

  bool isCanonical();
  // Check whether the message builder is in canonical form

private:
  void* arenaSpace[22];
  // Space in which we can construct a BuilderArena.  We don't use BuilderArena directly here
  // because we don't want clients to have to #include arena.h, which itself includes a bunch of
  // big STL headers.  We don't use a pointer to a BuilderArena because that would require an
  // extra malloc on every message which could be expensive when processing small messages.

  bool allocatedArena = false;
  // We have to initialize the arena lazily because when we do so we want to allocate the root
  // pointer immediately, and this will allocate a segment, which requires a virtual function
  // call on the MessageBuilder.  We can't do such a call in the constructor since the subclass
  // isn't constructed yet.  This is kind of annoying because it means that getOrphanage() is
  // not thread-safe, but that shouldn't be a huge deal...

  _::BuilderArena* arena() { return reinterpret_cast<_::BuilderArena*>(arenaSpace); }
  _::SegmentBuilder* getRootSegment();
  AnyPointer::Builder getRootInternal();
};

template <typename RootType>
typename RootType::Reader readMessageUnchecked(const word* data);
// IF THE INPUT IS INVALID, THIS MAY CRASH, CORRUPT MEMORY, CREATE A SECURITY HOLE IN YOUR APP,
// MURDER YOUR FIRST-BORN CHILD, AND/OR BRING ABOUT ETERNAL DAMNATION ON ALL OF HUMANITY.  DO NOT
// USE UNLESS YOU UNDERSTAND THE CONSEQUENCES.
//
// Given a pointer to a known-valid message located in a single contiguous memory segment,
// returns a reader for that message.  No bounds-checking will be done while traversing this
// message.  Use this only if you have already verified that all pointers are valid and in-bounds,
// and there are no far pointers in the message.
//
// To create a message that can be passed to this function, build a message using a MallocAllocator
// whose preferred segment size is larger than the message size.  This guarantees that the message
// will be allocated as a single segment, meaning getSegmentsForOutput() returns a single word
// array.  That word array is your message; you may pass a pointer to its first word into
// readMessageUnchecked() to read the message.
//
// This can be particularly handy for embedding messages in generated code:  you can
// embed the raw bytes (using AlignedData) then make a Reader for it using this.  This is the way
// default values are embedded in code generated by the Cap'n Proto compiler.  E.g., if you have
// a message MyMessage, you can read its default value like so:
//    MyMessage::Reader reader = Message<MyMessage>::readMessageUnchecked(MyMessage::DEFAULT.words);
//
// To sanitize a message from an untrusted source such that it can be safely passed to
// readMessageUnchecked(), use copyToUnchecked().

template <typename Reader>
void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer);
// Copy the content of the given reader into the given buffer, such that it can safely be passed to
// readMessageUnchecked().  The buffer's size must be exactly reader.totalSizeInWords() + 1,
// otherwise an exception will be thrown.  The buffer must be zero'd before calling.

template <typename RootType>
typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data);
// Interprets the given data as a single, data-only struct. Only primitive fields (booleans,
// numbers, and enums) will be readable; all pointers will be null. This is useful if you want
// to use Cap'n Proto as a language/platform-neutral way to pack some bits.
//
// The input is a word array rather than a byte array to enforce alignment. If you have a byte
// array which you know is word-aligned (or if your platform supports unaligned reads and you don't
// mind the performance penalty), then you can use `reinterpret_cast` to convert a byte array into
// a word array:
//
//     kj::arrayPtr(reinterpret_cast<const word*>(bytes.begin()),
//                  reinterpret_cast<const word*>(bytes.end()))

template <typename BuilderType>
typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder);
// Given a struct builder, get the underlying data section as a word array, suitable for passing
// to `readDataStruct()`.
//
// Note that you may call `.toBytes()` on the returned value to convert to `ArrayPtr<const byte>`.

template <typename Type>
static typename Type::Reader defaultValue();
// Get a default instance of the given struct or list type.
//
// TODO(cleanup):  Find a better home for this function?

// =======================================================================================

class SegmentArrayMessageReader: public MessageReader {
  // A simple MessageReader that reads from an array of word arrays representing all segments.
  // In particular you can read directly from the output of MessageBuilder::getSegmentsForOutput()
  // (although it would probably make more sense to call builder.getRoot().asReader() in that case).

public:
  SegmentArrayMessageReader(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments,
                            ReaderOptions options = ReaderOptions());
  // Creates a message pointing at the given segment array, without taking ownership of the
  // segments.  All arrays passed in must remain valid until the MessageReader is destroyed.

  KJ_DISALLOW_COPY(SegmentArrayMessageReader);
  ~SegmentArrayMessageReader() noexcept(false);

  virtual kj::ArrayPtr<const word> getSegment(uint id) override;

private:
  kj::ArrayPtr<const kj::ArrayPtr<const word>> segments;
};

enum class AllocationStrategy: uint8_t {
  FIXED_SIZE,
  // The builder will prefer to allocate the same amount of space for each segment with no
  // heuristic growth.  It will still allocate larger segments when the preferred size is too small
  // for some single object.  This mode is generally not recommended, but can be particularly useful
  // for testing in order to force a message to allocate a predictable number of segments.  Note
  // that you can force every single object in the message to be located in a separate segment by
  // using this mode with firstSegmentWords = 0.

  GROW_HEURISTICALLY
  // The builder will heuristically decide how much space to allocate for each segment.  Each
  // allocated segment will be progressively larger than the previous segments on the assumption
  // that message sizes are exponentially distributed.  The total number of segments that will be
  // allocated for a message of size n is O(log n).
};

constexpr uint SUGGESTED_FIRST_SEGMENT_WORDS = 1024;
constexpr AllocationStrategy SUGGESTED_ALLOCATION_STRATEGY = AllocationStrategy::GROW_HEURISTICALLY;

class MallocMessageBuilder: public MessageBuilder {
  // A simple MessageBuilder that uses malloc() (actually, calloc()) to allocate segments.  This
  // implementation should be reasonable for any case that doesn't require writing the message to
  // a specific location in memory.

public:
  explicit MallocMessageBuilder(uint firstSegmentWords = SUGGESTED_FIRST_SEGMENT_WORDS,
      AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
  // Creates a BuilderContext which allocates at least the given number of words for the first
  // segment, and then uses the given strategy to decide how much to allocate for subsequent
  // segments.  When choosing a value for firstSegmentWords, consider that:
  // 1) Reading and writing messages gets slower when multiple segments are involved, so it's good
  //    if most messages fit in a single segment.
  // 2) Unused bytes will not be written to the wire, so generally it is not a big deal to allocate
  //    more space than you need.  It only becomes problematic if you are allocating many messages
  //    in parallel and thus use lots of memory, or if you allocate so much extra space that just
  //    zeroing it out becomes a bottleneck.
  // The defaults have been chosen to be reasonable for most people, so don't change them unless you
  // have reason to believe you need to.

  explicit MallocMessageBuilder(kj::ArrayPtr<word> firstSegment,
      AllocationStrategy allocationStrategy = SUGGESTED_ALLOCATION_STRATEGY);
  // This version always returns the given array for the first segment, and then proceeds with the
  // allocation strategy.  This is useful for optimization when building lots of small messages in
  // a tight loop:  you can reuse the space for the first segment.
  //
  // firstSegment MUST be zero-initialized.  MallocMessageBuilder's destructor will write new zeros
  // over any space that was used so that it can be reused.

  KJ_DISALLOW_COPY(MallocMessageBuilder);
  virtual ~MallocMessageBuilder() noexcept(false);

  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;

private:
  uint nextSize;
  AllocationStrategy allocationStrategy;

  bool ownFirstSegment;
  bool returnedFirstSegment;

  void* firstSegment;

  struct MoreSegments;
  kj::Maybe<kj::Own<MoreSegments>> moreSegments;
};

class FlatMessageBuilder: public MessageBuilder {
  // THIS IS NOT THE CLASS YOU'RE LOOKING FOR.
  //
  // If you want to write a message into already-existing scratch space, use `MallocMessageBuilder`
  // and pass the scratch space to its constructor.  It will then only fall back to malloc() if
  // the scratch space is not large enough.
  //
  // Do NOT use this class unless you really know what you're doing.  This class is problematic
  // because it requires advance knowledge of the size of your message, which is usually impossible
  // to determine without actually building the message.  The class was created primarily to
  // implement `copyToUnchecked()`, which itself exists only to support other internal parts of
  // the Cap'n Proto implementation.

public:
  explicit FlatMessageBuilder(kj::ArrayPtr<word> array);
  KJ_DISALLOW_COPY(FlatMessageBuilder);
  virtual ~FlatMessageBuilder() noexcept(false);

  void requireFilled();
  // Throws an exception if the flat array is not exactly full.

  virtual kj::ArrayPtr<word> allocateSegment(uint minimumSize) override;

private:
  kj::ArrayPtr<word> array;
  bool allocated;
};

// =======================================================================================
// implementation details

inline const ReaderOptions& MessageReader::getOptions() {
  return options;
}

template <typename RootType>
inline typename RootType::Reader MessageReader::getRoot() {
  return getRootInternal().getAs<RootType>();
}

template <typename RootType>
inline typename RootType::Builder MessageBuilder::initRoot() {
  return getRootInternal().initAs<RootType>();
}

template <typename Reader>
inline void MessageBuilder::setRoot(Reader&& value) {
  getRootInternal().setAs<FromReader<Reader>>(value);
}

template <typename RootType>
inline typename RootType::Builder MessageBuilder::getRoot() {
  return getRootInternal().getAs<RootType>();
}

template <typename T>
void MessageBuilder::adoptRoot(Orphan<T>&& orphan) {
  return getRootInternal().adopt(kj::mv(orphan));
}

template <typename RootType, typename SchemaType>
typename RootType::Reader MessageReader::getRoot(SchemaType schema) {
  return getRootInternal().getAs<RootType>(schema);
}

template <typename RootType, typename SchemaType>
typename RootType::Builder MessageBuilder::getRoot(SchemaType schema) {
  return getRootInternal().getAs<RootType>(schema);
}

template <typename RootType, typename SchemaType>
typename RootType::Builder MessageBuilder::initRoot(SchemaType schema) {
  return getRootInternal().initAs<RootType>(schema);
}

template <typename RootType>
typename RootType::Reader readMessageUnchecked(const word* data) {
  return AnyPointer::Reader(_::PointerReader::getRootUnchecked(data)).getAs<RootType>();
}

template <typename Reader>
void copyToUnchecked(Reader&& reader, kj::ArrayPtr<word> uncheckedBuffer) {
  FlatMessageBuilder builder(uncheckedBuffer);
  builder.setRoot(kj::fwd<Reader>(reader));
  builder.requireFilled();
}

template <typename RootType>
typename RootType::Reader readDataStruct(kj::ArrayPtr<const word> data) {
  return typename RootType::Reader(_::StructReader(data));
}

template <typename BuilderType>
typename kj::ArrayPtr<const word> writeDataStruct(BuilderType builder) {
  auto bytes = _::PointerHelpers<FromBuilder<BuilderType>>::getInternalBuilder(kj::mv(builder))
      .getDataSectionAsBlob();
  return kj::arrayPtr(reinterpret_cast<word*>(bytes.begin()),
                      reinterpret_cast<word*>(bytes.end()));
}

template <typename Type>
static typename Type::Reader defaultValue() {
  return typename Type::Reader(_::StructReader());
}

template <typename T>
kj::Array<word> canonicalize(T&& reader) {
    return _::PointerHelpers<FromReader<T>>::getInternalReader(reader).canonicalize();
}

}  // namespace capnp

#endif  // CAPNP_MESSAGE_H_