Implement adoptWithCaveats() for struct lists, and some parser tweaks.

0757e6ef · Kenton Varda · ce4162e8 · 0757e6ef · 0757e6ef · 0757e6ef
Commit 0757e6ef authored Jul 12, 2013 by Kenton Varda
8 changed files
--- a/c++/src/capnp/compiler/lexer-test.c++
+++ b/c++/src/capnp/compiler/lexer-test.c++
--- a/c++/src/capnp/compiler/lexer.c++
+++ b/c++/src/capnp/compiler/lexer.c++
@@ -42,7 +42,7 @@ bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result) {
  KJ_IF_MAYBE(output, parseOutput) {
    auto l = result.initStatements(output->size());
    for (uint i = 0; i < output->size(); i++) {
-      l[i].adoptStatement(kj::mv((*output)[i]));
+      l.adoptWithCaveats(i, kj::mv((*output)[i]));
    }
    return true;
  } else {
@@ -64,7 +64,7 @@ bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result) {
  KJ_IF_MAYBE(output, parseOutput) {
    auto l = result.initTokens(output->size());
    for (uint i = 0; i < output->size(); i++) {
-      l[i].adoptToken(kj::mv((*output)[i]));
+      l.adoptWithCaveats(i, kj::mv((*output)[i]));
    }
    return true;
  } else {
@@ -85,13 +85,13 @@ Token::Body::Builder initTok(Orphan<Token>& t, const Location& loc) {
  return tb.getBody();
 }

-void buildTokenSequenceList(List<List<TokenPointer>>::Builder builder,
+void buildTokenSequenceList(List<List<Token>>::Builder builder,
                            kj::Array<kj::Array<Orphan<Token>>>&& items) {
  for (uint i = 0; i < items.size(); i++) {
    auto& item = items[i];
    auto itemBuilder = builder.init(i, item.size());
    for (uint j = 0; j < item.size(); j++) {
-      itemBuilder[j].adoptToken(kj::mv(item[j]));
+      itemBuilder.adoptWithCaveats(j, kj::mv(item[j]));
    }
  }
 }
@@ -101,16 +101,14 @@ void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comm
  for (auto& line: comment) {
    size += line.size() + 1;  // include newline
  }
-  if (size > 0) {
-    Text::Builder builder = statement.initDocComment(size);
-    char* pos = builder.begin();
-    for (auto& line: comment) {
-      memcpy(pos, line.begin(), line.size());
-      pos += line.size();
-      *pos++ = '\n';
-    }
-    KJ_ASSERT(pos == builder.end());
+  Text::Builder builder = statement.initDocComment(size);
+  char* pos = builder.begin();
+  for (auto& line: comment) {
+    memcpy(pos, line.begin(), line.size());
+    pos += line.size();
+    *pos++ = '\n';
  }
+  KJ_ASSERT(pos == builder.end());
 }

 constexpr auto discardComment =
@@ -131,16 +129,16 @@ constexpr auto newline = p::oneOf(
    p::exactChar<'\n'>(),
    sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>()))));

-constexpr auto docComment = sequence(
+constexpr auto docComment = p::optional(p::sequence(
    discardLineWhitespace,
    p::discard(p::optional(newline)),
-    p::many(p::sequence(discardLineWhitespace, saveComment)));
+    p::oneOrMore(p::sequence(discardLineWhitespace, saveComment))));
 // Parses a set of comment lines preceded by at most one newline and with no intervening blank
 // lines.

 }  // namespace

-Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
+Lexer::Lexer(Orphanage orphanageParam): orphanage(orphanageParam) {

  // Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe
  // for us to use parsers.tokenSequence even though we haven't yet constructed it.
@@ -148,7 +146,7 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {

  auto& commaDelimitedList = arena.copy(p::transform(
      p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))),
-      [&](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
+      [this](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
          -> kj::Array<kj::Array<Orphan<Token>>> {
        if (first == nullptr && rest == nullptr) {
          // Completely empty list.
@@ -165,39 +163,39 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {

  auto& token = arena.copy(p::oneOf(
      p::transformWithLocation(p::identifier,
-          [&](Location loc, kj::String name) -> Orphan<Token> {
+          [this](Location loc, kj::String name) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setIdentifier(name);
            return t;
          }),
      p::transformWithLocation(p::doubleQuotedString,
-          [&](Location loc, kj::String text) -> Orphan<Token> {
+          [this](Location loc, kj::String text) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setStringLiteral(text);
            return t;
          }),
      p::transformWithLocation(p::integer,
-          [&](Location loc, uint64_t i) -> Orphan<Token> {
+          [this](Location loc, uint64_t i) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setIntegerLiteral(i);
            return t;
          }),
      p::transformWithLocation(p::number,
-          [&](Location loc, double x) -> Orphan<Token> {
+          [this](Location loc, double x) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setFloatLiteral(x);
            return t;
          }),
      p::transformWithLocation(
          p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))),
-          [&](Location loc, kj::String text) -> Orphan<Token> {
+          [this](Location loc, kj::String text) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setOperator(text);
            return t;
          }),
      p::transformWithLocation(
          sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()),
-          [&](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
+          [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            buildTokenSequenceList(
                initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items));
@@ -205,7 +203,7 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
          }),
      p::transformWithLocation(
          sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()),
-          [&](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
+          [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            buildTokenSequenceList(
                initTok(t, loc).initBracketedList(items.size()), kj::mv(items));
@@ -219,34 +217,46 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {

  auto& statementEnd = arena.copy(p::oneOf(
      transform(p::sequence(p::exactChar<';'>(), docComment),
-          [&](kj::Array<kj::String>&& comment) -> Orphan<Statement> {
+          [this](kj::Maybe<kj::Array<kj::String>>&& comment) -> Orphan<Statement> {
            auto result = orphanage.newOrphan<Statement>();
            auto builder = result.get();
-            attachDocComment(builder, kj::mv(comment));
+            KJ_IF_MAYBE(c, comment) {
+              attachDocComment(builder, kj::mv(*c));
+            }
            builder.getBlock().setNone();
            return result;
          }),
      transform(
-          p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>()),
-          [&](kj::Array<kj::String>&& comment, kj::Array<Orphan<Statement>>&& statements)
+          p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>(),
+                      docComment),
+          [this](kj::Maybe<kj::Array<kj::String>>&& comment,
+                 kj::Array<Orphan<Statement>>&& statements,
+                 kj::Maybe<kj::Array<kj::String>>&& lateComment)
              -> Orphan<Statement> {
            auto result = orphanage.newOrphan<Statement>();
            auto builder = result.get();
-            attachDocComment(builder, kj::mv(comment));
+            KJ_IF_MAYBE(c, comment) {
+              attachDocComment(builder, kj::mv(*c));
+            } else KJ_IF_MAYBE(c, lateComment) {
+              attachDocComment(builder, kj::mv(*c));
+            }
            auto list = builder.getBlock().initStatements(statements.size());
            for (uint i = 0; i < statements.size(); i++) {
-              list[i].adoptStatement(kj::mv(statements[i]));
+              list.adoptWithCaveats(i, kj::mv(statements[i]));
            }
            return result;
          })
      ));

-  auto& statement = arena.copy(p::transform(p::sequence(tokenSequence, statementEnd),
-      [&](kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
-        auto tokensBuilder = statement.get().initTokens(tokens.size());
+  auto& statement = arena.copy(p::transformWithLocation(p::sequence(tokenSequence, statementEnd),
+      [this](Location loc, kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
+        auto builder = statement.get();
+        auto tokensBuilder = builder.initTokens(tokens.size());
        for (uint i = 0; i < tokens.size(); i++) {
-          tokensBuilder[i].adoptToken(kj::mv(tokens[i]));
+          tokensBuilder.adoptWithCaveats(i, kj::mv(tokens[i]));
        }
+        builder.setStartByte(loc.begin());
+        builder.setEndByte(loc.end());
        return kj::mv(statement);
      }));


--- a/c++/src/capnp/compiler/lexer.capnp
+++ b/c++/src/capnp/compiler/lexer.capnp
+# Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 @0xa73956d2621fc3ee;

 using Cxx = import "/capnp/c++.capnp";
@@ -11,48 +34,35 @@ struct Token {
    integerLiteral @3 :UInt64;
    floatLiteral @4 :Float64;
    operator @5 :Text;
-    parenthesizedList @6 :List(List(TokenPointer));
-    bracketedList @7 :List(List(TokenPointer));
+    parenthesizedList @6 :List(List(Token));
+    bracketedList @7 :List(List(Token));
  }

  startByte @8 :UInt32;
  endByte @9 :UInt32;
 }

-struct TokenPointer {
-  # Hack to deal with the fact that struct lists cannot adopt elements.
-  #
-  # TODO(cleanup):  Find a better approach.
-
-  token @0 :Token;
-}
-
 struct Statement {
-  tokens @0 :List(TokenPointer);
+  tokens @0 :List(Token);
  block @1 union {
    none @2 :Void;
-    statements @3 :List(StatementPointer);
+    statements @3 :List(Statement);
  }

  docComment @4 :Text;
-}
-
-struct StatementPointer {
-  # Hack to deal with the fact that struct lists cannot adopt elements.
-  #
-  # TODO(cleanup):  Find a better approach.

-  statement @0 :Statement;
+  startByte @5 :UInt32;
+  endByte @6 :UInt32;
 }

 struct LexedTokens {
  # Lexer output when asked to parse tokens that don't form statements.

-  tokens @0 :List(TokenPointer);
+  tokens @0 :List(Token);
 }

 struct LexedStatements {
  # Lexer output when asked to parse statements.

-  statements @0 :List(StatementPointer);
+  statements @0 :List(Statement);
 }
--- a/c++/src/capnp/layout.c++
+++ b/c++/src/capnp/layout.c++
@@ -698,6 +698,10 @@ struct WireHelpers {
    // mistakenly thinks the source location still owns the object.  transferPointer() doesn't do
    // this zeroing itself because many callers transfer several pointers in a loop then zero out
    // the whole section.
+
+    KJ_DASSERT(dst->isNull());
+    // We expect the caller to ensure the target is already null so won't leak.
+
    if (src->isNull()) {
      memset(dst, 0, sizeof(WirePointer));
    } else if (src->kind() == WirePointer::FAR) {
@@ -2034,6 +2038,45 @@ OrphanBuilder StructBuilder::disown(WirePointerCount ptrIndex) {
  return WireHelpers::disown(segment, pointers + ptrIndex);
 }

+void StructBuilder::transferContentFrom(StructBuilder other) {
+  // Determine the amount of data the builders have in common.
+  BitCount sharedDataSize = kj::min(dataSize, other.dataSize);
+
+  if (dataSize > sharedDataSize) {
+    // Since the target is larger than the source, make sure to zero out the extra bits that the
+    // source doesn't have.
+    if (dataSize == 1 * BITS) {
+      setDataField<bool>(0 * ELEMENTS, false);
+    } else {
+      byte* unshared = reinterpret_cast<byte*>(data) + sharedDataSize / BITS_PER_BYTE / BYTES;
+      memset(unshared, 0, (dataSize - sharedDataSize) / BITS_PER_BYTE / BYTES);
+    }
+  }
+
+  // Copy over the shared part.
+  if (sharedDataSize == 1 * BITS) {
+    setDataField<bool>(0 * ELEMENTS, other.getDataField<bool>(0 * ELEMENTS));
+  } else {
+    memcpy(data, other.data, sharedDataSize / BITS_PER_BYTE / BYTES);
+  }
+
+  // Zero out all pointers in the target.
+  for (uint i = 0; i < pointerCount / POINTERS; i++) {
+    WireHelpers::zeroObject(segment, pointers + i);
+  }
+
+  // Transfer the pointers.
+  WirePointerCount sharedPointerCount = kj::min(pointerCount, other.pointerCount);
+  for (uint i = 0; i < sharedPointerCount / POINTERS; i++) {
+    WireHelpers::transferPointer(segment, pointers + i, other.segment, other.pointers + i);
+  }
+
+  // Zero out the pointers that were transferred in the source because it no longer has ownership.
+  // If the source had any extra pointers that the destination didn't have space for, we
+  // intentionally leave them be, so that they'll be cleaned up later.
+  memset(other.pointers, 0, sharedPointerCount * BYTES_PER_POINTER / BYTES);
+}
+
 bool StructBuilder::isPointerFieldNull(WirePointerCount ptrIndex) {
  return (pointers + ptrIndex)->isNull();
 }

--- a/c++/src/capnp/layout.h
+++ b/c++/src/capnp/layout.h
@@ -364,6 +364,11 @@ public:
  // Detach the given pointer field from this object.  The pointer becomes null, and the child
  // object is returned as an orphan.

+  void transferContentFrom(StructBuilder other);
+  // Adopt all pointers from `other`, and also copy all data.  If `other`'s sections are larger
+  // than this, the extra data is not transferred, meaning there is a risk of data loss when
+  // transferring from messages built with future versions of the protocol.
+
  bool isPointerFieldNull(WirePointerCount ptrIndex);

  StructReader asReader() const;

--- a/c++/src/capnp/list.h
+++ b/c++/src/capnp/list.h
@@ -249,6 +249,24 @@ struct List<T, Kind::STRUCT> {
      return typename T::Builder(builder.getStructElement(index * ELEMENTS));
    }

+    inline void adoptWithCaveats(uint index, Orphan<T>&& orphan) {
+      // Mostly behaves like you'd expect `adopt` to behave, but with two caveats originating from
+      // the fact that structs in a struct list are allocated inline rather than by pointer:
+      // * This actually performs a shallow copy, effectively adopting each of the orphan's
+      //   children rather than adopting the orphan itself.  The orphan ends up being discarded,
+      //   possibly wasting space in the message object.
+      // * If the orphan is larger than the target struct -- say, because the orphan was built
+      //   using a newer version of the schema that has additional fields -- it will be truncated,
+      //   losing data.
+
+      // We pass a zero-valued StructSize to asStruct() because we do not want the struct to be
+      // expanded under any circumstances.  We're just going to throw it away anyway, and
+      // transferContentFrom() already carefully compares the struct sizes before transferring.
+      builder.getStructElement(index * ELEMENTS).transferContentFrom(
+          orphan.builder.asStruct(_::StructSize(
+              0 * WORDS, 0 * POINTERS, _::FieldSize::VOID)));
+    }
+
    // There are no init(), set(), adopt(), or disown() methods for lists of structs because the
    // elements of the list are inlined and are initialized when the list is initialized.  This
    // means that init() would be redundant, and set() would risk data loss if the input struct

--- a/c++/src/kj/parse/common-test.c++
+++ b/c++/src/kj/parse/common-test.c++
@@ -344,6 +344,37 @@ TEST(CommonParsers, TransformParser) {
  }
 }

+TEST(CommonParsers, TransformOrRejectParser) {
+  auto parser = transformOrReject(many(any),
+      [](Array<char> chars) -> Maybe<int> {
+        if (heapString(chars) == "foo") {
+          return 123;
+        } else {
+          return nullptr;
+        }
+      });
+
+  {
+    StringPtr text = "foo";
+    Input input(text.begin(), text.end());
+    Maybe<int> result = parser(input);
+    KJ_IF_MAYBE(i, result) {
+      EXPECT_EQ(123, *i);
+    } else {
+      ADD_FAILURE() << "Expected 123, got null.";
+    }
+    EXPECT_TRUE(input.atEnd());
+  }
+
+  {
+    StringPtr text = "bar";
+    Input input(text.begin(), text.end());
+    Maybe<int> result = parser(input);
+    EXPECT_TRUE(result == nullptr);
+    EXPECT_TRUE(input.atEnd());
+  }
+}
+
 TEST(CommonParsers, References) {
  struct TransformFunc {
    int value;

--- a/c++/src/kj/parse/common.h
+++ b/c++/src/kj/parse/common.h
@@ -71,11 +71,11 @@ public:
  }

  bool atEnd() { return pos == end; }
-  const Element& current() {
+  auto current() -> decltype(*instance<Iterator>()) {
    KJ_IREQUIRE(!atEnd());
    return *pos;
  }
-  const Element& consume() {
+  auto consume() -> decltype(*instance<Iterator>()) {
    KJ_IREQUIRE(!atEnd());
    return *pos++;
  }
@@ -274,7 +274,6 @@ private:
 template <typename SubParser, typename Result>
 constexpr ConstResult_<SubParser, Result> constResult(SubParser&& subParser, Result&& result) {
  // Constructs a parser which returns exactly `result` if `subParser` is successful.
-
  return ConstResult_<SubParser, Result>(kj::fwd<SubParser>(subParser), kj::fwd<Result>(result));
 }

@@ -571,6 +570,27 @@ private:
  TransformFunc transform;
 };

+template <typename SubParser, typename TransformFunc>
+class TransformOrReject_ {
+public:
+  explicit constexpr TransformOrReject_(SubParser&& subParser, TransformFunc&& transform)
+      : subParser(kj::fwd<SubParser>(subParser)), transform(kj::fwd<TransformFunc>(transform)) {}
+
+  template <typename Input>
+  decltype(kj::apply(instance<TransformFunc&>(), instance<OutputType<SubParser, Input>&&>()))
+      operator()(Input& input) const {
+    KJ_IF_MAYBE(subResult, subParser(input)) {
+      return kj::apply(transform, kj::mv(*subResult));
+    } else {
+      return nullptr;
+    }
+  }
+
+private:
+  SubParser subParser;
+  TransformFunc transform;
+};
+
 template <typename SubParser, typename TransformFunc>
 class TransformWithLocation_ {
 public:
@@ -606,12 +626,21 @@ constexpr Transform_<SubParser, TransformFunc> transform(
      kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
 }

+template <typename SubParser, typename TransformFunc>
+constexpr TransformOrReject_<SubParser, TransformFunc> transformOrReject(
+    SubParser&& subParser, TransformFunc&& functor) {
+  // Like `transform()` except that `functor` returns a `Maybe`.  If it returns null, parsing fails,
+  // otherwise the parser's result is the content of the `Maybe`.
+  return TransformOrReject_<SubParser, TransformFunc>(
+      kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
+}
+
 template <typename SubParser, typename TransformFunc>
 constexpr TransformWithLocation_<SubParser, TransformFunc> transformWithLocation(
    SubParser&& subParser, TransformFunc&& functor) {
-  // Constructs a parser which executes some other parser and then transforms the result by invoking
-  // `functor` on it.  Typically `functor` is a lambda.  It is invoked using `kj::apply`,
-  // meaning tuples will be unpacked as arguments.
+  // Like `transform` except that `functor` also takes a `Span` as its first parameter specifying
+  // the location of the parsed content.  The span's position type is whatever the parser input's
+  // getPosition() returns.
  return TransformWithLocation_<SubParser, TransformFunc>(
      kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
 }
@@ -650,6 +679,8 @@ constexpr AcceptIf_<SubParser, Condition> acceptIf(SubParser&& subParser, Condit
  // `condition` on the result to check if it is valid.  Typically, `condition` is a lambda
  // returning true or false.  Like with `transform()`, `condition` is invoked using `kj::apply`
  // to unpack tuples.
+  //
+  // TODO(soon):  Remove in favor of transformOrReject()?
  return AcceptIf_<SubParser, Condition>(
      kj::fwd<SubParser>(subParser), kj::fwd<Condition>(condition));
 }