Implement adoptWithCaveats() for struct lists, and some parser tweaks.

0757e6ef · Kenton Varda · ce4162e8 · 0757e6ef · 0757e6ef · 0757e6ef
Commit 0757e6ef authored Jul 12, 2013 by Kenton Varda
8 changed files
--- a/c++/src/capnp/compiler/lexer-test.c++
+++ b/c++/src/capnp/compiler/lexer-test.c++
@@ -59,54 +59,54 @@ kj::String doLex(kj::StringPtr constText) {
 TEST(Lexer, Tokens) {
  EXPECT_STREQ(
      "(tokens = ["
-        "(token = (body = identifier('foo'), endByte = 3)), "
-        "(token = (body = identifier('bar'), startByte = 4, endByte = 7))"
+        "(body = identifier('foo'), endByte = 3), "
+        "(body = identifier('bar'), startByte = 4, endByte = 7)"
      "])",
      doLex<LexedTokens>("foo bar").cStr());

  EXPECT_STREQ(
      "(tokens = ["
-        "(token = (body = identifier('foo'), endByte = 3)), "
-        "(token = (body = identifier('bar'), startByte = 15, endByte = 18))"
+        "(body = identifier('foo'), endByte = 3), "
+        "(body = identifier('bar'), startByte = 15, endByte = 18)"
      "])",
      doLex<LexedTokens>("foo # comment\n bar").cStr());

  EXPECT_STREQ(
      "(tokens = ["
-        "(token = (body = stringLiteral('foo '), startByte = 2, endByte = 11)), "
-        "(token = (body = integerLiteral(123), startByte = 12, endByte = 15)), "
-        "(token = (body = floatLiteral(2.75), startByte = 16, endByte = 20)), "
-        "(token = (body = floatLiteral(60000), startByte = 21, endByte = 24)), "
-        "(token = (body = operator('+'), startByte = 25, endByte = 26)), "
-        "(token = (body = operator('-='), startByte = 27, endByte = 29))"
+        "(body = stringLiteral('foo '), startByte = 2, endByte = 11), "
+        "(body = integerLiteral(123), startByte = 12, endByte = 15), "
+        "(body = floatLiteral(2.75), startByte = 16, endByte = 20), "
+        "(body = floatLiteral(60000), startByte = 21, endByte = 24), "
+        "(body = operator('+'), startByte = 25, endByte = 26), "
+        "(body = operator('-='), startByte = 27, endByte = 29)"
      "])",
      doLex<LexedTokens>("  'foo\\x20' 123 2.75 6e4 + -=  ").cStr());

  EXPECT_STREQ(
      "(tokens = ["
-        "(token = (body = parenthesizedList(["
+        "(body = parenthesizedList(["
          "["
-            "(token = (body = identifier('foo'), startByte = 1, endByte = 4)), "
-            "(token = (body = identifier('bar'), startByte = 5, endByte = 8))"
+            "(body = identifier('foo'), startByte = 1, endByte = 4), "
+            "(body = identifier('bar'), startByte = 5, endByte = 8)"
          "], ["
-            "(token = (body = identifier('baz'), startByte = 10, endByte = 13)), "
-            "(token = (body = identifier('qux'), startByte = 14, endByte = 17))"
+            "(body = identifier('baz'), startByte = 10, endByte = 13), "
+            "(body = identifier('qux'), startByte = 14, endByte = 17)"
          "], ["
-            "(token = (body = identifier('corge'), startByte = 19, endByte = 24)), "
-            "(token = (body = identifier('grault'), startByte = 25, endByte = 31))"
+            "(body = identifier('corge'), startByte = 19, endByte = 24), "
+            "(body = identifier('grault'), startByte = 25, endByte = 31)"
          "]"
-        "]), endByte = 32))"
+        "]), endByte = 32)"
      "])",
      doLex<LexedTokens>("(foo bar, baz qux, corge grault)").cStr());

  EXPECT_STREQ(
      "(tokens = ["
-        "(token = (body = parenthesizedList(["
+        "(body = parenthesizedList(["
          "["
-            "(token = (body = identifier('foo'), startByte = 1, endByte = 4)), "
-            "(token = (body = identifier('bar'), startByte = 5, endByte = 8))"
+            "(body = identifier('foo'), startByte = 1, endByte = 4), "
+            "(body = identifier('bar'), startByte = 5, endByte = 8)"
          "]"
-        "]), endByte = 9))"
+        "]), endByte = 9)"
      "])",
      doLex<LexedTokens>("(foo bar)").cStr());

@@ -114,50 +114,50 @@ TEST(Lexer, Tokens) {
  // list.
  EXPECT_STREQ(
      "(tokens = ["
-        "(token = (body = parenthesizedList([]), endByte = 4))"
+        "(body = parenthesizedList([]), endByte = 4)"
      "])",
      doLex<LexedTokens>("(  )").cStr());

  EXPECT_STREQ(
      "(tokens = ["
-        "(token = (body = bracketedList(["
+        "(body = bracketedList(["
          "["
-            "(token = (body = identifier('foo'), startByte = 1, endByte = 4)), "
-            "(token = (body = identifier('bar'), startByte = 5, endByte = 8))"
+            "(body = identifier('foo'), startByte = 1, endByte = 4), "
+            "(body = identifier('bar'), startByte = 5, endByte = 8)"
          "], ["
-            "(token = (body = identifier('baz'), startByte = 10, endByte = 13)), "
-            "(token = (body = identifier('qux'), startByte = 14, endByte = 17))"
+            "(body = identifier('baz'), startByte = 10, endByte = 13), "
+            "(body = identifier('qux'), startByte = 14, endByte = 17)"
          "], ["
-            "(token = (body = identifier('corge'), startByte = 19, endByte = 24)), "
-            "(token = (body = identifier('grault'), startByte = 25, endByte = 31))"
+            "(body = identifier('corge'), startByte = 19, endByte = 24), "
+            "(body = identifier('grault'), startByte = 25, endByte = 31)"
          "]"
-        "]), endByte = 32))"
+        "]), endByte = 32)"
      "])",
      doLex<LexedTokens>("[foo bar, baz qux, corge grault]").cStr());

  EXPECT_STREQ(
      "(tokens = ["
-        "(token = (body = bracketedList(["
+        "(body = bracketedList(["
          "["
-            "(token = (body = identifier('foo'), startByte = 1, endByte = 4))"
+            "(body = identifier('foo'), startByte = 1, endByte = 4)"
          "], ["
-            "(token = (body = parenthesizedList(["
+            "(body = parenthesizedList(["
              "["
-                "(token = (body = identifier('bar'), startByte = 7, endByte = 10))"
+                "(body = identifier('bar'), startByte = 7, endByte = 10)"
              "], ["
-                "(token = (body = identifier('baz'), startByte = 12, endByte = 15))"
+                "(body = identifier('baz'), startByte = 12, endByte = 15)"
              "]"
-            "]), startByte = 6, endByte = 16))"
+            "]), startByte = 6, endByte = 16)"
          "]"
-        "]), endByte = 17)), "
-        "(token = (body = identifier('qux'), startByte = 18, endByte = 21))"
+        "]), endByte = 17), "
+        "(body = identifier('qux'), startByte = 18, endByte = 21)"
      "])",
      doLex<LexedTokens>("[foo, (bar, baz)] qux").cStr());

  EXPECT_STREQ(
      "(tokens = ["
-        "(token = (body = identifier('foo'), endByte = 3)), "
-        "(token = (body = identifier('bar'), startByte = 7, endByte = 10))"
+        "(body = identifier('foo'), endByte = 3), "
+        "(body = identifier('bar'), startByte = 7, endByte = 10)"
      "])",
      doLex<LexedTokens>("foo\n\r\t\vbar").cStr());
 }
@@ -165,45 +165,46 @@ TEST(Lexer, Tokens) {
 TEST(Lexer, Statements) {
  EXPECT_STREQ(
      "(statements = ["
-        "(statement = (tokens = ["
-          "(token = (body = identifier('foo'), endByte = 3)), "
-          "(token = (body = identifier('bar'), startByte = 4, endByte = 7))"
-        "]))"
+        "(tokens = ["
+          "(body = identifier('foo'), endByte = 3), "
+          "(body = identifier('bar'), startByte = 4, endByte = 7)"
+        "], endByte = 8)"
      "])",
      doLex<LexedStatements>("foo bar;").cStr());

  EXPECT_STREQ(
      "(statements = ["
-        "(statement = (tokens = ["
-          "(token = (body = identifier('foo'), endByte = 3))"
-        "])), "
-        "(statement = (tokens = ["
-          "(token = (body = identifier('bar'), startByte = 5, endByte = 8))"
-        "])), "
-        "(statement = (tokens = ["
-          "(token = (body = identifier('baz'), startByte = 10, endByte = 13))"
-        "]))"
+        "(tokens = ["
+          "(body = identifier('foo'), endByte = 3)"
+        "], endByte = 4), "
+        "(tokens = ["
+          "(body = identifier('bar'), startByte = 5, endByte = 8)"
+        "], startByte = 5, endByte = 9), "
+        "(tokens = ["
+          "(body = identifier('baz'), startByte = 10, endByte = 13)"
+        "], startByte = 10, endByte = 14)"
      "])",
      doLex<LexedStatements>("foo; bar; baz; ").cStr());

  EXPECT_STREQ(
      "(statements = ["
-        "(statement = ("
+        "("
          "tokens = ["
-            "(token = (body = identifier('foo'), endByte = 3))"
+            "(body = identifier('foo'), endByte = 3)"
          "], "
          "block = statements(["
-            "(statement = (tokens = ["
-              "(token = (body = identifier('bar'), startByte = 5, endByte = 8))"
-            "])), "
-            "(statement = (tokens = ["
-              "(token = (body = identifier('baz'), startByte = 10, endByte = 13))"
-            "]))"
-          "]))"
+            "(tokens = ["
+              "(body = identifier('bar'), startByte = 5, endByte = 8)"
+            "], startByte = 5, endByte = 9), "
+            "(tokens = ["
+              "(body = identifier('baz'), startByte = 10, endByte = 13)"
+            "], startByte = 10, endByte = 14)"
+          "]), "
+          "endByte = 15"
        "), "
-        "(statement = (tokens = ["
-          "(token = (body = identifier('qux'), startByte = 16, endByte = 19))"
-        "]))"
+        "(tokens = ["
+          "(body = identifier('qux'), startByte = 16, endByte = 19)"
+        "], startByte = 16, endByte = 20)"
      "])",
      doLex<LexedStatements>("foo {bar; baz;} qux;").cStr());
 }
@@ -211,90 +212,120 @@ TEST(Lexer, Statements) {
 TEST(Lexer, DocComments) {
  EXPECT_STREQ(
      "(statements = ["
-        "(statement = ("
+        "("
          "tokens = ["
-            "(token = (body = identifier('foo'), endByte = 3))"
+            "(body = identifier('foo'), endByte = 3)"
          "], "
-          "docComment = 'blah blah\\n'"
-        "))"
+          "docComment = 'blah blah\\n', "
+          "endByte = 16"
+        ")"
      "])",
      doLex<LexedStatements>("foo; # blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
-        "(statement = ("
+        "("
          "tokens = ["
-            "(token = (body = identifier('foo'), endByte = 3))"
+            "(body = identifier('foo'), endByte = 3)"
          "], "
-          "docComment = 'blah blah\\n'"
-        "))"
+          "docComment = 'blah blah\\n', "
+          "endByte = 15"
+        ")"
      "])",
      doLex<LexedStatements>("foo; #blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
-        "(statement = ("
+        "("
          "tokens = ["
-            "(token = (body = identifier('foo'), endByte = 3))"
+            "(body = identifier('foo'), endByte = 3)"
          "], "
-          "docComment = ' blah blah\\n'"
-        "))"
+          "docComment = ' blah blah\\n', "
+          "endByte = 17"
+        ")"
      "])",
      doLex<LexedStatements>("foo; #  blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
-        "(statement = ("
+        "("
          "tokens = ["
-            "(token = (body = identifier('foo'), endByte = 3))"
+            "(body = identifier('foo'), endByte = 3)"
          "], "
-          "docComment = 'blah blah\\n'"
-        "))"
+          "docComment = 'blah blah\\n', "
+          "endByte = 16"
+        ")"
      "])",
      doLex<LexedStatements>("foo;\n# blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
-        "(statement = ("
+        "("
          "tokens = ["
-            "(token = (body = identifier('foo'), endByte = 3))"
-          "]"
-        "))"
+            "(body = identifier('foo'), endByte = 3)"
+          "], "
+          "endByte = 4"
+        ")"
      "])",
      doLex<LexedStatements>("foo;\n\n# blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
-        "(statement = ("
+        "("
          "tokens = ["
-            "(token = (body = identifier('foo'), endByte = 3))"
+            "(body = identifier('foo'), endByte = 3)"
          "], "
-          "docComment = 'bar baz\\nqux corge\\n'"
-        "))"
+          "docComment = 'bar baz\\nqux corge\\n', "
+          "endByte = 30"
+        ")"
      "])",
      doLex<LexedStatements>("foo;\n # bar baz\n  # qux corge\n\n# grault\n# garply").cStr());

  EXPECT_STREQ(
      "(statements = ["
-        "(statement = ("
+        "("
          "tokens = ["
-            "(token = (body = identifier('foo'), endByte = 3))"
+            "(body = identifier('foo'), endByte = 3)"
          "], "
          "block = statements(["
-            "(statement = (tokens = ["
-              "(token = (body = identifier('bar'), startByte = 17, endByte = 20))"
-            "], docComment = 'hi\\n')), "
-            "(statement = (tokens = ["
-              "(token = (body = identifier('baz'), startByte = 28, endByte = 31))"
-            "]))"
+            "(tokens = ["
+              "(body = identifier('bar'), startByte = 17, endByte = 20)"
+            "], docComment = 'hi\\n', startByte = 17, endByte = 27), "
+            "(tokens = ["
+              "(body = identifier('baz'), startByte = 28, endByte = 31)"
+            "], startByte = 28, endByte = 32)"
          "]), "
-          "docComment = 'blah blah\\n'"
-        ")), "
-        "(statement = (tokens = ["
-          "(token = (body = identifier('qux'), startByte = 44, endByte = 47))"
-        "]))"
+          "docComment = 'blah blah\\n', "
+          "endByte = 44"
+        "), "
+        "(tokens = ["
+          "(body = identifier('qux'), startByte = 44, endByte = 47)"
+        "], startByte = 44, endByte = 48)"
      "])",
      doLex<LexedStatements>("foo {# blah blah\nbar; # hi\n baz;} # ignored\nqux;").cStr());
+
+  EXPECT_STREQ(
+      "(statements = ["
+        "("
+          "tokens = ["
+            "(body = identifier('foo'), endByte = 3)"
+          "], "
+          "block = statements(["
+            "(tokens = ["
+              "(body = identifier('bar'), startByte = 5, endByte = 8)"
+            "], startByte = 5, endByte = 9), "
+            "(tokens = ["
+              "(body = identifier('baz'), startByte = 10, endByte = 13)"
+            "], startByte = 10, endByte = 14)"
+          "]), "
+          "docComment = 'late comment\\n', "
+          "endByte = 31"
+        "), "
+        "(tokens = ["
+          "(body = identifier('qux'), startByte = 31, endByte = 34)"
+        "], startByte = 31, endByte = 35)"
+      "])",
+      doLex<LexedStatements>("foo {bar; baz;}\n# late comment\nqux;").cStr());
 }

 }  // namespace

--- a/c++/src/capnp/compiler/lexer.c++
+++ b/c++/src/capnp/compiler/lexer.c++
@@ -42,7 +42,7 @@ bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result) {
  KJ_IF_MAYBE(output, parseOutput) {
    auto l = result.initStatements(output->size());
    for (uint i = 0; i < output->size(); i++) {
-      l[i].adoptStatement(kj::mv((*output)[i]));
+      l.adoptWithCaveats(i, kj::mv((*output)[i]));
    }
    return true;
  } else {
@@ -64,7 +64,7 @@ bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result) {
  KJ_IF_MAYBE(output, parseOutput) {
    auto l = result.initTokens(output->size());
    for (uint i = 0; i < output->size(); i++) {
-      l[i].adoptToken(kj::mv((*output)[i]));
+      l.adoptWithCaveats(i, kj::mv((*output)[i]));
    }
    return true;
  } else {
@@ -85,13 +85,13 @@ Token::Body::Builder initTok(Orphan<Token>& t, const Location& loc) {
  return tb.getBody();
 }

-void buildTokenSequenceList(List<List<TokenPointer>>::Builder builder,
+void buildTokenSequenceList(List<List<Token>>::Builder builder,
                            kj::Array<kj::Array<Orphan<Token>>>&& items) {
  for (uint i = 0; i < items.size(); i++) {
    auto& item = items[i];
    auto itemBuilder = builder.init(i, item.size());
    for (uint j = 0; j < item.size(); j++) {
-      itemBuilder[j].adoptToken(kj::mv(item[j]));
+      itemBuilder.adoptWithCaveats(j, kj::mv(item[j]));
    }
  }
 }
@@ -101,7 +101,6 @@ void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comm
  for (auto& line: comment) {
    size += line.size() + 1;  // include newline
  }
-  if (size > 0) {
  Text::Builder builder = statement.initDocComment(size);
  char* pos = builder.begin();
  for (auto& line: comment) {
@@ -110,7 +109,6 @@ void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comm
    *pos++ = '\n';
  }
  KJ_ASSERT(pos == builder.end());
-  }
 }

 constexpr auto discardComment =
@@ -131,16 +129,16 @@ constexpr auto newline = p::oneOf(
    p::exactChar<'\n'>(),
    sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>()))));

-constexpr auto docComment = sequence(
+constexpr auto docComment = p::optional(p::sequence(
    discardLineWhitespace,
    p::discard(p::optional(newline)),
-    p::many(p::sequence(discardLineWhitespace, saveComment)));
+    p::oneOrMore(p::sequence(discardLineWhitespace, saveComment))));
 // Parses a set of comment lines preceded by at most one newline and with no intervening blank
 // lines.

 }  // namespace

-Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
+Lexer::Lexer(Orphanage orphanageParam): orphanage(orphanageParam) {

  // Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe
  // for us to use parsers.tokenSequence even though we haven't yet constructed it.
@@ -148,7 +146,7 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {

  auto& commaDelimitedList = arena.copy(p::transform(
      p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))),
-      [&](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
+      [this](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
          -> kj::Array<kj::Array<Orphan<Token>>> {
        if (first == nullptr && rest == nullptr) {
          // Completely empty list.
@@ -165,39 +163,39 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {

  auto& token = arena.copy(p::oneOf(
      p::transformWithLocation(p::identifier,
-          [&](Location loc, kj::String name) -> Orphan<Token> {
+          [this](Location loc, kj::String name) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setIdentifier(name);
            return t;
          }),
      p::transformWithLocation(p::doubleQuotedString,
-          [&](Location loc, kj::String text) -> Orphan<Token> {
+          [this](Location loc, kj::String text) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setStringLiteral(text);
            return t;
          }),
      p::transformWithLocation(p::integer,
-          [&](Location loc, uint64_t i) -> Orphan<Token> {
+          [this](Location loc, uint64_t i) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setIntegerLiteral(i);
            return t;
          }),
      p::transformWithLocation(p::number,
-          [&](Location loc, double x) -> Orphan<Token> {
+          [this](Location loc, double x) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setFloatLiteral(x);
            return t;
          }),
      p::transformWithLocation(
          p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))),
-          [&](Location loc, kj::String text) -> Orphan<Token> {
+          [this](Location loc, kj::String text) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setOperator(text);
            return t;
          }),
      p::transformWithLocation(
          sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()),
-          [&](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
+          [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            buildTokenSequenceList(
                initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items));
@@ -205,7 +203,7 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
          }),
      p::transformWithLocation(
          sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()),
-          [&](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
+          [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            buildTokenSequenceList(
                initTok(t, loc).initBracketedList(items.size()), kj::mv(items));
@@ -219,34 +217,46 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {

  auto& statementEnd = arena.copy(p::oneOf(
      transform(p::sequence(p::exactChar<';'>(), docComment),
-          [&](kj::Array<kj::String>&& comment) -> Orphan<Statement> {
+          [this](kj::Maybe<kj::Array<kj::String>>&& comment) -> Orphan<Statement> {
            auto result = orphanage.newOrphan<Statement>();
            auto builder = result.get();
-            attachDocComment(builder, kj::mv(comment));
+            KJ_IF_MAYBE(c, comment) {
+              attachDocComment(builder, kj::mv(*c));
+            }
            builder.getBlock().setNone();
            return result;
          }),
      transform(
-          p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>()),
-          [&](kj::Array<kj::String>&& comment, kj::Array<Orphan<Statement>>&& statements)
+          p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>(),
+                      docComment),
+          [this](kj::Maybe<kj::Array<kj::String>>&& comment,
+                 kj::Array<Orphan<Statement>>&& statements,
+                 kj::Maybe<kj::Array<kj::String>>&& lateComment)
              -> Orphan<Statement> {
            auto result = orphanage.newOrphan<Statement>();
            auto builder = result.get();
-            attachDocComment(builder, kj::mv(comment));
+            KJ_IF_MAYBE(c, comment) {
+              attachDocComment(builder, kj::mv(*c));
+            } else KJ_IF_MAYBE(c, lateComment) {
+              attachDocComment(builder, kj::mv(*c));
+            }
            auto list = builder.getBlock().initStatements(statements.size());
            for (uint i = 0; i < statements.size(); i++) {
-              list[i].adoptStatement(kj::mv(statements[i]));
+              list.adoptWithCaveats(i, kj::mv(statements[i]));
            }
            return result;
          })
      ));

-  auto& statement = arena.copy(p::transform(p::sequence(tokenSequence, statementEnd),
-      [&](kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
-        auto tokensBuilder = statement.get().initTokens(tokens.size());
+  auto& statement = arena.copy(p::transformWithLocation(p::sequence(tokenSequence, statementEnd),
+      [this](Location loc, kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
+        auto builder = statement.get();
+        auto tokensBuilder = builder.initTokens(tokens.size());
        for (uint i = 0; i < tokens.size(); i++) {
-          tokensBuilder[i].adoptToken(kj::mv(tokens[i]));
+          tokensBuilder.adoptWithCaveats(i, kj::mv(tokens[i]));
        }
+        builder.setStartByte(loc.begin());
+        builder.setEndByte(loc.end());
        return kj::mv(statement);
      }));


--- a/c++/src/capnp/compiler/lexer.capnp
+++ b/c++/src/capnp/compiler/lexer.capnp
+# Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 @0xa73956d2621fc3ee;

 using Cxx = import "/capnp/c++.capnp";
@@ -11,48 +34,35 @@ struct Token {
    integerLiteral @3 :UInt64;
    floatLiteral @4 :Float64;
    operator @5 :Text;
-    parenthesizedList @6 :List(List(TokenPointer));
-    bracketedList @7 :List(List(TokenPointer));
+    parenthesizedList @6 :List(List(Token));
+    bracketedList @7 :List(List(Token));
  }

  startByte @8 :UInt32;
  endByte @9 :UInt32;
 }

-struct TokenPointer {
-  # Hack to deal with the fact that struct lists cannot adopt elements.
-  #
-  # TODO(cleanup):  Find a better approach.
-
-  token @0 :Token;
-}
-
 struct Statement {
-  tokens @0 :List(TokenPointer);
+  tokens @0 :List(Token);
  block @1 union {
    none @2 :Void;
-    statements @3 :List(StatementPointer);
+    statements @3 :List(Statement);
  }

  docComment @4 :Text;
-}
-
-struct StatementPointer {
-  # Hack to deal with the fact that struct lists cannot adopt elements.
-  #
-  # TODO(cleanup):  Find a better approach.

-  statement @0 :Statement;
+  startByte @5 :UInt32;
+  endByte @6 :UInt32;
 }

 struct LexedTokens {
  # Lexer output when asked to parse tokens that don't form statements.

-  tokens @0 :List(TokenPointer);
+  tokens @0 :List(Token);
 }

 struct LexedStatements {
  # Lexer output when asked to parse statements.

-  statements @0 :List(StatementPointer);
+  statements @0 :List(Statement);
 }
--- a/c++/src/capnp/layout.c++
+++ b/c++/src/capnp/layout.c++
@@ -698,6 +698,10 @@ struct WireHelpers {
    // mistakenly thinks the source location still owns the object.  transferPointer() doesn't do
    // this zeroing itself because many callers transfer several pointers in a loop then zero out
    // the whole section.
+
+    KJ_DASSERT(dst->isNull());
+    // We expect the caller to ensure the target is already null so won't leak.
+
    if (src->isNull()) {
      memset(dst, 0, sizeof(WirePointer));
    } else if (src->kind() == WirePointer::FAR) {
@@ -2034,6 +2038,45 @@ OrphanBuilder StructBuilder::disown(WirePointerCount ptrIndex) {
  return WireHelpers::disown(segment, pointers + ptrIndex);
 }

+void StructBuilder::transferContentFrom(StructBuilder other) {
+  // Determine the amount of data the builders have in common.
+  BitCount sharedDataSize = kj::min(dataSize, other.dataSize);
+
+  if (dataSize > sharedDataSize) {
+    // Since the target is larger than the source, make sure to zero out the extra bits that the
+    // source doesn't have.
+    if (dataSize == 1 * BITS) {
+      setDataField<bool>(0 * ELEMENTS, false);
+    } else {
+      byte* unshared = reinterpret_cast<byte*>(data) + sharedDataSize / BITS_PER_BYTE / BYTES;
+      memset(unshared, 0, (dataSize - sharedDataSize) / BITS_PER_BYTE / BYTES);
+    }
+  }
+
+  // Copy over the shared part.
+  if (sharedDataSize == 1 * BITS) {
+    setDataField<bool>(0 * ELEMENTS, other.getDataField<bool>(0 * ELEMENTS));
+  } else {
+    memcpy(data, other.data, sharedDataSize / BITS_PER_BYTE / BYTES);
+  }
+
+  // Zero out all pointers in the target.
+  for (uint i = 0; i < pointerCount / POINTERS; i++) {
+    WireHelpers::zeroObject(segment, pointers + i);
+  }
+
+  // Transfer the pointers.
+  WirePointerCount sharedPointerCount = kj::min(pointerCount, other.pointerCount);
+  for (uint i = 0; i < sharedPointerCount / POINTERS; i++) {
+    WireHelpers::transferPointer(segment, pointers + i, other.segment, other.pointers + i);
+  }
+
+  // Zero out the pointers that were transferred in the source because it no longer has ownership.
+  // If the source had any extra pointers that the destination didn't have space for, we
+  // intentionally leave them be, so that they'll be cleaned up later.
+  memset(other.pointers, 0, sharedPointerCount * BYTES_PER_POINTER / BYTES);
+}
+
 bool StructBuilder::isPointerFieldNull(WirePointerCount ptrIndex) {
  return (pointers + ptrIndex)->isNull();
 }

--- a/c++/src/capnp/layout.h
+++ b/c++/src/capnp/layout.h
@@ -364,6 +364,11 @@ public:
  // Detach the given pointer field from this object.  The pointer becomes null, and the child
  // object is returned as an orphan.

+  void transferContentFrom(StructBuilder other);
+  // Adopt all pointers from `other`, and also copy all data.  If `other`'s sections are larger
+  // than this, the extra data is not transferred, meaning there is a risk of data loss when
+  // transferring from messages built with future versions of the protocol.
+
  bool isPointerFieldNull(WirePointerCount ptrIndex);

  StructReader asReader() const;

--- a/c++/src/capnp/list.h
+++ b/c++/src/capnp/list.h
@@ -249,6 +249,24 @@ struct List<T, Kind::STRUCT> {
      return typename T::Builder(builder.getStructElement(index * ELEMENTS));
    }

+    inline void adoptWithCaveats(uint index, Orphan<T>&& orphan) {
+      // Mostly behaves like you'd expect `adopt` to behave, but with two caveats originating from
+      // the fact that structs in a struct list are allocated inline rather than by pointer:
+      // * This actually performs a shallow copy, effectively adopting each of the orphan's
+      //   children rather than adopting the orphan itself.  The orphan ends up being discarded,
+      //   possibly wasting space in the message object.
+      // * If the orphan is larger than the target struct -- say, because the orphan was built
+      //   using a newer version of the schema that has additional fields -- it will be truncated,
+      //   losing data.
+
+      // We pass a zero-valued StructSize to asStruct() because we do not want the struct to be
+      // expanded under any circumstances.  We're just going to throw it away anyway, and
+      // transferContentFrom() already carefully compares the struct sizes before transferring.
+      builder.getStructElement(index * ELEMENTS).transferContentFrom(
+          orphan.builder.asStruct(_::StructSize(
+              0 * WORDS, 0 * POINTERS, _::FieldSize::VOID)));
+    }
+
    // There are no init(), set(), adopt(), or disown() methods for lists of structs because the
    // elements of the list are inlined and are initialized when the list is initialized.  This
    // means that init() would be redundant, and set() would risk data loss if the input struct

--- a/c++/src/kj/parse/common-test.c++
+++ b/c++/src/kj/parse/common-test.c++
@@ -344,6 +344,37 @@ TEST(CommonParsers, TransformParser) {
  }
 }

+TEST(CommonParsers, TransformOrRejectParser) {
+  auto parser = transformOrReject(many(any),
+      [](Array<char> chars) -> Maybe<int> {
+        if (heapString(chars) == "foo") {
+          return 123;
+        } else {
+          return nullptr;
+        }
+      });
+
+  {
+    StringPtr text = "foo";
+    Input input(text.begin(), text.end());
+    Maybe<int> result = parser(input);
+    KJ_IF_MAYBE(i, result) {
+      EXPECT_EQ(123, *i);
+    } else {
+      ADD_FAILURE() << "Expected 123, got null.";
+    }
+    EXPECT_TRUE(input.atEnd());
+  }
+
+  {
+    StringPtr text = "bar";
+    Input input(text.begin(), text.end());
+    Maybe<int> result = parser(input);
+    EXPECT_TRUE(result == nullptr);
+    EXPECT_TRUE(input.atEnd());
+  }
+}
+
 TEST(CommonParsers, References) {
  struct TransformFunc {
    int value;

--- a/c++/src/kj/parse/common.h
+++ b/c++/src/kj/parse/common.h
@@ -71,11 +71,11 @@ public:
  }

  bool atEnd() { return pos == end; }
-  const Element& current() {
+  auto current() -> decltype(*instance<Iterator>()) {
    KJ_IREQUIRE(!atEnd());
    return *pos;
  }
-  const Element& consume() {
+  auto consume() -> decltype(*instance<Iterator>()) {
    KJ_IREQUIRE(!atEnd());
    return *pos++;
  }
@@ -274,7 +274,6 @@ private:
 template <typename SubParser, typename Result>
 constexpr ConstResult_<SubParser, Result> constResult(SubParser&& subParser, Result&& result) {
  // Constructs a parser which returns exactly `result` if `subParser` is successful.
-
  return ConstResult_<SubParser, Result>(kj::fwd<SubParser>(subParser), kj::fwd<Result>(result));
 }

@@ -571,6 +570,27 @@ private:
  TransformFunc transform;
 };

+template <typename SubParser, typename TransformFunc>
+class TransformOrReject_ {
+public:
+  explicit constexpr TransformOrReject_(SubParser&& subParser, TransformFunc&& transform)
+      : subParser(kj::fwd<SubParser>(subParser)), transform(kj::fwd<TransformFunc>(transform)) {}
+
+  template <typename Input>
+  decltype(kj::apply(instance<TransformFunc&>(), instance<OutputType<SubParser, Input>&&>()))
+      operator()(Input& input) const {
+    KJ_IF_MAYBE(subResult, subParser(input)) {
+      return kj::apply(transform, kj::mv(*subResult));
+    } else {
+      return nullptr;
+    }
+  }
+
+private:
+  SubParser subParser;
+  TransformFunc transform;
+};
+
 template <typename SubParser, typename TransformFunc>
 class TransformWithLocation_ {
 public:
@@ -606,12 +626,21 @@ constexpr Transform_<SubParser, TransformFunc> transform(
      kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
 }

+template <typename SubParser, typename TransformFunc>
+constexpr TransformOrReject_<SubParser, TransformFunc> transformOrReject(
+    SubParser&& subParser, TransformFunc&& functor) {
+  // Like `transform()` except that `functor` returns a `Maybe`.  If it returns null, parsing fails,
+  // otherwise the parser's result is the content of the `Maybe`.
+  return TransformOrReject_<SubParser, TransformFunc>(
+      kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
+}
+
 template <typename SubParser, typename TransformFunc>
 constexpr TransformWithLocation_<SubParser, TransformFunc> transformWithLocation(
    SubParser&& subParser, TransformFunc&& functor) {
-  // Constructs a parser which executes some other parser and then transforms the result by invoking
-  // `functor` on it.  Typically `functor` is a lambda.  It is invoked using `kj::apply`,
-  // meaning tuples will be unpacked as arguments.
+  // Like `transform` except that `functor` also takes a `Span` as its first parameter specifying
+  // the location of the parsed content.  The span's position type is whatever the parser input's
+  // getPosition() returns.
  return TransformWithLocation_<SubParser, TransformFunc>(
      kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
 }
@@ -650,6 +679,8 @@ constexpr AcceptIf_<SubParser, Condition> acceptIf(SubParser&& subParser, Condit
  // `condition` on the result to check if it is valid.  Typically, `condition` is a lambda
  // returning true or false.  Like with `transform()`, `condition` is invoked using `kj::apply`
  // to unpack tuples.
+  //
+  // TODO(soon):  Remove in favor of transformOrReject()?
  return AcceptIf_<SubParser, Condition>(
      kj::fwd<SubParser>(subParser), kj::fwd<Condition>(condition));
 }