Commit 0757e6ef authored by Kenton Varda's avatar Kenton Varda

Implement adoptWithCaveats() for struct lists, and some parser tweaks.

parent ce4162e8
...@@ -59,54 +59,54 @@ kj::String doLex(kj::StringPtr constText) { ...@@ -59,54 +59,54 @@ kj::String doLex(kj::StringPtr constText) {
TEST(Lexer, Tokens) { TEST(Lexer, Tokens) {
EXPECT_STREQ( EXPECT_STREQ(
"(tokens = [" "(tokens = ["
"(token = (body = identifier('foo'), endByte = 3)), " "(body = identifier('foo'), endByte = 3), "
"(token = (body = identifier('bar'), startByte = 4, endByte = 7))" "(body = identifier('bar'), startByte = 4, endByte = 7)"
"])", "])",
doLex<LexedTokens>("foo bar").cStr()); doLex<LexedTokens>("foo bar").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(tokens = [" "(tokens = ["
"(token = (body = identifier('foo'), endByte = 3)), " "(body = identifier('foo'), endByte = 3), "
"(token = (body = identifier('bar'), startByte = 15, endByte = 18))" "(body = identifier('bar'), startByte = 15, endByte = 18)"
"])", "])",
doLex<LexedTokens>("foo # comment\n bar").cStr()); doLex<LexedTokens>("foo # comment\n bar").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(tokens = [" "(tokens = ["
"(token = (body = stringLiteral('foo '), startByte = 2, endByte = 11)), " "(body = stringLiteral('foo '), startByte = 2, endByte = 11), "
"(token = (body = integerLiteral(123), startByte = 12, endByte = 15)), " "(body = integerLiteral(123), startByte = 12, endByte = 15), "
"(token = (body = floatLiteral(2.75), startByte = 16, endByte = 20)), " "(body = floatLiteral(2.75), startByte = 16, endByte = 20), "
"(token = (body = floatLiteral(60000), startByte = 21, endByte = 24)), " "(body = floatLiteral(60000), startByte = 21, endByte = 24), "
"(token = (body = operator('+'), startByte = 25, endByte = 26)), " "(body = operator('+'), startByte = 25, endByte = 26), "
"(token = (body = operator('-='), startByte = 27, endByte = 29))" "(body = operator('-='), startByte = 27, endByte = 29)"
"])", "])",
doLex<LexedTokens>(" 'foo\\x20' 123 2.75 6e4 + -= ").cStr()); doLex<LexedTokens>(" 'foo\\x20' 123 2.75 6e4 + -= ").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(tokens = [" "(tokens = ["
"(token = (body = parenthesizedList([" "(body = parenthesizedList(["
"[" "["
"(token = (body = identifier('foo'), startByte = 1, endByte = 4)), " "(body = identifier('foo'), startByte = 1, endByte = 4), "
"(token = (body = identifier('bar'), startByte = 5, endByte = 8))" "(body = identifier('bar'), startByte = 5, endByte = 8)"
"], [" "], ["
"(token = (body = identifier('baz'), startByte = 10, endByte = 13)), " "(body = identifier('baz'), startByte = 10, endByte = 13), "
"(token = (body = identifier('qux'), startByte = 14, endByte = 17))" "(body = identifier('qux'), startByte = 14, endByte = 17)"
"], [" "], ["
"(token = (body = identifier('corge'), startByte = 19, endByte = 24)), " "(body = identifier('corge'), startByte = 19, endByte = 24), "
"(token = (body = identifier('grault'), startByte = 25, endByte = 31))" "(body = identifier('grault'), startByte = 25, endByte = 31)"
"]" "]"
"]), endByte = 32))" "]), endByte = 32)"
"])", "])",
doLex<LexedTokens>("(foo bar, baz qux, corge grault)").cStr()); doLex<LexedTokens>("(foo bar, baz qux, corge grault)").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(tokens = [" "(tokens = ["
"(token = (body = parenthesizedList([" "(body = parenthesizedList(["
"[" "["
"(token = (body = identifier('foo'), startByte = 1, endByte = 4)), " "(body = identifier('foo'), startByte = 1, endByte = 4), "
"(token = (body = identifier('bar'), startByte = 5, endByte = 8))" "(body = identifier('bar'), startByte = 5, endByte = 8)"
"]" "]"
"]), endByte = 9))" "]), endByte = 9)"
"])", "])",
doLex<LexedTokens>("(foo bar)").cStr()); doLex<LexedTokens>("(foo bar)").cStr());
...@@ -114,50 +114,50 @@ TEST(Lexer, Tokens) { ...@@ -114,50 +114,50 @@ TEST(Lexer, Tokens) {
// list. // list.
EXPECT_STREQ( EXPECT_STREQ(
"(tokens = [" "(tokens = ["
"(token = (body = parenthesizedList([]), endByte = 4))" "(body = parenthesizedList([]), endByte = 4)"
"])", "])",
doLex<LexedTokens>("( )").cStr()); doLex<LexedTokens>("( )").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(tokens = [" "(tokens = ["
"(token = (body = bracketedList([" "(body = bracketedList(["
"[" "["
"(token = (body = identifier('foo'), startByte = 1, endByte = 4)), " "(body = identifier('foo'), startByte = 1, endByte = 4), "
"(token = (body = identifier('bar'), startByte = 5, endByte = 8))" "(body = identifier('bar'), startByte = 5, endByte = 8)"
"], [" "], ["
"(token = (body = identifier('baz'), startByte = 10, endByte = 13)), " "(body = identifier('baz'), startByte = 10, endByte = 13), "
"(token = (body = identifier('qux'), startByte = 14, endByte = 17))" "(body = identifier('qux'), startByte = 14, endByte = 17)"
"], [" "], ["
"(token = (body = identifier('corge'), startByte = 19, endByte = 24)), " "(body = identifier('corge'), startByte = 19, endByte = 24), "
"(token = (body = identifier('grault'), startByte = 25, endByte = 31))" "(body = identifier('grault'), startByte = 25, endByte = 31)"
"]" "]"
"]), endByte = 32))" "]), endByte = 32)"
"])", "])",
doLex<LexedTokens>("[foo bar, baz qux, corge grault]").cStr()); doLex<LexedTokens>("[foo bar, baz qux, corge grault]").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(tokens = [" "(tokens = ["
"(token = (body = bracketedList([" "(body = bracketedList(["
"[" "["
"(token = (body = identifier('foo'), startByte = 1, endByte = 4))" "(body = identifier('foo'), startByte = 1, endByte = 4)"
"], [" "], ["
"(token = (body = parenthesizedList([" "(body = parenthesizedList(["
"[" "["
"(token = (body = identifier('bar'), startByte = 7, endByte = 10))" "(body = identifier('bar'), startByte = 7, endByte = 10)"
"], [" "], ["
"(token = (body = identifier('baz'), startByte = 12, endByte = 15))" "(body = identifier('baz'), startByte = 12, endByte = 15)"
"]" "]"
"]), startByte = 6, endByte = 16))" "]), startByte = 6, endByte = 16)"
"]" "]"
"]), endByte = 17)), " "]), endByte = 17), "
"(token = (body = identifier('qux'), startByte = 18, endByte = 21))" "(body = identifier('qux'), startByte = 18, endByte = 21)"
"])", "])",
doLex<LexedTokens>("[foo, (bar, baz)] qux").cStr()); doLex<LexedTokens>("[foo, (bar, baz)] qux").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(tokens = [" "(tokens = ["
"(token = (body = identifier('foo'), endByte = 3)), " "(body = identifier('foo'), endByte = 3), "
"(token = (body = identifier('bar'), startByte = 7, endByte = 10))" "(body = identifier('bar'), startByte = 7, endByte = 10)"
"])", "])",
doLex<LexedTokens>("foo\n\r\t\vbar").cStr()); doLex<LexedTokens>("foo\n\r\t\vbar").cStr());
} }
...@@ -165,45 +165,46 @@ TEST(Lexer, Tokens) { ...@@ -165,45 +165,46 @@ TEST(Lexer, Tokens) {
TEST(Lexer, Statements) { TEST(Lexer, Statements) {
EXPECT_STREQ( EXPECT_STREQ(
"(statements = [" "(statements = ["
"(statement = (tokens = [" "(tokens = ["
"(token = (body = identifier('foo'), endByte = 3)), " "(body = identifier('foo'), endByte = 3), "
"(token = (body = identifier('bar'), startByte = 4, endByte = 7))" "(body = identifier('bar'), startByte = 4, endByte = 7)"
"]))" "], endByte = 8)"
"])", "])",
doLex<LexedStatements>("foo bar;").cStr()); doLex<LexedStatements>("foo bar;").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(statements = [" "(statements = ["
"(statement = (tokens = [" "(tokens = ["
"(token = (body = identifier('foo'), endByte = 3))" "(body = identifier('foo'), endByte = 3)"
"])), " "], endByte = 4), "
"(statement = (tokens = [" "(tokens = ["
"(token = (body = identifier('bar'), startByte = 5, endByte = 8))" "(body = identifier('bar'), startByte = 5, endByte = 8)"
"])), " "], startByte = 5, endByte = 9), "
"(statement = (tokens = [" "(tokens = ["
"(token = (body = identifier('baz'), startByte = 10, endByte = 13))" "(body = identifier('baz'), startByte = 10, endByte = 13)"
"]))" "], startByte = 10, endByte = 14)"
"])", "])",
doLex<LexedStatements>("foo; bar; baz; ").cStr()); doLex<LexedStatements>("foo; bar; baz; ").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(statements = [" "(statements = ["
"(statement = (" "("
"tokens = [" "tokens = ["
"(token = (body = identifier('foo'), endByte = 3))" "(body = identifier('foo'), endByte = 3)"
"], " "], "
"block = statements([" "block = statements(["
"(statement = (tokens = [" "(tokens = ["
"(token = (body = identifier('bar'), startByte = 5, endByte = 8))" "(body = identifier('bar'), startByte = 5, endByte = 8)"
"])), " "], startByte = 5, endByte = 9), "
"(statement = (tokens = [" "(tokens = ["
"(token = (body = identifier('baz'), startByte = 10, endByte = 13))" "(body = identifier('baz'), startByte = 10, endByte = 13)"
"]))" "], startByte = 10, endByte = 14)"
"]))" "]), "
"endByte = 15"
"), " "), "
"(statement = (tokens = [" "(tokens = ["
"(token = (body = identifier('qux'), startByte = 16, endByte = 19))" "(body = identifier('qux'), startByte = 16, endByte = 19)"
"]))" "], startByte = 16, endByte = 20)"
"])", "])",
doLex<LexedStatements>("foo {bar; baz;} qux;").cStr()); doLex<LexedStatements>("foo {bar; baz;} qux;").cStr());
} }
...@@ -211,90 +212,120 @@ TEST(Lexer, Statements) { ...@@ -211,90 +212,120 @@ TEST(Lexer, Statements) {
TEST(Lexer, DocComments) { TEST(Lexer, DocComments) {
EXPECT_STREQ( EXPECT_STREQ(
"(statements = [" "(statements = ["
"(statement = (" "("
"tokens = [" "tokens = ["
"(token = (body = identifier('foo'), endByte = 3))" "(body = identifier('foo'), endByte = 3)"
"], " "], "
"docComment = 'blah blah\\n'" "docComment = 'blah blah\\n', "
"))" "endByte = 16"
")"
"])", "])",
doLex<LexedStatements>("foo; # blah blah").cStr()); doLex<LexedStatements>("foo; # blah blah").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(statements = [" "(statements = ["
"(statement = (" "("
"tokens = [" "tokens = ["
"(token = (body = identifier('foo'), endByte = 3))" "(body = identifier('foo'), endByte = 3)"
"], " "], "
"docComment = 'blah blah\\n'" "docComment = 'blah blah\\n', "
"))" "endByte = 15"
")"
"])", "])",
doLex<LexedStatements>("foo; #blah blah").cStr()); doLex<LexedStatements>("foo; #blah blah").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(statements = [" "(statements = ["
"(statement = (" "("
"tokens = [" "tokens = ["
"(token = (body = identifier('foo'), endByte = 3))" "(body = identifier('foo'), endByte = 3)"
"], " "], "
"docComment = ' blah blah\\n'" "docComment = ' blah blah\\n', "
"))" "endByte = 17"
")"
"])", "])",
doLex<LexedStatements>("foo; # blah blah").cStr()); doLex<LexedStatements>("foo; # blah blah").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(statements = [" "(statements = ["
"(statement = (" "("
"tokens = [" "tokens = ["
"(token = (body = identifier('foo'), endByte = 3))" "(body = identifier('foo'), endByte = 3)"
"], " "], "
"docComment = 'blah blah\\n'" "docComment = 'blah blah\\n', "
"))" "endByte = 16"
")"
"])", "])",
doLex<LexedStatements>("foo;\n# blah blah").cStr()); doLex<LexedStatements>("foo;\n# blah blah").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(statements = [" "(statements = ["
"(statement = (" "("
"tokens = [" "tokens = ["
"(token = (body = identifier('foo'), endByte = 3))" "(body = identifier('foo'), endByte = 3)"
"]" "], "
"))" "endByte = 4"
")"
"])", "])",
doLex<LexedStatements>("foo;\n\n# blah blah").cStr()); doLex<LexedStatements>("foo;\n\n# blah blah").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(statements = [" "(statements = ["
"(statement = (" "("
"tokens = [" "tokens = ["
"(token = (body = identifier('foo'), endByte = 3))" "(body = identifier('foo'), endByte = 3)"
"], " "], "
"docComment = 'bar baz\\nqux corge\\n'" "docComment = 'bar baz\\nqux corge\\n', "
"))" "endByte = 30"
")"
"])", "])",
doLex<LexedStatements>("foo;\n # bar baz\n # qux corge\n\n# grault\n# garply").cStr()); doLex<LexedStatements>("foo;\n # bar baz\n # qux corge\n\n# grault\n# garply").cStr());
EXPECT_STREQ( EXPECT_STREQ(
"(statements = [" "(statements = ["
"(statement = (" "("
"tokens = [" "tokens = ["
"(token = (body = identifier('foo'), endByte = 3))" "(body = identifier('foo'), endByte = 3)"
"], " "], "
"block = statements([" "block = statements(["
"(statement = (tokens = [" "(tokens = ["
"(token = (body = identifier('bar'), startByte = 17, endByte = 20))" "(body = identifier('bar'), startByte = 17, endByte = 20)"
"], docComment = 'hi\\n')), " "], docComment = 'hi\\n', startByte = 17, endByte = 27), "
"(statement = (tokens = [" "(tokens = ["
"(token = (body = identifier('baz'), startByte = 28, endByte = 31))" "(body = identifier('baz'), startByte = 28, endByte = 31)"
"]))" "], startByte = 28, endByte = 32)"
"]), " "]), "
"docComment = 'blah blah\\n'" "docComment = 'blah blah\\n', "
")), " "endByte = 44"
"(statement = (tokens = [" "), "
"(token = (body = identifier('qux'), startByte = 44, endByte = 47))" "(tokens = ["
"]))" "(body = identifier('qux'), startByte = 44, endByte = 47)"
"], startByte = 44, endByte = 48)"
"])", "])",
doLex<LexedStatements>("foo {# blah blah\nbar; # hi\n baz;} # ignored\nqux;").cStr()); doLex<LexedStatements>("foo {# blah blah\nbar; # hi\n baz;} # ignored\nqux;").cStr());
EXPECT_STREQ(
"(statements = ["
"("
"tokens = ["
"(body = identifier('foo'), endByte = 3)"
"], "
"block = statements(["
"(tokens = ["
"(body = identifier('bar'), startByte = 5, endByte = 8)"
"], startByte = 5, endByte = 9), "
"(tokens = ["
"(body = identifier('baz'), startByte = 10, endByte = 13)"
"], startByte = 10, endByte = 14)"
"]), "
"docComment = 'late comment\\n', "
"endByte = 31"
"), "
"(tokens = ["
"(body = identifier('qux'), startByte = 31, endByte = 34)"
"], startByte = 31, endByte = 35)"
"])",
doLex<LexedStatements>("foo {bar; baz;}\n# late comment\nqux;").cStr());
} }
} // namespace } // namespace
......
...@@ -42,7 +42,7 @@ bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result) { ...@@ -42,7 +42,7 @@ bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result) {
KJ_IF_MAYBE(output, parseOutput) { KJ_IF_MAYBE(output, parseOutput) {
auto l = result.initStatements(output->size()); auto l = result.initStatements(output->size());
for (uint i = 0; i < output->size(); i++) { for (uint i = 0; i < output->size(); i++) {
l[i].adoptStatement(kj::mv((*output)[i])); l.adoptWithCaveats(i, kj::mv((*output)[i]));
} }
return true; return true;
} else { } else {
...@@ -64,7 +64,7 @@ bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result) { ...@@ -64,7 +64,7 @@ bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result) {
KJ_IF_MAYBE(output, parseOutput) { KJ_IF_MAYBE(output, parseOutput) {
auto l = result.initTokens(output->size()); auto l = result.initTokens(output->size());
for (uint i = 0; i < output->size(); i++) { for (uint i = 0; i < output->size(); i++) {
l[i].adoptToken(kj::mv((*output)[i])); l.adoptWithCaveats(i, kj::mv((*output)[i]));
} }
return true; return true;
} else { } else {
...@@ -85,13 +85,13 @@ Token::Body::Builder initTok(Orphan<Token>& t, const Location& loc) { ...@@ -85,13 +85,13 @@ Token::Body::Builder initTok(Orphan<Token>& t, const Location& loc) {
return tb.getBody(); return tb.getBody();
} }
void buildTokenSequenceList(List<List<TokenPointer>>::Builder builder, void buildTokenSequenceList(List<List<Token>>::Builder builder,
kj::Array<kj::Array<Orphan<Token>>>&& items) { kj::Array<kj::Array<Orphan<Token>>>&& items) {
for (uint i = 0; i < items.size(); i++) { for (uint i = 0; i < items.size(); i++) {
auto& item = items[i]; auto& item = items[i];
auto itemBuilder = builder.init(i, item.size()); auto itemBuilder = builder.init(i, item.size());
for (uint j = 0; j < item.size(); j++) { for (uint j = 0; j < item.size(); j++) {
itemBuilder[j].adoptToken(kj::mv(item[j])); itemBuilder.adoptWithCaveats(j, kj::mv(item[j]));
} }
} }
} }
...@@ -101,7 +101,6 @@ void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comm ...@@ -101,7 +101,6 @@ void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comm
for (auto& line: comment) { for (auto& line: comment) {
size += line.size() + 1; // include newline size += line.size() + 1; // include newline
} }
if (size > 0) {
Text::Builder builder = statement.initDocComment(size); Text::Builder builder = statement.initDocComment(size);
char* pos = builder.begin(); char* pos = builder.begin();
for (auto& line: comment) { for (auto& line: comment) {
...@@ -110,7 +109,6 @@ void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comm ...@@ -110,7 +109,6 @@ void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comm
*pos++ = '\n'; *pos++ = '\n';
} }
KJ_ASSERT(pos == builder.end()); KJ_ASSERT(pos == builder.end());
}
} }
constexpr auto discardComment = constexpr auto discardComment =
...@@ -131,16 +129,16 @@ constexpr auto newline = p::oneOf( ...@@ -131,16 +129,16 @@ constexpr auto newline = p::oneOf(
p::exactChar<'\n'>(), p::exactChar<'\n'>(),
sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>())))); sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>()))));
constexpr auto docComment = sequence( constexpr auto docComment = p::optional(p::sequence(
discardLineWhitespace, discardLineWhitespace,
p::discard(p::optional(newline)), p::discard(p::optional(newline)),
p::many(p::sequence(discardLineWhitespace, saveComment))); p::oneOrMore(p::sequence(discardLineWhitespace, saveComment))));
// Parses a set of comment lines preceded by at most one newline and with no intervening blank // Parses a set of comment lines preceded by at most one newline and with no intervening blank
// lines. // lines.
} // namespace } // namespace
Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) { Lexer::Lexer(Orphanage orphanageParam): orphanage(orphanageParam) {
// Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe // Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe
// for us to use parsers.tokenSequence even though we haven't yet constructed it. // for us to use parsers.tokenSequence even though we haven't yet constructed it.
...@@ -148,7 +146,7 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) { ...@@ -148,7 +146,7 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
auto& commaDelimitedList = arena.copy(p::transform( auto& commaDelimitedList = arena.copy(p::transform(
p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))), p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))),
[&](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest) [this](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
-> kj::Array<kj::Array<Orphan<Token>>> { -> kj::Array<kj::Array<Orphan<Token>>> {
if (first == nullptr && rest == nullptr) { if (first == nullptr && rest == nullptr) {
// Completely empty list. // Completely empty list.
...@@ -165,39 +163,39 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) { ...@@ -165,39 +163,39 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
auto& token = arena.copy(p::oneOf( auto& token = arena.copy(p::oneOf(
p::transformWithLocation(p::identifier, p::transformWithLocation(p::identifier,
[&](Location loc, kj::String name) -> Orphan<Token> { [this](Location loc, kj::String name) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>(); auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setIdentifier(name); initTok(t, loc).setIdentifier(name);
return t; return t;
}), }),
p::transformWithLocation(p::doubleQuotedString, p::transformWithLocation(p::doubleQuotedString,
[&](Location loc, kj::String text) -> Orphan<Token> { [this](Location loc, kj::String text) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>(); auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setStringLiteral(text); initTok(t, loc).setStringLiteral(text);
return t; return t;
}), }),
p::transformWithLocation(p::integer, p::transformWithLocation(p::integer,
[&](Location loc, uint64_t i) -> Orphan<Token> { [this](Location loc, uint64_t i) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>(); auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setIntegerLiteral(i); initTok(t, loc).setIntegerLiteral(i);
return t; return t;
}), }),
p::transformWithLocation(p::number, p::transformWithLocation(p::number,
[&](Location loc, double x) -> Orphan<Token> { [this](Location loc, double x) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>(); auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setFloatLiteral(x); initTok(t, loc).setFloatLiteral(x);
return t; return t;
}), }),
p::transformWithLocation( p::transformWithLocation(
p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))), p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))),
[&](Location loc, kj::String text) -> Orphan<Token> { [this](Location loc, kj::String text) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>(); auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setOperator(text); initTok(t, loc).setOperator(text);
return t; return t;
}), }),
p::transformWithLocation( p::transformWithLocation(
sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()), sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()),
[&](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> { [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>(); auto t = orphanage.newOrphan<Token>();
buildTokenSequenceList( buildTokenSequenceList(
initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items)); initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items));
...@@ -205,7 +203,7 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) { ...@@ -205,7 +203,7 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
}), }),
p::transformWithLocation( p::transformWithLocation(
sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()), sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()),
[&](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> { [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>(); auto t = orphanage.newOrphan<Token>();
buildTokenSequenceList( buildTokenSequenceList(
initTok(t, loc).initBracketedList(items.size()), kj::mv(items)); initTok(t, loc).initBracketedList(items.size()), kj::mv(items));
...@@ -219,34 +217,46 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) { ...@@ -219,34 +217,46 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
auto& statementEnd = arena.copy(p::oneOf( auto& statementEnd = arena.copy(p::oneOf(
transform(p::sequence(p::exactChar<';'>(), docComment), transform(p::sequence(p::exactChar<';'>(), docComment),
[&](kj::Array<kj::String>&& comment) -> Orphan<Statement> { [this](kj::Maybe<kj::Array<kj::String>>&& comment) -> Orphan<Statement> {
auto result = orphanage.newOrphan<Statement>(); auto result = orphanage.newOrphan<Statement>();
auto builder = result.get(); auto builder = result.get();
attachDocComment(builder, kj::mv(comment)); KJ_IF_MAYBE(c, comment) {
attachDocComment(builder, kj::mv(*c));
}
builder.getBlock().setNone(); builder.getBlock().setNone();
return result; return result;
}), }),
transform( transform(
p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>()), p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>(),
[&](kj::Array<kj::String>&& comment, kj::Array<Orphan<Statement>>&& statements) docComment),
[this](kj::Maybe<kj::Array<kj::String>>&& comment,
kj::Array<Orphan<Statement>>&& statements,
kj::Maybe<kj::Array<kj::String>>&& lateComment)
-> Orphan<Statement> { -> Orphan<Statement> {
auto result = orphanage.newOrphan<Statement>(); auto result = orphanage.newOrphan<Statement>();
auto builder = result.get(); auto builder = result.get();
attachDocComment(builder, kj::mv(comment)); KJ_IF_MAYBE(c, comment) {
attachDocComment(builder, kj::mv(*c));
} else KJ_IF_MAYBE(c, lateComment) {
attachDocComment(builder, kj::mv(*c));
}
auto list = builder.getBlock().initStatements(statements.size()); auto list = builder.getBlock().initStatements(statements.size());
for (uint i = 0; i < statements.size(); i++) { for (uint i = 0; i < statements.size(); i++) {
list[i].adoptStatement(kj::mv(statements[i])); list.adoptWithCaveats(i, kj::mv(statements[i]));
} }
return result; return result;
}) })
)); ));
auto& statement = arena.copy(p::transform(p::sequence(tokenSequence, statementEnd), auto& statement = arena.copy(p::transformWithLocation(p::sequence(tokenSequence, statementEnd),
[&](kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) { [this](Location loc, kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
auto tokensBuilder = statement.get().initTokens(tokens.size()); auto builder = statement.get();
auto tokensBuilder = builder.initTokens(tokens.size());
for (uint i = 0; i < tokens.size(); i++) { for (uint i = 0; i < tokens.size(); i++) {
tokensBuilder[i].adoptToken(kj::mv(tokens[i])); tokensBuilder.adoptWithCaveats(i, kj::mv(tokens[i]));
} }
builder.setStartByte(loc.begin());
builder.setEndByte(loc.end());
return kj::mv(statement); return kj::mv(statement);
})); }));
......
# Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@0xa73956d2621fc3ee; @0xa73956d2621fc3ee;
using Cxx = import "/capnp/c++.capnp"; using Cxx = import "/capnp/c++.capnp";
...@@ -11,48 +34,35 @@ struct Token { ...@@ -11,48 +34,35 @@ struct Token {
integerLiteral @3 :UInt64; integerLiteral @3 :UInt64;
floatLiteral @4 :Float64; floatLiteral @4 :Float64;
operator @5 :Text; operator @5 :Text;
parenthesizedList @6 :List(List(TokenPointer)); parenthesizedList @6 :List(List(Token));
bracketedList @7 :List(List(TokenPointer)); bracketedList @7 :List(List(Token));
} }
startByte @8 :UInt32; startByte @8 :UInt32;
endByte @9 :UInt32; endByte @9 :UInt32;
} }
struct TokenPointer {
# Hack to deal with the fact that struct lists cannot adopt elements.
#
# TODO(cleanup): Find a better approach.
token @0 :Token;
}
struct Statement { struct Statement {
tokens @0 :List(TokenPointer); tokens @0 :List(Token);
block @1 union { block @1 union {
none @2 :Void; none @2 :Void;
statements @3 :List(StatementPointer); statements @3 :List(Statement);
} }
docComment @4 :Text; docComment @4 :Text;
}
struct StatementPointer {
# Hack to deal with the fact that struct lists cannot adopt elements.
#
# TODO(cleanup): Find a better approach.
statement @0 :Statement; startByte @5 :UInt32;
endByte @6 :UInt32;
} }
struct LexedTokens { struct LexedTokens {
# Lexer output when asked to parse tokens that don't form statements. # Lexer output when asked to parse tokens that don't form statements.
tokens @0 :List(TokenPointer); tokens @0 :List(Token);
} }
struct LexedStatements { struct LexedStatements {
# Lexer output when asked to parse statements. # Lexer output when asked to parse statements.
statements @0 :List(StatementPointer); statements @0 :List(Statement);
} }
...@@ -698,6 +698,10 @@ struct WireHelpers { ...@@ -698,6 +698,10 @@ struct WireHelpers {
// mistakenly thinks the source location still owns the object. transferPointer() doesn't do // mistakenly thinks the source location still owns the object. transferPointer() doesn't do
// this zeroing itself because many callers transfer several pointers in a loop then zero out // this zeroing itself because many callers transfer several pointers in a loop then zero out
// the whole section. // the whole section.
KJ_DASSERT(dst->isNull());
// We expect the caller to ensure the target is already null so won't leak.
if (src->isNull()) { if (src->isNull()) {
memset(dst, 0, sizeof(WirePointer)); memset(dst, 0, sizeof(WirePointer));
} else if (src->kind() == WirePointer::FAR) { } else if (src->kind() == WirePointer::FAR) {
...@@ -2034,6 +2038,45 @@ OrphanBuilder StructBuilder::disown(WirePointerCount ptrIndex) { ...@@ -2034,6 +2038,45 @@ OrphanBuilder StructBuilder::disown(WirePointerCount ptrIndex) {
return WireHelpers::disown(segment, pointers + ptrIndex); return WireHelpers::disown(segment, pointers + ptrIndex);
} }
void StructBuilder::transferContentFrom(StructBuilder other) {
// Determine the amount of data the builders have in common.
BitCount sharedDataSize = kj::min(dataSize, other.dataSize);
if (dataSize > sharedDataSize) {
// Since the target is larger than the source, make sure to zero out the extra bits that the
// source doesn't have.
if (dataSize == 1 * BITS) {
setDataField<bool>(0 * ELEMENTS, false);
} else {
byte* unshared = reinterpret_cast<byte*>(data) + sharedDataSize / BITS_PER_BYTE / BYTES;
memset(unshared, 0, (dataSize - sharedDataSize) / BITS_PER_BYTE / BYTES);
}
}
// Copy over the shared part.
if (sharedDataSize == 1 * BITS) {
setDataField<bool>(0 * ELEMENTS, other.getDataField<bool>(0 * ELEMENTS));
} else {
memcpy(data, other.data, sharedDataSize / BITS_PER_BYTE / BYTES);
}
// Zero out all pointers in the target.
for (uint i = 0; i < pointerCount / POINTERS; i++) {
WireHelpers::zeroObject(segment, pointers + i);
}
// Transfer the pointers.
WirePointerCount sharedPointerCount = kj::min(pointerCount, other.pointerCount);
for (uint i = 0; i < sharedPointerCount / POINTERS; i++) {
WireHelpers::transferPointer(segment, pointers + i, other.segment, other.pointers + i);
}
// Zero out the pointers that were transferred in the source because it no longer has ownership.
// If the source had any extra pointers that the destination didn't have space for, we
// intentionally leave them be, so that they'll be cleaned up later.
memset(other.pointers, 0, sharedPointerCount * BYTES_PER_POINTER / BYTES);
}
bool StructBuilder::isPointerFieldNull(WirePointerCount ptrIndex) { bool StructBuilder::isPointerFieldNull(WirePointerCount ptrIndex) {
return (pointers + ptrIndex)->isNull(); return (pointers + ptrIndex)->isNull();
} }
......
...@@ -364,6 +364,11 @@ public: ...@@ -364,6 +364,11 @@ public:
// Detach the given pointer field from this object. The pointer becomes null, and the child // Detach the given pointer field from this object. The pointer becomes null, and the child
// object is returned as an orphan. // object is returned as an orphan.
void transferContentFrom(StructBuilder other);
// Adopt all pointers from `other`, and also copy all data. If `other`'s sections are larger
// than this, the extra data is not transferred, meaning there is a risk of data loss when
// transferring from messages built with future versions of the protocol.
bool isPointerFieldNull(WirePointerCount ptrIndex); bool isPointerFieldNull(WirePointerCount ptrIndex);
StructReader asReader() const; StructReader asReader() const;
......
...@@ -249,6 +249,24 @@ struct List<T, Kind::STRUCT> { ...@@ -249,6 +249,24 @@ struct List<T, Kind::STRUCT> {
return typename T::Builder(builder.getStructElement(index * ELEMENTS)); return typename T::Builder(builder.getStructElement(index * ELEMENTS));
} }
inline void adoptWithCaveats(uint index, Orphan<T>&& orphan) {
// Mostly behaves like you'd expect `adopt` to behave, but with two caveats originating from
// the fact that structs in a struct list are allocated inline rather than by pointer:
// * This actually performs a shallow copy, effectively adopting each of the orphan's
// children rather than adopting the orphan itself. The orphan ends up being discarded,
// possibly wasting space in the message object.
// * If the orphan is larger than the target struct -- say, because the orphan was built
// using a newer version of the schema that has additional fields -- it will be truncated,
// losing data.
// We pass a zero-valued StructSize to asStruct() because we do not want the struct to be
// expanded under any circumstances. We're just going to throw it away anyway, and
// transferContentFrom() already carefully compares the struct sizes before transferring.
builder.getStructElement(index * ELEMENTS).transferContentFrom(
orphan.builder.asStruct(_::StructSize(
0 * WORDS, 0 * POINTERS, _::FieldSize::VOID)));
}
// There are no init(), set(), adopt(), or disown() methods for lists of structs because the // There are no init(), set(), adopt(), or disown() methods for lists of structs because the
// elements of the list are inlined and are initialized when the list is initialized. This // elements of the list are inlined and are initialized when the list is initialized. This
// means that init() would be redundant, and set() would risk data loss if the input struct // means that init() would be redundant, and set() would risk data loss if the input struct
......
...@@ -344,6 +344,37 @@ TEST(CommonParsers, TransformParser) { ...@@ -344,6 +344,37 @@ TEST(CommonParsers, TransformParser) {
} }
} }
TEST(CommonParsers, TransformOrRejectParser) {
auto parser = transformOrReject(many(any),
[](Array<char> chars) -> Maybe<int> {
if (heapString(chars) == "foo") {
return 123;
} else {
return nullptr;
}
});
{
StringPtr text = "foo";
Input input(text.begin(), text.end());
Maybe<int> result = parser(input);
KJ_IF_MAYBE(i, result) {
EXPECT_EQ(123, *i);
} else {
ADD_FAILURE() << "Expected 123, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "bar";
Input input(text.begin(), text.end());
Maybe<int> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_TRUE(input.atEnd());
}
}
TEST(CommonParsers, References) { TEST(CommonParsers, References) {
struct TransformFunc { struct TransformFunc {
int value; int value;
......
...@@ -71,11 +71,11 @@ public: ...@@ -71,11 +71,11 @@ public:
} }
bool atEnd() { return pos == end; } bool atEnd() { return pos == end; }
const Element& current() { auto current() -> decltype(*instance<Iterator>()) {
KJ_IREQUIRE(!atEnd()); KJ_IREQUIRE(!atEnd());
return *pos; return *pos;
} }
const Element& consume() { auto consume() -> decltype(*instance<Iterator>()) {
KJ_IREQUIRE(!atEnd()); KJ_IREQUIRE(!atEnd());
return *pos++; return *pos++;
} }
...@@ -274,7 +274,6 @@ private: ...@@ -274,7 +274,6 @@ private:
template <typename SubParser, typename Result> template <typename SubParser, typename Result>
constexpr ConstResult_<SubParser, Result> constResult(SubParser&& subParser, Result&& result) { constexpr ConstResult_<SubParser, Result> constResult(SubParser&& subParser, Result&& result) {
// Constructs a parser which returns exactly `result` if `subParser` is successful. // Constructs a parser which returns exactly `result` if `subParser` is successful.
return ConstResult_<SubParser, Result>(kj::fwd<SubParser>(subParser), kj::fwd<Result>(result)); return ConstResult_<SubParser, Result>(kj::fwd<SubParser>(subParser), kj::fwd<Result>(result));
} }
...@@ -571,6 +570,27 @@ private: ...@@ -571,6 +570,27 @@ private:
TransformFunc transform; TransformFunc transform;
}; };
template <typename SubParser, typename TransformFunc>
class TransformOrReject_ {
public:
explicit constexpr TransformOrReject_(SubParser&& subParser, TransformFunc&& transform)
: subParser(kj::fwd<SubParser>(subParser)), transform(kj::fwd<TransformFunc>(transform)) {}
template <typename Input>
decltype(kj::apply(instance<TransformFunc&>(), instance<OutputType<SubParser, Input>&&>()))
operator()(Input& input) const {
KJ_IF_MAYBE(subResult, subParser(input)) {
return kj::apply(transform, kj::mv(*subResult));
} else {
return nullptr;
}
}
private:
SubParser subParser;
TransformFunc transform;
};
template <typename SubParser, typename TransformFunc> template <typename SubParser, typename TransformFunc>
class TransformWithLocation_ { class TransformWithLocation_ {
public: public:
...@@ -606,12 +626,21 @@ constexpr Transform_<SubParser, TransformFunc> transform( ...@@ -606,12 +626,21 @@ constexpr Transform_<SubParser, TransformFunc> transform(
kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor)); kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
} }
template <typename SubParser, typename TransformFunc>
constexpr TransformOrReject_<SubParser, TransformFunc> transformOrReject(
SubParser&& subParser, TransformFunc&& functor) {
// Like `transform()` except that `functor` returns a `Maybe`. If it returns null, parsing fails,
// otherwise the parser's result is the content of the `Maybe`.
return TransformOrReject_<SubParser, TransformFunc>(
kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
}
template <typename SubParser, typename TransformFunc> template <typename SubParser, typename TransformFunc>
constexpr TransformWithLocation_<SubParser, TransformFunc> transformWithLocation( constexpr TransformWithLocation_<SubParser, TransformFunc> transformWithLocation(
SubParser&& subParser, TransformFunc&& functor) { SubParser&& subParser, TransformFunc&& functor) {
// Constructs a parser which executes some other parser and then transforms the result by invoking // Like `transform` except that `functor` also takes a `Span` as its first parameter specifying
// `functor` on it. Typically `functor` is a lambda. It is invoked using `kj::apply`, // the location of the parsed content. The span's position type is whatever the parser input's
// meaning tuples will be unpacked as arguments. // getPosition() returns.
return TransformWithLocation_<SubParser, TransformFunc>( return TransformWithLocation_<SubParser, TransformFunc>(
kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor)); kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
} }
...@@ -650,6 +679,8 @@ constexpr AcceptIf_<SubParser, Condition> acceptIf(SubParser&& subParser, Condit ...@@ -650,6 +679,8 @@ constexpr AcceptIf_<SubParser, Condition> acceptIf(SubParser&& subParser, Condit
// `condition` on the result to check if it is valid. Typically, `condition` is a lambda // `condition` on the result to check if it is valid. Typically, `condition` is a lambda
// returning true or false. Like with `transform()`, `condition` is invoked using `kj::apply` // returning true or false. Like with `transform()`, `condition` is invoked using `kj::apply`
// to unpack tuples. // to unpack tuples.
//
// TODO(soon): Remove in favor of transformOrReject()?
return AcceptIf_<SubParser, Condition>( return AcceptIf_<SubParser, Condition>(
kj::fwd<SubParser>(subParser), kj::fwd<Condition>(condition)); kj::fwd<SubParser>(subParser), kj::fwd<Condition>(condition));
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment