Commit ddb57c2d authored by Kenton Varda's avatar Kenton Varda

Implement lexer for new compiler.

parent 1d364ad9
......@@ -122,6 +122,7 @@ public:
// Result does not include NUL terminator.
inline char operator[](size_t index) const { return content[index]; }
inline char& operator[](size_t index) { return content[index]; }
inline char* begin() { return content.begin(); }
inline char* end() { return content.end() - 1; }
......
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "lexer.h"
#include <kj/vector.h>
#include <kj/io.h>
#include <unistd.h>
#include <kj/debug.h>
#include "../message.h"
int main(int argc, char* argv[]) {
// Eventually this will be capnpc. For now it's just a dummy program that tests parsing.
kj::Vector<char> input;
char buffer[4096];
for (;;) {
ssize_t n;
KJ_SYSCALL(n = read(STDIN_FILENO, buffer, sizeof(buffer)));
if (n == 0) {
break;
}
input.addAll(buffer, buffer + n);
}
KJ_DBG(input);
capnp::MallocMessageBuilder message;
auto file = message.initRoot<capnp::compiler::LexedStatements>();
capnp::compiler::lex(input, file);
KJ_DBG(file);
return 0;
}
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "lexer.h"
#include "../message.h"
#include <gtest/gtest.h>
namespace capnp {
namespace compiler {
namespace {
template <typename LexResult>
kj::String doLex(kj::StringPtr constText) {
// Parse the given string into the given Cap'n Proto struct type using lex(), then stringify the
// result and return that string. Additionally, single quotes in the input are converted to
// double quotes, and double quotes in the output are converted to single quotes, to reduce the
// amount of escaping needed in the test strings.
//
// Comparing stringifications against golden strings is ugly and brittle. If we had a
// text-format parser we could use that. Except that said parser would probably be built on
// the very lexer being tested here, so... maybe this is the best we can reasonably do.
kj::String text = heapString(constText);
for (char& c: text) {
// Make it easier to write input strings below.
if (c == '\'') c = '\"';
}
MallocMessageBuilder message;
auto file = message.initRoot<LexResult>();
EXPECT_TRUE(lex(text, file));
kj::String result = kj::str(file);
for (char& c: result) {
// Make it easier to write golden strings below.
if (c == '\"') c = '\'';
}
return result;
}
TEST(Lexer, Tokens) {
EXPECT_STREQ(
"(tokens = ["
"(token = (body = identifier('foo'), endByte = 3)), "
"(token = (body = identifier('bar'), startByte = 4, endByte = 7))"
"])",
doLex<LexedTokens>("foo bar").cStr());
EXPECT_STREQ(
"(tokens = ["
"(token = (body = identifier('foo'), endByte = 3)), "
"(token = (body = identifier('bar'), startByte = 15, endByte = 18))"
"])",
doLex<LexedTokens>("foo # comment\n bar").cStr());
EXPECT_STREQ(
"(tokens = ["
"(token = (body = stringLiteral('foo '), startByte = 2, endByte = 11)), "
"(token = (body = integerLiteral(123), startByte = 12, endByte = 15)), "
"(token = (body = floatLiteral(2.75), startByte = 16, endByte = 20)), "
"(token = (body = floatLiteral(60000), startByte = 21, endByte = 24)), "
"(token = (body = operator('+'), startByte = 25, endByte = 26)), "
"(token = (body = operator('-='), startByte = 27, endByte = 29))"
"])",
doLex<LexedTokens>(" 'foo\\x20' 123 2.75 6e4 + -= ").cStr());
EXPECT_STREQ(
"(tokens = ["
"(token = (body = parenthesizedList(["
"["
"(token = (body = identifier('foo'), startByte = 1, endByte = 4)), "
"(token = (body = identifier('bar'), startByte = 5, endByte = 8))"
"], ["
"(token = (body = identifier('baz'), startByte = 10, endByte = 13)), "
"(token = (body = identifier('qux'), startByte = 14, endByte = 17))"
"], ["
"(token = (body = identifier('corge'), startByte = 19, endByte = 24)), "
"(token = (body = identifier('grault'), startByte = 25, endByte = 31))"
"]"
"]), endByte = 32))"
"])",
doLex<LexedTokens>("(foo bar, baz qux, corge grault)").cStr());
EXPECT_STREQ(
"(tokens = ["
"(token = (body = parenthesizedList(["
"["
"(token = (body = identifier('foo'), startByte = 1, endByte = 4)), "
"(token = (body = identifier('bar'), startByte = 5, endByte = 8))"
"]"
"]), endByte = 9))"
"])",
doLex<LexedTokens>("(foo bar)").cStr());
// Empty parentheses should result in an empty list-of-lists, *not* a list containing an empty
// list.
EXPECT_STREQ(
"(tokens = ["
"(token = (body = parenthesizedList([]), endByte = 4))"
"])",
doLex<LexedTokens>("( )").cStr());
EXPECT_STREQ(
"(tokens = ["
"(token = (body = bracketedList(["
"["
"(token = (body = identifier('foo'), startByte = 1, endByte = 4)), "
"(token = (body = identifier('bar'), startByte = 5, endByte = 8))"
"], ["
"(token = (body = identifier('baz'), startByte = 10, endByte = 13)), "
"(token = (body = identifier('qux'), startByte = 14, endByte = 17))"
"], ["
"(token = (body = identifier('corge'), startByte = 19, endByte = 24)), "
"(token = (body = identifier('grault'), startByte = 25, endByte = 31))"
"]"
"]), endByte = 32))"
"])",
doLex<LexedTokens>("[foo bar, baz qux, corge grault]").cStr());
EXPECT_STREQ(
"(tokens = ["
"(token = (body = bracketedList(["
"["
"(token = (body = identifier('foo'), startByte = 1, endByte = 4))"
"], ["
"(token = (body = parenthesizedList(["
"["
"(token = (body = identifier('bar'), startByte = 7, endByte = 10))"
"], ["
"(token = (body = identifier('baz'), startByte = 12, endByte = 15))"
"]"
"]), startByte = 6, endByte = 16))"
"]"
"]), endByte = 17)), "
"(token = (body = identifier('qux'), startByte = 18, endByte = 21))"
"])",
doLex<LexedTokens>("[foo, (bar, baz)] qux").cStr());
EXPECT_STREQ(
"(tokens = ["
"(token = (body = identifier('foo'), endByte = 3)), "
"(token = (body = identifier('bar'), startByte = 7, endByte = 10))"
"])",
doLex<LexedTokens>("foo\n\r\t\vbar").cStr());
}
TEST(Lexer, Statements) {
EXPECT_STREQ(
"(statements = ["
"(statement = (tokens = ["
"(token = (body = identifier('foo'), endByte = 3)), "
"(token = (body = identifier('bar'), startByte = 4, endByte = 7))"
"]))"
"])",
doLex<LexedStatements>("foo bar;").cStr());
EXPECT_STREQ(
"(statements = ["
"(statement = (tokens = ["
"(token = (body = identifier('foo'), endByte = 3))"
"])), "
"(statement = (tokens = ["
"(token = (body = identifier('bar'), startByte = 5, endByte = 8))"
"])), "
"(statement = (tokens = ["
"(token = (body = identifier('baz'), startByte = 10, endByte = 13))"
"]))"
"])",
doLex<LexedStatements>("foo; bar; baz; ").cStr());
EXPECT_STREQ(
"(statements = ["
"(statement = ("
"tokens = ["
"(token = (body = identifier('foo'), endByte = 3))"
"], "
"block = statements(["
"(statement = (tokens = ["
"(token = (body = identifier('bar'), startByte = 5, endByte = 8))"
"])), "
"(statement = (tokens = ["
"(token = (body = identifier('baz'), startByte = 10, endByte = 13))"
"]))"
"]))"
"), "
"(statement = (tokens = ["
"(token = (body = identifier('qux'), startByte = 16, endByte = 19))"
"]))"
"])",
doLex<LexedStatements>("foo {bar; baz;} qux;").cStr());
}
TEST(Lexer, DocComments) {
EXPECT_STREQ(
"(statements = ["
"(statement = ("
"tokens = ["
"(token = (body = identifier('foo'), endByte = 3))"
"], "
"docComment = 'blah blah\\n'"
"))"
"])",
doLex<LexedStatements>("foo; # blah blah").cStr());
EXPECT_STREQ(
"(statements = ["
"(statement = ("
"tokens = ["
"(token = (body = identifier('foo'), endByte = 3))"
"], "
"docComment = 'blah blah\\n'"
"))"
"])",
doLex<LexedStatements>("foo; #blah blah").cStr());
EXPECT_STREQ(
"(statements = ["
"(statement = ("
"tokens = ["
"(token = (body = identifier('foo'), endByte = 3))"
"], "
"docComment = ' blah blah\\n'"
"))"
"])",
doLex<LexedStatements>("foo; # blah blah").cStr());
EXPECT_STREQ(
"(statements = ["
"(statement = ("
"tokens = ["
"(token = (body = identifier('foo'), endByte = 3))"
"], "
"docComment = 'blah blah\\n'"
"))"
"])",
doLex<LexedStatements>("foo;\n# blah blah").cStr());
EXPECT_STREQ(
"(statements = ["
"(statement = ("
"tokens = ["
"(token = (body = identifier('foo'), endByte = 3))"
"]"
"))"
"])",
doLex<LexedStatements>("foo;\n\n# blah blah").cStr());
EXPECT_STREQ(
"(statements = ["
"(statement = ("
"tokens = ["
"(token = (body = identifier('foo'), endByte = 3))"
"], "
"docComment = 'bar baz\\nqux corge\\n'"
"))"
"])",
doLex<LexedStatements>("foo;\n # bar baz\n # qux corge\n\n# grault\n# garply").cStr());
EXPECT_STREQ(
"(statements = ["
"(statement = ("
"tokens = ["
"(token = (body = identifier('foo'), endByte = 3))"
"], "
"block = statements(["
"(statement = (tokens = ["
"(token = (body = identifier('bar'), startByte = 17, endByte = 20))"
"], docComment = 'hi\\n')), "
"(statement = (tokens = ["
"(token = (body = identifier('baz'), startByte = 28, endByte = 31))"
"]))"
"]), "
"docComment = 'blah blah\\n'"
")), "
"(statement = (tokens = ["
"(token = (body = identifier('qux'), startByte = 44, endByte = 47))"
"]))"
"])",
doLex<LexedStatements>("foo {# blah blah\nbar; # hi\n baz;} # ignored\nqux;").cStr());
}
} // namespace
} // namespace compiler
} // namespace capnp
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "lexer.h"
#include <kj/parse/char.h>
#include <kj/debug.h>
namespace capnp {
namespace compiler {
namespace p = kj::parse;
namespace {
typedef p::IteratorInput<char, const char*> Input;
typedef p::Span<const char*> Location;
void buildTokenSequenceList(List<List<TokenPointer>>::Builder builder,
kj::Array<kj::Array<Orphan<Token>>>&& items) {
for (uint i = 0; i < items.size(); i++) {
auto& item = items[i];
auto itemBuilder = builder.init(i, item.size());
for (uint j = 0; j < item.size(); j++) {
itemBuilder[j].adoptToken(kj::mv(item[j]));
}
}
}
void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comment) {
size_t size = 0;
for (auto& line: comment) {
size += line.size() + 1; // include newline
}
if (size > 0) {
Text::Builder builder = statement.initDocComment(size);
char* pos = builder.begin();
for (auto& line: comment) {
memcpy(pos, line.begin(), line.size());
pos += line.size();
*pos++ = '\n';
}
KJ_ASSERT(pos == builder.end());
}
}
constexpr auto discardComment =
sequence(p::exactChar<'#'>(), p::discard(p::many(p::discard(p::anyOfChars("\n").invert()))),
p::oneOf(p::exactChar<'\n'>(), p::endOfInput));
constexpr auto saveComment =
sequence(p::exactChar<'#'>(), p::discard(p::optional(p::exactChar<' '>())),
p::charsToString(p::many(p::anyOfChars("\n").invert())),
p::oneOf(p::exactChar<'\n'>(), p::endOfInput));
constexpr auto commentsAndWhitespace =
sequence(p::discardWhitespace,
p::discard(p::many(sequence(discardComment, p::discardWhitespace))));
constexpr auto discardLineWhitespace =
p::discard(p::many(p::discard(p::whitespaceChar.invert().orAny("\r\n").invert())));
constexpr auto newline = p::oneOf(
p::exactChar<'\n'>(),
sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>()))));
constexpr auto docComment = sequence(
discardLineWhitespace,
p::discard(p::optional(newline)),
p::many(p::sequence(discardLineWhitespace, saveComment)));
// Parses a set of comment lines preceded by at most one newline and with no intervening blank
// lines.
} // namespace
bool lex(kj::ArrayPtr<const char> input,
LexedStatements::Builder* resultStatements,
LexedTokens::Builder* resultTokens) {
// This is a bit hacky. Since the transformations applied by our parser require access to an
// Orphanage in order to build objects, we construct the parsers as local variables. This means
// that all the parsers need to live in a single function scope. In order to handle both tokens
// and statements, we have the function take `resultStatements` and `resultTokens` and parse
// into whichever one is non-null.
//
// TODO(someday): Perhaps there should be a utility class called ParserPool which has a method
// that takes a parser, allocates a copy of it within some arena, then returns a ParserRef
// referencing that copy. Then there could be a Lexer class which contains a ParserPool and
// builds all its parsers in its constructor. This would allow the class to directly expose
// the parsers so that they can be used within other parser combinators.
Orphanage orphanage = resultStatements == nullptr ?
Orphanage::getForMessageContaining(*resultTokens) :
Orphanage::getForMessageContaining(*resultStatements);
auto initTok = [&](Orphan<Token>& t, const Location& loc) -> Token::Body::Builder {
auto tb = t.get();
tb.setStartByte(loc.begin() - input.begin());
tb.setEndByte(loc.end() - input.begin());
return tb.getBody();
};
p::ParserRef<Input, kj::Array<Orphan<Token>>> tokenSequence;
auto commaDelimitedList = transform(
p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))),
[&](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
-> kj::Array<kj::Array<Orphan<Token>>> {
if (first == nullptr && rest == nullptr) {
// Completely empty list.
return nullptr;
} else {
auto result = kj::heapArrayBuilder<kj::Array<Orphan<Token>>>(rest.size() + 1);
result.add(kj::mv(first));
for (auto& item: rest) {
result.add(kj::mv(item));
}
return result.finish();
}
});
auto token = p::oneOf(
p::transformWithLocation(p::identifier,
[&](Location loc, kj::String name) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setIdentifier(name);
return t;
}),
p::transformWithLocation(p::doubleQuotedString,
[&](Location loc, kj::String text) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setStringLiteral(text);
return t;
}),
p::transformWithLocation(p::integer,
[&](Location loc, uint64_t i) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setIntegerLiteral(i);
return t;
}),
p::transformWithLocation(p::number,
[&](Location loc, double x) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setFloatLiteral(x);
return t;
}),
p::transformWithLocation(
p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))),
[&](Location loc, kj::String text) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setOperator(text);
return t;
}),
p::transformWithLocation(
sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()),
[&](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
buildTokenSequenceList(
initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items));
return t;
}),
p::transformWithLocation(
sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()),
[&](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
buildTokenSequenceList(
initTok(t, loc).initBracketedList(items.size()), kj::mv(items));
return t;
})
);
auto tokenSequence_ =
sequence(commentsAndWhitespace, many(sequence(token, commentsAndWhitespace)));
tokenSequence = tokenSequence_;
if (resultStatements == nullptr) {
// Only a token sequence is requested.
Input parserInput(input.begin(), input.end());
kj::Maybe<kj::Array<Orphan<Token>>> parseOutput = tokenSequence(parserInput);
if (!parserInput.atEnd()) {
return false;
}
KJ_IF_MAYBE(output, parseOutput) {
auto l = resultTokens->initTokens(output->size());
for (uint i = 0; i < output->size(); i++) {
l[i].adoptToken(kj::mv((*output)[i]));
}
return true;
} else {
return false;
}
} else {
p::ParserRef<Input, kj::Array<Orphan<Statement>>> statementSequence;
auto statementEnd = p::oneOf(
transform(p::sequence(p::exactChar<';'>(), docComment),
[&](kj::Array<kj::String>&& comment) -> Orphan<Statement> {
auto result = orphanage.newOrphan<Statement>();
auto builder = result.get();
attachDocComment(builder, kj::mv(comment));
builder.getBlock().setNone();
return result;
}),
transform(
p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>()),
[&](kj::Array<kj::String>&& comment, kj::Array<Orphan<Statement>>&& statements)
-> Orphan<Statement> {
auto result = orphanage.newOrphan<Statement>();
auto builder = result.get();
attachDocComment(builder, kj::mv(comment));
auto list = builder.getBlock().initStatements(statements.size());
for (uint i = 0; i < statements.size(); i++) {
list[i].adoptStatement(kj::mv(statements[i]));
}
return result;
})
);
auto statement = p::transform(p::sequence(tokenSequence, statementEnd),
[&](kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
auto tokensBuilder = statement.get().initTokens(tokens.size());
for (uint i = 0; i < tokens.size(); i++) {
tokensBuilder[i].adoptToken(kj::mv(tokens[i]));
}
return kj::mv(statement);
});
auto statementSequence_ =
sequence(commentsAndWhitespace, many(sequence(statement, commentsAndWhitespace)));
statementSequence = statementSequence_;
Input parserInput(input.begin(), input.end());
kj::Maybe<kj::Array<Orphan<Statement>>> parseOutput = statementSequence(parserInput);
if (!parserInput.atEnd()) {
return false;
}
KJ_IF_MAYBE(output, parseOutput) {
auto l = resultStatements->initStatements(output->size());
for (uint i = 0; i < output->size(); i++) {
l[i].adoptStatement(kj::mv((*output)[i]));
}
return true;
} else {
return false;
}
}
}
bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result) {
return lex(kj::mv(input), &result, nullptr);
}
bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result) {
return lex(kj::mv(input), nullptr, &result);
}
} // namespace compiler
} // namespace capnp
@0xa73956d2621fc3ee;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("capnp::compiler");
struct Token {
body @0 union {
identifier @1 :Text;
stringLiteral @2 :Text;
integerLiteral @3 :UInt64;
floatLiteral @4 :Float64;
operator @5 :Text;
parenthesizedList @6 :List(List(TokenPointer));
bracketedList @7 :List(List(TokenPointer));
}
startByte @8 :UInt32;
endByte @9 :UInt32;
}
struct TokenPointer {
# Hack to deal with the fact that struct lists cannot adopt elements.
#
# TODO(cleanup): Find a better approach.
token @0 :Token;
}
struct Statement {
tokens @0 :List(TokenPointer);
block @1 union {
none @2 :Void;
statements @3 :List(StatementPointer);
}
docComment @4 :Text;
}
struct StatementPointer {
# Hack to deal with the fact that struct lists cannot adopt elements.
#
# TODO(cleanup): Find a better approach.
statement @0 :Statement;
}
struct LexedTokens {
# Lexer output when asked to parse tokens that don't form statements.
tokens @0 :List(TokenPointer);
}
struct LexedStatements {
# Lexer output when asked to parse statements.
statements @0 :List(StatementPointer);
}
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef CAPNP_COMPILER_LEXER_H_
#define CAPNP_COMPILER_LEXER_H_
#include "lexer.capnp.h"
namespace capnp {
namespace compiler {
bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result);
bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result);
// Lex the given source code, placing the results in `result`. Returns true if there
// were no errors, false if there were. Even when errors are present, the file may have partial
// content which can be fed into later stages of parsing in order to find more errors.
//
// There are two versions, one that parses a list of statements, and one which just parses tokens
// that might form a part of one statement. In other words, in the later case, the input should
// not contain semicolons or curly braces, unless they are in string literals of course.
} // namespace compiler
} // namespace capnp
#endif // CAPNP_COMPILER_LEXER_H_
......@@ -59,13 +59,13 @@ TEST(Orphans, Lists) {
Orphan<List<uint32_t>> orphan = root.disownUInt32List();
EXPECT_FALSE(orphan == nullptr);
checkList(orphan.get().asReader(), {12, 34, 56});
checkList(orphan.get().asReader(), {12u, 34u, 56u});
EXPECT_FALSE(root.hasUInt32List());
root.adoptUInt32List(kj::mv(orphan));
EXPECT_TRUE(orphan == nullptr);
EXPECT_TRUE(root.hasUInt32List());
checkList(root.asReader().getUInt32List(), {12, 34, 56});
checkList(root.asReader().getUInt32List(), {12u, 34u, 56u});
}
TEST(Orphans, Text) {
......@@ -202,7 +202,7 @@ TEST(Orphans, OrphanageListCopy) {
Orphan<List<uint32_t>> orphan = builder2.getOrphanage().newOrphanCopy(
root1.asReader().getUInt32List());
checkList(orphan.get().asReader(), {12, 34, 56});
checkList(orphan.get().asReader(), {12u, 34u, 56u});
auto root2 = builder2.initRoot<TestAllTypes>();
root2.adoptUInt32List(kj::mv(orphan));
......@@ -272,13 +272,13 @@ TEST(Orphans, ListObject) {
Orphan<List<uint32_t>> orphan = root.disownObjectField<List<uint32_t>>();
EXPECT_FALSE(orphan == nullptr);
checkList(orphan.get().asReader(), {12, 34, 56});
checkList(orphan.get().asReader(), {12u, 34u, 56u});
EXPECT_FALSE(root.hasObjectField());
root.adoptObjectField(kj::mv(orphan));
EXPECT_TRUE(orphan == nullptr);
EXPECT_TRUE(root.hasObjectField());
checkList(root.asReader().getObjectField<List<uint32_t>>(), {12, 34, 56});
checkList(root.asReader().getObjectField<List<uint32_t>>(), {12u, 34u, 56u});
}
TEST(Orphans, DynamicStruct) {
......@@ -318,7 +318,7 @@ TEST(Orphans, DynamicList) {
root.adoptObjectField(kj::mv(orphan));
EXPECT_TRUE(orphan == nullptr);
EXPECT_TRUE(root.hasObjectField());
checkList(root.asReader().getObjectField<List<uint32_t>>(), {12, 34, 56});
checkList(root.asReader().getObjectField<List<uint32_t>>(), {12u, 34u, 56u});
}
TEST(Orphans, OrphanageDynamicStruct) {
......@@ -344,7 +344,7 @@ TEST(Orphans, OrphanageDynamicList) {
auto root = builder.initRoot<test::TestObject>();
root.adoptObjectField(kj::mv(orphan));
checkList(root.getObjectField<List<uint32_t>>(), {123, 456});
checkList(root.getObjectField<List<uint32_t>>(), {123u, 456u});
}
TEST(Orphans, OrphanageDynamicStructCopy) {
......@@ -376,7 +376,7 @@ TEST(Orphans, OrphanageDynamicListCopy) {
auto root2 = builder2.initRoot<test::TestObject>();
root2.adoptObjectField(kj::mv(orphan));
checkList(root2.getObjectField<List<uint32_t>>(), {12, 34, 56});
checkList(root2.getObjectField<List<uint32_t>>(), {12u, 34u, 56u});
}
TEST(Orphans, OrphanageFromBuilder) {
......
......@@ -267,6 +267,10 @@ template <typename T> struct RemoveConstOrBogus_ { struct Type; };
template <typename T> struct RemoveConstOrBogus_<const T> { typedef T Type; };
template <typename T> using RemoveConstOrBogus = typename RemoveConstOrBogus_<T>::Type;
template <typename T> struct IsReference_ { static constexpr bool value = false; };
template <typename T> struct IsReference_<T&> { static constexpr bool value = true; };
template <typename T> constexpr bool isReference() { return IsReference_<T>::value; }
// =======================================================================================
// Equivalents to std::move() and std::forward(), since these are very commonly needed and the
// std header <utility> pulls in lots of other stuff.
......
......@@ -351,11 +351,11 @@ TEST(CharParsers, DoubleQuotedString) {
}
{
StringPtr text = "\"test\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\?\x01\2\34\156\"";
StringPtr text = "\"test\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\?\\x01\\x20\\2\\34\\156\"";
Input input(text.begin(), text.end());
Maybe<String> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ("test\a\b\f\n\r\t\v\'\"\?\x01\2\34\156", *value);
EXPECT_EQ("test\a\b\f\n\r\t\v\'\"\?\x01\x20\2\34\156", *value);
} else {
ADD_FAILURE() << "Expected string, got null.";
}
......
......@@ -183,6 +183,9 @@ constexpr auto nameStart = alpha.orChar('_');
constexpr auto nameChar = alphaNumeric.orChar('_');
constexpr auto hexDigit = charRange('0', '9').orRange('a', 'f').orRange('A', 'F');
constexpr auto octDigit = charRange('0', '7');
constexpr auto whitespaceChar = anyOfChars(" \f\n\r\t\v");
constexpr auto controlChar = charRange(0, 0x1f).invert().orGroup(whitespaceChar).invert();
constexpr auto whitespace = many(anyOfChars(" \f\n\r\t\v"));
constexpr auto discardWhitespace = discard(many(discard(anyOfChars(" \f\n\r\t\v"))));
......@@ -257,7 +260,7 @@ struct ParseFloat {
constexpr auto number = transform(
sequence(
many(digit),
oneOrMore(digit),
optional(sequence(exactChar<'.'>(), many(digit))),
optional(sequence(discard(anyOfChars("eE")), optional(anyOfChars("+-")), many(digit))),
notLookingAt(alpha.orAny("_."))),
......@@ -285,7 +288,7 @@ struct InterpretEscape {
struct ParseHexEscape {
inline char operator()(char first, char second) const {
return (parseDigit(first) << 4) | second;
return (parseDigit(first) << 4) | parseDigit(second);
}
};
......
......@@ -111,14 +111,30 @@ class ParserRef {
// from becoming ridiculous. Using too many of them can hurt performance, though.
public:
ParserRef(): parser(nullptr), wrapper(nullptr) {}
ParserRef(const ParserRef&) = default;
ParserRef(ParserRef&&) = default;
ParserRef& operator=(const ParserRef& other) = default;
ParserRef& operator=(ParserRef&& other) = default;
template <typename Other>
constexpr ParserRef(Other& other)
: parser(&other), wrapper(WrapperImplInstance<Other>::instance) {}
constexpr ParserRef(Other&& other)
: parser(&other), wrapper(&WrapperImplInstance<Decay<Other>>::instance) {
static_assert(kj::isReference<Other>(), "ParseRef should not be assigned to a temporary.");
}
template <typename Other>
inline ParserRef& operator=(Other&& other) {
static_assert(kj::isReference<Other>(), "ParseRef should not be assigned to a temporary.");
parser = &other;
wrapper = &WrapperImplInstance<Decay<Other>>::instance;
return *this;
}
KJ_ALWAYS_INLINE(Maybe<Output> operator()(Input& input) const) {
// Always inline in the hopes that this allows branch prediction to kick in so the virtual call
// doesn't hurt so much.
return wrapper.parse(parser, input);
return wrapper->parse(parser, input);
}
private:
......@@ -137,7 +153,7 @@ private:
};
const void* parser;
const Wrapper& wrapper;
const Wrapper* wrapper;
};
template <typename Input, typename Output>
......@@ -517,8 +533,8 @@ constexpr OneOf_<SubParsers...> oneOf(SubParsers&&... parsers) {
template <typename Position>
struct Span {
public:
inline const Position& begin() { return begin_; }
inline const Position& end() { return end_; }
inline const Position& begin() const { return begin_; }
inline const Position& end() const { return end_; }
Span() = default;
inline constexpr Span(Position&& begin, Position&& end): begin_(mv(begin)), end_(mv(end)) {}
......
......@@ -180,6 +180,10 @@ class Tuple<> {
// Tuple<>() is constexpr.
};
template <typename T>
class Tuple<T>;
// Single-element tuple should never be used. The public API should ensure this.
template <size_t index, typename... T>
inline TypeByIndex<index, T...>& getImpl(Tuple<T...>& tuple) {
// Get member of a Tuple by index, e.g. `get<2>(myTuple)`.
......@@ -210,6 +214,10 @@ inline T&& getImpl(T&& value) {
template <typename Func, typename SoFar, typename... T>
struct ExpandAndApplyResult_;
// Template which computes the return type of applying Func to T... after flattening tuples.
// SoFar starts as Tuple<> and accumulates the flattened parameter types -- so after this template
// is recursively expanded, T... is empty and SoFar is a Tuple containing all the parameters.
template <typename Func, typename First, typename... Rest, typename... T>
struct ExpandAndApplyResult_<Func, Tuple<T...>, First, Rest...>
: public ExpandAndApplyResult_<Func, Tuple<T..., First>, Rest...> {};
......
......@@ -75,11 +75,18 @@ public:
builder.add(kj::fwd<Params>(params)...);
}
template <typename Iterator>
inline void addAll(Iterator begin, Iterator end) {
size_t needed = builder.size() + (end - begin);
if (needed > builder.capacity()) grow(needed);
builder.addAll(begin, end);
}
private:
ArrayBuilder<T> builder;
void grow() {
setCapacity(capacity() == 0 ? 4 : capacity() * 2);
void grow(size_t minCapacity = 0) {
setCapacity(kj::max(minCapacity, capacity() == 0 ? 4 : capacity() * 2));
}
void setCapacity(size_t newSize) {
ArrayBuilder<T> newBuilder = heapArrayBuilder<T>(newSize);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment