Commit 359ea45a authored by Kenton Varda's avatar Kenton Varda

Continuing work on capnp C++ parser.

parent cc8d8fc7
......@@ -22,33 +22,54 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "lexer.h"
#include "parser.h"
#include <kj/vector.h>
#include <kj/io.h>
#include <unistd.h>
#include <kj/debug.h>
#include "../message.h"
#include <iostream>
class CoutErrorReporter: public capnp::compiler::ErrorReporter {
public:
void addError(uint32_t startByte, uint32_t endByte, kj::String message) override {
std::cout << "input:" << startByte << "-" << endByte << ": " << message.cStr() << std::endl;
}
};
int main(int argc, char* argv[]) {
// Eventually this will be capnpc. For now it's just a dummy program that tests parsing.
kj::Vector<char> input;
char buffer[4096];
for (;;) {
ssize_t n;
KJ_SYSCALL(n = read(STDIN_FILENO, buffer, sizeof(buffer)));
if (n == 0) {
break;
}
input.addAll(buffer, buffer + n);
}
// kj::Vector<char> input;
// char buffer[4096];
// for (;;) {
// ssize_t n;
// KJ_SYSCALL(n = read(STDIN_FILENO, buffer, sizeof(buffer)));
// if (n == 0) {
// break;
// }
// input.addAll(buffer, buffer + n);
// }
//
// KJ_DBG(input);
// This input triggers a data corruption bug. Fix it before doing anything else!
kj::StringPtr input = "@0xfa974d18d718428e; const x :Int32 = 1;";
CoutErrorReporter errorReporter;
KJ_DBG(input);
capnp::MallocMessageBuilder lexerArena;
auto lexedFile = lexerArena.initRoot<capnp::compiler::LexedStatements>();
capnp::compiler::lex(input, lexedFile, errorReporter);
KJ_DBG(lexedFile);
capnp::MallocMessageBuilder message;
auto file = message.initRoot<capnp::compiler::LexedStatements>();
capnp::compiler::lex(input, file);
capnp::MallocMessageBuilder parserArena;
auto parsedFile = parserArena.initRoot<capnp::compiler::ParsedFile>();
capnp::compiler::parseFile(lexedFile.getStatements(), parsedFile, errorReporter);
KJ_DBG(file);
capnp::MallocMessageBuilder parserArena2;
parserArena2.setRoot(parsedFile.asReader());
//KJ_DBG(parsedFile);
return 0;
}
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "error-reporter.h"
#include <unistd.h>
namespace capnp {
namespace compiler {
ErrorReporter::~ErrorReporter() noexcept(false) {}
} // namespace compiler
} // namespace capnp
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef ERROR_REPORTER_H_
#define ERROR_REPORTER_H_
#include "../common.h"
#include <kj/string.h>
namespace capnp {
namespace compiler {
class ErrorReporter {
public:
virtual ~ErrorReporter() noexcept(false);
virtual void addError(uint32_t startByte, uint32_t endByte, kj::String message) = 0;
// Report an error at the given location in the input text. `startByte` and `endByte` indicate
// the span of text that is erroneous. They may be equal, in which case the parser was only
// able to identify where the error begins, not where it ends.
};
} // namespace compiler
} // namespace capnp
#endif // ERROR_REPORTER_H_
......@@ -27,6 +27,8 @@ using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("capnp::compiler");
# TODO(someday): Here's a case where parameterized types might be nice, but note that it would
# need to support primitive parameters...
struct LocatedText {
value @0 :Text;
startByte @1 :UInt32;
......@@ -169,6 +171,8 @@ struct Declaration {
struct Union {}
struct Group {}
struct Interface {}
struct Method {
......
......@@ -29,6 +29,13 @@ namespace capnp {
namespace compiler {
namespace {
class TestFailingErrorReporter: public ErrorReporter {
public:
void addError(uint32_t startByte, uint32_t endByte, kj::String message) override {
ADD_FAILURE() << "Parse failed: (" << startByte << "-" << endByte << ") " << message.cStr();
}
};
template <typename LexResult>
kj::String doLex(kj::StringPtr constText) {
// Parse the given string into the given Cap'n Proto struct type using lex(), then stringify the
......@@ -47,7 +54,8 @@ kj::String doLex(kj::StringPtr constText) {
}
MallocMessageBuilder message;
auto file = message.initRoot<LexResult>();
EXPECT_TRUE(lex(text, file));
TestFailingErrorReporter errorReporter;
EXPECT_TRUE(lex(text, file, errorReporter));
kj::String result = kj::str(file);
for (char& c: result) {
// Make it easier to write golden strings below.
......
......@@ -28,16 +28,16 @@
namespace capnp {
namespace compiler {
bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result) {
Lexer lexer(Orphanage::getForMessageContaining(result));
namespace p = kj::parse;
Lexer::ParserInput parserInput(input.begin(), input.end());
kj::Maybe<kj::Array<Orphan<Statement>>> parseOutput =
lexer.getParsers().statementSequence(parserInput);
bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result,
ErrorReporter& errorReporter) {
Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter);
if (!parserInput.atEnd()) {
return false;
}
auto parser = p::sequence(lexer.getParsers().statementSequence, p::endOfInput);
Lexer::ParserInput parserInput(input.begin(), input.end());
kj::Maybe<kj::Array<Orphan<Statement>>> parseOutput = parser(parserInput);
KJ_IF_MAYBE(output, parseOutput) {
auto l = result.initStatements(output->size());
......@@ -46,20 +46,20 @@ bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result) {
}
return true;
} else {
uint32_t best = parserInput.getBest();
errorReporter.addError(best, best, kj::str("Parse error."));
return false;
}
}
bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result) {
Lexer lexer(Orphanage::getForMessageContaining(result));
bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result,
ErrorReporter& errorReporter) {
Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter);
Lexer::ParserInput parserInput(input.begin(), input.end());
kj::Maybe<kj::Array<Orphan<Token>>> parseOutput =
lexer.getParsers().tokenSequence(parserInput);
auto parser = p::sequence(lexer.getParsers().tokenSequence, p::endOfInput);
if (!parserInput.atEnd()) {
return false;
}
Lexer::ParserInput parserInput(input.begin(), input.end());
kj::Maybe<kj::Array<Orphan<Token>>> parseOutput = parser(parserInput);
KJ_IF_MAYBE(output, parseOutput) {
auto l = result.initTokens(output->size());
......@@ -68,12 +68,12 @@ bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result) {
}
return true;
} else {
uint32_t best = parserInput.getBest();
errorReporter.addError(best, best, kj::str("Parse error."));
return false;
}
}
namespace p = kj::parse;
namespace {
typedef p::Span<uint32_t> Location;
......@@ -138,7 +138,8 @@ constexpr auto docComment = p::optional(p::sequence(
} // namespace
Lexer::Lexer(Orphanage orphanageParam): orphanage(orphanageParam) {
Lexer::Lexer(Orphanage orphanageParam, ErrorReporter& errorReporterParam)
: orphanage(orphanageParam), errorReporter(errorReporterParam) {
// Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe
// for us to use parsers.tokenSequence even though we haven't yet constructed it.
......
......@@ -24,15 +24,18 @@
#ifndef CAPNP_COMPILER_LEXER_H_
#define CAPNP_COMPILER_LEXER_H_
#include "lexer.capnp.h"
#include <capnp/compiler/lexer.capnp.h>
#include <kj/parse/common.h>
#include <kj/arena.h>
#include "error-reporter.h"
namespace capnp {
namespace compiler {
bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result);
bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result);
bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result,
ErrorReporter& errorReporter);
bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result,
ErrorReporter& errorReporter);
// Lex the given source code, placing the results in `result`. Returns true if there
// were no errors, false if there were. Even when errors are present, the file may have partial
// content which can be fed into later stages of parsing in order to find more errors.
......@@ -46,7 +49,7 @@ class Lexer {
// into your own parsers.
public:
Lexer(Orphanage orphanage);
Lexer(Orphanage orphanage, ErrorReporter& errorReporter);
// `orphanage` is used to allocate Cap'n Proto message objects in the result. `inputStart` is
// a pointer to the beginning of the input, used to compute byte offsets.
......@@ -62,6 +65,9 @@ public:
explicit ParserInput(ParserInput& parent)
: IteratorInput<char, const char*>(parent), begin(parent.begin) {}
inline uint32_t getBest() {
return IteratorInput<char, const char*>::getBest() - begin;
}
inline uint32_t getPosition() {
return IteratorInput<char, const char*>::getPosition() - begin;
}
......@@ -85,6 +91,7 @@ public:
private:
Orphanage orphanage;
ErrorReporter& errorReporter;
kj::Arena arena;
Parsers parsers;
};
......
......@@ -22,14 +22,88 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "parser.h"
#include <kj/debug.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
namespace capnp {
namespace compiler {
namespace {
uint64_t randomId() {
uint64_t result;
int fd;
KJ_SYSCALL(fd = open("/dev/urandom", O_RDONLY));
ssize_t n;
KJ_SYSCALL(n = read(fd, &result, sizeof(result)), "/dev/urandom");
KJ_ASSERT(n == sizeof(result), "Incomplete read from /dev/urandom.", n);
return result;
}
} // namespace
void parseFile(List<Statement>::Reader statements, ParsedFile::Builder result,
ErrorReporter& errorReporter) {
CapnpParser parser(Orphanage::getForMessageContaining(result), errorReporter);
kj::Vector<Orphan<Declaration>> decls(statements.size());
kj::Vector<Orphan<Declaration::AnnotationApplication>> annotations;
bool sawId = false;
for (auto statement: statements) {
KJ_IF_MAYBE(decl, parser.parseStatement(statement, parser.getParsers().fileLevelDecl)) {
Declaration::Builder builder = decl->get();
auto body = builder.getBody();
switch (body.which()) {
case Declaration::Body::NAKED_ID:
if (sawId) {
errorReporter.addError(builder.getStartByte(), builder.getEndByte(),
kj::str("File can only have one ID."));
} else {
sawId = true;
result.setId(body.getNakedId());
}
break;
case Declaration::Body::NAKED_ANNOTATION:
annotations.add(body.disownNakedAnnotation());
break;
default:
decls.add(kj::mv(*decl));
break;
}
}
}
if (!sawId) {
errorReporter.addError(0, 0,
kj::str("File does not declare an ID. I've generated one for you. Add this line to your "
"file: @0x", kj::hex(randomId() | (1ull << 63)), ";"));
}
auto declsBuilder = result.initTopDecls(decls.size());
for (size_t i = 0; i < decls.size(); i++) {
declsBuilder.adoptWithCaveats(i, kj::mv(decls[i]));
}
auto annotationsBuilder = result.initAnnotations(annotations.size());
for (size_t i = 0; i < annotations.size(); i++) {
annotationsBuilder.adoptWithCaveats(i, kj::mv(annotations[i]));
}
}
namespace p = kj::parse;
namespace {
// =======================================================================================
template <typename T>
struct Located {
T value;
......@@ -48,7 +122,7 @@ struct Located {
}
template <typename Result>
Orphan<Result> asProto(Orphanage orphanage) {
auto result = orphanage.newOrphan<T>();
auto result = orphanage.newOrphan<Result>();
copyTo(result.get());
return result;
}
......@@ -59,6 +133,8 @@ struct Located {
: value(kj::mv(value)), startByte(startByte), endByte(endByte) {}
};
// =======================================================================================
template <typename T, Token::Body::Which type, T (Token::Body::Reader::*get)() const>
struct MatchTokenType {
kj::Maybe<Located<T>> operator()(Token::Reader token) const {
......@@ -85,6 +161,8 @@ constexpr auto rawParenthesizedList =
constexpr auto rawBracketedList =
TOKEN_TYPE_PARSER(List<List<Token>>::Reader, BRACKETED_LIST, getBracketedList);
// =======================================================================================
class ExactString {
public:
constexpr ExactString(const char* expected): expected(expected) {}
......@@ -111,6 +189,8 @@ constexpr auto op(const char* expected)
return p::transformOrReject(operatorToken, ExactString(expected));
}
// =======================================================================================
template <typename ItemParser>
class ParseListItems {
// Transformer that parses all items in the input token sequence list using the given parser.
......@@ -176,8 +256,22 @@ constexpr auto bracketedList(ItemParser&& itemParser, ErrorReporter& errorReport
kj::fwd<ItemParser>(itemParser), errorReporter));
}
// =======================================================================================
template <typename T>
Orphan<List<T>> arrayToList(Orphanage& orphanage, kj::Array<Orphan<T>>&& elements) {
auto result = orphanage.newOrphan<List<T>>(elements.size());
auto builder = result.get();
for (size_t i = 0; i < elements.size(); i++) {
builder.adoptWithCaveats(i, kj::mv(elements[i]));
}
return kj::mv(result);
}
} // namespace
// =======================================================================================
CapnpParser::CapnpParser(Orphanage orphanageParam, ErrorReporter& errorReporterParam)
: orphanage(orphanageParam), errorReporter(errorReporterParam) {
parsers.declName = arena.copy(p::transform(
......@@ -249,6 +343,50 @@ CapnpParser::CapnpParser(Orphanage orphanageParam, ErrorReporter& errorReporterP
return result;
}));
parsers.parenthesizedValueExpression = arena.copy(p::transform(
parenthesizedList(fieldAssignment, errorReporter),
[this](Located<kj::Array<kj::Maybe<Orphan<ValueExpression::FieldAssignment>>>>&& value)
-> Orphan<ValueExpression> {
if (value.value.size() == 1) {
KJ_IF_MAYBE(firstVal, value.value[0]) {
if (!firstVal->get().hasFieldName()) {
// There is only one value and it isn't an assignment, therefore the value is
// not a struct.
return firstVal->get().disownValue();
}
} else {
// There is only one value and it failed to parse.
auto result = orphanage.newOrphan<ValueExpression>();
auto builder = result.get();
builder.getBody().setUnknown();
value.copyLocationTo(builder);
return result;
}
}
// If we get here, the parentheses appear to contain a list of field assignments, meaning
// the value is a struct.
auto result = orphanage.newOrphan<ValueExpression>();
auto builder = result.get();
value.copyLocationTo(builder);
auto structBuilder = builder.getBody().initStructValue(value.value.size());
for (uint i = 0; i < value.value.size(); i++) {
KJ_IF_MAYBE(field, value.value[i]) {
if (field->get().hasFieldName()) {
structBuilder.adoptWithCaveats(i, kj::mv(*field));
} else {
auto fieldValue = field->get().getValue();
errorReporter.addError(fieldValue.getStartByte(), fieldValue.getEndByte(),
kj::str("Missing field name."));
}
}
}
return result;
}));
parsers.valueExpression = arena.copy(p::oneOf(
p::transform(integerLiteral,
[this](Located<uint64_t>&& value) -> Orphan<ValueExpression> {
......@@ -290,49 +428,18 @@ CapnpParser::CapnpParser(Orphanage orphanageParam, ErrorReporter& errorReporterP
value.copyLocationTo(builder);
return result;
}),
p::transform(p::sequence(identifier, parenthesizedList(fieldAssignment, errorReporter)),
[this](Located<Text::Reader>&& fieldName,
Located<kj::Array<kj::Maybe<Orphan<ValueExpression::FieldAssignment>>>>&& value)
p::transform(p::sequence(identifier, parsers.parenthesizedValueExpression),
[this](Located<Text::Reader>&& fieldName, Orphan<ValueExpression>&& value)
-> Orphan<ValueExpression> {
auto result = orphanage.newOrphan<ValueExpression>();
auto builder = result.get();
builder.setStartByte(fieldName.startByte);
builder.setEndByte(value.endByte);
builder.setEndByte(value.get().getEndByte());
auto uAssign = builder.getBody().initUnionValue();
fieldName.copyTo(uAssign.initFieldName());
if (value.value.size() == 1) {
KJ_IF_MAYBE(firstVal, value.value[0]) {
if (!firstVal->get().hasFieldName()) {
// There is only one value and it isn't an assignment, therefore the union is
// not a struct.
uAssign.adoptValue(firstVal->get().disownValue());
return result;
}
} else {
// There is only one value and it failed to parse.
uAssign.initValue().getBody().setUnknown();
return result;
}
}
// If we get here, the union value's parentheses appear to contain a list of field
// assignments, meaning the value is a struct.
auto uValue = uAssign.initValue();
value.copyLocationTo(uValue);
auto structBuilder = uValue.getBody().initStructValue(value.value.size());
for (uint i = 0; i < value.value.size(); i++) {
KJ_IF_MAYBE(field, value.value[i]) {
if (field->get().hasFieldName()) {
structBuilder.adoptWithCaveats(i, kj::mv(*field));
} else {
auto fieldValue = field->get().getValue();
errorReporter.addError(fieldValue.getStartByte(), fieldValue.getEndByte(),
kj::str("Missing field name."));
}
}
}
auto unionBuilder = builder.getBody().initUnionValue();
fieldName.copyTo(unionBuilder.initFieldName());
unionBuilder.adoptValue(kj::mv(value));
return result;
}),
......@@ -380,6 +487,37 @@ CapnpParser::CapnpParser(Orphanage orphanageParam, ErrorReporter& errorReporterP
})
));
parsers.annotation = arena.copy(p::transform(
p::sequence(op("$"), parsers.declName, parsers.parenthesizedValueExpression),
[this](Orphan<DeclName>&& name, Orphan<ValueExpression>&& value)
-> Orphan<Declaration::AnnotationApplication> {
auto result = orphanage.newOrphan<Declaration::AnnotationApplication>();
auto builder = result.get();
builder.adoptName(kj::mv(name));
builder.adoptValue(kj::mv(value));
return result;
}));
parsers.uid = arena.copy(p::transform(
p::sequence(op("@"), integerLiteral),
[this](Located<uint64_t>&& value) {
if (value.value < (1ull << 63)) {
errorReporter.addError(value.startByte, value.endByte,
kj::str("Invalid ID. Please generate a new one with 'capnpc -i'."));
}
return value.asProto<LocatedInteger>(orphanage);
}));
parsers.ordinal = arena.copy(p::transform(
p::sequence(op("@"), integerLiteral),
[this](Located<uint64_t>&& value) {
if (value.value >= 65536) {
errorReporter.addError(value.startByte, value.endByte,
kj::str("Ordinals cannot be greater than 65535."));
}
return value.asProto<LocatedInteger>(orphanage);
}));
// -----------------------------------------------------------------
parsers.usingDecl = arena.copy(p::transform(
......@@ -392,6 +530,115 @@ CapnpParser::CapnpParser(Orphanage orphanageParam, ErrorReporter& errorReporterP
builder.getBody().initUsingDecl().adoptTarget(kj::mv(type));
return DeclParserResult(kj::mv(decl));
}));
parsers.constDecl = arena.copy(p::transform(
p::sequence(keyword("const"), identifier, p::optional(parsers.uid),
op(":"), parsers.typeExpression,
op("="), parsers.valueExpression,
p::many(parsers.annotation)),
[this](Located<Text::Reader>&& name, kj::Maybe<Orphan<LocatedInteger>>&& id,
Orphan<TypeExpression>&& type, Orphan<ValueExpression>&& value,
kj::Array<Orphan<Declaration::AnnotationApplication>>&& annotations)
-> DeclParserResult {
auto decl = orphanage.newOrphan<Declaration>();
auto builder = decl.get();
name.copyTo(builder.initName());
KJ_IF_MAYBE(i, id) {
builder.getId().adoptUid(kj::mv(*i));
}
builder.adoptAnnotations(arrayToList(orphanage, kj::mv(annotations)));
auto constBuilder = builder.getBody().initConstDecl();
constBuilder.adoptType(kj::mv(type));
constBuilder.adoptValue(kj::mv(value));
return DeclParserResult(kj::mv(decl));
}));
// -----------------------------------------------------------------
auto& nakedId = arena.copy(p::transform(parsers.uid,
[this](Orphan<LocatedInteger>&& value) -> DeclParserResult {
auto decl = orphanage.newOrphan<Declaration>();
decl.get().getBody().setNakedId(value.get().getValue());
return DeclParserResult(kj::mv(decl));
}));
auto& nakedAnnotation = arena.copy(p::transform(parsers.annotation,
[this](Orphan<Declaration::AnnotationApplication>&& value) -> DeclParserResult {
auto decl = orphanage.newOrphan<Declaration>();
decl.get().getBody().adoptNakedAnnotation(kj::mv(value));
return DeclParserResult(kj::mv(decl));
}));
// -----------------------------------------------------------------
parsers.fileLevelDecl = arena.copy(p::oneOf(
parsers.usingDecl, parsers.constDecl, nakedId, nakedAnnotation));
}
CapnpParser::~CapnpParser() {}
kj::Maybe<Orphan<Declaration>> CapnpParser::parseStatement(
Statement::Reader statement, const DeclParser& parser) {
auto fullParser = p::sequence(parser, p::endOfInput);
auto tokens = statement.getTokens();
ParserInput parserInput(tokens.begin(), tokens.end());
KJ_IF_MAYBE(output, fullParser(parserInput)) {
auto builder = output->decl.get();
if (statement.hasDocComment()) {
builder.setDocComment(statement.getDocComment());
}
builder.setStartByte(statement.getStartByte());
builder.setEndByte(statement.getEndByte());
switch (statement.getBlock().which()) {
case Statement::Block::NONE:
if (output->memberParser != nullptr) {
errorReporter.addError(statement.getStartByte(), statement.getEndByte(),
kj::str("This statement should end with a semicolon, not a block."));
}
break;
case Statement::Block::STATEMENTS:
KJ_IF_MAYBE(memberParser, output->memberParser) {
auto memberStatements = statement.getBlock().getStatements();
kj::Vector<Orphan<Declaration>> members(memberStatements.size());
for (auto memberStatement: memberStatements) {
KJ_IF_MAYBE(member, parseStatement(memberStatement, *memberParser)) {
members.add(kj::mv(*member));
}
}
builder.adoptNestedDecls(arrayToList(orphanage, members.releaseAsArray()));
} else {
errorReporter.addError(statement.getStartByte(), statement.getEndByte(),
kj::str("This statement should end with a block, not a semicolon."));
}
break;
}
return kj::mv(output->decl);
} else {
// Parse error. Figure out where to report it.
auto best = parserInput.getBest();
uint32_t bestByte;
if (best != tokens.end()) {
bestByte = best->getStartByte();
} else if (tokens.end() != tokens.begin()) {
bestByte = (tokens.end() - 1)->getEndByte();
} else {
bestByte = 0;
}
errorReporter.addError(bestByte, bestByte, kj::str("Parse error."));
return nullptr;
}
}
} // namespace compiler
......
......@@ -24,21 +24,21 @@
#ifndef CAPNP_COMPILER_PARSER_H_
#define CAPNP_COMPILER_PARSER_H_
#include "grammar.capnp.h"
#include "lexer.capnp.h"
#include <capnp/compiler/grammar.capnp.h>
#include <capnp/compiler/lexer.capnp.h>
#include <kj/parse/common.h>
#include <kj/arena.h>
#include "error-reporter.h"
namespace capnp {
namespace compiler {
bool parseFile(List<Statement>::Reader statements, ParsedFile::Builder result);
void parseFile(List<Statement>::Reader statements, ParsedFile::Builder result,
ErrorReporter& errorReporter);
// Parse a list of statements to build a ParsedFile.
class ErrorReporter {
public:
virtual void addError(uint32_t startByte, uint32_t endByte, kj::String message) = 0;
};
//
// If any errors are reported, then the output is not usable. However, it may be passed on through
// later stages of compilation in order to detect additional errors.
class CapnpParser {
// Advanced parser interface. This interface exposes the inner parsers so that you can embed
......@@ -51,13 +51,16 @@ public:
~CapnpParser();
KJ_DISALLOW_COPY(CapnpParser);
using ParserInput = kj::parse::IteratorInput<Token::Reader, List<Token>::Reader::Iterator>;
struct DeclParserResult;
template <typename Output>
using Parser = kj::parse::ParserRef<ParserInput, Output>;
using DeclParser = Parser<DeclParserResult>;
Orphan<Declaration> parseStatement(Statement::Reader statement, const DeclParser& parser);
kj::Maybe<Orphan<Declaration>> parseStatement(
Statement::Reader statement, const DeclParser& parser);
// Parse a statement using the given parser. In addition to parsing the token sequence itself,
// this takes care of parsing the block (if any) and copying over the doc comment (if any).
......@@ -99,6 +102,10 @@ public:
Parser<Orphan<DeclName>> declName;
Parser<Orphan<TypeExpression>> typeExpression;
Parser<Orphan<ValueExpression>> valueExpression;
Parser<Orphan<ValueExpression>> parenthesizedValueExpression;
Parser<Orphan<Declaration::AnnotationApplication>> annotation;
Parser<Orphan<LocatedInteger>> uid;
Parser<Orphan<LocatedInteger>> ordinal;
DeclParser usingDecl;
DeclParser constDecl;
......
......@@ -138,8 +138,8 @@ struct OrphanGetImpl<T, Kind::STRUCT> {
}
};
template <typename T>
struct OrphanGetImpl<List<T>, Kind::LIST> {
template <typename T, Kind k>
struct OrphanGetImpl<List<T, k>, Kind::LIST> {
static inline typename List<T>::Builder apply(_::OrphanBuilder& builder) {
return typename List<T>::Builder(builder.asList(_::ElementSizeForType<T>::value));
}
......@@ -147,7 +147,7 @@ struct OrphanGetImpl<List<T>, Kind::LIST> {
template <typename T>
struct OrphanGetImpl<List<T, Kind::STRUCT>, Kind::LIST> {
static inline typename T::Builder apply(_::OrphanBuilder& builder) {
static inline typename List<T>::Builder apply(_::OrphanBuilder& builder) {
return typename List<T>::Builder(builder.asStructList(_::structSize<T>()));
}
};
......@@ -207,7 +207,7 @@ struct Orphanage::NewOrphanListImpl<List<T, k>> {
template <typename T>
struct Orphanage::NewOrphanListImpl<List<T, Kind::STRUCT>> {
static inline _::OrphanBuilder apply(_::BuilderArena* arena, uint size) {
return _::OrphanBuilder::initList(arena, size * ELEMENTS, _::structSize<T>());
return _::OrphanBuilder::initStructList(arena, size * ELEMENTS, _::structSize<T>());
}
};
......
......@@ -164,6 +164,17 @@ TEST(Stringify, Unions) {
EXPECT_EQ("u3f0s64(123456789012345678)", kj::str(root.getUnion3()));
}
TEST(Stringify, StructUnions) {
MallocMessageBuilder builder;
auto root = builder.initRoot<test::TestStructUnion>();
auto allTypes = root.getUn().initAllTypes();
allTypes.setUInt32Field(12345);
allTypes.setTextField("foo");
EXPECT_EQ("(un = allTypes(uInt32Field = 12345, textField = \"foo\"))", kj::str(root));
}
TEST(Stringify, MoreValues) {
EXPECT_EQ("123", kj::str(DynamicValue::Reader(123)));
EXPECT_EQ("1.23e47", kj::str(DynamicValue::Reader(123e45)));
......
......@@ -34,7 +34,7 @@ namespace {
static const char HEXDIGITS[] = "0123456789abcdef";
static void print(std::ostream& os, const DynamicValue::Reader& value,
schema::Type::Body::Which which) {
schema::Type::Body::Which which, bool alreadyParenthesized = false) {
// Print an arbitrary message via the dynamic API by
// iterating over the schema. Look at the handling
// of STRUCT in particular.
......@@ -127,7 +127,7 @@ static void print(std::ostream& os, const DynamicValue::Reader& value,
break;
}
case DynamicValue::STRUCT: {
os << "(";
if (!alreadyParenthesized) os << "(";
auto structValue = value.as<DynamicStruct>();
bool first = true;
for (auto member: structValue.getSchema().getMembers()) {
......@@ -151,7 +151,7 @@ static void print(std::ostream& os, const DynamicValue::Reader& value,
}
}
}
os << ")";
if (!alreadyParenthesized) os << ")";
break;
}
case DynamicValue::UNION: {
......@@ -159,7 +159,8 @@ static void print(std::ostream& os, const DynamicValue::Reader& value,
KJ_IF_MAYBE(tag, unionValue.which()) {
os << tag->getProto().getName().cStr() << "(";
print(os, unionValue.get(),
tag->getProto().getBody().getFieldMember().getType().getBody().which());
tag->getProto().getBody().getFieldMember().getType().getBody().which(),
true /* alreadyParenthesized */);
os << ")";
} else {
// Unknown union member; must have come from newer
......
......@@ -376,3 +376,10 @@ struct TestNewVersion {
new1 @3 :Int64 = 987;
new2 @4 :Text = "baz";
}
struct TestStructUnion {
un @0 union {
allTypes @1 :TestAllTypes;
object @2 :TestObject;
}
}
......@@ -227,6 +227,77 @@ TEST(CommonParsers, ManyParserCountOnly) {
}
}
TEST(CommonParsers, TimesParser) {
StringPtr text = "foobar";
auto parser = sequence(exactly('f'), times(any, 4));
{
Input input(text.begin(), text.begin() + 4);
Maybe<Array<char>> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_TRUE(input.atEnd());
}
{
Input input(text.begin(), text.begin() + 5);
Maybe<Array<char>> result = parser(input);
KJ_IF_MAYBE(s, result) {
EXPECT_EQ("ooba", heapString(*s));
} else {
ADD_FAILURE() << "Expected string, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
Input input(text.begin(), text.end());
Maybe<Array<char>> result = parser(input);
KJ_IF_MAYBE(s, result) {
EXPECT_EQ("ooba", heapString(*s));
} else {
ADD_FAILURE() << "Expected string, got null.";
}
EXPECT_FALSE(input.atEnd());
}
}
TEST(CommonParsers, TimesParserCountOnly) {
StringPtr text = "foooob";
auto parser = sequence(exactly('f'), times(exactly('o'), 4));
{
Input input(text.begin(), text.begin() + 4);
Maybe<Tuple<>> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_TRUE(input.atEnd());
}
{
Input input(text.begin(), text.begin() + 5);
Maybe<Tuple<>> result = parser(input);
EXPECT_TRUE(result != nullptr);
EXPECT_TRUE(input.atEnd());
}
{
Input input(text.begin(), text.end());
Maybe<Tuple<>> result = parser(input);
EXPECT_TRUE(result != nullptr);
EXPECT_FALSE(input.atEnd());
}
text = "fooob";
{
Input input(text.begin(), text.end());
Maybe<Tuple<>> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
}
TEST(CommonParsers, ManyParserSubResult) {
StringPtr text = "foooob";
......
......@@ -356,7 +356,7 @@ class Many_ {
struct Impl;
public:
explicit constexpr Many_(SubParser&& subParser)
: subParser(kj::mv(subParser)) {}
: subParser(kj::fwd<SubParser>(subParser)) {}
template <typename Input>
auto operator()(Input& input) const
......@@ -395,6 +395,8 @@ struct Many_<SubParser, atLeastOne>::Impl {
template <typename SubParser, bool atLeastOne>
template <typename Input>
struct Many_<SubParser, atLeastOne>::Impl<Input, Tuple<>> {
// If the sub-parser output is Tuple<>, just return a count.
static Maybe<uint> apply(const SubParser& subParser, Input& input) {
uint count = 0;
......@@ -437,6 +439,82 @@ constexpr Many_<SubParser, true> oneOrMore(SubParser&& subParser) {
return Many_<SubParser, true>(kj::fwd<SubParser>(subParser));
}
// -------------------------------------------------------------------
// times()
// Output = Array of output of sub-parser, or Tuple<> if sub-parser returns Tuple<>.
template <typename SubParser>
class Times_ {
template <typename Input, typename Output = OutputType<SubParser, Input>>
struct Impl;
public:
explicit constexpr Times_(SubParser&& subParser, uint count)
: subParser(kj::fwd<SubParser>(subParser)), count(count) {}
template <typename Input>
auto operator()(Input& input) const
-> decltype(Impl<Input>::apply(instance<const SubParser&>(), instance<uint>(), input));
private:
SubParser subParser;
uint count;
};
template <typename SubParser>
template <typename Input, typename Output>
struct Times_<SubParser>::Impl {
static Maybe<Array<Output>> apply(const SubParser& subParser, uint count, Input& input) {
auto results = heapArrayBuilder<OutputType<SubParser, Input>>(count);
while (results.size() < count) {
if (input.atEnd()) {
return nullptr;
} else KJ_IF_MAYBE(subResult, subParser(input)) {
results.add(kj::mv(*subResult));
} else {
return nullptr;
}
}
return results.finish();
}
};
template <typename SubParser>
template <typename Input>
struct Times_<SubParser>::Impl<Input, Tuple<>> {
// If the sub-parser output is Tuple<>, just return a count.
static Maybe<Tuple<>> apply(const SubParser& subParser, uint count, Input& input) {
uint actualCount = 0;
while (actualCount < count) {
if (input.atEnd()) {
return nullptr;
} else KJ_IF_MAYBE(subResult, subParser(input)) {
++actualCount;
} else {
return nullptr;
}
}
return tuple();
}
};
template <typename SubParser>
template <typename Input>
auto Times_<SubParser>::operator()(Input& input) const
-> decltype(Impl<Input>::apply(instance<const SubParser&>(), instance<uint>(), input)) {
return Impl<Input, OutputType<SubParser, Input>>::apply(subParser, count, input);
}
template <typename SubParser>
constexpr Times_<SubParser> times(SubParser&& subParser, uint count) {
// Constructs a parser that repeats the subParser exactly `count` times.
return Times_<SubParser>(kj::fwd<SubParser>(subParser), count);
}
// -------------------------------------------------------------------
// optional()
// Output = Maybe<output of sub-parser>
......@@ -445,7 +523,7 @@ template <typename SubParser>
class Optional_ {
public:
explicit constexpr Optional_(SubParser&& subParser)
: subParser(kj::mv(subParser)) {}
: subParser(kj::fwd<SubParser>(subParser)) {}
template <typename Input>
Maybe<Maybe<OutputType<SubParser, Input>>> operator()(Input& input) const {
......@@ -482,9 +560,8 @@ class OneOf_;
template <typename FirstSubParser, typename... SubParsers>
class OneOf_<FirstSubParser, SubParsers...> {
public:
template <typename T, typename... U>
explicit constexpr OneOf_(T&& firstSubParser, U&&... rest)
: first(kj::fwd<T>(firstSubParser)), rest(kj::fwd<U>(rest)...) {}
explicit constexpr OneOf_(FirstSubParser&& firstSubParser, SubParsers&&... rest)
: first(kj::fwd<FirstSubParser>(firstSubParser)), rest(kj::fwd<SubParsers>(rest)...) {}
template <typename Input>
Maybe<OutputType<FirstSubParser, Input>> operator()(Input& input) const {
......@@ -653,7 +730,7 @@ template <typename SubParser, typename Condition>
class AcceptIf_ {
public:
explicit constexpr AcceptIf_(SubParser&& subParser, Condition&& condition)
: subParser(kj::mv(subParser)), condition(kj::mv(condition)) {}
: subParser(kj::fwd<SubParser>(subParser)), condition(kj::fwd<Condition>(condition)) {}
template <typename Input>
Maybe<OutputType<SubParser, Input>> operator()(Input& input) const {
......@@ -692,7 +769,8 @@ constexpr AcceptIf_<SubParser, Condition> acceptIf(SubParser&& subParser, Condit
template <typename SubParser>
class NotLookingAt_ {
public:
explicit constexpr NotLookingAt_(SubParser&& subParser): subParser(kj::mv(subParser)) {}
explicit constexpr NotLookingAt_(SubParser&& subParser)
: subParser(kj::fwd<SubParser>(subParser)) {}
template <typename Input>
Maybe<Tuple<>> operator()(Input& input) const {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment