Commit ddb57c2d authored by Kenton Varda's avatar Kenton Varda

Implement lexer for new compiler.

parent 1d364ad9
......@@ -122,6 +122,7 @@ public:
// Result does not include NUL terminator.
inline char operator[](size_t index) const { return content[index]; }
inline char& operator[](size_t index) { return content[index]; }
inline char* begin() { return content.begin(); }
inline char* end() { return content.end() - 1; }
......
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "lexer.h"
#include <kj/vector.h>
#include <kj/io.h>
#include <unistd.h>
#include <kj/debug.h>
#include "../message.h"
int main(int argc, char* argv[]) {
// Eventually this will be capnpc. For now it's just a dummy program that tests parsing.
kj::Vector<char> input;
char buffer[4096];
for (;;) {
ssize_t n;
KJ_SYSCALL(n = read(STDIN_FILENO, buffer, sizeof(buffer)));
if (n == 0) {
break;
}
input.addAll(buffer, buffer + n);
}
KJ_DBG(input);
capnp::MallocMessageBuilder message;
auto file = message.initRoot<capnp::compiler::LexedStatements>();
capnp::compiler::lex(input, file);
KJ_DBG(file);
return 0;
}
This diff is collapsed.
This diff is collapsed.
@0xa73956d2621fc3ee;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("capnp::compiler");
struct Token {
body @0 union {
identifier @1 :Text;
stringLiteral @2 :Text;
integerLiteral @3 :UInt64;
floatLiteral @4 :Float64;
operator @5 :Text;
parenthesizedList @6 :List(List(TokenPointer));
bracketedList @7 :List(List(TokenPointer));
}
startByte @8 :UInt32;
endByte @9 :UInt32;
}
struct TokenPointer {
# Hack to deal with the fact that struct lists cannot adopt elements.
#
# TODO(cleanup): Find a better approach.
token @0 :Token;
}
struct Statement {
tokens @0 :List(TokenPointer);
block @1 union {
none @2 :Void;
statements @3 :List(StatementPointer);
}
docComment @4 :Text;
}
struct StatementPointer {
# Hack to deal with the fact that struct lists cannot adopt elements.
#
# TODO(cleanup): Find a better approach.
statement @0 :Statement;
}
struct LexedTokens {
# Lexer output when asked to parse tokens that don't form statements.
tokens @0 :List(TokenPointer);
}
struct LexedStatements {
# Lexer output when asked to parse statements.
statements @0 :List(StatementPointer);
}
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef CAPNP_COMPILER_LEXER_H_
#define CAPNP_COMPILER_LEXER_H_
#include "lexer.capnp.h"
namespace capnp {
namespace compiler {
bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result);
bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result);
// Lex the given source code, placing the results in `result`. Returns true if there
// were no errors, false if there were. Even when errors are present, the file may have partial
// content which can be fed into later stages of parsing in order to find more errors.
//
// There are two versions, one that parses a list of statements, and one which just parses tokens
// that might form a part of one statement. In other words, in the later case, the input should
// not contain semicolons or curly braces, unless they are in string literals of course.
} // namespace compiler
} // namespace capnp
#endif // CAPNP_COMPILER_LEXER_H_
......@@ -59,13 +59,13 @@ TEST(Orphans, Lists) {
Orphan<List<uint32_t>> orphan = root.disownUInt32List();
EXPECT_FALSE(orphan == nullptr);
checkList(orphan.get().asReader(), {12, 34, 56});
checkList(orphan.get().asReader(), {12u, 34u, 56u});
EXPECT_FALSE(root.hasUInt32List());
root.adoptUInt32List(kj::mv(orphan));
EXPECT_TRUE(orphan == nullptr);
EXPECT_TRUE(root.hasUInt32List());
checkList(root.asReader().getUInt32List(), {12, 34, 56});
checkList(root.asReader().getUInt32List(), {12u, 34u, 56u});
}
TEST(Orphans, Text) {
......@@ -202,7 +202,7 @@ TEST(Orphans, OrphanageListCopy) {
Orphan<List<uint32_t>> orphan = builder2.getOrphanage().newOrphanCopy(
root1.asReader().getUInt32List());
checkList(orphan.get().asReader(), {12, 34, 56});
checkList(orphan.get().asReader(), {12u, 34u, 56u});
auto root2 = builder2.initRoot<TestAllTypes>();
root2.adoptUInt32List(kj::mv(orphan));
......@@ -272,13 +272,13 @@ TEST(Orphans, ListObject) {
Orphan<List<uint32_t>> orphan = root.disownObjectField<List<uint32_t>>();
EXPECT_FALSE(orphan == nullptr);
checkList(orphan.get().asReader(), {12, 34, 56});
checkList(orphan.get().asReader(), {12u, 34u, 56u});
EXPECT_FALSE(root.hasObjectField());
root.adoptObjectField(kj::mv(orphan));
EXPECT_TRUE(orphan == nullptr);
EXPECT_TRUE(root.hasObjectField());
checkList(root.asReader().getObjectField<List<uint32_t>>(), {12, 34, 56});
checkList(root.asReader().getObjectField<List<uint32_t>>(), {12u, 34u, 56u});
}
TEST(Orphans, DynamicStruct) {
......@@ -318,7 +318,7 @@ TEST(Orphans, DynamicList) {
root.adoptObjectField(kj::mv(orphan));
EXPECT_TRUE(orphan == nullptr);
EXPECT_TRUE(root.hasObjectField());
checkList(root.asReader().getObjectField<List<uint32_t>>(), {12, 34, 56});
checkList(root.asReader().getObjectField<List<uint32_t>>(), {12u, 34u, 56u});
}
TEST(Orphans, OrphanageDynamicStruct) {
......@@ -344,7 +344,7 @@ TEST(Orphans, OrphanageDynamicList) {
auto root = builder.initRoot<test::TestObject>();
root.adoptObjectField(kj::mv(orphan));
checkList(root.getObjectField<List<uint32_t>>(), {123, 456});
checkList(root.getObjectField<List<uint32_t>>(), {123u, 456u});
}
TEST(Orphans, OrphanageDynamicStructCopy) {
......@@ -376,7 +376,7 @@ TEST(Orphans, OrphanageDynamicListCopy) {
auto root2 = builder2.initRoot<test::TestObject>();
root2.adoptObjectField(kj::mv(orphan));
checkList(root2.getObjectField<List<uint32_t>>(), {12, 34, 56});
checkList(root2.getObjectField<List<uint32_t>>(), {12u, 34u, 56u});
}
TEST(Orphans, OrphanageFromBuilder) {
......
......@@ -267,6 +267,10 @@ template <typename T> struct RemoveConstOrBogus_ { struct Type; };
template <typename T> struct RemoveConstOrBogus_<const T> { typedef T Type; };
template <typename T> using RemoveConstOrBogus = typename RemoveConstOrBogus_<T>::Type;
template <typename T> struct IsReference_ { static constexpr bool value = false; };
template <typename T> struct IsReference_<T&> { static constexpr bool value = true; };
template <typename T> constexpr bool isReference() { return IsReference_<T>::value; }
// =======================================================================================
// Equivalents to std::move() and std::forward(), since these are very commonly needed and the
// std header <utility> pulls in lots of other stuff.
......
......@@ -351,11 +351,11 @@ TEST(CharParsers, DoubleQuotedString) {
}
{
StringPtr text = "\"test\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\?\x01\2\34\156\"";
StringPtr text = "\"test\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\?\\x01\\x20\\2\\34\\156\"";
Input input(text.begin(), text.end());
Maybe<String> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ("test\a\b\f\n\r\t\v\'\"\?\x01\2\34\156", *value);
EXPECT_EQ("test\a\b\f\n\r\t\v\'\"\?\x01\x20\2\34\156", *value);
} else {
ADD_FAILURE() << "Expected string, got null.";
}
......
......@@ -183,6 +183,9 @@ constexpr auto nameStart = alpha.orChar('_');
constexpr auto nameChar = alphaNumeric.orChar('_');
constexpr auto hexDigit = charRange('0', '9').orRange('a', 'f').orRange('A', 'F');
constexpr auto octDigit = charRange('0', '7');
constexpr auto whitespaceChar = anyOfChars(" \f\n\r\t\v");
constexpr auto controlChar = charRange(0, 0x1f).invert().orGroup(whitespaceChar).invert();
constexpr auto whitespace = many(anyOfChars(" \f\n\r\t\v"));
constexpr auto discardWhitespace = discard(many(discard(anyOfChars(" \f\n\r\t\v"))));
......@@ -257,7 +260,7 @@ struct ParseFloat {
constexpr auto number = transform(
sequence(
many(digit),
oneOrMore(digit),
optional(sequence(exactChar<'.'>(), many(digit))),
optional(sequence(discard(anyOfChars("eE")), optional(anyOfChars("+-")), many(digit))),
notLookingAt(alpha.orAny("_."))),
......@@ -285,7 +288,7 @@ struct InterpretEscape {
struct ParseHexEscape {
inline char operator()(char first, char second) const {
return (parseDigit(first) << 4) | second;
return (parseDigit(first) << 4) | parseDigit(second);
}
};
......
......@@ -111,14 +111,30 @@ class ParserRef {
// from becoming ridiculous. Using too many of them can hurt performance, though.
public:
ParserRef(): parser(nullptr), wrapper(nullptr) {}
ParserRef(const ParserRef&) = default;
ParserRef(ParserRef&&) = default;
ParserRef& operator=(const ParserRef& other) = default;
ParserRef& operator=(ParserRef&& other) = default;
template <typename Other>
constexpr ParserRef(Other& other)
: parser(&other), wrapper(WrapperImplInstance<Other>::instance) {}
constexpr ParserRef(Other&& other)
: parser(&other), wrapper(&WrapperImplInstance<Decay<Other>>::instance) {
static_assert(kj::isReference<Other>(), "ParseRef should not be assigned to a temporary.");
}
template <typename Other>
inline ParserRef& operator=(Other&& other) {
static_assert(kj::isReference<Other>(), "ParseRef should not be assigned to a temporary.");
parser = &other;
wrapper = &WrapperImplInstance<Decay<Other>>::instance;
return *this;
}
KJ_ALWAYS_INLINE(Maybe<Output> operator()(Input& input) const) {
// Always inline in the hopes that this allows branch prediction to kick in so the virtual call
// doesn't hurt so much.
return wrapper.parse(parser, input);
return wrapper->parse(parser, input);
}
private:
......@@ -137,7 +153,7 @@ private:
};
const void* parser;
const Wrapper& wrapper;
const Wrapper* wrapper;
};
template <typename Input, typename Output>
......@@ -517,8 +533,8 @@ constexpr OneOf_<SubParsers...> oneOf(SubParsers&&... parsers) {
template <typename Position>
struct Span {
public:
inline const Position& begin() { return begin_; }
inline const Position& end() { return end_; }
inline const Position& begin() const { return begin_; }
inline const Position& end() const { return end_; }
Span() = default;
inline constexpr Span(Position&& begin, Position&& end): begin_(mv(begin)), end_(mv(end)) {}
......
......@@ -180,6 +180,10 @@ class Tuple<> {
// Tuple<>() is constexpr.
};
template <typename T>
class Tuple<T>;
// Single-element tuple should never be used. The public API should ensure this.
template <size_t index, typename... T>
inline TypeByIndex<index, T...>& getImpl(Tuple<T...>& tuple) {
// Get member of a Tuple by index, e.g. `get<2>(myTuple)`.
......@@ -210,6 +214,10 @@ inline T&& getImpl(T&& value) {
template <typename Func, typename SoFar, typename... T>
struct ExpandAndApplyResult_;
// Template which computes the return type of applying Func to T... after flattening tuples.
// SoFar starts as Tuple<> and accumulates the flattened parameter types -- so after this template
// is recursively expanded, T... is empty and SoFar is a Tuple containing all the parameters.
template <typename Func, typename First, typename... Rest, typename... T>
struct ExpandAndApplyResult_<Func, Tuple<T...>, First, Rest...>
: public ExpandAndApplyResult_<Func, Tuple<T..., First>, Rest...> {};
......
......@@ -75,11 +75,18 @@ public:
builder.add(kj::fwd<Params>(params)...);
}
template <typename Iterator>
inline void addAll(Iterator begin, Iterator end) {
size_t needed = builder.size() + (end - begin);
if (needed > builder.capacity()) grow(needed);
builder.addAll(begin, end);
}
private:
ArrayBuilder<T> builder;
void grow() {
setCapacity(capacity() == 0 ? 4 : capacity() * 2);
void grow(size_t minCapacity = 0) {
setCapacity(kj::max(minCapacity, capacity() == 0 ? 4 : capacity() * 2));
}
void setCapacity(size_t newSize) {
ArrayBuilder<T> newBuilder = heapArrayBuilder<T>(newSize);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment