Commit ddb57c2d authored by Kenton Varda's avatar Kenton Varda

Implement lexer for new compiler.

parent 1d364ad9
...@@ -122,6 +122,7 @@ public: ...@@ -122,6 +122,7 @@ public:
// Result does not include NUL terminator. // Result does not include NUL terminator.
inline char operator[](size_t index) const { return content[index]; } inline char operator[](size_t index) const { return content[index]; }
inline char& operator[](size_t index) { return content[index]; }
inline char* begin() { return content.begin(); } inline char* begin() { return content.begin(); }
inline char* end() { return content.end() - 1; } inline char* end() { return content.end() - 1; }
......
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "lexer.h"
#include <kj/vector.h>
#include <kj/io.h>
#include <unistd.h>
#include <kj/debug.h>
#include "../message.h"
int main(int argc, char* argv[]) {
// Eventually this will be capnpc. For now it's just a dummy program that tests parsing.
kj::Vector<char> input;
char buffer[4096];
for (;;) {
ssize_t n;
KJ_SYSCALL(n = read(STDIN_FILENO, buffer, sizeof(buffer)));
if (n == 0) {
break;
}
input.addAll(buffer, buffer + n);
}
KJ_DBG(input);
capnp::MallocMessageBuilder message;
auto file = message.initRoot<capnp::compiler::LexedStatements>();
capnp::compiler::lex(input, file);
KJ_DBG(file);
return 0;
}
This diff is collapsed.
This diff is collapsed.
@0xa73956d2621fc3ee;
using Cxx = import "/capnp/c++.capnp";
$Cxx.namespace("capnp::compiler");
struct Token {
body @0 union {
identifier @1 :Text;
stringLiteral @2 :Text;
integerLiteral @3 :UInt64;
floatLiteral @4 :Float64;
operator @5 :Text;
parenthesizedList @6 :List(List(TokenPointer));
bracketedList @7 :List(List(TokenPointer));
}
startByte @8 :UInt32;
endByte @9 :UInt32;
}
struct TokenPointer {
# Hack to deal with the fact that struct lists cannot adopt elements.
#
# TODO(cleanup): Find a better approach.
token @0 :Token;
}
struct Statement {
tokens @0 :List(TokenPointer);
block @1 union {
none @2 :Void;
statements @3 :List(StatementPointer);
}
docComment @4 :Text;
}
struct StatementPointer {
# Hack to deal with the fact that struct lists cannot adopt elements.
#
# TODO(cleanup): Find a better approach.
statement @0 :Statement;
}
struct LexedTokens {
# Lexer output when asked to parse tokens that don't form statements.
tokens @0 :List(TokenPointer);
}
struct LexedStatements {
# Lexer output when asked to parse statements.
statements @0 :List(StatementPointer);
}
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef CAPNP_COMPILER_LEXER_H_
#define CAPNP_COMPILER_LEXER_H_
#include "lexer.capnp.h"
namespace capnp {
namespace compiler {
bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result);
bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result);
// Lex the given source code, placing the results in `result`. Returns true if there
// were no errors, false if there were. Even when errors are present, the file may have partial
// content which can be fed into later stages of parsing in order to find more errors.
//
// There are two versions, one that parses a list of statements, and one which just parses tokens
// that might form a part of one statement. In other words, in the later case, the input should
// not contain semicolons or curly braces, unless they are in string literals of course.
} // namespace compiler
} // namespace capnp
#endif // CAPNP_COMPILER_LEXER_H_
...@@ -59,13 +59,13 @@ TEST(Orphans, Lists) { ...@@ -59,13 +59,13 @@ TEST(Orphans, Lists) {
Orphan<List<uint32_t>> orphan = root.disownUInt32List(); Orphan<List<uint32_t>> orphan = root.disownUInt32List();
EXPECT_FALSE(orphan == nullptr); EXPECT_FALSE(orphan == nullptr);
checkList(orphan.get().asReader(), {12, 34, 56}); checkList(orphan.get().asReader(), {12u, 34u, 56u});
EXPECT_FALSE(root.hasUInt32List()); EXPECT_FALSE(root.hasUInt32List());
root.adoptUInt32List(kj::mv(orphan)); root.adoptUInt32List(kj::mv(orphan));
EXPECT_TRUE(orphan == nullptr); EXPECT_TRUE(orphan == nullptr);
EXPECT_TRUE(root.hasUInt32List()); EXPECT_TRUE(root.hasUInt32List());
checkList(root.asReader().getUInt32List(), {12, 34, 56}); checkList(root.asReader().getUInt32List(), {12u, 34u, 56u});
} }
TEST(Orphans, Text) { TEST(Orphans, Text) {
...@@ -202,7 +202,7 @@ TEST(Orphans, OrphanageListCopy) { ...@@ -202,7 +202,7 @@ TEST(Orphans, OrphanageListCopy) {
Orphan<List<uint32_t>> orphan = builder2.getOrphanage().newOrphanCopy( Orphan<List<uint32_t>> orphan = builder2.getOrphanage().newOrphanCopy(
root1.asReader().getUInt32List()); root1.asReader().getUInt32List());
checkList(orphan.get().asReader(), {12, 34, 56}); checkList(orphan.get().asReader(), {12u, 34u, 56u});
auto root2 = builder2.initRoot<TestAllTypes>(); auto root2 = builder2.initRoot<TestAllTypes>();
root2.adoptUInt32List(kj::mv(orphan)); root2.adoptUInt32List(kj::mv(orphan));
...@@ -272,13 +272,13 @@ TEST(Orphans, ListObject) { ...@@ -272,13 +272,13 @@ TEST(Orphans, ListObject) {
Orphan<List<uint32_t>> orphan = root.disownObjectField<List<uint32_t>>(); Orphan<List<uint32_t>> orphan = root.disownObjectField<List<uint32_t>>();
EXPECT_FALSE(orphan == nullptr); EXPECT_FALSE(orphan == nullptr);
checkList(orphan.get().asReader(), {12, 34, 56}); checkList(orphan.get().asReader(), {12u, 34u, 56u});
EXPECT_FALSE(root.hasObjectField()); EXPECT_FALSE(root.hasObjectField());
root.adoptObjectField(kj::mv(orphan)); root.adoptObjectField(kj::mv(orphan));
EXPECT_TRUE(orphan == nullptr); EXPECT_TRUE(orphan == nullptr);
EXPECT_TRUE(root.hasObjectField()); EXPECT_TRUE(root.hasObjectField());
checkList(root.asReader().getObjectField<List<uint32_t>>(), {12, 34, 56}); checkList(root.asReader().getObjectField<List<uint32_t>>(), {12u, 34u, 56u});
} }
TEST(Orphans, DynamicStruct) { TEST(Orphans, DynamicStruct) {
...@@ -318,7 +318,7 @@ TEST(Orphans, DynamicList) { ...@@ -318,7 +318,7 @@ TEST(Orphans, DynamicList) {
root.adoptObjectField(kj::mv(orphan)); root.adoptObjectField(kj::mv(orphan));
EXPECT_TRUE(orphan == nullptr); EXPECT_TRUE(orphan == nullptr);
EXPECT_TRUE(root.hasObjectField()); EXPECT_TRUE(root.hasObjectField());
checkList(root.asReader().getObjectField<List<uint32_t>>(), {12, 34, 56}); checkList(root.asReader().getObjectField<List<uint32_t>>(), {12u, 34u, 56u});
} }
TEST(Orphans, OrphanageDynamicStruct) { TEST(Orphans, OrphanageDynamicStruct) {
...@@ -344,7 +344,7 @@ TEST(Orphans, OrphanageDynamicList) { ...@@ -344,7 +344,7 @@ TEST(Orphans, OrphanageDynamicList) {
auto root = builder.initRoot<test::TestObject>(); auto root = builder.initRoot<test::TestObject>();
root.adoptObjectField(kj::mv(orphan)); root.adoptObjectField(kj::mv(orphan));
checkList(root.getObjectField<List<uint32_t>>(), {123, 456}); checkList(root.getObjectField<List<uint32_t>>(), {123u, 456u});
} }
TEST(Orphans, OrphanageDynamicStructCopy) { TEST(Orphans, OrphanageDynamicStructCopy) {
...@@ -376,7 +376,7 @@ TEST(Orphans, OrphanageDynamicListCopy) { ...@@ -376,7 +376,7 @@ TEST(Orphans, OrphanageDynamicListCopy) {
auto root2 = builder2.initRoot<test::TestObject>(); auto root2 = builder2.initRoot<test::TestObject>();
root2.adoptObjectField(kj::mv(orphan)); root2.adoptObjectField(kj::mv(orphan));
checkList(root2.getObjectField<List<uint32_t>>(), {12, 34, 56}); checkList(root2.getObjectField<List<uint32_t>>(), {12u, 34u, 56u});
} }
TEST(Orphans, OrphanageFromBuilder) { TEST(Orphans, OrphanageFromBuilder) {
......
...@@ -267,6 +267,10 @@ template <typename T> struct RemoveConstOrBogus_ { struct Type; }; ...@@ -267,6 +267,10 @@ template <typename T> struct RemoveConstOrBogus_ { struct Type; };
template <typename T> struct RemoveConstOrBogus_<const T> { typedef T Type; }; template <typename T> struct RemoveConstOrBogus_<const T> { typedef T Type; };
template <typename T> using RemoveConstOrBogus = typename RemoveConstOrBogus_<T>::Type; template <typename T> using RemoveConstOrBogus = typename RemoveConstOrBogus_<T>::Type;
template <typename T> struct IsReference_ { static constexpr bool value = false; };
template <typename T> struct IsReference_<T&> { static constexpr bool value = true; };
template <typename T> constexpr bool isReference() { return IsReference_<T>::value; }
// ======================================================================================= // =======================================================================================
// Equivalents to std::move() and std::forward(), since these are very commonly needed and the // Equivalents to std::move() and std::forward(), since these are very commonly needed and the
// std header <utility> pulls in lots of other stuff. // std header <utility> pulls in lots of other stuff.
......
...@@ -351,11 +351,11 @@ TEST(CharParsers, DoubleQuotedString) { ...@@ -351,11 +351,11 @@ TEST(CharParsers, DoubleQuotedString) {
} }
{ {
StringPtr text = "\"test\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\?\x01\2\34\156\""; StringPtr text = "\"test\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\?\\x01\\x20\\2\\34\\156\"";
Input input(text.begin(), text.end()); Input input(text.begin(), text.end());
Maybe<String> result = parser(input); Maybe<String> result = parser(input);
KJ_IF_MAYBE(value, result) { KJ_IF_MAYBE(value, result) {
EXPECT_EQ("test\a\b\f\n\r\t\v\'\"\?\x01\2\34\156", *value); EXPECT_EQ("test\a\b\f\n\r\t\v\'\"\?\x01\x20\2\34\156", *value);
} else { } else {
ADD_FAILURE() << "Expected string, got null."; ADD_FAILURE() << "Expected string, got null.";
} }
......
...@@ -183,6 +183,9 @@ constexpr auto nameStart = alpha.orChar('_'); ...@@ -183,6 +183,9 @@ constexpr auto nameStart = alpha.orChar('_');
constexpr auto nameChar = alphaNumeric.orChar('_'); constexpr auto nameChar = alphaNumeric.orChar('_');
constexpr auto hexDigit = charRange('0', '9').orRange('a', 'f').orRange('A', 'F'); constexpr auto hexDigit = charRange('0', '9').orRange('a', 'f').orRange('A', 'F');
constexpr auto octDigit = charRange('0', '7'); constexpr auto octDigit = charRange('0', '7');
constexpr auto whitespaceChar = anyOfChars(" \f\n\r\t\v");
constexpr auto controlChar = charRange(0, 0x1f).invert().orGroup(whitespaceChar).invert();
constexpr auto whitespace = many(anyOfChars(" \f\n\r\t\v")); constexpr auto whitespace = many(anyOfChars(" \f\n\r\t\v"));
constexpr auto discardWhitespace = discard(many(discard(anyOfChars(" \f\n\r\t\v")))); constexpr auto discardWhitespace = discard(many(discard(anyOfChars(" \f\n\r\t\v"))));
...@@ -257,7 +260,7 @@ struct ParseFloat { ...@@ -257,7 +260,7 @@ struct ParseFloat {
constexpr auto number = transform( constexpr auto number = transform(
sequence( sequence(
many(digit), oneOrMore(digit),
optional(sequence(exactChar<'.'>(), many(digit))), optional(sequence(exactChar<'.'>(), many(digit))),
optional(sequence(discard(anyOfChars("eE")), optional(anyOfChars("+-")), many(digit))), optional(sequence(discard(anyOfChars("eE")), optional(anyOfChars("+-")), many(digit))),
notLookingAt(alpha.orAny("_."))), notLookingAt(alpha.orAny("_."))),
...@@ -285,7 +288,7 @@ struct InterpretEscape { ...@@ -285,7 +288,7 @@ struct InterpretEscape {
struct ParseHexEscape { struct ParseHexEscape {
inline char operator()(char first, char second) const { inline char operator()(char first, char second) const {
return (parseDigit(first) << 4) | second; return (parseDigit(first) << 4) | parseDigit(second);
} }
}; };
......
...@@ -111,14 +111,30 @@ class ParserRef { ...@@ -111,14 +111,30 @@ class ParserRef {
// from becoming ridiculous. Using too many of them can hurt performance, though. // from becoming ridiculous. Using too many of them can hurt performance, though.
public: public:
ParserRef(): parser(nullptr), wrapper(nullptr) {}
ParserRef(const ParserRef&) = default;
ParserRef(ParserRef&&) = default;
ParserRef& operator=(const ParserRef& other) = default;
ParserRef& operator=(ParserRef&& other) = default;
template <typename Other> template <typename Other>
constexpr ParserRef(Other& other) constexpr ParserRef(Other&& other)
: parser(&other), wrapper(WrapperImplInstance<Other>::instance) {} : parser(&other), wrapper(&WrapperImplInstance<Decay<Other>>::instance) {
static_assert(kj::isReference<Other>(), "ParseRef should not be assigned to a temporary.");
}
template <typename Other>
inline ParserRef& operator=(Other&& other) {
static_assert(kj::isReference<Other>(), "ParseRef should not be assigned to a temporary.");
parser = &other;
wrapper = &WrapperImplInstance<Decay<Other>>::instance;
return *this;
}
KJ_ALWAYS_INLINE(Maybe<Output> operator()(Input& input) const) { KJ_ALWAYS_INLINE(Maybe<Output> operator()(Input& input) const) {
// Always inline in the hopes that this allows branch prediction to kick in so the virtual call // Always inline in the hopes that this allows branch prediction to kick in so the virtual call
// doesn't hurt so much. // doesn't hurt so much.
return wrapper.parse(parser, input); return wrapper->parse(parser, input);
} }
private: private:
...@@ -137,7 +153,7 @@ private: ...@@ -137,7 +153,7 @@ private:
}; };
const void* parser; const void* parser;
const Wrapper& wrapper; const Wrapper* wrapper;
}; };
template <typename Input, typename Output> template <typename Input, typename Output>
...@@ -517,8 +533,8 @@ constexpr OneOf_<SubParsers...> oneOf(SubParsers&&... parsers) { ...@@ -517,8 +533,8 @@ constexpr OneOf_<SubParsers...> oneOf(SubParsers&&... parsers) {
template <typename Position> template <typename Position>
struct Span { struct Span {
public: public:
inline const Position& begin() { return begin_; } inline const Position& begin() const { return begin_; }
inline const Position& end() { return end_; } inline const Position& end() const { return end_; }
Span() = default; Span() = default;
inline constexpr Span(Position&& begin, Position&& end): begin_(mv(begin)), end_(mv(end)) {} inline constexpr Span(Position&& begin, Position&& end): begin_(mv(begin)), end_(mv(end)) {}
......
...@@ -180,6 +180,10 @@ class Tuple<> { ...@@ -180,6 +180,10 @@ class Tuple<> {
// Tuple<>() is constexpr. // Tuple<>() is constexpr.
}; };
template <typename T>
class Tuple<T>;
// Single-element tuple should never be used. The public API should ensure this.
template <size_t index, typename... T> template <size_t index, typename... T>
inline TypeByIndex<index, T...>& getImpl(Tuple<T...>& tuple) { inline TypeByIndex<index, T...>& getImpl(Tuple<T...>& tuple) {
// Get member of a Tuple by index, e.g. `get<2>(myTuple)`. // Get member of a Tuple by index, e.g. `get<2>(myTuple)`.
...@@ -210,6 +214,10 @@ inline T&& getImpl(T&& value) { ...@@ -210,6 +214,10 @@ inline T&& getImpl(T&& value) {
template <typename Func, typename SoFar, typename... T> template <typename Func, typename SoFar, typename... T>
struct ExpandAndApplyResult_; struct ExpandAndApplyResult_;
// Template which computes the return type of applying Func to T... after flattening tuples.
// SoFar starts as Tuple<> and accumulates the flattened parameter types -- so after this template
// is recursively expanded, T... is empty and SoFar is a Tuple containing all the parameters.
template <typename Func, typename First, typename... Rest, typename... T> template <typename Func, typename First, typename... Rest, typename... T>
struct ExpandAndApplyResult_<Func, Tuple<T...>, First, Rest...> struct ExpandAndApplyResult_<Func, Tuple<T...>, First, Rest...>
: public ExpandAndApplyResult_<Func, Tuple<T..., First>, Rest...> {}; : public ExpandAndApplyResult_<Func, Tuple<T..., First>, Rest...> {};
......
...@@ -75,11 +75,18 @@ public: ...@@ -75,11 +75,18 @@ public:
builder.add(kj::fwd<Params>(params)...); builder.add(kj::fwd<Params>(params)...);
} }
template <typename Iterator>
inline void addAll(Iterator begin, Iterator end) {
size_t needed = builder.size() + (end - begin);
if (needed > builder.capacity()) grow(needed);
builder.addAll(begin, end);
}
private: private:
ArrayBuilder<T> builder; ArrayBuilder<T> builder;
void grow() { void grow(size_t minCapacity = 0) {
setCapacity(capacity() == 0 ? 4 : capacity() * 2); setCapacity(kj::max(minCapacity, capacity() == 0 ? 4 : capacity() * 2));
} }
void setCapacity(size_t newSize) { void setCapacity(size_t newSize) {
ArrayBuilder<T> newBuilder = heapArrayBuilder<T>(newSize); ArrayBuilder<T> newBuilder = heapArrayBuilder<T>(newSize);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment