Commit 5cc86aa6 authored by Kenton Varda's avatar Kenton Varda

Split char parser into separate module.

parent 2e71ad16
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "char.h"
#include "../string.h"
#include <gtest/gtest.h>
namespace kj {
namespace parse {
namespace {
typedef IteratorInput<char, const char*> Input;
typedef Span<const char*> TestLocation;
TEST(CharParsers, CharRange) {
constexpr auto parser = charRange('a', 'z');
{
StringPtr text = "a";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('a', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "n";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('n', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "z";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('z', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "`";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
{
StringPtr text = "{";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
{
StringPtr text = "A";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
}
TEST(CharParsers, AnyChar) {
constexpr auto parser = anyChar("axn2B");
{
StringPtr text = "a";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('a', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "n";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('n', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "B";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('B', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "b";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
{
StringPtr text = "j";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
{
StringPtr text = "A";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
}
TEST(CharParsers, CharGroupCombo) {
constexpr auto parser =
many(charRange('0', '9').orRange('a', 'z').orRange('A', 'Z').orAny("-_"));
{
StringPtr text = "foo1-bar2_baz3@qux";
Input input(text.begin(), text.end());
Maybe<Array<char>> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ("foo1-bar2_baz3", str(*value));
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_FALSE(input.atEnd());
}
}
TEST(CommonParsers, ExactChar) {
constexpr auto parser = exactChar<'a'>();
{
StringPtr text = "a";
Input input(text.begin(), text.end());
EXPECT_TRUE(parser(input) != nullptr);
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "b";
Input input(text.begin(), text.end());
EXPECT_TRUE(parser(input) == nullptr);
EXPECT_FALSE(input.atEnd());
}
}
} // namespace
} // namespace parse
} // namespace kj
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef KJ_PARSE_CHAR_H_
#define KJ_PARSE_CHAR_H_
#include "common.h"
namespace kj {
namespace parse {
// =======================================================================================
class CharGroup_ {
public:
constexpr CharGroup_(): bits{0, 0, 0, 0} {}
constexpr CharGroup_ orRange(unsigned char first, unsigned char last) const {
return CharGroup_(bits[0] | (oneBits(last + 1) & ~oneBits(first )),
bits[1] | (oneBits(last - 63) & ~oneBits(first - 64)),
bits[2] | (oneBits(last - 127) & ~oneBits(first - 128)),
bits[3] | (oneBits(last - 191) & ~oneBits(first - 192)));
}
constexpr CharGroup_ orAny(const char* chars) const {
return *chars == 0 ? *this : orChar(*chars).orAny(chars + 1);
}
constexpr CharGroup_ orChar(unsigned char c) const {
return CharGroup_(bits[0] | bit(c),
bits[1] | bit(c - 64),
bits[2] | bit(c - 128),
bits[3] | bit(c - 256));
}
constexpr CharGroup_ invert() const {
return CharGroup_(~bits[0], ~bits[1], ~bits[2], ~bits[3]);
}
template <typename Input>
Maybe<char> operator()(Input& input) const {
unsigned char c = input.current();
if ((bits[c / 64] & (1ll << (c % 64))) != 0) {
input.next();
return c;
} else {
return nullptr;
}
}
private:
typedef unsigned long long Bits64;
constexpr CharGroup_(Bits64 a, Bits64 b, Bits64 c, Bits64 d): bits{a, b, c, d} {}
Bits64 bits[4];
static constexpr Bits64 oneBits(int count) {
return count <= 0 ? 0ll : count >= 64 ? -1ll : ((1ll << count) - 1);
}
static constexpr Bits64 bit(int index) {
return index < 0 ? 0 : index >= 64 ? 0 : (1ll << index);
}
};
constexpr CharGroup_ charRange(char first, char last) {
// Create a parser which accepts any character in the range from `first` to `last`, inclusive.
// For example: `charRange('a', 'z')` matches all lower-case letters. The parser's result is the
// character matched.
//
// The returned object has methods which can be used to match more characters. The following
// produces a parser which accepts any letter as well as '_', '+', '-', and '.'.
//
// charRange('a', 'z').orRange('A', 'Z').orChar('_').orAny("+-.")
//
// You can also use `.invert()` to match the opposite set of characters.
return CharGroup_().orRange(first, last);
}
constexpr CharGroup_ anyChar(const char* chars) {
// Returns a parser that accepts any of the characters in the given string (which should usually
// be a literal). The returned parser is of the same type as returned by `charRange()` -- see
// that function for more info.
return CharGroup_().orAny(chars);
}
template <char c>
constexpr ExactlyConst_<char, c> exactChar() {
// Returns a parser that matches exactly the character given by the template argument (returning
// no result).
return ExactlyConst_<char, c>();
}
} // namespace parse
} // namespace kj
#endif // KJ_PARSE_CHAR_H_
...@@ -78,24 +78,6 @@ TEST(CommonParsers, ExactlyConstParser) { ...@@ -78,24 +78,6 @@ TEST(CommonParsers, ExactlyConstParser) {
EXPECT_TRUE(input.atEnd()); EXPECT_TRUE(input.atEnd());
} }
TEST(CommonParsers, ExactChar) {
constexpr auto parser = exactChar<'a'>();
{
StringPtr text = "a";
Input input(text.begin(), text.end());
EXPECT_TRUE(parser(input) != nullptr);
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "b";
Input input(text.begin(), text.end());
EXPECT_TRUE(parser(input) == nullptr);
EXPECT_FALSE(input.atEnd());
}
}
TEST(CommonParsers, ConstResultParser) { TEST(CommonParsers, ConstResultParser) {
auto parser = constResult(exactly('o'), 123); auto parser = constResult(exactly('o'), 123);
...@@ -383,151 +365,6 @@ TEST(CommonParsers, AcceptIfParser) { ...@@ -383,151 +365,6 @@ TEST(CommonParsers, AcceptIfParser) {
} }
} }
TEST(CommonParsers, CharRange) {
constexpr auto parser = charRange('a', 'z');
{
StringPtr text = "a";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('a', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "n";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('n', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "z";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('z', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "`";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
{
StringPtr text = "{";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
{
StringPtr text = "A";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
}
TEST(CommonParsers, AnyChar) {
constexpr auto parser = anyChar("axn2B");
{
StringPtr text = "a";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('a', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "n";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('n', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "B";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ('B', *value);
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "b";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
{
StringPtr text = "j";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
{
StringPtr text = "A";
Input input(text.begin(), text.end());
Maybe<char> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_FALSE(input.atEnd());
}
}
TEST(CommonParsers, CharGroupCombo) {
constexpr auto parser =
many(charRange('0', '9').orRange('a', 'z').orRange('A', 'Z').orAny("-_"));
{
StringPtr text = "foo1-bar2_baz3@qux";
Input input(text.begin(), text.end());
Maybe<Array<char>> result = parser(input);
KJ_IF_MAYBE(value, result) {
EXPECT_EQ("foo1-bar2_baz3", str(*value));
} else {
ADD_FAILURE() << "Expected parse result, got null.";
}
EXPECT_FALSE(input.atEnd());
}
}
} // namespace } // namespace
} // namespace parse } // namespace parse
} // namespace kj } // namespace kj
...@@ -52,8 +52,6 @@ class IteratorInput { ...@@ -52,8 +52,6 @@ class IteratorInput {
// A parser input implementation based on an iterator range. // A parser input implementation based on an iterator range.
public: public:
typedef Element ElementType;
IteratorInput(Iterator begin, Iterator end) IteratorInput(Iterator begin, Iterator end)
: parent(nullptr), pos(begin), end(end), best(begin) {} : parent(nullptr), pos(begin), end(end), best(begin) {}
IteratorInput(IteratorInput& parent) IteratorInput(IteratorInput& parent)
...@@ -63,6 +61,7 @@ public: ...@@ -63,6 +61,7 @@ public:
parent->best = kj::max(kj::max(pos, best), parent->best); parent->best = kj::max(kj::max(pos, best), parent->best);
} }
} }
KJ_DISALLOW_COPY(IteratorInput);
void advanceParent() { void advanceParent() {
parent->pos = pos; parent->pos = pos;
...@@ -74,7 +73,7 @@ public: ...@@ -74,7 +73,7 @@ public:
return *pos; return *pos;
} }
const Element& consume() { const Element& consume() {
assert(!atEnd()); KJ_IREQUIRE(!atEnd());
return *pos++; return *pos++;
} }
void next() { void next() {
...@@ -91,10 +90,6 @@ private: ...@@ -91,10 +90,6 @@ private:
Iterator pos; Iterator pos;
Iterator end; Iterator end;
Iterator best; // furthest we got with any sub-input Iterator best; // furthest we got with any sub-input
IteratorInput(IteratorInput&&) = delete;
IteratorInput& operator=(const IteratorInput&) = delete;
IteratorInput& operator=(IteratorInput&&) = delete;
}; };
template <typename T> struct OutputType_; template <typename T> struct OutputType_;
...@@ -215,11 +210,6 @@ constexpr ExactlyConst_<T, expected> exactlyConst() { ...@@ -215,11 +210,6 @@ constexpr ExactlyConst_<T, expected> exactlyConst() {
return ExactlyConst_<T, expected>(); return ExactlyConst_<T, expected>();
} }
template <char c>
constexpr ExactlyConst_<char, c> exactChar() {
return ExactlyConst_<char, c>();
}
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// constResult() // constResult()
...@@ -568,67 +558,6 @@ constexpr EndOfInput_ endOfInput() { ...@@ -568,67 +558,6 @@ constexpr EndOfInput_ endOfInput() {
return EndOfInput_(); return EndOfInput_();
} }
// -------------------------------------------------------------------
// CharGroup
class CharGroup_ {
public:
constexpr CharGroup_(): bits{0, 0, 0, 0} {}
constexpr CharGroup_ orRange(unsigned char first, unsigned char last) const {
return CharGroup_(bits[0] | (oneBits(last + 1) & ~oneBits(first )),
bits[1] | (oneBits(last - 63) & ~oneBits(first - 64)),
bits[2] | (oneBits(last - 127) & ~oneBits(first - 128)),
bits[3] | (oneBits(last - 191) & ~oneBits(first - 192)));
}
constexpr CharGroup_ orAny(const char* chars) const {
return *chars == 0 ? *this : orChar(*chars).orAny(chars + 1);
}
constexpr CharGroup_ orChar(unsigned char c) const {
return CharGroup_(bits[0] | bit(c),
bits[1] | bit(c - 64),
bits[2] | bit(c - 128),
bits[3] | bit(c - 256));
}
constexpr CharGroup_ invert() const {
return CharGroup_(~bits[0], ~bits[1], ~bits[2], ~bits[3]);
}
template <typename Input>
Maybe<char> operator()(Input& input) const {
unsigned char c = input.current();
if ((bits[c / 64] & (1ll << (c % 64))) != 0) {
input.next();
return c;
} else {
return nullptr;
}
}
private:
typedef unsigned long long Bits64;
constexpr CharGroup_(Bits64 a, Bits64 b, Bits64 c, Bits64 d): bits{a, b, c, d} {}
Bits64 bits[4];
static constexpr Bits64 oneBits(int count) {
return count <= 0 ? 0ll : count >= 64 ? -1ll : ((1ll << count) - 1);
}
static constexpr Bits64 bit(int index) {
return index < 0 ? 0 : index >= 64 ? 0 : (1ll << index);
}
};
constexpr CharGroup_ charRange(char first, char last) {
return CharGroup_().orRange(first, last);
}
constexpr CharGroup_ anyChar(const char* chars) {
return CharGroup_().orAny(chars);
}
} // namespace parse } // namespace parse
} // namespace kj } // namespace kj
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment