Commit b9777edc authored by Kenton Varda's avatar Kenton Varda

exactString parser.

parent 364e3c4e
...@@ -32,6 +32,44 @@ namespace { ...@@ -32,6 +32,44 @@ namespace {
typedef IteratorInput<char, const char*> Input; typedef IteratorInput<char, const char*> Input;
typedef Span<const char*> TestLocation; typedef Span<const char*> TestLocation;
TEST(CharParsers, ExactChar) {
constexpr auto parser = exactChar<'a'>();
{
StringPtr text = "a";
Input input(text.begin(), text.end());
EXPECT_TRUE(parser(input) != nullptr);
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "b";
Input input(text.begin(), text.end());
EXPECT_TRUE(parser(input) == nullptr);
EXPECT_FALSE(input.atEnd());
}
}
TEST(CharParsers, ExactString) {
constexpr auto parser = exactString("foo");
{
StringPtr text = "foobar";
Input input(text.begin(), text.end());
EXPECT_TRUE(parser(input) != nullptr);
ASSERT_FALSE(input.atEnd());
EXPECT_EQ('b', input.current());
}
{
StringPtr text = "bar";
Input input(text.begin(), text.end());
EXPECT_TRUE(parser(input) == nullptr);
EXPECT_FALSE(input.atEnd());
EXPECT_EQ('b', input.current());
}
}
TEST(CharParsers, CharRange) { TEST(CharParsers, CharRange) {
constexpr auto parser = charRange('a', 'z'); constexpr auto parser = charRange('a', 'z');
...@@ -177,24 +215,6 @@ TEST(CharParsers, CharGroupCombo) { ...@@ -177,24 +215,6 @@ TEST(CharParsers, CharGroupCombo) {
} }
} }
TEST(CharParsers, ExactChar) {
constexpr auto parser = exactChar<'a'>();
{
StringPtr text = "a";
Input input(text.begin(), text.end());
EXPECT_TRUE(parser(input) != nullptr);
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "b";
Input input(text.begin(), text.end());
EXPECT_TRUE(parser(input) == nullptr);
EXPECT_FALSE(input.atEnd());
}
}
TEST(CharParsers, Identifier) { TEST(CharParsers, Identifier) {
constexpr auto parser = identifier; constexpr auto parser = identifier;
......
...@@ -21,6 +21,9 @@ ...@@ -21,6 +21,9 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// This file contains parsers useful for character stream inputs, including parsers to parse
// common kinds of tokens like identifiers, numbers, and quoted strings.
#ifndef KJ_PARSE_CHAR_H_ #ifndef KJ_PARSE_CHAR_H_
#define KJ_PARSE_CHAR_H_ #define KJ_PARSE_CHAR_H_
...@@ -32,37 +35,73 @@ namespace kj { ...@@ -32,37 +35,73 @@ namespace kj {
namespace parse { namespace parse {
// ======================================================================================= // =======================================================================================
// Exact char/string.
class ExactString_ {
public:
constexpr inline ExactString_(const char* str): str(str) {}
template <typename Input>
Maybe<Tuple<>> operator()(Input& input) const {
const char* ptr = str;
while (*ptr != '\0') {
if (input.atEnd() || input.current() != *ptr) return nullptr;
input.next();
++ptr;
}
return Tuple<>();
}
private:
const char* str;
};
constexpr inline ExactString_ exactString(const char* str) {
return ExactString_(str);
}
template <char c>
constexpr ExactlyConst_<char, c> exactChar() {
// Returns a parser that matches exactly the character given by the template argument (returning
// no result).
return ExactlyConst_<char, c>();
}
// =======================================================================================
// Char ranges / sets
class CharGroup_ { class CharGroup_ {
public: public:
constexpr CharGroup_(): bits{0, 0, 0, 0} {} constexpr inline CharGroup_(): bits{0, 0, 0, 0} {}
constexpr CharGroup_ orRange(unsigned char first, unsigned char last) const { constexpr inline CharGroup_ orRange(unsigned char first, unsigned char last) const {
return CharGroup_(bits[0] | (oneBits(last + 1) & ~oneBits(first )), return CharGroup_(bits[0] | (oneBits(last + 1) & ~oneBits(first )),
bits[1] | (oneBits(last - 63) & ~oneBits(first - 64)), bits[1] | (oneBits(last - 63) & ~oneBits(first - 64)),
bits[2] | (oneBits(last - 127) & ~oneBits(first - 128)), bits[2] | (oneBits(last - 127) & ~oneBits(first - 128)),
bits[3] | (oneBits(last - 191) & ~oneBits(first - 192))); bits[3] | (oneBits(last - 191) & ~oneBits(first - 192)));
} }
constexpr CharGroup_ orAny(const char* chars) const { constexpr inline CharGroup_ orAny(const char* chars) const {
return *chars == 0 ? *this : orChar(*chars).orAny(chars + 1); return *chars == 0 ? *this : orChar(*chars).orAny(chars + 1);
} }
constexpr CharGroup_ orChar(unsigned char c) const { constexpr inline CharGroup_ orChar(unsigned char c) const {
return CharGroup_(bits[0] | bit(c), return CharGroup_(bits[0] | bit(c),
bits[1] | bit(c - 64), bits[1] | bit(c - 64),
bits[2] | bit(c - 128), bits[2] | bit(c - 128),
bits[3] | bit(c - 256)); bits[3] | bit(c - 256));
} }
constexpr CharGroup_ orGroup(CharGroup_ other) const { constexpr inline CharGroup_ orGroup(CharGroup_ other) const {
return CharGroup_(bits[0] | other.bits[0], return CharGroup_(bits[0] | other.bits[0],
bits[1] | other.bits[1], bits[1] | other.bits[1],
bits[2] | other.bits[2], bits[2] | other.bits[2],
bits[3] | other.bits[3]); bits[3] | other.bits[3]);
} }
constexpr CharGroup_ invert() const { constexpr inline CharGroup_ invert() const {
return CharGroup_(~bits[0], ~bits[1], ~bits[2], ~bits[3]); return CharGroup_(~bits[0], ~bits[1], ~bits[2], ~bits[3]);
} }
...@@ -81,18 +120,18 @@ public: ...@@ -81,18 +120,18 @@ public:
private: private:
typedef unsigned long long Bits64; typedef unsigned long long Bits64;
constexpr CharGroup_(Bits64 a, Bits64 b, Bits64 c, Bits64 d): bits{a, b, c, d} {} constexpr inline CharGroup_(Bits64 a, Bits64 b, Bits64 c, Bits64 d): bits{a, b, c, d} {}
Bits64 bits[4]; Bits64 bits[4];
static constexpr Bits64 oneBits(int count) { static constexpr inline Bits64 oneBits(int count) {
return count <= 0 ? 0ll : count >= 64 ? -1ll : ((1ll << count) - 1); return count <= 0 ? 0ll : count >= 64 ? -1ll : ((1ll << count) - 1);
} }
static constexpr Bits64 bit(int index) { static constexpr inline Bits64 bit(int index) {
return index < 0 ? 0 : index >= 64 ? 0 : (1ll << index); return index < 0 ? 0 : index >= 64 ? 0 : (1ll << index);
} }
}; };
constexpr CharGroup_ charRange(char first, char last) { constexpr inline CharGroup_ charRange(char first, char last) {
// Create a parser which accepts any character in the range from `first` to `last`, inclusive. // Create a parser which accepts any character in the range from `first` to `last`, inclusive.
// For example: `charRange('a', 'z')` matches all lower-case letters. The parser's result is the // For example: `charRange('a', 'z')` matches all lower-case letters. The parser's result is the
// character matched. // character matched.
...@@ -107,7 +146,7 @@ constexpr CharGroup_ charRange(char first, char last) { ...@@ -107,7 +146,7 @@ constexpr CharGroup_ charRange(char first, char last) {
return CharGroup_().orRange(first, last); return CharGroup_().orRange(first, last);
} }
constexpr CharGroup_ anyOfChars(const char* chars) { constexpr inline CharGroup_ anyOfChars(const char* chars) {
// Returns a parser that accepts any of the characters in the given string (which should usually // Returns a parser that accepts any of the characters in the given string (which should usually
// be a literal). The returned parser is of the same type as returned by `charRange()` -- see // be a literal). The returned parser is of the same type as returned by `charRange()` -- see
// that function for more info. // that function for more info.
...@@ -115,13 +154,6 @@ constexpr CharGroup_ anyOfChars(const char* chars) { ...@@ -115,13 +154,6 @@ constexpr CharGroup_ anyOfChars(const char* chars) {
return CharGroup_().orAny(chars); return CharGroup_().orAny(chars);
} }
template <char c>
constexpr ExactlyConst_<char, c> exactChar() {
// Returns a parser that matches exactly the character given by the template argument (returning
// no result).
return ExactlyConst_<char, c>();
}
// ======================================================================================= // =======================================================================================
namespace _ { // private namespace _ { // private
...@@ -135,7 +167,7 @@ struct ArrayToString { ...@@ -135,7 +167,7 @@ struct ArrayToString {
} // namespace _ (private) } // namespace _ (private)
template <typename SubParser> template <typename SubParser>
constexpr auto charsToString(SubParser&& subParser) constexpr inline auto charsToString(SubParser&& subParser)
-> decltype(transform(kj::fwd<SubParser>(subParser), _::ArrayToString())) { -> decltype(transform(kj::fwd<SubParser>(subParser), _::ArrayToString())) {
// Wraps a parser that returns Array<char> such that it returns String instead. // Wraps a parser that returns Array<char> such that it returns String instead.
return parse::transform(kj::fwd<SubParser>(subParser), _::ArrayToString()); return parse::transform(kj::fwd<SubParser>(subParser), _::ArrayToString());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment