Commit 05c79891 authored by miloyip's avatar miloyip

Add parenthesis support in regex

parent 0bef29a5
......@@ -90,6 +90,12 @@ public:
}
private:
enum Operator {
kConcatenation,
kAlternation,
kLeftParenthesis,
};
struct State {
SizeType out; //!< Equals to kInvalid for match
SizeType out1; //!< Equals to non-kInvalid for split
......@@ -155,52 +161,96 @@ private:
void Parse(InputStream& is) {
Allocator allocator;
Stack<Allocator> operandStack(&allocator, 256); // Frag
Stack<Allocator> operatorStack(&allocator, 256); // char
Stack<Allocator> operatorStack(&allocator, 256); // Operator
Stack<Allocator> atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis)
*atomCountStack.template Push<unsigned>() = 0;
unsigned codepoint;
bool previousOperand = false;
while (Encoding::Decode(is, &codepoint) && codepoint != 0) {
switch (codepoint) {
case '|':
*operatorStack.template Push<char>() = '|';
previousOperand = false;
while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
if (!Eval(operandStack, operatorStack))
return;
*operatorStack.template Push<Operator>() = kAlternation;
*atomCountStack.template Top<unsigned>() = 0;
break;
case '(':
*operatorStack.template Push<Operator>() = kLeftParenthesis;
*atomCountStack.template Push<unsigned>() = 0;
break;
case ')':
while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
if (!Eval(operandStack, operatorStack))
return;
if (operatorStack.Empty())
return;
operatorStack.template Pop<Operator>(1);
atomCountStack.template Pop<unsigned>(1);
ImplicitConcatenation(atomCountStack, operatorStack);
break;
default:
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
// concatenation with previous operand
if (previousOperand) {
Frag* e = operandStack.template Top<Frag>();
Patch(e->out, s);
e->out = s;
}
else
*operandStack.template Push<Frag>() = Frag(s, s);
previousOperand = true;
*operandStack.template Push<Frag>() = Frag(s, s);
ImplicitConcatenation(atomCountStack, operatorStack);
}
}
while (!operatorStack.Empty()) {
switch (*operatorStack.template Pop<char>(1)) {
case '|':
{
Frag e2 = *operandStack.template Pop<Frag>(1);
Frag e1 = *operandStack.template Pop<Frag>(1);
SizeType s = NewState(e1.start, e2.start, 0);
*operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out));
}
break;
}
}
while (!operatorStack.Empty())
if (!Eval(operandStack, operatorStack))
return;
// Link the operand to matching state.
if (operandStack.GetSize() == sizeof(Frag)) {
Frag* e = operandStack.template Pop<Frag>(1);
Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
root_ = e->start;
// printf("root: %d\n", root_);
// for (SizeType i = 0; i < stateCount_ ; i++) {
// State& s = GetState(i);
// printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
// }
// printf("\n");
}
}
bool Eval(Stack<Allocator>& operandStack, Stack<Allocator>& operatorStack) {
switch (*operatorStack.template Pop<Operator>(1)) {
case kConcatenation:
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
Frag e2 = *operandStack.template Pop<Frag>(1);
Frag e1 = *operandStack.template Pop<Frag>(1);
Patch(e1.out, e2.start);
*operandStack.template Push<Frag>() = Frag(e1.start, e2.out);
return true;
}
return false;
case kAlternation:
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
Frag e2 = *operandStack.template Pop<Frag>(1);
Frag e1 = *operandStack.template Pop<Frag>(1);
SizeType s = NewState(e1.start, e2.start, 0);
*operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out));
return true;
}
return false;
default:
return false;
}
}
void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
if (*atomCountStack.template Top<unsigned>())
*operatorStack.template Push<Operator>() = kConcatenation;
(*atomCountStack.template Top<unsigned>())++;
}
Stack<Allocator> states_;
SizeType root_;
SizeType stateCount_;
......
......@@ -19,6 +19,7 @@ using namespace rapidjson::internal;
TEST(Regex, concatenation) {
Regex re("abc");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("abc"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("a"));
......@@ -27,24 +28,59 @@ TEST(Regex, concatenation) {
EXPECT_FALSE(re.Match("abcd"));
}
TEST(Regex, split) {
{
Regex re("abab|abbb");
EXPECT_TRUE(re.Match("abab"));
EXPECT_TRUE(re.Match("abbb"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("ab"));
EXPECT_FALSE(re.Match("ababa"));
EXPECT_FALSE(re.Match("abb"));
EXPECT_FALSE(re.Match("abbbb"));
}
{
Regex re("a|b|c");
EXPECT_TRUE(re.Match("a"));
EXPECT_TRUE(re.Match("b"));
EXPECT_TRUE(re.Match("c"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("aa"));
EXPECT_FALSE(re.Match("ab"));
}
TEST(Regex, split1) {
Regex re("abab|abbb");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("abab"));
EXPECT_TRUE(re.Match("abbb"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("ab"));
EXPECT_FALSE(re.Match("ababa"));
EXPECT_FALSE(re.Match("abb"));
EXPECT_FALSE(re.Match("abbbb"));
}
TEST(Regex, split2) {
Regex re("a|b|c");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("a"));
EXPECT_TRUE(re.Match("b"));
EXPECT_TRUE(re.Match("c"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("aa"));
EXPECT_FALSE(re.Match("ab"));
}
TEST(Regex, parenthesis1) {
Regex re("(ab)c");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("abc"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("a"));
EXPECT_FALSE(re.Match("b"));
EXPECT_FALSE(re.Match("ab"));
EXPECT_FALSE(re.Match("abcd"));
}
TEST(Regex, parenthesis2) {
Regex re("a(bc)");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("abc"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("a"));
EXPECT_FALSE(re.Match("b"));
EXPECT_FALSE(re.Match("ab"));
EXPECT_FALSE(re.Match("abcd"));
}
TEST(Regex, parenthesis3) {
Regex re("(a|b)(c|d)");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ac"));
EXPECT_TRUE(re.Match("ad"));
EXPECT_TRUE(re.Match("bc"));
EXPECT_TRUE(re.Match("bd"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("ab"));
EXPECT_FALSE(re.Match("cd"));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment