Commit a3869342 authored by miloyip's avatar miloyip

Add ?*+ to regex

parent 05c79891
......@@ -54,11 +54,12 @@ public:
const size_t stateSetSize = (stateCount_ + 31) / 32 * 4;
unsigned* stateSet = static_cast<unsigned*>(allocator.Malloc(stateSetSize));
std::memset(stateSet, 0, stateSetSize);
AddState(stateSet, *current, root_);
unsigned codepoint;
while (!current->Empty() && Encoding::Decode(is, &codepoint) && codepoint != 0) {
std::memset(stateSet, 0, stateSetSize);
next->Clear();
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
const State& sr = GetState(*s);
// if (sr.out != kRegexInvalidState)
......@@ -70,8 +71,6 @@ public:
Stack<Allocator>* temp = current;
current = next;
next = temp;
std::memset(stateSet, 0, stateSetSize);
next->Clear();
// printf("\n");
}
......@@ -91,9 +90,12 @@ public:
private:
enum Operator {
kZeroOrOne,
kZeroOrMore,
kOneOrMore,
kConcatenation,
kAlternation,
kLeftParenthesis,
kLeftParenthesis
};
struct State {
......@@ -193,6 +195,24 @@ private:
ImplicitConcatenation(atomCountStack, operatorStack);
break;
case '?':
*operatorStack.template Push<Operator>() = kZeroOrOne;
if (!Eval(operandStack, operatorStack))
return;
break;
case '*':
*operatorStack.template Push<Operator>() = kZeroOrMore;
if (!Eval(operandStack, operatorStack))
return;
break;
case '+':
*operatorStack.template Push<Operator>() = kOneOrMore;
if (!Eval(operandStack, operatorStack))
return;
break;
default:
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
*operandStack.template Push<Frag>() = Frag(s, s);
......@@ -209,16 +229,19 @@ private:
Frag* e = operandStack.template Pop<Frag>(1);
Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
root_ = e->start;
// printf("root: %d\n", root_);
// for (SizeType i = 0; i < stateCount_ ; i++) {
// State& s = GetState(i);
// printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
// }
// printf("\n");
#if 0
printf("root: %d\n", root_);
for (SizeType i = 0; i < stateCount_ ; i++) {
State& s = GetState(i);
printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
}
printf("\n");
#endif
}
}
bool Eval(Stack<Allocator>& operandStack, Stack<Allocator>& operatorStack) {
// printf("Eval %c\n", "?*+.|("[*operatorStack.template Top<Operator>()]);
switch (*operatorStack.template Pop<Operator>(1)) {
case kConcatenation:
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
......@@ -240,6 +263,35 @@ private:
}
return false;
case kZeroOrOne:
if (operandStack.GetSize() >= sizeof(Frag)) {
Frag e = *operandStack.template Pop<Frag>(1);
SizeType s = NewState(kRegexInvalidState, e.start, 0);
*operandStack.template Push<Frag>() = Frag(s, Append(e.out, s));
return true;
}
return false;
case kZeroOrMore:
if (operandStack.GetSize() >= sizeof(Frag)) {
Frag e = *operandStack.template Pop<Frag>(1);
SizeType s = NewState(kRegexInvalidState, e.start, 0);
Patch(e.out, s);
*operandStack.template Push<Frag>() = Frag(s, s);
return true;
}
return false;
case kOneOrMore:
if (operandStack.GetSize() >= sizeof(Frag)) {
Frag e = *operandStack.template Pop<Frag>(1);
SizeType s = NewState(kRegexInvalidState, e.start, 0);
Patch(e.out, s);
*operandStack.template Push<Frag>() = Frag(e.start, s);
return true;
}
return false;
default:
return false;
}
......
......@@ -17,7 +17,7 @@
using namespace rapidjson::internal;
TEST(Regex, concatenation) {
TEST(Regex, Concatenation) {
Regex re("abc");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("abc"));
......@@ -28,7 +28,7 @@ TEST(Regex, concatenation) {
EXPECT_FALSE(re.Match("abcd"));
}
TEST(Regex, split1) {
TEST(Regex, Alternation1) {
Regex re("abab|abbb");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("abab"));
......@@ -40,7 +40,7 @@ TEST(Regex, split1) {
EXPECT_FALSE(re.Match("abbbb"));
}
TEST(Regex, split2) {
TEST(Regex, Alternation2) {
Regex re("a|b|c");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("a"));
......@@ -51,7 +51,7 @@ TEST(Regex, split2) {
EXPECT_FALSE(re.Match("ab"));
}
TEST(Regex, parenthesis1) {
TEST(Regex, Parenthesis1) {
Regex re("(ab)c");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("abc"));
......@@ -62,7 +62,7 @@ TEST(Regex, parenthesis1) {
EXPECT_FALSE(re.Match("abcd"));
}
TEST(Regex, parenthesis2) {
TEST(Regex, Parenthesis2) {
Regex re("a(bc)");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("abc"));
......@@ -73,7 +73,7 @@ TEST(Regex, parenthesis2) {
EXPECT_FALSE(re.Match("abcd"));
}
TEST(Regex, parenthesis3) {
TEST(Regex, Parenthesis3) {
Regex re("(a|b)(c|d)");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ac"));
......@@ -84,3 +84,138 @@ TEST(Regex, parenthesis3) {
EXPECT_FALSE(re.Match("ab"));
EXPECT_FALSE(re.Match("cd"));
}
TEST(Regex, ZeroOrOne1) {
Regex re("a?");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match(""));
EXPECT_TRUE(re.Match("a"));
EXPECT_FALSE(re.Match("aa"));
}
TEST(Regex, ZeroOrOne2) {
Regex re("a?b");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("b"));
EXPECT_TRUE(re.Match("ab"));
EXPECT_FALSE(re.Match("a"));
EXPECT_FALSE(re.Match("aa"));
EXPECT_FALSE(re.Match("bb"));
EXPECT_FALSE(re.Match("ba"));
}
TEST(Regex, ZeroOrOne3) {
Regex re("ab?");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("a"));
EXPECT_TRUE(re.Match("ab"));
EXPECT_FALSE(re.Match("b"));
EXPECT_FALSE(re.Match("aa"));
EXPECT_FALSE(re.Match("bb"));
EXPECT_FALSE(re.Match("ba"));
}
TEST(Regex, ZeroOrOne4) {
Regex re("a?b?");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match(""));
EXPECT_TRUE(re.Match("a"));
EXPECT_TRUE(re.Match("b"));
EXPECT_TRUE(re.Match("ab"));
EXPECT_FALSE(re.Match("aa"));
EXPECT_FALSE(re.Match("bb"));
EXPECT_FALSE(re.Match("ba"));
EXPECT_FALSE(re.Match("abc"));
}
TEST(Regex, ZeroOrOne5) {
Regex re("a(ab)?b");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ab"));
EXPECT_TRUE(re.Match("aabb"));
EXPECT_FALSE(re.Match("aab"));
EXPECT_FALSE(re.Match("abb"));
}
TEST(Regex, ZeroOrMore1) {
Regex re("a*");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match(""));
EXPECT_TRUE(re.Match("a"));
EXPECT_TRUE(re.Match("aa"));
EXPECT_FALSE(re.Match("b"));
EXPECT_FALSE(re.Match("ab"));
}
TEST(Regex, ZeroOrMore2) {
Regex re("a*b");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("b"));
EXPECT_TRUE(re.Match("ab"));
EXPECT_TRUE(re.Match("aab"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("bb"));
}
TEST(Regex, ZeroOrMore3) {
Regex re("a*b*");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match(""));
EXPECT_TRUE(re.Match("a"));
EXPECT_TRUE(re.Match("aa"));
EXPECT_TRUE(re.Match("b"));
EXPECT_TRUE(re.Match("bb"));
EXPECT_TRUE(re.Match("ab"));
EXPECT_TRUE(re.Match("aabb"));
EXPECT_FALSE(re.Match("ba"));
}
TEST(Regex, ZeroOrMore4) {
Regex re("a(ab)*b");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ab"));
EXPECT_TRUE(re.Match("aabb"));
EXPECT_TRUE(re.Match("aababb"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("aa"));
}
TEST(Regex, OneOrMore1) {
Regex re("a+");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("a"));
EXPECT_TRUE(re.Match("aa"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("b"));
EXPECT_FALSE(re.Match("ab"));
}
TEST(Regex, OneOrMore2) {
Regex re("a+b");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ab"));
EXPECT_TRUE(re.Match("aab"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("b"));
}
TEST(Regex, OneOrMore3) {
Regex re("a+b+");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ab"));
EXPECT_TRUE(re.Match("aab"));
EXPECT_TRUE(re.Match("abb"));
EXPECT_TRUE(re.Match("aabb"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("b"));
EXPECT_FALSE(re.Match("ba"));
}
TEST(Regex, OneOrMore4) {
Regex re("a(ab)+b");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("aabb"));
EXPECT_TRUE(re.Match("aababb"));
EXPECT_FALSE(re.Match(""));
EXPECT_FALSE(re.Match("ab"));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment