Commit 3cb5733e authored by Milo Yip's avatar Milo Yip

Merge pull request #539 from miloyip/issue538_regexzeromin

Support {0, } and {0, m} in Regex
parents 3c2c1628 60116cf1
......@@ -256,13 +256,13 @@ private:
case '{':
{
unsigned n, m;
if (!ParseUnsigned(ds, &n) || n == 0)
if (!ParseUnsigned(ds, &n))
return;
if (ds.Peek() == ',') {
ds.Take();
if (ds.Peek() == '}')
m = 0;
m = kInfinityQuantifier;
else if (!ParseUnsigned(ds, &m) || m < n)
return;
}
......@@ -424,15 +424,29 @@ private:
}
bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
RAPIDJSON_ASSERT(n > 0);
RAPIDJSON_ASSERT(m == 0 || n <= m); // m == 0 means infinity
RAPIDJSON_ASSERT(n <= m);
if (operandStack.GetSize() < sizeof(Frag))
return false;
if (n == 0) {
if (m == 0) // a{0} not support
return false;
else if (m == kInfinityQuantifier)
Eval(operandStack, kZeroOrMore); // a{0,} -> a*
else {
Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
for (unsigned i = 0; i < m - 1; i++)
CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
for (unsigned i = 0; i < m - 1; i++)
Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
}
return true;
}
for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
CloneTopOperand(operandStack);
if (m == 0)
if (m == kInfinityQuantifier)
Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
else if (m > n) {
CloneTopOperand(operandStack); // a{3,5} -> a a a a
......@@ -469,6 +483,8 @@ private:
template <typename InputStream>
bool ParseUnsigned(DecodedStream<InputStream>& ds, unsigned* u) {
unsigned r = 0;
if (ds.Peek() < '0' || ds.Peek() > '9')
return false;
while (ds.Peek() >= '0' && ds.Peek() <= '9') {
if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
return false; // overflow
......@@ -658,6 +674,8 @@ private:
SizeType stateCount_;
SizeType rangeCount_;
static const unsigned kInfinityQuantifier = ~0u;
// For SearchWithAnchoring()
uint32_t* stateSet_; // allocated by states_.GetAllocator()
mutable Stack<Allocator> state0_;
......
......@@ -325,6 +325,43 @@ TEST(Regex, QuantifierMinMax3) {
EXPECT_FALSE(re.Match("abbbbbbd"));
}
// Issue538
TEST(Regex, QuantifierMinMax4) {
Regex re("a(b|c){0,3}d");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ad"));
EXPECT_TRUE(re.Match("abd"));
EXPECT_TRUE(re.Match("acd"));
EXPECT_TRUE(re.Match("abbd"));
EXPECT_TRUE(re.Match("accd"));
EXPECT_TRUE(re.Match("abcd"));
EXPECT_TRUE(re.Match("abbbd"));
EXPECT_TRUE(re.Match("acccd"));
EXPECT_FALSE(re.Match("abbbbd"));
EXPECT_FALSE(re.Match("add"));
EXPECT_FALSE(re.Match("accccd"));
EXPECT_FALSE(re.Match("abcbcd"));
}
// Issue538
TEST(Regex, QuantifierMinMax5) {
Regex re("a(b|c){0,}d");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ad"));
EXPECT_TRUE(re.Match("abd"));
EXPECT_TRUE(re.Match("acd"));
EXPECT_TRUE(re.Match("abbd"));
EXPECT_TRUE(re.Match("accd"));
EXPECT_TRUE(re.Match("abcd"));
EXPECT_TRUE(re.Match("abbbd"));
EXPECT_TRUE(re.Match("acccd"));
EXPECT_TRUE(re.Match("abbbbd"));
EXPECT_TRUE(re.Match("accccd"));
EXPECT_TRUE(re.Match("abcbcd"));
EXPECT_FALSE(re.Match("add"));
EXPECT_FALSE(re.Match("aad"));
}
#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC
TEST(Regex, Unicode) {
......@@ -501,6 +538,7 @@ TEST(Regex, Invalid) {
EXPECT_FALSE(re.IsValid());\
}
TEST_INVALID("");
TEST_INVALID("a|");
TEST_INVALID("()");
TEST_INVALID(")");
......@@ -517,7 +555,7 @@ TEST(Regex, Invalid) {
TEST_INVALID("a{0}");
TEST_INVALID("a{-1}");
TEST_INVALID("a{}");
TEST_INVALID("a{0,}");
// TEST_INVALID("a{0,}"); // Support now
TEST_INVALID("a{,0}");
TEST_INVALID("a{1,0}");
TEST_INVALID("a{-1,0}");
......@@ -530,4 +568,9 @@ TEST(Regex, Invalid) {
#undef TEST_INVALID
}
TEST(Regex, Issue538) {
Regex re("^[0-9]+(\\\\.[0-9]+){0,2}");
EXPECT_TRUE(re.IsValid());
}
#undef EURO
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment