Commit 60116cf1 authored by Milo Yip's avatar Milo Yip

Support {0, } and {0, m} in Regex

parent 7e383864
...@@ -256,13 +256,13 @@ private: ...@@ -256,13 +256,13 @@ private:
case '{': case '{':
{ {
unsigned n, m; unsigned n, m;
if (!ParseUnsigned(ds, &n) || n == 0) if (!ParseUnsigned(ds, &n))
return; return;
if (ds.Peek() == ',') { if (ds.Peek() == ',') {
ds.Take(); ds.Take();
if (ds.Peek() == '}') if (ds.Peek() == '}')
m = 0; m = kInfinityQuantifier;
else if (!ParseUnsigned(ds, &m) || m < n) else if (!ParseUnsigned(ds, &m) || m < n)
return; return;
} }
...@@ -424,15 +424,29 @@ private: ...@@ -424,15 +424,29 @@ private:
} }
bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) { bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
RAPIDJSON_ASSERT(n > 0); RAPIDJSON_ASSERT(n <= m);
RAPIDJSON_ASSERT(m == 0 || n <= m); // m == 0 means infinity
if (operandStack.GetSize() < sizeof(Frag)) if (operandStack.GetSize() < sizeof(Frag))
return false; return false;
if (n == 0) {
if (m == 0) // a{0} not support
return false;
else if (m == kInfinityQuantifier)
Eval(operandStack, kZeroOrMore); // a{0,} -> a*
else {
Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
for (unsigned i = 0; i < m - 1; i++)
CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
for (unsigned i = 0; i < m - 1; i++)
Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
}
return true;
}
for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
CloneTopOperand(operandStack); CloneTopOperand(operandStack);
if (m == 0) if (m == kInfinityQuantifier)
Eval(operandStack, kOneOrMore); // a{3,} -> a a a+ Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
else if (m > n) { else if (m > n) {
CloneTopOperand(operandStack); // a{3,5} -> a a a a CloneTopOperand(operandStack); // a{3,5} -> a a a a
...@@ -469,6 +483,8 @@ private: ...@@ -469,6 +483,8 @@ private:
template <typename InputStream> template <typename InputStream>
bool ParseUnsigned(DecodedStream<InputStream>& ds, unsigned* u) { bool ParseUnsigned(DecodedStream<InputStream>& ds, unsigned* u) {
unsigned r = 0; unsigned r = 0;
if (ds.Peek() < '0' || ds.Peek() > '9')
return false;
while (ds.Peek() >= '0' && ds.Peek() <= '9') { while (ds.Peek() >= '0' && ds.Peek() <= '9') {
if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295 if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
return false; // overflow return false; // overflow
...@@ -658,6 +674,8 @@ private: ...@@ -658,6 +674,8 @@ private:
SizeType stateCount_; SizeType stateCount_;
SizeType rangeCount_; SizeType rangeCount_;
static const unsigned kInfinityQuantifier = ~0u;
// For SearchWithAnchoring() // For SearchWithAnchoring()
uint32_t* stateSet_; // allocated by states_.GetAllocator() uint32_t* stateSet_; // allocated by states_.GetAllocator()
mutable Stack<Allocator> state0_; mutable Stack<Allocator> state0_;
......
...@@ -325,6 +325,43 @@ TEST(Regex, QuantifierMinMax3) { ...@@ -325,6 +325,43 @@ TEST(Regex, QuantifierMinMax3) {
EXPECT_FALSE(re.Match("abbbbbbd")); EXPECT_FALSE(re.Match("abbbbbbd"));
} }
// Issue538
TEST(Regex, QuantifierMinMax4) {
Regex re("a(b|c){0,3}d");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ad"));
EXPECT_TRUE(re.Match("abd"));
EXPECT_TRUE(re.Match("acd"));
EXPECT_TRUE(re.Match("abbd"));
EXPECT_TRUE(re.Match("accd"));
EXPECT_TRUE(re.Match("abcd"));
EXPECT_TRUE(re.Match("abbbd"));
EXPECT_TRUE(re.Match("acccd"));
EXPECT_FALSE(re.Match("abbbbd"));
EXPECT_FALSE(re.Match("add"));
EXPECT_FALSE(re.Match("accccd"));
EXPECT_FALSE(re.Match("abcbcd"));
}
// Issue538
TEST(Regex, QuantifierMinMax5) {
Regex re("a(b|c){0,}d");
ASSERT_TRUE(re.IsValid());
EXPECT_TRUE(re.Match("ad"));
EXPECT_TRUE(re.Match("abd"));
EXPECT_TRUE(re.Match("acd"));
EXPECT_TRUE(re.Match("abbd"));
EXPECT_TRUE(re.Match("accd"));
EXPECT_TRUE(re.Match("abcd"));
EXPECT_TRUE(re.Match("abbbd"));
EXPECT_TRUE(re.Match("acccd"));
EXPECT_TRUE(re.Match("abbbbd"));
EXPECT_TRUE(re.Match("accccd"));
EXPECT_TRUE(re.Match("abcbcd"));
EXPECT_FALSE(re.Match("add"));
EXPECT_FALSE(re.Match("aad"));
}
#define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC #define EURO "\xE2\x82\xAC" // "\xE2\x82\xAC" is UTF-8 sequence of Euro sign U+20AC
TEST(Regex, Unicode) { TEST(Regex, Unicode) {
...@@ -501,6 +538,7 @@ TEST(Regex, Invalid) { ...@@ -501,6 +538,7 @@ TEST(Regex, Invalid) {
EXPECT_FALSE(re.IsValid());\ EXPECT_FALSE(re.IsValid());\
} }
TEST_INVALID("");
TEST_INVALID("a|"); TEST_INVALID("a|");
TEST_INVALID("()"); TEST_INVALID("()");
TEST_INVALID(")"); TEST_INVALID(")");
...@@ -517,7 +555,7 @@ TEST(Regex, Invalid) { ...@@ -517,7 +555,7 @@ TEST(Regex, Invalid) {
TEST_INVALID("a{0}"); TEST_INVALID("a{0}");
TEST_INVALID("a{-1}"); TEST_INVALID("a{-1}");
TEST_INVALID("a{}"); TEST_INVALID("a{}");
TEST_INVALID("a{0,}"); // TEST_INVALID("a{0,}"); // Support now
TEST_INVALID("a{,0}"); TEST_INVALID("a{,0}");
TEST_INVALID("a{1,0}"); TEST_INVALID("a{1,0}");
TEST_INVALID("a{-1,0}"); TEST_INVALID("a{-1,0}");
...@@ -530,4 +568,9 @@ TEST(Regex, Invalid) { ...@@ -530,4 +568,9 @@ TEST(Regex, Invalid) {
#undef TEST_INVALID #undef TEST_INVALID
} }
TEST(Regex, Issue538) {
Regex re("^[0-9]+(\\\\.[0-9]+){0,2}");
EXPECT_TRUE(re.IsValid());
}
#undef EURO #undef EURO
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment