Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
R
rapidjson
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
rapidjson
Commits
0bef29a5
Commit
0bef29a5
authored
May 24, 2015
by
miloyip
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Initial reggae implementation with only concatenation and alternation
parent
c8c8ad47
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
280 additions
and
0 deletions
+280
-0
regex.h
include/rapidjson/internal/regex.h
+214
-0
stack.h
include/rapidjson/internal/stack.h
+15
-0
CMakeLists.txt
test/unittest/CMakeLists.txt
+1
-0
regextest.cpp
test/unittest/regextest.cpp
+50
-0
No files found.
include/rapidjson/internal/regex.h
0 → 100644
View file @
0bef29a5
// Tencent is pleased to support the open source community by making RapidJSON available.
//
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
//
// Licensed under the MIT License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://opensource.org/licenses/MIT
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifndef RAPIDJSON_INTERNAL_REGEX_H_
#define RAPIDJSON_INTERNAL_REGEX_H_
#include "../rapidjson.h"
#include "stack.h"
RAPIDJSON_NAMESPACE_BEGIN
namespace
internal
{
///////////////////////////////////////////////////////////////////////////////
// GenericRegex
static
const
SizeType
kRegexInvalidState
=
~
SizeType
(
0
);
//!< Represents an invalid index in GenericRegex::State::out, out1
template
<
typename
Encoding
,
typename
Allocator
=
CrtAllocator
>
class
GenericRegex
{
public
:
typedef
typename
Encoding
::
Ch
Ch
;
GenericRegex
(
const
Ch
*
source
,
Allocator
*
allocator
=
0
)
:
states_
(
allocator
,
256
),
root_
(
kRegexInvalidState
),
stateCount_
()
{
StringStream
is
(
source
);
Parse
(
is
);
}
~
GenericRegex
()
{
}
bool
IsValid
()
const
{
return
root_
!=
kRegexInvalidState
;
}
template
<
typename
InputStream
>
bool
Match
(
InputStream
&
is
)
const
{
RAPIDJSON_ASSERT
(
IsValid
());
Allocator
allocator
;
Stack
<
Allocator
>
state0
(
&
allocator
,
stateCount_
*
sizeof
(
SizeType
));
Stack
<
Allocator
>
state1
(
&
allocator
,
stateCount_
*
sizeof
(
SizeType
));
Stack
<
Allocator
>
*
current
=
&
state0
,
*
next
=
&
state1
;
const
size_t
stateSetSize
=
(
stateCount_
+
31
)
/
32
*
4
;
unsigned
*
stateSet
=
static_cast
<
unsigned
*>
(
allocator
.
Malloc
(
stateSetSize
));
std
::
memset
(
stateSet
,
0
,
stateSetSize
);
AddState
(
stateSet
,
*
current
,
root_
);
unsigned
codepoint
;
while
(
!
current
->
Empty
()
&&
Encoding
::
Decode
(
is
,
&
codepoint
)
&&
codepoint
!=
0
)
{
for
(
const
SizeType
*
s
=
current
->
template
Bottom
<
SizeType
>
();
s
!=
current
->
template
End
<
SizeType
>
();
++
s
)
{
const
State
&
sr
=
GetState
(
*
s
);
// if (sr.out != kRegexInvalidState)
// printf("%c matches %c\n", (char)sr.codepoint, (char)codepoint);
if
(
sr
.
out
!=
kRegexInvalidState
&&
sr
.
codepoint
==
codepoint
)
AddState
(
stateSet
,
*
next
,
sr
.
out
);
}
Stack
<
Allocator
>*
temp
=
current
;
current
=
next
;
next
=
temp
;
std
::
memset
(
stateSet
,
0
,
stateSetSize
);
next
->
Clear
();
// printf("\n");
}
Allocator
::
Free
(
stateSet
);
for
(
const
SizeType
*
s
=
current
->
template
Bottom
<
SizeType
>
();
s
!=
current
->
template
End
<
SizeType
>
();
++
s
)
if
(
GetState
(
*
s
).
out
==
kRegexInvalidState
)
return
true
;
return
false
;
}
bool
Match
(
const
Ch
*
s
)
{
StringStream
is
(
s
);
return
Match
(
is
);
}
private
:
struct
State
{
SizeType
out
;
//!< Equals to kInvalid for match
SizeType
out1
;
//!< Equals to non-kInvalid for split
unsigned
codepoint
;
};
struct
Frag
{
Frag
(
SizeType
s
,
SizeType
o
)
:
start
(
s
),
out
(
o
)
{}
SizeType
start
;
SizeType
out
;
//!< link-list of all output states
};
State
&
GetState
(
SizeType
index
)
{
RAPIDJSON_ASSERT
(
index
<
stateCount_
);
return
states_
.
template
Bottom
<
State
>
()[
index
];
}
const
State
&
GetState
(
SizeType
index
)
const
{
RAPIDJSON_ASSERT
(
index
<
stateCount_
);
return
states_
.
template
Bottom
<
State
>
()[
index
];
}
void
AddState
(
unsigned
*
stateSet
,
Stack
<
Allocator
>&
l
,
SizeType
index
)
const
{
if
(
index
==
kRegexInvalidState
)
return
;
const
State
&
s
=
GetState
(
index
);
if
(
s
.
out1
!=
kRegexInvalidState
)
{
// Split
AddState
(
stateSet
,
l
,
s
.
out
);
AddState
(
stateSet
,
l
,
s
.
out1
);
}
else
if
(
!
(
stateSet
[
index
>>
5
]
&
(
1
<<
(
index
&
31
))))
{
stateSet
[
index
>>
5
]
|=
(
1
<<
(
index
&
31
));
*
l
.
template
Push
<
SizeType
>
()
=
index
;
}
}
SizeType
NewState
(
SizeType
out
,
SizeType
out1
,
unsigned
codepoint
)
{
State
*
s
=
states_
.
template
Push
<
State
>
();
s
->
out
=
out
;
s
->
out1
=
out1
;
s
->
codepoint
=
codepoint
;
return
stateCount_
++
;
}
SizeType
Append
(
SizeType
l1
,
SizeType
l2
)
{
SizeType
old
=
l1
;
while
(
GetState
(
l1
).
out
!=
kRegexInvalidState
)
l1
=
GetState
(
l1
).
out
;
GetState
(
l1
).
out
=
l2
;
return
old
;
}
void
Patch
(
SizeType
l
,
SizeType
s
)
{
SizeType
next
;
for
(;
l
!=
kRegexInvalidState
;
l
=
next
)
{
next
=
GetState
(
l
).
out
;
GetState
(
l
).
out
=
s
;
}
}
template
<
typename
InputStream
>
void
Parse
(
InputStream
&
is
)
{
Allocator
allocator
;
Stack
<
Allocator
>
operandStack
(
&
allocator
,
256
);
// Frag
Stack
<
Allocator
>
operatorStack
(
&
allocator
,
256
);
// char
unsigned
codepoint
;
bool
previousOperand
=
false
;
while
(
Encoding
::
Decode
(
is
,
&
codepoint
)
&&
codepoint
!=
0
)
{
switch
(
codepoint
)
{
case
'|'
:
*
operatorStack
.
template
Push
<
char
>
()
=
'|'
;
previousOperand
=
false
;
break
;
default
:
SizeType
s
=
NewState
(
kRegexInvalidState
,
kRegexInvalidState
,
codepoint
);
// concatenation with previous operand
if
(
previousOperand
)
{
Frag
*
e
=
operandStack
.
template
Top
<
Frag
>
();
Patch
(
e
->
out
,
s
);
e
->
out
=
s
;
}
else
*
operandStack
.
template
Push
<
Frag
>
()
=
Frag
(
s
,
s
);
previousOperand
=
true
;
}
}
while
(
!
operatorStack
.
Empty
())
{
switch
(
*
operatorStack
.
template
Pop
<
char
>
(
1
))
{
case
'|'
:
{
Frag
e2
=
*
operandStack
.
template
Pop
<
Frag
>
(
1
);
Frag
e1
=
*
operandStack
.
template
Pop
<
Frag
>
(
1
);
SizeType
s
=
NewState
(
e1
.
start
,
e2
.
start
,
0
);
*
operandStack
.
template
Push
<
Frag
>
()
=
Frag
(
s
,
Append
(
e1
.
out
,
e2
.
out
));
}
break
;
}
}
// Link the operand to matching state.
if
(
operandStack
.
GetSize
()
==
sizeof
(
Frag
))
{
Frag
*
e
=
operandStack
.
template
Pop
<
Frag
>
(
1
);
Patch
(
e
->
out
,
NewState
(
kRegexInvalidState
,
kRegexInvalidState
,
0
));
root_
=
e
->
start
;
}
}
Stack
<
Allocator
>
states_
;
SizeType
root_
;
SizeType
stateCount_
;
};
typedef
GenericRegex
<
UTF8
<>
>
Regex
;
}
// namespace internal
RAPIDJSON_NAMESPACE_END
#endif // RAPIDJSON_INTERNAL_REGEX_H_
include/rapidjson/internal/stack.h
View file @
0bef29a5
...
...
@@ -121,9 +121,24 @@ public:
return
reinterpret_cast
<
T
*>
(
stackTop_
-
sizeof
(
T
));
}
template
<
typename
T
>
const
T
*
Top
()
const
{
RAPIDJSON_ASSERT
(
GetSize
()
>=
sizeof
(
T
));
return
reinterpret_cast
<
T
*>
(
stackTop_
-
sizeof
(
T
));
}
template
<
typename
T
>
T
*
End
()
{
return
reinterpret_cast
<
T
*>
(
stackTop_
);
}
template
<
typename
T
>
const
T
*
End
()
const
{
return
reinterpret_cast
<
T
*>
(
stackTop_
);
}
template
<
typename
T
>
T
*
Bottom
()
{
return
(
T
*
)
stack_
;
}
template
<
typename
T
>
const
T
*
Bottom
()
const
{
return
(
T
*
)
stack_
;
}
Allocator
&
GetAllocator
()
{
return
*
allocator_
;
}
bool
Empty
()
const
{
return
stackTop_
==
stack_
;
}
size_t
GetSize
()
const
{
return
static_cast
<
size_t
>
(
stackTop_
-
stack_
);
}
...
...
test/unittest/CMakeLists.txt
View file @
0bef29a5
...
...
@@ -11,6 +11,7 @@ set(UNITTEST_SOURCES
pointertest.cpp
prettywritertest.cpp
readertest.cpp
regextest.cpp
simdtest.cpp
stringbuffertest.cpp
strtodtest.cpp
...
...
test/unittest/regextest.cpp
0 → 100644
View file @
0bef29a5
// Tencent is pleased to support the open source community by making RapidJSON available.
//
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
//
// Licensed under the MIT License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// http://opensource.org/licenses/MIT
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include "unittest.h"
#include "rapidjson/internal/regex.h"
using
namespace
rapidjson
::
internal
;
TEST
(
Regex
,
concatenation
)
{
Regex
re
(
"abc"
);
EXPECT_TRUE
(
re
.
Match
(
"abc"
));
EXPECT_FALSE
(
re
.
Match
(
""
));
EXPECT_FALSE
(
re
.
Match
(
"a"
));
EXPECT_FALSE
(
re
.
Match
(
"b"
));
EXPECT_FALSE
(
re
.
Match
(
"ab"
));
EXPECT_FALSE
(
re
.
Match
(
"abcd"
));
}
TEST
(
Regex
,
split
)
{
{
Regex
re
(
"abab|abbb"
);
EXPECT_TRUE
(
re
.
Match
(
"abab"
));
EXPECT_TRUE
(
re
.
Match
(
"abbb"
));
EXPECT_FALSE
(
re
.
Match
(
""
));
EXPECT_FALSE
(
re
.
Match
(
"ab"
));
EXPECT_FALSE
(
re
.
Match
(
"ababa"
));
EXPECT_FALSE
(
re
.
Match
(
"abb"
));
EXPECT_FALSE
(
re
.
Match
(
"abbbb"
));
}
{
Regex
re
(
"a|b|c"
);
EXPECT_TRUE
(
re
.
Match
(
"a"
));
EXPECT_TRUE
(
re
.
Match
(
"b"
));
EXPECT_TRUE
(
re
.
Match
(
"c"
));
EXPECT_FALSE
(
re
.
Match
(
""
));
EXPECT_FALSE
(
re
.
Match
(
"aa"
));
EXPECT_FALSE
(
re
.
Match
(
"ab"
));
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment