Commit a33af83e authored by Milo Yip's avatar Milo Yip

Optimization for Regex and Schema

parent a0066483
......@@ -71,13 +71,17 @@ class GenericRegex {
public:
typedef typename Encoding::Ch Ch;
GenericRegex(const Ch* source, Allocator* allocator = 0) : states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), anchorBegin_(), anchorEnd_() {
GenericRegex(const Ch* source, Allocator* allocator = 0) :
states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
stateSet_(), state0_(allocator, 0), state1_(allocator, 0), anchorBegin_(), anchorEnd_()
{
GenericStringStream<Encoding> ss(source);
DecodedStream<GenericStringStream<Encoding> > ds(ss);
Parse(ds);
}
~GenericRegex() {
Allocator::Free(stateSet_);
}
bool IsValid() const {
......@@ -308,6 +312,14 @@ private:
printf("\n");
#endif
}
// Preallocate buffer for SearchWithAnchoring()
RAPIDJSON_ASSERT(stateSet_ == 0);
if (stateCount_ > 0) {
stateSet_ = static_cast<unsigned*>(states_.GetAllocator().Malloc(GetStateSetSize()));
state0_.Reserve<SizeType>(stateCount_);
state1_.Reserve<SizeType>(stateCount_);
}
}
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
......@@ -568,21 +580,15 @@ private:
RAPIDJSON_ASSERT(IsValid());
DecodedStream<InputStream> ds(is);
Allocator allocator;
Stack<Allocator> state0(&allocator, stateCount_ * sizeof(SizeType));
Stack<Allocator> state1(&allocator, stateCount_ * sizeof(SizeType));
Stack<Allocator> *current = &state0, *next = &state1;
const size_t stateSetSize = (stateCount_ + 31) / 32 * 4;
unsigned* stateSet = static_cast<unsigned*>(allocator.Malloc(stateSetSize));
std::memset(stateSet, 0, stateSetSize);
bool matched = false;
matched = AddState(stateSet, *current, root_);
state0_.Clear();
Stack<Allocator> *current = &state0_, *next = &state1_;
const size_t stateSetSize = GetStateSetSize();
std::memset(stateSet_, 0, stateSetSize);
bool matched = AddState(*current, root_);
unsigned codepoint;
while (!current->Empty() && (codepoint = ds.Take()) != 0) {
std::memset(stateSet, 0, stateSetSize);
std::memset(stateSet_, 0, stateSetSize);
next->Clear();
matched = false;
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
......@@ -591,39 +597,38 @@ private:
sr.codepoint == kAnyCharacterClass ||
(sr.codepoint == kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
{
matched = AddState(stateSet, *next, sr.out) || matched;
matched = AddState(*next, sr.out) || matched;
if (!anchorEnd && matched)
goto exit;
return true;
}
if (!anchorBegin)
AddState(stateSet, *next, root_);
AddState(*next, root_);
}
Stack<Allocator>* temp = current;
current = next;
next = temp;
internal::Swap(current, next);
}
exit:
Allocator::Free(stateSet);
return matched;
}
size_t GetStateSetSize() const {
return (stateCount_ + 31) / 32 * 4;
}
// Return whether the added states is a match state
bool AddState(unsigned* stateSet, Stack<Allocator>& l, SizeType index) const {
bool AddState(Stack<Allocator>& l, SizeType index) const {
if (index == kRegexInvalidState)
return true;
const State& s = GetState(index);
if (s.out1 != kRegexInvalidState) { // Split
bool matched = AddState(stateSet, l, s.out);
matched = AddState(stateSet, l, s.out1) || matched;
return matched;
bool matched = AddState(l, s.out);
return AddState(l, s.out1) || matched;
}
else if (!(stateSet[index >> 5] & (1 << (index & 31)))) {
stateSet[index >> 5] |= (1 << (index & 31));
*l.template Push<SizeType>() = index;
else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) {
stateSet_[index >> 5] |= (1 << (index & 31));
*l.template PushUnsafe<SizeType>() = index;
}
return GetState(index).out == kRegexInvalidState;
return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
}
bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
......@@ -642,6 +647,11 @@ private:
SizeType root_;
SizeType stateCount_;
SizeType rangeCount_;
// For SearchWithAnchoring()
uint32_t* stateSet_; // allocated by states_.GetAllocator()
mutable Stack<Allocator> state0_;
mutable Stack<Allocator> state1_;
bool anchorBegin_;
bool anchorEnd_;
};
......
......@@ -38,7 +38,6 @@ public:
// Optimization note: Do not allocate memory for stack_ in constructor.
// Do it lazily when first Push() -> Expand() -> Resize().
Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) {
RAPIDJSON_ASSERT(stackCapacity > 0);
}
#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
......
......@@ -300,15 +300,17 @@ struct SchemaValidationContext {
factory.DestroySchemaValidator(patternPropertiesValidators[i]);
factory.FreeState(patternPropertiesValidators);
}
factory.FreeState(patternPropertiesSchemas);
factory.FreeState(objectDependencies);
if (patternPropertiesSchemas)
factory.FreeState(patternPropertiesSchemas);
if (objectDependencies)
factory.FreeState(objectDependencies);
}
SchemaValidatorFactoryType& factory;
const SchemaType* schema;
const SchemaType* valueSchema;
const Ch* invalidKeyword;
void* hasher; // Only calidator access
void* hasher; // Only validator access
void* arrayElementHashCodes; // Only validator access this
ISchemaValidator** validators;
SizeType validatorCount;
......@@ -613,7 +615,7 @@ public:
return true;
}
bool EndValue(Context& context) const {
RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const {
if (context.patternPropertiesValidatorCount > 0) {
bool otherValid = false;
SizeType count = context.patternPropertiesValidatorCount;
......@@ -1080,8 +1082,12 @@ private:
// O(n)
template <typename ValueType>
bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const {
SizeType len = name.GetStringLength();
const Ch* str = name.GetString();
for (SizeType index = 0; index < propertyCount_; index++)
if (properties_[index].name == name) {
if (properties_[index].name.GetStringLength() == len &&
(std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0))
{
*outIndex = index;
return true;
}
......@@ -1703,7 +1709,7 @@ private:
PushSchema(root_);
else {
if (CurrentContext().inArray)
AppendToken(CurrentContext().arrayElementIndex);
AppendToken<Ch>(CurrentContext().arrayElementIndex);
if (!CurrentSchema().BeginValue(CurrentContext()))
return false;
......@@ -1767,21 +1773,23 @@ private:
}
void AppendToken(const Ch* str, SizeType len) {
*documentStack_.template Push<Ch>() = '/';
documentStack_.template Reserve<Ch>(1 + len * 2); // worst case all characters are escaped as two characters
*documentStack_.template PushUnsafe<Ch>() = '/';
for (SizeType i = 0; i < len; i++) {
if (str[i] == '~') {
*documentStack_.template Push<Ch>() = '~';
*documentStack_.template Push<Ch>() = '0';
*documentStack_.template PushUnsafe<Ch>() = '~';
*documentStack_.template PushUnsafe<Ch>() = '0';
}
else if (str[i] == '/') {
*documentStack_.template Push<Ch>() = '~';
*documentStack_.template Push<Ch>() = '1';
*documentStack_.template PushUnsafe<Ch>() = '~';
*documentStack_.template PushUnsafe<Ch>() = '1';
}
else
*documentStack_.template Push<Ch>() = str[i];
*documentStack_.template PushUnsafe<Ch>() = str[i];
}
}
template<typename Ch>
void AppendToken(SizeType index) {
*documentStack_.template Push<Ch>() = '/';
char buffer[21];
......@@ -1790,9 +1798,27 @@ private:
*documentStack_.template Push<Ch>() = buffer[i];
}
void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, &schema); }
// Specialized version for char to prevent buffer copying.
template <>
void AppendToken<char>(SizeType index) {
if (sizeof(SizeType) == 4) {
char *buffer = documentStack_.template Push<Ch>(1 + 10); // '/' + uint
*buffer++ = '/';
const char* end = internal::u32toa(index, buffer);
documentStack_.template Pop<Ch>(static_cast<size_t>(10 - (end - buffer)));
}
else {
char *buffer = documentStack_.template Push<Ch>(1 + 20); // '/' + uint64
*buffer++ = '/';
const char* end = internal::u64toa(index, buffer);
documentStack_.template Pop<Ch>(static_cast<size_t>(20 - (end - buffer)));
}
}
RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, &schema); }
void PopSchema() {
RAPIDJSON_FORCEINLINE void PopSchema() {
Context* c = schemaStack_.template Pop<Context>(1);
if (HashCodeArray* a = static_cast<HashCodeArray*>(c->arrayElementHashCodes)) {
a->~HashCodeArray();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment