Commit 3006fa7d authored by thebusytypist's avatar thebusytypist

Try to resolve issue #35: implement iterative parsing.

parent c12286a0
......@@ -46,7 +46,8 @@ namespace rapidjson {
enum ParseFlag {
kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer.
kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
kParseValidateEncodingFlag = 2 //!< Validate encoding of JSON strings.
kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
kParseNonRecursiveFlag = 4 //!< Non-recursive(constant complexity in terms of function call stack size) parsing.
};
//! Error code of parsing.
......@@ -69,7 +70,7 @@ enum ParseErrorCode {
kParseErrorStringUnicodeSurrogateInvalid, //!< The surrogate pair in string is invalid.
kParseErrorStringEscapeInvalid, //!< Invalid escape character in string.
kParseErrorStringMissQuotationMark, //!< Missing a closing quotation mark in string.
kParseErrorStringInvalidEncoding, //!< Invalid encoidng in string.
kParseErrorStringInvalidEncoding, //!< Invalid encoding in string.
kParseErrorNumberTooBig, //!< Number too big to be stored in double.
kParseErrorNumberMissFraction, //!< Miss fraction part in number.
......@@ -134,7 +135,7 @@ namespace internal {
template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
class StreamLocalCopy;
//! Do copy optimziation.
//! Do copy optimization.
template<typename Stream>
class StreamLocalCopy<Stream, 1> {
public:
......@@ -297,6 +298,9 @@ public:
parseErrorCode_ = kParseErrorNone;
errorOffset_ = 0;
if (parseFlags & kParseNonRecursiveFlag)
return NonRecursiveParse<parseFlags>(is, handler);
SkipWhitespace(is);
if (is.Peek() == '\0')
......@@ -748,6 +752,220 @@ private:
}
}
// Non-recursive parsing
enum NonRecursiveParsingState {
NonRecursiveParsingStartState,
NonRecursiveParsingFinishState,
NonRecursiveParsingErrorState,
// Object states
NonRecursiveParsingObjectInitialState,
NonRecursiveParsingObjectContentState,
// Array states
NonRecursiveParsingArrayInitialState,
NonRecursiveParsingArrayContentState
};
template <typename InputStream, typename Handler>
NonRecursiveParsingState TransitToCompoundValueTypeState(NonRecursiveParsingState state, InputStream& is, Handler& handler) {
// For compound value type(object and array), we should push the current state and start a new stack frame for this type.
NonRecursiveParsingState r = NonRecursiveParsingErrorState;
switch (is.Take()) {
case '{':
handler.StartObject();
r = NonRecursiveParsingObjectInitialState;
// Push current state.
*stack_.template Push<NonRecursiveParsingState>(1) = state;
// Initialize and push member count.
*stack_.template Push<int>(1) = 0;
break;
case '[':
handler.StartArray();
r = NonRecursiveParsingArrayInitialState;
// Push current state.
*stack_.template Push<NonRecursiveParsingState>(1) = state;
// Initialize and push element count.
*stack_.template Push<int>(1) = 0;
break;
}
return r;
}
// Inner transition of object or array states(ObjectInitial->ObjectContent, ArrayInitial->ArrayContent).
template <unsigned parseFlags, typename InputStream, typename Handler>
NonRecursiveParsingState TransitByValue(NonRecursiveParsingState state, InputStream& is, Handler& handler) {
RAPIDJSON_ASSERT(
state == NonRecursiveParsingObjectInitialState ||
state == NonRecursiveParsingArrayInitialState ||
state == NonRecursiveParsingObjectContentState ||
state == NonRecursiveParsingArrayContentState);
NonRecursiveParsingState t;
if (state == NonRecursiveParsingObjectInitialState)
t = NonRecursiveParsingObjectContentState;
else if (state == NonRecursiveParsingArrayInitialState)
t = NonRecursiveParsingArrayContentState;
else
t = state;
NonRecursiveParsingState r = NonRecursiveParsingErrorState;
switch (is.Peek()) {
// For plain value state is not changed.
case 'n': ParseNull <parseFlags>(is, handler); r = t; break;
case 't': ParseTrue <parseFlags>(is, handler); r = t; break;
case 'f': ParseFalse <parseFlags>(is, handler); r = t; break;
case '"': ParseString<parseFlags>(is, handler); r = t; break;
// Transit when value is object or array.
case '{':
case '[':
r = TransitToCompoundValueTypeState(state, is, handler); break;
default: ParseNumber<parseFlags>(is, handler); r = t; break;
}
if (HasParseError())
r = NonRecursiveParsingErrorState;
return r;
}
// Transit from object related states(ObjectInitial, ObjectContent).
template <unsigned parseFlags, typename InputStream, typename Handler>
NonRecursiveParsingState TransitFromObjectStates(NonRecursiveParsingState state, InputStream& is, Handler& handler) {
NonRecursiveParsingState r = NonRecursiveParsingErrorState;
switch (is.Peek()) {
case '}': {
is.Take();
// Get member count(include an extra one for non-empty object).
int memberCount = *stack_.template Pop<int>(1);
if (state == NonRecursiveParsingObjectContentState)
++memberCount;
// Restore the parent stack frame.
r = *stack_.template Pop<NonRecursiveParsingState>(1);
// Transit to ContentState since a member/an element was just parsed.
if (r == NonRecursiveParsingArrayInitialState)
r = NonRecursiveParsingArrayContentState;
else if (r == NonRecursiveParsingObjectInitialState)
r = NonRecursiveParsingObjectContentState;
// If we return to the topmost frame mark it finished.
if (r == NonRecursiveParsingStartState)
r = NonRecursiveParsingFinishState;
handler.EndObject(memberCount);
break;
}
case ',':
is.Take();
r = NonRecursiveParsingObjectContentState;
// Update member count.
*stack_.template Top<int>() = *stack_.template Top<int>() + 1;
break;
case '"':
// Should be a key-value pair.
ParseString<parseFlags>(is, handler);
if (HasParseError()) {
r = NonRecursiveParsingErrorState;
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
break;
}
SkipWhitespace(is);
if (is.Take() != ':') {
r = NonRecursiveParsingErrorState;
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissColon, is.Tell());
break;
}
SkipWhitespace(is);
r = TransitByValue<parseFlags>(state, is, handler);
break;
default:
r = NonRecursiveParsingErrorState;
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
break;
}
return r;
}
// Transit from array related states(ArrayInitial, ArrayContent).
template <unsigned parseFlags, typename InputStream, typename Handler>
NonRecursiveParsingState TransitFromArrayStates(NonRecursiveParsingState state, InputStream& is, Handler& handler) {
NonRecursiveParsingState r = NonRecursiveParsingErrorState;
switch (is.Peek()) {
case ']': {
is.Take();
// Get element count(include an extra one for non-empty array).
int elementCount = *stack_.template Pop<int>(1);
if (state == NonRecursiveParsingArrayContentState)
++elementCount;
// Restore the parent stack frame.
r = *stack_.template Pop<NonRecursiveParsingState>(1);
// Transit to ContentState since a member/an element was just parsed.
if (r == NonRecursiveParsingArrayInitialState)
r = NonRecursiveParsingArrayContentState;
else if (r == NonRecursiveParsingObjectInitialState)
r = NonRecursiveParsingObjectContentState;
// If we return to the topmost frame mark it finished.
if (r == NonRecursiveParsingStartState)
r = NonRecursiveParsingFinishState;
handler.EndArray(elementCount);
break;
}
case ',':
is.Take();
r = NonRecursiveParsingArrayContentState;
// Update element count.
*stack_.template Top<int>() = *stack_.template Top<int>() + 1;
break;
default:
// Should be a single value.
r = TransitByValue<parseFlags>(state, is, handler);
break;
}
return r;
}
template <unsigned parseFlags, typename InputStream, typename Handler>
NonRecursiveParsingState Transit(NonRecursiveParsingState state, InputStream& is, Handler& handler) {
NonRecursiveParsingState r = NonRecursiveParsingErrorState;
switch (state) {
case NonRecursiveParsingStartState:
r = TransitToCompoundValueTypeState(state, is, handler);
break;
case NonRecursiveParsingObjectInitialState:
case NonRecursiveParsingObjectContentState:
r = TransitFromObjectStates<parseFlags>(state, is, handler);
break;
case NonRecursiveParsingArrayInitialState:
case NonRecursiveParsingArrayContentState:
r = TransitFromArrayStates<parseFlags>(state, is, handler);
break;
}
return r;
}
template <unsigned parseFlags, typename InputStream, typename Handler>
bool NonRecursiveParse(InputStream& is, Handler& handler) {
NonRecursiveParsingState state = NonRecursiveParsingStartState;
SkipWhitespace(is);
while (is.Peek() != '\0' && state != NonRecursiveParsingErrorState) {
state = Transit<parseFlags>(state, is, handler);
SkipWhitespace(is);
}
stack_.Clear();
return state == NonRecursiveParsingFinishState && !HasParseError();
}
static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
internal::Stack<Allocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
ParseErrorCode parseErrorCode_;
......
......@@ -650,7 +650,7 @@ struct StreamTraits<CustomStringStream<Encoding> > {
enum { copyOptimization = 1 };
};
} // namespace rapdijson
} // namespace rapidjson
#endif
TEST(Reader, CustomStringStream) {
......@@ -706,6 +706,200 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) {
EXPECT_FALSE(reader.HasParseError());
}
TEST(Reader, NonRecursiveParsing) {
StringStream json("[1,true,false,null,\"string\",{\"array\":[1]}]");
Reader reader;
BaseReaderHandler<> handler;
Reader::NonRecursiveParsingState r;
// [
r = reader.Transit<kParseNonRecursiveFlag>(
Reader::NonRecursiveParsingStartState,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayInitialState, r);
// 1
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(0, *reader.stack_.template Top<int>()); // element count
// ,
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(1, *reader.stack_.template Top<int>()); // element count
// true
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(1, *reader.stack_.template Top<int>()); // element count
// ,
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(2, *reader.stack_.template Top<int>()); // element count
// false
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(2, *reader.stack_.template Top<int>()); // element count
// ,
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(3, *reader.stack_.template Top<int>()); // element count
// null
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(3, *reader.stack_.template Top<int>()); // element count
// ,
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(4, *reader.stack_.template Top<int>()); // element count
// "string"
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(4, *reader.stack_.template Top<int>()); // element count
// ,
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(5, *reader.stack_.template Top<int>()); // element count
// {
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingObjectInitialState, r);
EXPECT_EQ(0, *reader.stack_.template Top<int>()); // member count
// "array":[
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayInitialState, r);
EXPECT_EQ(0, *reader.stack_.template Top<int>()); // element count
// 1
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(0, *reader.stack_.template Top<int>()); // element count
// ]
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingObjectContentState, r);
EXPECT_EQ(0, *reader.stack_.template Top<int>()); // member count
// }
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r);
EXPECT_EQ(5, *reader.stack_.template Top<int>()); // element count
// ]
r = reader.Transit<kParseNonRecursiveFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::NonRecursiveParsingFinishState, r);
}
struct CountHandler : BaseReaderHandler<> {
void EndObject(SizeType memberCount) {
MemberCount = memberCount;
}
void EndArray(SizeType elementCount) {
ElementCount = elementCount;
}
SizeType MemberCount;
SizeType ElementCount;
};
TEST(Reader, NonRecursiveParsing_MemberCounting) {
StringStream json("{\"array\": []}");
Reader reader;
CountHandler handler;
reader.NonRecursiveParse<kParseNonRecursiveFlag>(json, handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(1, handler.MemberCount);
}
TEST(Reader, NonRecursiveParsing_ElementCounting) {
StringStream json("[{}]");
Reader reader;
CountHandler handler;
reader.NonRecursiveParse<kParseNonRecursiveFlag>(json, handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(1, handler.ElementCount);
}
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment