Commit 19a2279a authored by Milo Yip's avatar Milo Yip

Merge pull request #76 from thebusytypist/TransitionTable

Iterative Parsing (for issue #35)
parents 9eda05c2 140dc066
...@@ -1221,12 +1221,13 @@ public: ...@@ -1221,12 +1221,13 @@ public:
\tparam SourceEncoding Encoding of input stream \tparam SourceEncoding Encoding of input stream
\tparam InputStream Type of input stream, implementing Stream concept \tparam InputStream Type of input stream, implementing Stream concept
\param is Input stream to be parsed. \param is Input stream to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
template <unsigned parseFlags, typename SourceEncoding, typename InputStream> template <unsigned parseFlags, typename SourceEncoding, typename InputStream>
GenericDocument& ParseStream(InputStream& is) { GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
ValueType::SetNull(); // Remove existing root if exist ValueType::SetNull(); // Remove existing root if exist
GenericReader<SourceEncoding, Encoding, Allocator> reader(&GetAllocator()); GenericReader<SourceEncoding, Encoding, Allocator> reader(limit, &GetAllocator());
ClearStackOnExit scope(*this); ClearStackOnExit scope(*this);
parseResult_ = reader.template Parse<parseFlags>(is, *this); parseResult_ = reader.template Parse<parseFlags>(is, *this);
if (parseResult_) { if (parseResult_) {
...@@ -1240,21 +1241,23 @@ public: ...@@ -1240,21 +1241,23 @@ public:
/*! \tparam parseFlags Combination of \ref ParseFlag. /*! \tparam parseFlags Combination of \ref ParseFlag.
\tparam InputStream Type of input stream, implementing Stream concept \tparam InputStream Type of input stream, implementing Stream concept
\param is Input stream to be parsed. \param is Input stream to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
template <unsigned parseFlags, typename InputStream> template <unsigned parseFlags, typename InputStream>
GenericDocument& ParseStream(InputStream& is) { GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
return ParseStream<parseFlags,Encoding,InputStream>(is); return ParseStream<parseFlags,Encoding,InputStream>(is, limit);
} }
//! Parse JSON text from an input stream (with \ref kParseDefaultFlags) //! Parse JSON text from an input stream (with \ref kParseDefaultFlags)
/*! \tparam InputStream Type of input stream, implementing Stream concept /*! \tparam InputStream Type of input stream, implementing Stream concept
\param is Input stream to be parsed. \param is Input stream to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
template <typename InputStream> template <typename InputStream>
GenericDocument& ParseStream(InputStream& is) { GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is); return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is, limit);
} }
//!@} //!@}
...@@ -1265,30 +1268,33 @@ public: ...@@ -1265,30 +1268,33 @@ public:
/*! \tparam parseFlags Combination of \ref ParseFlag. /*! \tparam parseFlags Combination of \ref ParseFlag.
\tparam SourceEncoding Transcoding from input Encoding \tparam SourceEncoding Transcoding from input Encoding
\param str Mutable zero-terminated string to be parsed. \param str Mutable zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
template <unsigned parseFlags, typename SourceEncoding> template <unsigned parseFlags, typename SourceEncoding>
GenericDocument& ParseInsitu(Ch* str) { GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
GenericInsituStringStream<Encoding> s(str); GenericInsituStringStream<Encoding> s(str);
return ParseStream<parseFlags | kParseInsituFlag, SourceEncoding>(s); return ParseStream<parseFlags | kParseInsituFlag, SourceEncoding>(s, limit);
} }
//! Parse JSON text from a mutable string //! Parse JSON text from a mutable string
/*! \tparam parseFlags Combination of \ref ParseFlag. /*! \tparam parseFlags Combination of \ref ParseFlag.
\param str Mutable zero-terminated string to be parsed. \param str Mutable zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
template <unsigned parseFlags> template <unsigned parseFlags>
GenericDocument& ParseInsitu(Ch* str) { GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
return ParseInsitu<parseFlags, Encoding>(str); return ParseInsitu<parseFlags, Encoding>(str, limit);
} }
//! Parse JSON text from a mutable string (with \ref kParseDefaultFlags) //! Parse JSON text from a mutable string (with \ref kParseDefaultFlags)
/*! \param str Mutable zero-terminated string to be parsed. /*! \param str Mutable zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
GenericDocument& ParseInsitu(Ch* str) { GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
return ParseInsitu<kParseDefaultFlags, Encoding>(str); return ParseInsitu<kParseDefaultFlags, Encoding>(str, limit);
} }
//!@} //!@}
...@@ -1299,28 +1305,31 @@ public: ...@@ -1299,28 +1305,31 @@ public:
/*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
\tparam SourceEncoding Transcoding from input Encoding \tparam SourceEncoding Transcoding from input Encoding
\param str Read-only zero-terminated string to be parsed. \param str Read-only zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
*/ */
template <unsigned parseFlags, typename SourceEncoding> template <unsigned parseFlags, typename SourceEncoding>
GenericDocument& Parse(const Ch* str) { GenericDocument& Parse(const Ch* str, size_t limit = 0) {
RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
GenericStringStream<SourceEncoding> s(str); GenericStringStream<SourceEncoding> s(str);
return ParseStream<parseFlags, SourceEncoding>(s); return ParseStream<parseFlags, SourceEncoding>(s, limit);
} }
//! Parse JSON text from a read-only string //! Parse JSON text from a read-only string
/*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
\param str Read-only zero-terminated string to be parsed. \param str Read-only zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
*/ */
template <unsigned parseFlags> template <unsigned parseFlags>
GenericDocument& Parse(const Ch* str) { GenericDocument& Parse(const Ch* str, size_t limit = 0) {
return Parse<parseFlags, Encoding>(str); return Parse<parseFlags, Encoding>(str, limit);
} }
//! Parse JSON text from a read-only string (with \ref kParseDefaultFlags) //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags)
/*! \param str Read-only zero-terminated string to be parsed. /*! \param str Read-only zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
*/ */
GenericDocument& Parse(const Ch* str) { GenericDocument& Parse(const Ch* str, size_t limit = 0) {
return Parse<kParseDefaultFlags>(str); return Parse<kParseDefaultFlags>(str, limit);
} }
//!@} //!@}
......
...@@ -39,6 +39,8 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro ...@@ -39,6 +39,8 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro
case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number."); case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number.");
case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error."); case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error.");
case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error.");
case kParseErrorStackSizeLimitExceeded: return RAPIDJSON_ERROR_STRING("Parsing stack size limit is exceeded.");
default: default:
return RAPIDJSON_ERROR_STRING("Unknown error."); return RAPIDJSON_ERROR_STRING("Unknown error.");
......
...@@ -58,7 +58,9 @@ enum ParseErrorCode { ...@@ -58,7 +58,9 @@ enum ParseErrorCode {
kParseErrorNumberMissFraction, //!< Miss fraction part in number. kParseErrorNumberMissFraction, //!< Miss fraction part in number.
kParseErrorNumberMissExponent, //!< Miss exponent in number. kParseErrorNumberMissExponent, //!< Miss exponent in number.
kParseErrorTermination //!< Parsing was terminated. kParseErrorTermination, //!< Parsing was terminated.
kParseErrorUnspecificSyntaxError, //!< Unspecific syntax error.
kParseErrorStackSizeLimitExceeded //!< Parsing stack size limit is exceeded.
}; };
//! Result of parsing (wraps ParseErrorCode) //! Result of parsing (wraps ParseErrorCode)
......
...@@ -64,7 +64,8 @@ namespace rapidjson { ...@@ -64,7 +64,8 @@ namespace rapidjson {
enum ParseFlag { enum ParseFlag {
kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer. kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer.
kParseInsituFlag = 1, //!< In-situ(destructive) parsing. kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
kParseValidateEncodingFlag = 2 //!< Validate encoding of JSON strings. kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
kParseIterativeFlag = 4 //!< Iterative(constant complexity in terms of function call stack size) parsing.
}; };
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
...@@ -127,7 +128,7 @@ namespace internal { ...@@ -127,7 +128,7 @@ namespace internal {
template<typename Stream, int = StreamTraits<Stream>::copyOptimization> template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
class StreamLocalCopy; class StreamLocalCopy;
//! Do copy optimziation. //! Do copy optimization.
template<typename Stream> template<typename Stream>
class StreamLocalCopy<Stream, 1> { class StreamLocalCopy<Stream, 1> {
public: public:
...@@ -272,10 +273,11 @@ public: ...@@ -272,10 +273,11 @@ public:
typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
//! Constructor. //! Constructor.
/*! \param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) /*! \param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
\param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
*/ */
GenericReader(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), parseResult_() {} GenericReader(size_t limit = 0, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), kStackSizeLimit_(limit), parseResult_() {}
//! Parse JSON text. //! Parse JSON text.
/*! \tparam parseFlags Combination of \ref ParseFlag. /*! \tparam parseFlags Combination of \ref ParseFlag.
...@@ -287,9 +289,13 @@ public: ...@@ -287,9 +289,13 @@ public:
*/ */
template <unsigned parseFlags, typename InputStream, typename Handler> template <unsigned parseFlags, typename InputStream, typename Handler>
ParseResult Parse(InputStream& is, Handler& handler) { ParseResult Parse(InputStream& is, Handler& handler) {
if (parseFlags & kParseIterativeFlag)
return IterativeParse<parseFlags>(is, handler);
parseResult_.Clear(); parseResult_.Clear();
ClearStackOnExit scope(*this); ClearStackOnExit scope(*this);
SkipWhitespace(is); SkipWhitespace(is);
if (is.Peek() == '\0') { if (is.Peek() == '\0') {
...@@ -565,8 +571,14 @@ private: ...@@ -565,8 +571,14 @@ private:
if (c == '\\') { // Escape if (c == '\\') { // Escape
is.Take(); is.Take();
Ch e = is.Take(); Ch e = is.Take();
if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) {
if (!(parseFlags & kParseInsituFlag)) {
if (!CheckStackSpaceQuota(sizeof(Ch))) {
RAPIDJSON_PARSE_ERROR(kParseErrorStackSizeLimitExceeded, is.Tell() - 1);
}
}
os.Put(escape[(unsigned char)e]); os.Put(escape[(unsigned char)e]);
}
else if (e == 'u') { // Unicode else if (e == 'u') { // Unicode
unsigned codepoint = ParseHex4(is); unsigned codepoint = ParseHex4(is);
if (codepoint >= 0xD800 && codepoint <= 0xDBFF) { if (codepoint >= 0xD800 && codepoint <= 0xDBFF) {
...@@ -585,6 +597,11 @@ private: ...@@ -585,6 +597,11 @@ private:
} }
else if (c == '"') { // Closing double quote else if (c == '"') { // Closing double quote
is.Take(); is.Take();
if (!(parseFlags & kParseInsituFlag)) {
if (!CheckStackSpaceQuota(sizeof(Ch))) {
RAPIDJSON_PARSE_ERROR(kParseErrorStackSizeLimitExceeded, is.Tell() - 1);
}
}
os.Put('\0'); // null-terminate the string os.Put('\0'); // null-terminate the string
return; return;
} }
...@@ -786,8 +803,434 @@ private: ...@@ -786,8 +803,434 @@ private:
} }
} }
// Iterative Parsing
// States
enum IterativeParsingState {
IterativeParsingStartState = 0,
IterativeParsingFinishState,
IterativeParsingErrorState,
// Object states
IterativeParsingObjectInitialState,
IterativeParsingMemberKeyState,
IterativeParsingKeyValueDelimiterState,
IterativeParsingMemberValueState,
IterativeParsingMemberDelimiterState,
IterativeParsingObjectFinishState,
// Array states
IterativeParsingArrayInitialState,
IterativeParsingElementState,
IterativeParsingElementDelimiterState,
IterativeParsingArrayFinishState,
cIterativeParsingStateCount
};
// Tokens
enum IterativeParsingToken {
IterativeParsingLeftBracketToken = 0,
IterativeParsingRightBracketToken,
IterativeParsingLeftCurlyBracketToken,
IterativeParsingRightCurlyBracketToken,
IterativeParsingCommaToken,
IterativeParsingColonToken,
IterativeParsingStringToken,
IterativeParsingFalseToken,
IterativeParsingTrueToken,
IterativeParsingNullToken,
IterativeParsingNumberToken,
cIterativeParsingTokenCount
};
IterativeParsingToken Tokenize(Ch c) {
switch (c) {
case '[': return IterativeParsingLeftBracketToken;
case ']': return IterativeParsingRightBracketToken;
case '{': return IterativeParsingLeftCurlyBracketToken;
case '}': return IterativeParsingRightCurlyBracketToken;
case ',': return IterativeParsingCommaToken;
case ':': return IterativeParsingColonToken;
case '"': return IterativeParsingStringToken;
case 'f': return IterativeParsingFalseToken;
case 't': return IterativeParsingTrueToken;
case 'n': return IterativeParsingNullToken;
default: return IterativeParsingNumberToken;
}
}
IterativeParsingState Predict(IterativeParsingState state, IterativeParsingToken token) {
// current state x one lookahead token -> new state
static const char G[cIterativeParsingStateCount][cIterativeParsingTokenCount] = {
// Start
{
IterativeParsingArrayInitialState, // Left bracket
IterativeParsingErrorState, // Right bracket
IterativeParsingObjectInitialState, // Left curly bracket
IterativeParsingErrorState, // Right curly bracket
IterativeParsingErrorState, // Comma
IterativeParsingErrorState, // Colon
IterativeParsingErrorState, // String
IterativeParsingErrorState, // False
IterativeParsingErrorState, // True
IterativeParsingErrorState, // Null
IterativeParsingErrorState // Number
},
// Finish(sink state)
{
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
IterativeParsingErrorState
},
// Error(sink state)
{
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
IterativeParsingErrorState
},
// ObjectInitial
{
IterativeParsingErrorState, // Left bracket
IterativeParsingErrorState, // Right bracket
IterativeParsingErrorState, // Left curly bracket
IterativeParsingObjectFinishState, // Right curly bracket
IterativeParsingErrorState, // Comma
IterativeParsingErrorState, // Colon
IterativeParsingMemberKeyState, // String
IterativeParsingErrorState, // False
IterativeParsingErrorState, // True
IterativeParsingErrorState, // Null
IterativeParsingErrorState // Number
},
// MemberKey
{
IterativeParsingErrorState, // Left bracket
IterativeParsingErrorState, // Right bracket
IterativeParsingErrorState, // Left curly bracket
IterativeParsingErrorState, // Right curly bracket
IterativeParsingErrorState, // Comma
IterativeParsingKeyValueDelimiterState, // Colon
IterativeParsingErrorState, // String
IterativeParsingErrorState, // False
IterativeParsingErrorState, // True
IterativeParsingErrorState, // Null
IterativeParsingErrorState // Number
},
// KeyValueDelimiter
{
IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
IterativeParsingErrorState, // Right bracket
IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
IterativeParsingErrorState, // Right curly bracket
IterativeParsingErrorState, // Comma
IterativeParsingErrorState, // Colon
IterativeParsingMemberValueState, // String
IterativeParsingMemberValueState, // False
IterativeParsingMemberValueState, // True
IterativeParsingMemberValueState, // Null
IterativeParsingMemberValueState // Number
},
// MemberValue
{
IterativeParsingErrorState, // Left bracket
IterativeParsingErrorState, // Right bracket
IterativeParsingErrorState, // Left curly bracket
IterativeParsingObjectFinishState, // Right curly bracket
IterativeParsingMemberDelimiterState, // Comma
IterativeParsingErrorState, // Colon
IterativeParsingErrorState, // String
IterativeParsingErrorState, // False
IterativeParsingErrorState, // True
IterativeParsingErrorState, // Null
IterativeParsingErrorState // Number
},
// MemberDelimiter
{
IterativeParsingErrorState, // Left bracket
IterativeParsingErrorState, // Right bracket
IterativeParsingErrorState, // Left curly bracket
IterativeParsingErrorState, // Right curly bracket
IterativeParsingErrorState, // Comma
IterativeParsingErrorState, // Colon
IterativeParsingMemberKeyState, // String
IterativeParsingErrorState, // False
IterativeParsingErrorState, // True
IterativeParsingErrorState, // Null
IterativeParsingErrorState // Number
},
// ObjectFinish(sink state)
{
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
IterativeParsingErrorState
},
// ArrayInitial
{
IterativeParsingArrayInitialState, // Left bracket(push Element state)
IterativeParsingArrayFinishState, // Right bracket
IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
IterativeParsingErrorState, // Right curly bracket
IterativeParsingErrorState, // Comma
IterativeParsingErrorState, // Colon
IterativeParsingElementState, // String
IterativeParsingElementState, // False
IterativeParsingElementState, // True
IterativeParsingElementState, // Null
IterativeParsingElementState // Number
},
// Element
{
IterativeParsingErrorState, // Left bracket
IterativeParsingArrayFinishState, // Right bracket
IterativeParsingErrorState, // Left curly bracket
IterativeParsingErrorState, // Right curly bracket
IterativeParsingElementDelimiterState, // Comma
IterativeParsingErrorState, // Colon
IterativeParsingErrorState, // String
IterativeParsingErrorState, // False
IterativeParsingErrorState, // True
IterativeParsingErrorState, // Null
IterativeParsingErrorState // Number
},
// ElementDelimiter
{
IterativeParsingArrayInitialState, // Left bracket(push Element state)
IterativeParsingErrorState, // Right bracket
IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
IterativeParsingErrorState, // Right curly bracket
IterativeParsingErrorState, // Comma
IterativeParsingErrorState, // Colon
IterativeParsingElementState, // String
IterativeParsingElementState, // False
IterativeParsingElementState, // True
IterativeParsingElementState, // Null
IterativeParsingElementState // Number
},
// ArrayFinish(sink state)
{
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
IterativeParsingErrorState
}
}; // End of G
return (IterativeParsingState)G[state][token];
}
// Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
// May return a new state on state pop.
template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParsingState Transit(IterativeParsingState src, IterativeParsingToken token, IterativeParsingState dst, InputStream& is, Handler& handler) {
int c = 0;
IterativeParsingState n;
bool hr;
switch (dst) {
case IterativeParsingStartState:
RAPIDJSON_ASSERT(false);
return IterativeParsingErrorState;
case IterativeParsingFinishState:
return dst;
case IterativeParsingErrorState:
return dst;
case IterativeParsingObjectInitialState:
case IterativeParsingArrayInitialState:
// Push the state(Element or MemeberValue) if we are nested in another array or value of member.
// In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
n = src;
if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
n = IterativeParsingElementState;
else if (src == IterativeParsingKeyValueDelimiterState)
n = IterativeParsingMemberValueState;
// Check stack space limit.
if (!CheckStackSpaceQuota(sizeof(IterativeParsingState) + sizeof(int))) {
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStackSizeLimitExceeded, is.Tell());
return IterativeParsingErrorState;
}
// Push current state.
*stack_.template Push<IterativeParsingState>(1) = n;
// Initialize and push the member/element count.
*stack_.template Push<int>(1) = 0;
// Call handler
if (dst == IterativeParsingObjectInitialState)
hr = handler.StartObject();
else
hr = handler.StartArray();
// On handler short circuits the parsing.
if (!hr) {
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
return IterativeParsingErrorState;
}
else {
is.Take();
return dst;
}
case IterativeParsingMemberKeyState:
ParseString<parseFlags>(is, handler);
if (HasParseError())
return IterativeParsingErrorState;
else
return dst;
case IterativeParsingKeyValueDelimiterState:
if (token == IterativeParsingColonToken) {
is.Take();
return dst;
}
else
return IterativeParsingErrorState;
case IterativeParsingMemberValueState:
// Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
ParseValue<parseFlags>(is, handler);
if (HasParseError()) {
return IterativeParsingErrorState;
}
return dst;
case IterativeParsingElementState:
// Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
ParseValue<parseFlags>(is, handler);
if (HasParseError()) {
return IterativeParsingErrorState;
}
return dst;
case IterativeParsingMemberDelimiterState:
case IterativeParsingElementDelimiterState:
is.Take();
// Update member/element count.
*stack_.template Top<int>() = *stack_.template Top<int>() + 1;
return dst;
case IterativeParsingObjectFinishState:
// Get member count.
c = *stack_.template Pop<int>(1);
// If the object is not empty, count the last member.
if (src == IterativeParsingMemberValueState)
++c;
// Restore the state.
n = *stack_.template Pop<IterativeParsingState>(1);
// Transit to Finish state if this is the topmost scope.
if (n == IterativeParsingStartState)
n = IterativeParsingFinishState;
// Call handler
hr = handler.EndObject(c);
// On handler short circuits the parsing.
if (!hr) {
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
return IterativeParsingErrorState;
}
else {
is.Take();
return n;
}
case IterativeParsingArrayFinishState:
// Get element count.
c = *stack_.template Pop<int>(1);
// If the array is not empty, count the last element.
if (src == IterativeParsingElementState)
++c;
// Restore the state.
n = *stack_.template Pop<IterativeParsingState>(1);
// Transit to Finish state if this is the topmost scope.
if (n == IterativeParsingStartState)
n = IterativeParsingFinishState;
// Call handler
hr = handler.EndArray(c);
// On handler short circuits the parsing.
if (!hr) {
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
return IterativeParsingErrorState;
}
else {
is.Take();
return n;
}
default:
RAPIDJSON_ASSERT(false);
return IterativeParsingErrorState;
}
}
template <typename InputStream>
void HandleError(IterativeParsingState src, InputStream& is) {
if (HasParseError()) {
// Error flag has been set.
return;
}
if (src == IterativeParsingStartState && is.Peek() == '\0')
RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell());
else if (src == IterativeParsingStartState)
RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotObjectOrArray, is.Tell());
else if (src == IterativeParsingFinishState)
RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell());
else if (src == IterativeParsingObjectInitialState || src == IterativeParsingMemberDelimiterState)
RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
else if (src == IterativeParsingMemberKeyState)
RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
else if (src == IterativeParsingMemberValueState)
RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
else if (src == IterativeParsingElementState)
RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
else
RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
}
template <unsigned parseFlags, typename InputStream, typename Handler>
ParseResult IterativeParse(InputStream& is, Handler& handler) {
parseResult_.Clear();
ClearStackOnExit scope(*this);
IterativeParsingState state = IterativeParsingStartState;
SkipWhitespace(is);
while (is.Peek() != '\0') {
IterativeParsingToken t = Tokenize(is.Peek());
IterativeParsingState n = Predict(state, t);
IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
if (d == IterativeParsingErrorState) {
HandleError(state, is);
break;
}
state = d;
SkipWhitespace(is);
}
// Handle the end of file.
if (state != IterativeParsingFinishState)
HandleError(state, is);
return parseResult_;
}
bool CheckStackSpaceQuota(size_t size) const {
return kStackSizeLimit_ == 0 || (stack_.GetSize() + size <= kStackSizeLimit_);
}
static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
internal::Stack<Allocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. internal::Stack<Allocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
const size_t kStackSizeLimit_; //!< Stack size limit(in bytes). A value of 0 means no limit.
ParseResult parseResult_; ParseResult parseResult_;
}; // class GenericReader }; // class GenericReader
......
...@@ -76,6 +76,24 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) { ...@@ -76,6 +76,24 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) {
} }
} }
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterative_DummyHandler)) {
for (size_t i = 0; i < kTrialCount; i++) {
StringStream s(json_);
BaseReaderHandler<> h;
Reader reader;
EXPECT_TRUE(reader.Parse<kParseIterativeFlag>(s, h));
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterativeInsitu_DummyHandler)) {
for (size_t i = 0; i < kTrialCount; i++) {
StringStream s(json_);
BaseReaderHandler<> h;
Reader reader;
EXPECT_TRUE(reader.Parse<kParseIterativeFlag|kParseInsituFlag>(s, h));
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) { TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) {
for (size_t i = 0; i < kTrialCount; i++) { for (size_t i = 0; i < kTrialCount; i++) {
StringStream s(json_); StringStream s(json_);
......
...@@ -651,7 +651,7 @@ struct StreamTraits<CustomStringStream<Encoding> > { ...@@ -651,7 +651,7 @@ struct StreamTraits<CustomStringStream<Encoding> > {
enum { copyOptimization = 1 }; enum { copyOptimization = 1 };
}; };
} // namespace rapdijson } // namespace rapidjson
#endif #endif
TEST(Reader, CustomStringStream) { TEST(Reader, CustomStringStream) {
...@@ -707,6 +707,243 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { ...@@ -707,6 +707,243 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) {
EXPECT_FALSE(reader.HasParseError()); EXPECT_FALSE(reader.HasParseError());
} }
// Test iterative parsing.
#define TESTERRORHANDLING(text, errorCode, offset)\
{\
StringStream json(text); \
BaseReaderHandler<> handler; \
Reader reader; \
reader.IterativeParse<kParseDefaultFlags>(json, handler); \
EXPECT_TRUE(reader.HasParseError()); \
EXPECT_EQ(errorCode, reader.GetParseErrorCode()); \
EXPECT_EQ(offset, reader.GetErrorOffset()); \
}
TEST(Reader, IterativeParsing_ErrorHandling) {
TESTERRORHANDLING("{\"a\": a}", kParseErrorValueInvalid, 6u);
TESTERRORHANDLING("", kParseErrorDocumentEmpty, 0u);
TESTERRORHANDLING("1", kParseErrorDocumentRootNotObjectOrArray, 0u);
TESTERRORHANDLING("{}{}", kParseErrorDocumentRootNotSingular, 2u);
TESTERRORHANDLING("{1}", kParseErrorObjectMissName, 1u);
TESTERRORHANDLING("{\"a\", 1}", kParseErrorObjectMissColon, 4u);
TESTERRORHANDLING("{\"a\"}", kParseErrorObjectMissColon, 4u);
TESTERRORHANDLING("{\"a\": 1", kParseErrorObjectMissCommaOrCurlyBracket, 7u);
TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket, 3u);
}
template<typename Encoding = UTF8<> >
struct IterativeParsingReaderHandler {
typedef typename Encoding::Ch Ch;
const static int LOG_NULL = -1;
const static int LOG_BOOL = -2;
const static int LOG_INT = -3;
const static int LOG_UINT = -4;
const static int LOG_INT64 = -5;
const static int LOG_UINT64 = -6;
const static int LOG_DOUBLE = -7;
const static int LOG_STRING = -8;
const static int LOG_STARTOBJECT = -9;
const static int LOG_ENDOBJECT = -10;
const static int LOG_STARTARRAY = -11;
const static int LOG_ENDARRAY = -12;
const static size_t LogCapacity = 256;
int Logs[LogCapacity];
size_t LogCount;
IterativeParsingReaderHandler() : LogCount(0) {
}
bool Null() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_NULL; return true; }
bool Bool(bool) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_BOOL; return true; }
bool Int(int) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT; return true; }
bool Uint(unsigned) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT; return true; }
bool Int64(int64_t) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT64; return true; }
bool Uint64(uint64_t) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_UINT64; return true; }
bool Double(double) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_DOUBLE; return true; }
bool String(const Ch*, SizeType, bool) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STRING; return true; }
bool StartObject() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STARTOBJECT; return true; }
bool EndObject(SizeType c) {
RAPIDJSON_ASSERT(LogCount < LogCapacity);
Logs[LogCount++] = LOG_ENDOBJECT;
Logs[LogCount++] = (int)c;
return true;
}
bool StartArray() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STARTARRAY; return true; }
bool EndArray(SizeType c) {
RAPIDJSON_ASSERT(LogCount < LogCapacity);
Logs[LogCount++] = LOG_ENDARRAY;
Logs[LogCount++] = (int)c;
return true;
}
};
TEST(Reader, IterativeParsing_General) {
{
StringStream is("[1, {\"k\": [1, 2]}, null, false, true, \"string\", 1.2]");
Reader reader;
IterativeParsingReaderHandler<> handler;
ParseResult r = reader.IterativeParse<kParseIterativeFlag>(is, handler);
EXPECT_FALSE(r.IsError());
EXPECT_FALSE(reader.HasParseError());
int e[] = {
handler.LOG_STARTARRAY,
handler.LOG_INT,
handler.LOG_STARTOBJECT,
handler.LOG_STRING,
handler.LOG_STARTARRAY,
handler.LOG_INT,
handler.LOG_INT,
handler.LOG_ENDARRAY, 2,
handler.LOG_ENDOBJECT, 1,
handler.LOG_NULL,
handler.LOG_BOOL,
handler.LOG_BOOL,
handler.LOG_STRING,
handler.LOG_DOUBLE,
handler.LOG_ENDARRAY, 7
};
EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount);
for (size_t i = 0; i < handler.LogCount; ++i) {
EXPECT_EQ(e[i], handler.Logs[i]) << "i = " << i;
}
}
}
TEST(Reader, IterativeParsing_Count) {
{
StringStream is("[{}, {\"k\": 1}, [1], []]");
Reader reader;
IterativeParsingReaderHandler<> handler;
ParseResult r = reader.IterativeParse<kParseIterativeFlag>(is, handler);
EXPECT_FALSE(r.IsError());
EXPECT_FALSE(reader.HasParseError());
int e[] = {
handler.LOG_STARTARRAY,
handler.LOG_STARTOBJECT,
handler.LOG_ENDOBJECT, 0,
handler.LOG_STARTOBJECT,
handler.LOG_STRING,
handler.LOG_INT,
handler.LOG_ENDOBJECT, 1,
handler.LOG_STARTARRAY,
handler.LOG_INT,
handler.LOG_ENDARRAY, 1,
handler.LOG_STARTARRAY,
handler.LOG_ENDARRAY, 0,
handler.LOG_ENDARRAY, 4
};
EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount);
for (size_t i = 0; i < handler.LogCount; ++i) {
EXPECT_EQ(e[i], handler.Logs[i]) << "i = " << i;
}
}
}
// Test iterative parsing on kParseErrorTermination.
struct HandlerTerminateAtStartObject : public IterativeParsingReaderHandler<> {
bool StartObject() { return false; }
};
struct HandlerTerminateAtStartArray : public IterativeParsingReaderHandler<> {
bool StartArray() { return false; }
};
struct HandlerTerminateAtEndObject : public IterativeParsingReaderHandler<> {
bool EndObject(SizeType) { return false; }
};
struct HandlerTerminateAtEndArray : public IterativeParsingReaderHandler<> {
bool EndArray(SizeType) { return false; }
};
TEST(Reader, IterativeParsing_ShortCircuit) {
{
HandlerTerminateAtStartObject handler;
Reader reader;
StringStream is("[1, {}]");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(4u, r.Offset());
}
{
HandlerTerminateAtStartArray handler;
Reader reader;
StringStream is("{\"a\": []}");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(6u, r.Offset());
}
{
HandlerTerminateAtEndObject handler;
Reader reader;
StringStream is("[1, {}]");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(5u, r.Offset());
}
{
HandlerTerminateAtEndArray handler;
Reader reader;
StringStream is("{\"a\": []}");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(7u, r.Offset());
}
}
TEST(Reader, IterativeParsing_LimitStackSize) {
BaseReaderHandler<> handler;
Reader reader(20);
StringStream is("[[[]]]");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorStackSizeLimitExceeded, r.Code());
EXPECT_EQ(2u, r.Offset());
}
#ifdef __GNUC__ #ifdef __GNUC__
RAPIDJSON_DIAG_POP RAPIDJSON_DIAG_POP
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment