Commit 19a2279a authored by Milo Yip's avatar Milo Yip

Merge pull request #76 from thebusytypist/TransitionTable

Iterative Parsing (for issue #35)
parents 9eda05c2 140dc066
...@@ -1221,12 +1221,13 @@ public: ...@@ -1221,12 +1221,13 @@ public:
\tparam SourceEncoding Encoding of input stream \tparam SourceEncoding Encoding of input stream
\tparam InputStream Type of input stream, implementing Stream concept \tparam InputStream Type of input stream, implementing Stream concept
\param is Input stream to be parsed. \param is Input stream to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
template <unsigned parseFlags, typename SourceEncoding, typename InputStream> template <unsigned parseFlags, typename SourceEncoding, typename InputStream>
GenericDocument& ParseStream(InputStream& is) { GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
ValueType::SetNull(); // Remove existing root if exist ValueType::SetNull(); // Remove existing root if exist
GenericReader<SourceEncoding, Encoding, Allocator> reader(&GetAllocator()); GenericReader<SourceEncoding, Encoding, Allocator> reader(limit, &GetAllocator());
ClearStackOnExit scope(*this); ClearStackOnExit scope(*this);
parseResult_ = reader.template Parse<parseFlags>(is, *this); parseResult_ = reader.template Parse<parseFlags>(is, *this);
if (parseResult_) { if (parseResult_) {
...@@ -1240,21 +1241,23 @@ public: ...@@ -1240,21 +1241,23 @@ public:
/*! \tparam parseFlags Combination of \ref ParseFlag. /*! \tparam parseFlags Combination of \ref ParseFlag.
\tparam InputStream Type of input stream, implementing Stream concept \tparam InputStream Type of input stream, implementing Stream concept
\param is Input stream to be parsed. \param is Input stream to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
template <unsigned parseFlags, typename InputStream> template <unsigned parseFlags, typename InputStream>
GenericDocument& ParseStream(InputStream& is) { GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
return ParseStream<parseFlags,Encoding,InputStream>(is); return ParseStream<parseFlags,Encoding,InputStream>(is, limit);
} }
//! Parse JSON text from an input stream (with \ref kParseDefaultFlags) //! Parse JSON text from an input stream (with \ref kParseDefaultFlags)
/*! \tparam InputStream Type of input stream, implementing Stream concept /*! \tparam InputStream Type of input stream, implementing Stream concept
\param is Input stream to be parsed. \param is Input stream to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
template <typename InputStream> template <typename InputStream>
GenericDocument& ParseStream(InputStream& is) { GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is); return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is, limit);
} }
//!@} //!@}
...@@ -1265,30 +1268,33 @@ public: ...@@ -1265,30 +1268,33 @@ public:
/*! \tparam parseFlags Combination of \ref ParseFlag. /*! \tparam parseFlags Combination of \ref ParseFlag.
\tparam SourceEncoding Transcoding from input Encoding \tparam SourceEncoding Transcoding from input Encoding
\param str Mutable zero-terminated string to be parsed. \param str Mutable zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
template <unsigned parseFlags, typename SourceEncoding> template <unsigned parseFlags, typename SourceEncoding>
GenericDocument& ParseInsitu(Ch* str) { GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
GenericInsituStringStream<Encoding> s(str); GenericInsituStringStream<Encoding> s(str);
return ParseStream<parseFlags | kParseInsituFlag, SourceEncoding>(s); return ParseStream<parseFlags | kParseInsituFlag, SourceEncoding>(s, limit);
} }
//! Parse JSON text from a mutable string //! Parse JSON text from a mutable string
/*! \tparam parseFlags Combination of \ref ParseFlag. /*! \tparam parseFlags Combination of \ref ParseFlag.
\param str Mutable zero-terminated string to be parsed. \param str Mutable zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
template <unsigned parseFlags> template <unsigned parseFlags>
GenericDocument& ParseInsitu(Ch* str) { GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
return ParseInsitu<parseFlags, Encoding>(str); return ParseInsitu<parseFlags, Encoding>(str, limit);
} }
//! Parse JSON text from a mutable string (with \ref kParseDefaultFlags) //! Parse JSON text from a mutable string (with \ref kParseDefaultFlags)
/*! \param str Mutable zero-terminated string to be parsed. /*! \param str Mutable zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API. \return The document itself for fluent API.
*/ */
GenericDocument& ParseInsitu(Ch* str) { GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
return ParseInsitu<kParseDefaultFlags, Encoding>(str); return ParseInsitu<kParseDefaultFlags, Encoding>(str, limit);
} }
//!@} //!@}
...@@ -1299,28 +1305,31 @@ public: ...@@ -1299,28 +1305,31 @@ public:
/*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
\tparam SourceEncoding Transcoding from input Encoding \tparam SourceEncoding Transcoding from input Encoding
\param str Read-only zero-terminated string to be parsed. \param str Read-only zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
*/ */
template <unsigned parseFlags, typename SourceEncoding> template <unsigned parseFlags, typename SourceEncoding>
GenericDocument& Parse(const Ch* str) { GenericDocument& Parse(const Ch* str, size_t limit = 0) {
RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
GenericStringStream<SourceEncoding> s(str); GenericStringStream<SourceEncoding> s(str);
return ParseStream<parseFlags, SourceEncoding>(s); return ParseStream<parseFlags, SourceEncoding>(s, limit);
} }
//! Parse JSON text from a read-only string //! Parse JSON text from a read-only string
/*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
\param str Read-only zero-terminated string to be parsed. \param str Read-only zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
*/ */
template <unsigned parseFlags> template <unsigned parseFlags>
GenericDocument& Parse(const Ch* str) { GenericDocument& Parse(const Ch* str, size_t limit = 0) {
return Parse<parseFlags, Encoding>(str); return Parse<parseFlags, Encoding>(str, limit);
} }
//! Parse JSON text from a read-only string (with \ref kParseDefaultFlags) //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags)
/*! \param str Read-only zero-terminated string to be parsed. /*! \param str Read-only zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
*/ */
GenericDocument& Parse(const Ch* str) { GenericDocument& Parse(const Ch* str, size_t limit = 0) {
return Parse<kParseDefaultFlags>(str); return Parse<kParseDefaultFlags>(str, limit);
} }
//!@} //!@}
......
...@@ -39,6 +39,8 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro ...@@ -39,6 +39,8 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro
case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number."); case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number.");
case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error."); case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error.");
case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error.");
case kParseErrorStackSizeLimitExceeded: return RAPIDJSON_ERROR_STRING("Parsing stack size limit is exceeded.");
default: default:
return RAPIDJSON_ERROR_STRING("Unknown error."); return RAPIDJSON_ERROR_STRING("Unknown error.");
......
...@@ -58,7 +58,9 @@ enum ParseErrorCode { ...@@ -58,7 +58,9 @@ enum ParseErrorCode {
kParseErrorNumberMissFraction, //!< Miss fraction part in number. kParseErrorNumberMissFraction, //!< Miss fraction part in number.
kParseErrorNumberMissExponent, //!< Miss exponent in number. kParseErrorNumberMissExponent, //!< Miss exponent in number.
kParseErrorTermination //!< Parsing was terminated. kParseErrorTermination, //!< Parsing was terminated.
kParseErrorUnspecificSyntaxError, //!< Unspecific syntax error.
kParseErrorStackSizeLimitExceeded //!< Parsing stack size limit is exceeded.
}; };
//! Result of parsing (wraps ParseErrorCode) //! Result of parsing (wraps ParseErrorCode)
......
This diff is collapsed.
...@@ -76,6 +76,24 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) { ...@@ -76,6 +76,24 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) {
} }
} }
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterative_DummyHandler)) {
for (size_t i = 0; i < kTrialCount; i++) {
StringStream s(json_);
BaseReaderHandler<> h;
Reader reader;
EXPECT_TRUE(reader.Parse<kParseIterativeFlag>(s, h));
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterativeInsitu_DummyHandler)) {
for (size_t i = 0; i < kTrialCount; i++) {
StringStream s(json_);
BaseReaderHandler<> h;
Reader reader;
EXPECT_TRUE(reader.Parse<kParseIterativeFlag|kParseInsituFlag>(s, h));
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) { TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) {
for (size_t i = 0; i < kTrialCount; i++) { for (size_t i = 0; i < kTrialCount; i++) {
StringStream s(json_); StringStream s(json_);
......
...@@ -651,7 +651,7 @@ struct StreamTraits<CustomStringStream<Encoding> > { ...@@ -651,7 +651,7 @@ struct StreamTraits<CustomStringStream<Encoding> > {
enum { copyOptimization = 1 }; enum { copyOptimization = 1 };
}; };
} // namespace rapdijson } // namespace rapidjson
#endif #endif
TEST(Reader, CustomStringStream) { TEST(Reader, CustomStringStream) {
...@@ -707,6 +707,243 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { ...@@ -707,6 +707,243 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) {
EXPECT_FALSE(reader.HasParseError()); EXPECT_FALSE(reader.HasParseError());
} }
// Test iterative parsing.
#define TESTERRORHANDLING(text, errorCode, offset)\
{\
StringStream json(text); \
BaseReaderHandler<> handler; \
Reader reader; \
reader.IterativeParse<kParseDefaultFlags>(json, handler); \
EXPECT_TRUE(reader.HasParseError()); \
EXPECT_EQ(errorCode, reader.GetParseErrorCode()); \
EXPECT_EQ(offset, reader.GetErrorOffset()); \
}
TEST(Reader, IterativeParsing_ErrorHandling) {
TESTERRORHANDLING("{\"a\": a}", kParseErrorValueInvalid, 6u);
TESTERRORHANDLING("", kParseErrorDocumentEmpty, 0u);
TESTERRORHANDLING("1", kParseErrorDocumentRootNotObjectOrArray, 0u);
TESTERRORHANDLING("{}{}", kParseErrorDocumentRootNotSingular, 2u);
TESTERRORHANDLING("{1}", kParseErrorObjectMissName, 1u);
TESTERRORHANDLING("{\"a\", 1}", kParseErrorObjectMissColon, 4u);
TESTERRORHANDLING("{\"a\"}", kParseErrorObjectMissColon, 4u);
TESTERRORHANDLING("{\"a\": 1", kParseErrorObjectMissCommaOrCurlyBracket, 7u);
TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket, 3u);
}
template<typename Encoding = UTF8<> >
struct IterativeParsingReaderHandler {
typedef typename Encoding::Ch Ch;
const static int LOG_NULL = -1;
const static int LOG_BOOL = -2;
const static int LOG_INT = -3;
const static int LOG_UINT = -4;
const static int LOG_INT64 = -5;
const static int LOG_UINT64 = -6;
const static int LOG_DOUBLE = -7;
const static int LOG_STRING = -8;
const static int LOG_STARTOBJECT = -9;
const static int LOG_ENDOBJECT = -10;
const static int LOG_STARTARRAY = -11;
const static int LOG_ENDARRAY = -12;
const static size_t LogCapacity = 256;
int Logs[LogCapacity];
size_t LogCount;
IterativeParsingReaderHandler() : LogCount(0) {
}
bool Null() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_NULL; return true; }
bool Bool(bool) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_BOOL; return true; }
bool Int(int) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT; return true; }
bool Uint(unsigned) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT; return true; }
bool Int64(int64_t) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT64; return true; }
bool Uint64(uint64_t) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_UINT64; return true; }
bool Double(double) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_DOUBLE; return true; }
bool String(const Ch*, SizeType, bool) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STRING; return true; }
bool StartObject() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STARTOBJECT; return true; }
bool EndObject(SizeType c) {
RAPIDJSON_ASSERT(LogCount < LogCapacity);
Logs[LogCount++] = LOG_ENDOBJECT;
Logs[LogCount++] = (int)c;
return true;
}
bool StartArray() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STARTARRAY; return true; }
bool EndArray(SizeType c) {
RAPIDJSON_ASSERT(LogCount < LogCapacity);
Logs[LogCount++] = LOG_ENDARRAY;
Logs[LogCount++] = (int)c;
return true;
}
};
TEST(Reader, IterativeParsing_General) {
{
StringStream is("[1, {\"k\": [1, 2]}, null, false, true, \"string\", 1.2]");
Reader reader;
IterativeParsingReaderHandler<> handler;
ParseResult r = reader.IterativeParse<kParseIterativeFlag>(is, handler);
EXPECT_FALSE(r.IsError());
EXPECT_FALSE(reader.HasParseError());
int e[] = {
handler.LOG_STARTARRAY,
handler.LOG_INT,
handler.LOG_STARTOBJECT,
handler.LOG_STRING,
handler.LOG_STARTARRAY,
handler.LOG_INT,
handler.LOG_INT,
handler.LOG_ENDARRAY, 2,
handler.LOG_ENDOBJECT, 1,
handler.LOG_NULL,
handler.LOG_BOOL,
handler.LOG_BOOL,
handler.LOG_STRING,
handler.LOG_DOUBLE,
handler.LOG_ENDARRAY, 7
};
EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount);
for (size_t i = 0; i < handler.LogCount; ++i) {
EXPECT_EQ(e[i], handler.Logs[i]) << "i = " << i;
}
}
}
TEST(Reader, IterativeParsing_Count) {
{
StringStream is("[{}, {\"k\": 1}, [1], []]");
Reader reader;
IterativeParsingReaderHandler<> handler;
ParseResult r = reader.IterativeParse<kParseIterativeFlag>(is, handler);
EXPECT_FALSE(r.IsError());
EXPECT_FALSE(reader.HasParseError());
int e[] = {
handler.LOG_STARTARRAY,
handler.LOG_STARTOBJECT,
handler.LOG_ENDOBJECT, 0,
handler.LOG_STARTOBJECT,
handler.LOG_STRING,
handler.LOG_INT,
handler.LOG_ENDOBJECT, 1,
handler.LOG_STARTARRAY,
handler.LOG_INT,
handler.LOG_ENDARRAY, 1,
handler.LOG_STARTARRAY,
handler.LOG_ENDARRAY, 0,
handler.LOG_ENDARRAY, 4
};
EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount);
for (size_t i = 0; i < handler.LogCount; ++i) {
EXPECT_EQ(e[i], handler.Logs[i]) << "i = " << i;
}
}
}
// Test iterative parsing on kParseErrorTermination.
struct HandlerTerminateAtStartObject : public IterativeParsingReaderHandler<> {
bool StartObject() { return false; }
};
struct HandlerTerminateAtStartArray : public IterativeParsingReaderHandler<> {
bool StartArray() { return false; }
};
struct HandlerTerminateAtEndObject : public IterativeParsingReaderHandler<> {
bool EndObject(SizeType) { return false; }
};
struct HandlerTerminateAtEndArray : public IterativeParsingReaderHandler<> {
bool EndArray(SizeType) { return false; }
};
TEST(Reader, IterativeParsing_ShortCircuit) {
{
HandlerTerminateAtStartObject handler;
Reader reader;
StringStream is("[1, {}]");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(4u, r.Offset());
}
{
HandlerTerminateAtStartArray handler;
Reader reader;
StringStream is("{\"a\": []}");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(6u, r.Offset());
}
{
HandlerTerminateAtEndObject handler;
Reader reader;
StringStream is("[1, {}]");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(5u, r.Offset());
}
{
HandlerTerminateAtEndArray handler;
Reader reader;
StringStream is("{\"a\": []}");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(7u, r.Offset());
}
}
TEST(Reader, IterativeParsing_LimitStackSize) {
BaseReaderHandler<> handler;
Reader reader(20);
StringStream is("[[[]]]");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorStackSizeLimitExceeded, r.Code());
EXPECT_EQ(2u, r.Offset());
}
#ifdef __GNUC__ #ifdef __GNUC__
RAPIDJSON_DIAG_POP RAPIDJSON_DIAG_POP
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment