Commit 19a2279a authored by Milo Yip's avatar Milo Yip

Merge pull request #76 from thebusytypist/TransitionTable

Iterative Parsing (for issue #35)
parents 9eda05c2 140dc066
......@@ -1221,12 +1221,13 @@ public:
\tparam SourceEncoding Encoding of input stream
\tparam InputStream Type of input stream, implementing Stream concept
\param is Input stream to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API.
*/
template <unsigned parseFlags, typename SourceEncoding, typename InputStream>
GenericDocument& ParseStream(InputStream& is) {
GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
ValueType::SetNull(); // Remove existing root if exist
GenericReader<SourceEncoding, Encoding, Allocator> reader(&GetAllocator());
GenericReader<SourceEncoding, Encoding, Allocator> reader(limit, &GetAllocator());
ClearStackOnExit scope(*this);
parseResult_ = reader.template Parse<parseFlags>(is, *this);
if (parseResult_) {
......@@ -1240,21 +1241,23 @@ public:
/*! \tparam parseFlags Combination of \ref ParseFlag.
\tparam InputStream Type of input stream, implementing Stream concept
\param is Input stream to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API.
*/
template <unsigned parseFlags, typename InputStream>
GenericDocument& ParseStream(InputStream& is) {
return ParseStream<parseFlags,Encoding,InputStream>(is);
GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
return ParseStream<parseFlags,Encoding,InputStream>(is, limit);
}
//! Parse JSON text from an input stream (with \ref kParseDefaultFlags)
/*! \tparam InputStream Type of input stream, implementing Stream concept
\param is Input stream to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API.
*/
template <typename InputStream>
GenericDocument& ParseStream(InputStream& is) {
return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is);
GenericDocument& ParseStream(InputStream& is, size_t limit = 0) {
return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is, limit);
}
//!@}
......@@ -1265,30 +1268,33 @@ public:
/*! \tparam parseFlags Combination of \ref ParseFlag.
\tparam SourceEncoding Transcoding from input Encoding
\param str Mutable zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API.
*/
template <unsigned parseFlags, typename SourceEncoding>
GenericDocument& ParseInsitu(Ch* str) {
GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
GenericInsituStringStream<Encoding> s(str);
return ParseStream<parseFlags | kParseInsituFlag, SourceEncoding>(s);
return ParseStream<parseFlags | kParseInsituFlag, SourceEncoding>(s, limit);
}
//! Parse JSON text from a mutable string
/*! \tparam parseFlags Combination of \ref ParseFlag.
\param str Mutable zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API.
*/
template <unsigned parseFlags>
GenericDocument& ParseInsitu(Ch* str) {
return ParseInsitu<parseFlags, Encoding>(str);
GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
return ParseInsitu<parseFlags, Encoding>(str, limit);
}
//! Parse JSON text from a mutable string (with \ref kParseDefaultFlags)
/*! \param str Mutable zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
\return The document itself for fluent API.
*/
GenericDocument& ParseInsitu(Ch* str) {
return ParseInsitu<kParseDefaultFlags, Encoding>(str);
GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) {
return ParseInsitu<kParseDefaultFlags, Encoding>(str, limit);
}
//!@}
......@@ -1299,28 +1305,31 @@ public:
/*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
\tparam SourceEncoding Transcoding from input Encoding
\param str Read-only zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
*/
template <unsigned parseFlags, typename SourceEncoding>
GenericDocument& Parse(const Ch* str) {
GenericDocument& Parse(const Ch* str, size_t limit = 0) {
RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
GenericStringStream<SourceEncoding> s(str);
return ParseStream<parseFlags, SourceEncoding>(s);
return ParseStream<parseFlags, SourceEncoding>(s, limit);
}
//! Parse JSON text from a read-only string
/*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
\param str Read-only zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
*/
template <unsigned parseFlags>
GenericDocument& Parse(const Ch* str) {
return Parse<parseFlags, Encoding>(str);
GenericDocument& Parse(const Ch* str, size_t limit = 0) {
return Parse<parseFlags, Encoding>(str, limit);
}
//! Parse JSON text from a read-only string (with \ref kParseDefaultFlags)
/*! \param str Read-only zero-terminated string to be parsed.
\param limit Parsing stack size limit(in bytes). Pass 0 means no limit.
*/
GenericDocument& Parse(const Ch* str) {
return Parse<kParseDefaultFlags>(str);
GenericDocument& Parse(const Ch* str, size_t limit = 0) {
return Parse<kParseDefaultFlags>(str, limit);
}
//!@}
......
......@@ -39,6 +39,8 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro
case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number.");
case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error.");
case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error.");
case kParseErrorStackSizeLimitExceeded: return RAPIDJSON_ERROR_STRING("Parsing stack size limit is exceeded.");
default:
return RAPIDJSON_ERROR_STRING("Unknown error.");
......
......@@ -58,7 +58,9 @@ enum ParseErrorCode {
kParseErrorNumberMissFraction, //!< Miss fraction part in number.
kParseErrorNumberMissExponent, //!< Miss exponent in number.
kParseErrorTermination //!< Parsing was terminated.
kParseErrorTermination, //!< Parsing was terminated.
kParseErrorUnspecificSyntaxError, //!< Unspecific syntax error.
kParseErrorStackSizeLimitExceeded //!< Parsing stack size limit is exceeded.
};
//! Result of parsing (wraps ParseErrorCode)
......
This diff is collapsed.
......@@ -76,6 +76,24 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) {
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterative_DummyHandler)) {
for (size_t i = 0; i < kTrialCount; i++) {
StringStream s(json_);
BaseReaderHandler<> h;
Reader reader;
EXPECT_TRUE(reader.Parse<kParseIterativeFlag>(s, h));
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterativeInsitu_DummyHandler)) {
for (size_t i = 0; i < kTrialCount; i++) {
StringStream s(json_);
BaseReaderHandler<> h;
Reader reader;
EXPECT_TRUE(reader.Parse<kParseIterativeFlag|kParseInsituFlag>(s, h));
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) {
for (size_t i = 0; i < kTrialCount; i++) {
StringStream s(json_);
......
......@@ -651,7 +651,7 @@ struct StreamTraits<CustomStringStream<Encoding> > {
enum { copyOptimization = 1 };
};
} // namespace rapdijson
} // namespace rapidjson
#endif
TEST(Reader, CustomStringStream) {
......@@ -707,6 +707,243 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) {
EXPECT_FALSE(reader.HasParseError());
}
// Test iterative parsing.
#define TESTERRORHANDLING(text, errorCode, offset)\
{\
StringStream json(text); \
BaseReaderHandler<> handler; \
Reader reader; \
reader.IterativeParse<kParseDefaultFlags>(json, handler); \
EXPECT_TRUE(reader.HasParseError()); \
EXPECT_EQ(errorCode, reader.GetParseErrorCode()); \
EXPECT_EQ(offset, reader.GetErrorOffset()); \
}
TEST(Reader, IterativeParsing_ErrorHandling) {
TESTERRORHANDLING("{\"a\": a}", kParseErrorValueInvalid, 6u);
TESTERRORHANDLING("", kParseErrorDocumentEmpty, 0u);
TESTERRORHANDLING("1", kParseErrorDocumentRootNotObjectOrArray, 0u);
TESTERRORHANDLING("{}{}", kParseErrorDocumentRootNotSingular, 2u);
TESTERRORHANDLING("{1}", kParseErrorObjectMissName, 1u);
TESTERRORHANDLING("{\"a\", 1}", kParseErrorObjectMissColon, 4u);
TESTERRORHANDLING("{\"a\"}", kParseErrorObjectMissColon, 4u);
TESTERRORHANDLING("{\"a\": 1", kParseErrorObjectMissCommaOrCurlyBracket, 7u);
TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket, 3u);
}
template<typename Encoding = UTF8<> >
struct IterativeParsingReaderHandler {
typedef typename Encoding::Ch Ch;
const static int LOG_NULL = -1;
const static int LOG_BOOL = -2;
const static int LOG_INT = -3;
const static int LOG_UINT = -4;
const static int LOG_INT64 = -5;
const static int LOG_UINT64 = -6;
const static int LOG_DOUBLE = -7;
const static int LOG_STRING = -8;
const static int LOG_STARTOBJECT = -9;
const static int LOG_ENDOBJECT = -10;
const static int LOG_STARTARRAY = -11;
const static int LOG_ENDARRAY = -12;
const static size_t LogCapacity = 256;
int Logs[LogCapacity];
size_t LogCount;
IterativeParsingReaderHandler() : LogCount(0) {
}
bool Null() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_NULL; return true; }
bool Bool(bool) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_BOOL; return true; }
bool Int(int) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT; return true; }
bool Uint(unsigned) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT; return true; }
bool Int64(int64_t) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT64; return true; }
bool Uint64(uint64_t) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_UINT64; return true; }
bool Double(double) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_DOUBLE; return true; }
bool String(const Ch*, SizeType, bool) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STRING; return true; }
bool StartObject() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STARTOBJECT; return true; }
bool EndObject(SizeType c) {
RAPIDJSON_ASSERT(LogCount < LogCapacity);
Logs[LogCount++] = LOG_ENDOBJECT;
Logs[LogCount++] = (int)c;
return true;
}
bool StartArray() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STARTARRAY; return true; }
bool EndArray(SizeType c) {
RAPIDJSON_ASSERT(LogCount < LogCapacity);
Logs[LogCount++] = LOG_ENDARRAY;
Logs[LogCount++] = (int)c;
return true;
}
};
TEST(Reader, IterativeParsing_General) {
{
StringStream is("[1, {\"k\": [1, 2]}, null, false, true, \"string\", 1.2]");
Reader reader;
IterativeParsingReaderHandler<> handler;
ParseResult r = reader.IterativeParse<kParseIterativeFlag>(is, handler);
EXPECT_FALSE(r.IsError());
EXPECT_FALSE(reader.HasParseError());
int e[] = {
handler.LOG_STARTARRAY,
handler.LOG_INT,
handler.LOG_STARTOBJECT,
handler.LOG_STRING,
handler.LOG_STARTARRAY,
handler.LOG_INT,
handler.LOG_INT,
handler.LOG_ENDARRAY, 2,
handler.LOG_ENDOBJECT, 1,
handler.LOG_NULL,
handler.LOG_BOOL,
handler.LOG_BOOL,
handler.LOG_STRING,
handler.LOG_DOUBLE,
handler.LOG_ENDARRAY, 7
};
EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount);
for (size_t i = 0; i < handler.LogCount; ++i) {
EXPECT_EQ(e[i], handler.Logs[i]) << "i = " << i;
}
}
}
TEST(Reader, IterativeParsing_Count) {
{
StringStream is("[{}, {\"k\": 1}, [1], []]");
Reader reader;
IterativeParsingReaderHandler<> handler;
ParseResult r = reader.IterativeParse<kParseIterativeFlag>(is, handler);
EXPECT_FALSE(r.IsError());
EXPECT_FALSE(reader.HasParseError());
int e[] = {
handler.LOG_STARTARRAY,
handler.LOG_STARTOBJECT,
handler.LOG_ENDOBJECT, 0,
handler.LOG_STARTOBJECT,
handler.LOG_STRING,
handler.LOG_INT,
handler.LOG_ENDOBJECT, 1,
handler.LOG_STARTARRAY,
handler.LOG_INT,
handler.LOG_ENDARRAY, 1,
handler.LOG_STARTARRAY,
handler.LOG_ENDARRAY, 0,
handler.LOG_ENDARRAY, 4
};
EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount);
for (size_t i = 0; i < handler.LogCount; ++i) {
EXPECT_EQ(e[i], handler.Logs[i]) << "i = " << i;
}
}
}
// Test iterative parsing on kParseErrorTermination.
struct HandlerTerminateAtStartObject : public IterativeParsingReaderHandler<> {
bool StartObject() { return false; }
};
struct HandlerTerminateAtStartArray : public IterativeParsingReaderHandler<> {
bool StartArray() { return false; }
};
struct HandlerTerminateAtEndObject : public IterativeParsingReaderHandler<> {
bool EndObject(SizeType) { return false; }
};
struct HandlerTerminateAtEndArray : public IterativeParsingReaderHandler<> {
bool EndArray(SizeType) { return false; }
};
TEST(Reader, IterativeParsing_ShortCircuit) {
{
HandlerTerminateAtStartObject handler;
Reader reader;
StringStream is("[1, {}]");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(4u, r.Offset());
}
{
HandlerTerminateAtStartArray handler;
Reader reader;
StringStream is("{\"a\": []}");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(6u, r.Offset());
}
{
HandlerTerminateAtEndObject handler;
Reader reader;
StringStream is("[1, {}]");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(5u, r.Offset());
}
{
HandlerTerminateAtEndArray handler;
Reader reader;
StringStream is("{\"a\": []}");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorTermination, r.Code());
EXPECT_EQ(7u, r.Offset());
}
}
TEST(Reader, IterativeParsing_LimitStackSize) {
BaseReaderHandler<> handler;
Reader reader(20);
StringStream is("[[[]]]");
ParseResult r = reader.Parse<kParseIterativeFlag>(is, handler);
EXPECT_TRUE(reader.HasParseError());
EXPECT_EQ(kParseErrorStackSizeLimitExceeded, r.Code());
EXPECT_EQ(2u, r.Offset());
}
#ifdef __GNUC__
RAPIDJSON_DIAG_POP
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment