Commit f198c486 authored by miloyip@gmail.com's avatar miloyip@gmail.com

Added encoding validation feature

git-svn-id: https://rapidjson.googlecode.com/svn/trunk@30 c5894555-1306-4e8d-425f-1f6f381ee07c
parent 78492f99
......@@ -16,8 +16,10 @@
#ifdef _MSC_VER
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#define RAPIDJSON_FORCEINLINE __forceinline
#else
#include <inttypes.h>
#define RAPIDJSON_FORCEINLINE
#endif
#endif // RAPIDJSON_NO_INT64TYPEDEF
......@@ -343,6 +345,47 @@ struct UTF8 {
}
return buffer;
}
template <typename Stream>
RAPIDJSON_FORCEINLINE static Ch* Validate(Ch *buffer, Stream& s) {
#define X1 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
#define X5 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
static const char utf8[256] = {
X1,X1,X1,X1,X1,X1,X1,X1, // 00-7F 1 byte
X5,X5,X5,X5, // 80-BF Continuation
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // C0-C1: invalid, C2-CF: 2 bytes
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // D0-DF: 2 bytes
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // E0-EF: 3 bytes
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // F0-F4: 4 bytes
};
#undef X1
#undef X5
#define TAIL() c = *buffer++ = s.Take(); if ((c & 0xC0) != 0x80) return NULL;
Ch c = *buffer++ = s.Take();
if ((unsigned char)c < 0x80u)
return buffer;
switch(utf8[(unsigned char)c]) {
case 2:
TAIL();
return buffer;
case 3:
TAIL();
TAIL();
return buffer;
case 4:
TAIL();
TAIL();
TAIL();
return buffer;
}
return NULL;
#undef TAIL
}
};
///////////////////////////////////////////////////////////////////////////////
......@@ -370,6 +413,21 @@ struct UTF16 {
}
return buffer;
}
template <typename Stream>
static Ch* Validate(Ch *buffer, Stream& s) {
Ch c = *buffer++ = s.Take();
if (c < 0xD800 || c > 0xDFFF)
;
else if (c < 0xDBFF) {
Ch c = *buffer++ = s.Take();
if (c < 0xDC00 || c > 0xDFFF)
return NULL;
}
else
return NULL;
return buffer;
}
};
///////////////////////////////////////////////////////////////////////////////
......@@ -389,6 +447,12 @@ struct UTF32 {
*buffer++ = codepoint;
return buffer;
}
template <typename Stream>
static Ch* Validate(Ch *buffer, Stream& s) {
Ch c = *buffer++ = s.Take();
return c <= 0x10FFFF ? buffer : 0;
}
};
///////////////////////////////////////////////////////////////////////////////
......
......@@ -27,7 +27,8 @@ namespace rapidjson {
//! Combination of parseFlags
enum ParseFlag {
kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer.
kParseInsituFlag = 1 //!< In-situ(destructive) parsing.
kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
};
///////////////////////////////////////////////////////////////////////////////
......@@ -401,8 +402,9 @@ private:
} while(false)
for (;;) {
Ch c = s.Take();
Ch c = s.Peek();
if (c == '\\') { // Escape
s.Take();
Ch e = s.Take();
if ((sizeof(Ch) == 1 || e < 256) && escape[(unsigned char)e])
RAPIDJSON_PUT(escape[(unsigned char)e]);
......@@ -438,6 +440,7 @@ private:
}
}
else if (c == '"') { // Closing double quote
s.Take();
if (parseFlags & kParseInsituFlag) {
size_t length = s.PutEnd(head);
RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
......@@ -459,8 +462,29 @@ private:
RAPIDJSON_PARSE_ERROR("Incorrect unescaped character in string", stream.Tell() - 1);
return;
}
else
RAPIDJSON_PUT(c); // Normal character, just copy
else if (parseFlags & kParseValidateEncodingFlag) {
Ch buffer[4];
Ch* end = Encoding::Validate(&buffer[0], s);
if (end == NULL) {
RAPIDJSON_PARSE_ERROR("Invalid encoding", s.Tell());
return;
}
if (parseFlags & kParseInsituFlag)
for (Ch* p = &buffer[0]; p != end; ++p)
s.Put(*p);
else {
SizeType l = SizeType(end - &buffer[0]);
Ch* q = stack_.template Push<Ch>(l);
for (Ch* p = &buffer[0]; p != end; ++p)
*q++ = *p;
len += l;
}
}
else {
RAPIDJSON_PUT(s.Take()); // Normal character, just copy
}
}
#undef RAPIDJSON_PUT
}
......
......@@ -2,14 +2,14 @@
#define PERFTEST_H_
#define TEST_RAPIDJSON 1
#define TEST_JSONCPP 1
#define TEST_YAJL 1
#define TEST_ULTRAJSON 1
#define TEST_PLATFORM 1
#define TEST_JSONCPP 0
#define TEST_YAJL 0
#define TEST_ULTRAJSON 0
#define TEST_PLATFORM 0
#if TEST_RAPIDJSON
//#define RAPIDJSON_SSE2
//#define RAPIDJSON_SSE42
#define RAPIDJSON_SSE42
#endif
#if TEST_YAJL
......
......@@ -41,22 +41,41 @@ protected:
Document doc_;
};
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_NullHandler)) {
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler)) {
for (int i = 0; i < kTrialCount; i++) {
memcpy(temp_, json_, length_ + 1);
InsituStringStream s(temp_);
BaseReaderHandler<> h;
Reader reader;
reader.Parse<kParseInsituFlag>(s, h);
EXPECT_TRUE(reader.Parse<kParseInsituFlag>(s, h));
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_NullHandler)) {
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler_ValidateEncoding)) {
for (int i = 0; i < kTrialCount; i++) {
memcpy(temp_, json_, length_ + 1);
InsituStringStream s(temp_);
BaseReaderHandler<> h;
Reader reader;
EXPECT_TRUE(reader.Parse<kParseInsituFlag | kParseValidateEncodingFlag>(s, h));
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) {
for (int i = 0; i < kTrialCount; i++) {
StringStream s(json_);
BaseReaderHandler<> h;
Reader reader;
reader.Parse<0>(s, h);
EXPECT_TRUE(reader.Parse<0>(s, h));
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) {
for (int i = 0; i < kTrialCount; i++) {
StringStream s(json_);
BaseReaderHandler<> h;
Reader reader;
EXPECT_TRUE(reader.Parse<kParseValidateEncodingFlag>(s, h));
}
}
......@@ -235,7 +254,7 @@ TEST_F(RapidJson, FileReadStream) {
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_NullHandler_FileReadStream)) {
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream)) {
for (int i = 0; i < kTrialCount; i++) {
FILE *fp = fopen(filename_, "rb");
char buffer[65536];
......
......@@ -204,7 +204,7 @@ TEST(Reader, ParseString) {
GenericInsituStringStream<Encoding> is(buffer); \
ParseStringHandler<Encoding> h; \
GenericReader<Encoding> reader; \
reader.ParseString<kParseInsituFlag>(is, h); \
reader.ParseString<kParseInsituFlag | kParseValidateEncodingFlag>(is, h); \
EXPECT_EQ(0, StrCmp<Encoding::Ch>(e, h.str_)); \
EXPECT_EQ(StrLen(e), h.length_); \
free(buffer); \
......@@ -286,7 +286,6 @@ TEST(Reader, ParseString_NonDestructive) {
EXPECT_EQ(11, h.length_);
}
#ifdef RAPIDJSON_USE_EXCEPTION
TEST(Reader, ParseString_Error) {
#define TEST_STRING_ERROR(str) \
{ \
......@@ -295,18 +294,28 @@ TEST(Reader, ParseString_Error) {
InsituStringStream s(buffer); \
BaseReaderHandler<> h; \
Reader reader; \
EXPECT_ERROR(reader.ParseString<0>(s, h), ParseException); \
EXPECT_FALSE(reader.Parse<kParseValidateEncodingFlag>(s, h)); \
}
TEST_STRING_ERROR("\"\\a\""); // Unknown escape character
TEST_STRING_ERROR("\"\\uABCG\""); // Incorrect hex digit after \\u escape
TEST_STRING_ERROR("\"\\uD800X\""); // Missing the second \\u in surrogate pair
TEST_STRING_ERROR("\"\\uD800\\uFFFF\""); // The second \\u in surrogate pair is invalid
TEST_STRING_ERROR("\"Test"); // lacks ending quotation before the end of string
#define ARRAY(...) { __VA_ARGS__ }
#define TEST_STRINGARRAY_ERROR(Encoding, array) \
{ \
static const Encoding::Ch e[] = array; \
TEST_STRING_ERROR(e); \
}
TEST_STRING_ERROR("[\"\\a\"]"); // Unknown escape character
TEST_STRING_ERROR("[\"\\uABCG\"]"); // Incorrect hex digit after \\u escape
TEST_STRING_ERROR("[\"\\uD800X\"]"); // Missing the second \\u in surrogate pair
TEST_STRING_ERROR("[\"\\uD800\\uFFFF\"]"); // The second \\u in surrogate pair is invalid
TEST_STRING_ERROR("[\"Test]"); // lacks ending quotation before the end of string
TEST_STRINGARRAY_ERROR(UTF8<>, ARRAY('[', 0x80u, ']')); // Incorrect UTF8 sequence
TEST_STRINGARRAY_ERROR(UTF8<>, ARRAY('[', 0xC0u, 0x40, ']')); // Incorrect UTF8 sequence
#undef ARRAY
#undef TEST_STRINGARRAY_ERROR
#undef TEST_STRING_ERROR
}
#endif // RAPIDJSON_USE_EXCEPTION
template <unsigned count>
struct ParseArrayHandler : BaseReaderHandler<> {
......@@ -340,7 +349,6 @@ TEST(Reader, ParseArray) {
free(json);
}
#ifdef RAPIDJSON_USE_EXCEPTION
TEST(Reader, ParseArray_Error) {
#define TEST_ARRAY_ERROR(str) \
{ \
......@@ -348,8 +356,8 @@ TEST(Reader, ParseArray_Error) {
strncpy(buffer, str, 1000); \
InsituStringStream s(buffer); \
BaseReaderHandler<> h; \
Reader<UTF8<>, CrtAllocator> reader; \
EXPECT_ERROR(reader.ParseArray<0>(s, h), ParseException); \
GenericReader<UTF8<>, CrtAllocator> reader; \
EXPECT_FALSE(reader.Parse<0>(s, h)); \
}
// Must be a comma or ']' after an array element.
......@@ -359,7 +367,6 @@ TEST(Reader, ParseArray_Error) {
#undef TEST_ARRAY_ERROR
}
#endif // RAPIDJSON_USE_EXCEPTION
struct ParseObjectHandler : BaseReaderHandler<> {
ParseObjectHandler() : step_(0) {}
......@@ -446,7 +453,6 @@ TEST(Reader, Parse_EmptyObject) {
EXPECT_EQ(2, h.step_);
}
#ifdef RAPIDJSON_USE_EXCEPTION
TEST(Reader, ParseObject_Error) {
#define TEST_OBJECT_ERROR(str) \
{ \
......@@ -454,8 +460,8 @@ TEST(Reader, ParseObject_Error) {
strncpy(buffer, str, 1000); \
InsituStringStream s(buffer); \
BaseReaderHandler<> h; \
Reader<UTF8<>, CrtAllocator> reader; \
EXPECT_ERROR(reader.ParseObject<0>(s, h), ParseException); \
GenericReader<UTF8<>, CrtAllocator> reader; \
EXPECT_FALSE(reader.Parse<0>(s, h)); \
}
// Name of an object member must be a string
......@@ -477,9 +483,7 @@ TEST(Reader, ParseObject_Error) {
#undef TEST_OBJECT_ERROR
}
#endif // RAPIDJSON_USE_EXCEPTION
#ifdef RAPIDJSON_USE_EXCEPTION
TEST(Reader, Parse_Error) {
#define TEST_ERROR(str) \
{ \
......@@ -488,7 +492,7 @@ TEST(Reader, Parse_Error) {
InsituStringStream s(buffer); \
BaseReaderHandler<> h; \
Reader reader; \
EXPECT_ERROR(reader.Parse<0>(s, h), ParseException); \
EXPECT_FALSE(reader.Parse<0>(s, h)); \
}
// Text only contains white space(s)
......@@ -514,4 +518,3 @@ TEST(Reader, Parse_Error) {
#undef TEST_ERROR
}
#endif // RAPIDJSON_USE_EXCEPTION
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment