Commit 8bdcd742 authored by miloyip@gmail.com's avatar miloyip@gmail.com

Fixed one decoding/validating bug in UTF8

Fixed one decoding/validating buf in UTF16
Fixed incorrect return type in StringBuffer::GetString()
Added unit tests for encoding/decoding/validating of different encodings

git-svn-id: https://rapidjson.googlecode.com/svn/trunk@46 c5894555-1306-4e8d-425f-1f6f381ee07c
parent c51c90b2
...@@ -70,7 +70,7 @@ struct UTF8 { ...@@ -70,7 +70,7 @@ struct UTF8 {
template <typename InputStream> template <typename InputStream>
RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu) #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
#define TRANS(mask) result &= ((GetType((unsigned char)c) & mask) != 0) #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
#define TAIL() COPY(); TRANS(0x70) #define TAIL() COPY(); TRANS(0x70)
Ch c = is.Take(); Ch c = is.Take();
if (!(c & 0x80)) { if (!(c & 0x80)) {
...@@ -78,17 +78,17 @@ struct UTF8 { ...@@ -78,17 +78,17 @@ struct UTF8 {
return true; return true;
} }
unsigned char type = GetType((unsigned char)c); unsigned char type = GetRange((unsigned char)c);
*codepoint = (0xFF >> type) & (unsigned char)c; *codepoint = (0xFF >> type) & (unsigned char)c;
bool result = true; bool result = true;
switch (type) { switch (type) {
case 2: TAIL(); return result; case 2: TAIL(); return result;
case 3: TAIL(); TAIL(); return result; case 3: TAIL(); TAIL(); return result;
case 4: COPY(); TRANS(0x50); TAIL(); return result; case 4: COPY(); TRANS(0x50); TAIL(); return result;
case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return result; case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
case 6: TAIL(); TAIL(); TAIL(); return result; case 6: TAIL(); TAIL(); TAIL(); return result;
case 10: COPY(); TRANS(0x20); TAIL(); return result; case 10: COPY(); TRANS(0x20); TAIL(); return result;
case 11: COPY(); TRANS(0x60); TAIL(); return result; case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
default: return false; default: return false;
} }
#undef COPY #undef COPY
...@@ -99,7 +99,7 @@ struct UTF8 { ...@@ -99,7 +99,7 @@ struct UTF8 {
template <typename InputStream, typename OutputStream> template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
#define COPY() os.Put(c = is.Take()) #define COPY() os.Put(c = is.Take())
#define TRANS(mask) result &= ((GetType((unsigned char)c) & mask) != 0) #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0)
#define TAIL() COPY(); TRANS(0x70) #define TAIL() COPY(); TRANS(0x70)
Ch c; Ch c;
COPY(); COPY();
...@@ -107,14 +107,14 @@ struct UTF8 { ...@@ -107,14 +107,14 @@ struct UTF8 {
return true; return true;
bool result = true; bool result = true;
switch (GetType((unsigned char)c)) { switch (GetRange((unsigned char)c)) {
case 2: TAIL(); return result; case 2: TAIL(); return result;
case 3: TAIL(); TAIL(); return result; case 3: TAIL(); TAIL(); return result;
case 4: COPY(); TRANS(0x50); TAIL(); return result; case 4: COPY(); TRANS(0x50); TAIL(); return result;
case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return result; case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
case 6: TAIL(); TAIL(); TAIL(); return result; case 6: TAIL(); TAIL(); TAIL(); return result;
case 10: COPY(); TRANS(0x20); TAIL(); return result; case 10: COPY(); TRANS(0x20); TAIL(); return result;
case 11: COPY(); TRANS(0x60); TAIL(); return result; case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
default: return false; default: return false;
} }
#undef COPY #undef COPY
...@@ -122,7 +122,7 @@ struct UTF8 { ...@@ -122,7 +122,7 @@ struct UTF8 {
#undef TAIL #undef TAIL
} }
RAPIDJSON_FORCEINLINE static unsigned char GetType(unsigned char c) { RAPIDJSON_FORCEINLINE static unsigned char GetRange(unsigned char c) {
// Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
// With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
static const unsigned char type[] = { static const unsigned char type[] = {
...@@ -202,7 +202,7 @@ struct UTF16 { ...@@ -202,7 +202,7 @@ struct UTF16 {
*codepoint = c; *codepoint = c;
return true; return true;
} }
else if (c < 0xDBFF) { else if (c <= 0xDBFF) {
*codepoint = (c & 0x3FF) << 10; *codepoint = (c & 0x3FF) << 10;
c = is.Take(); c = is.Take();
*codepoint |= (c & 0x3FF); *codepoint |= (c & 0x3FF);
...@@ -218,7 +218,7 @@ struct UTF16 { ...@@ -218,7 +218,7 @@ struct UTF16 {
os.Put(c = is.Take()); os.Put(c = is.Take());
if (c < 0xD800 || c > 0xDFFF) if (c < 0xD800 || c > 0xDFFF)
return true; return true;
else if (c < 0xDBFF) { else if (c <= 0xDBFF) {
os.Put(c = is.Take()); os.Put(c = is.Take());
return c >= 0xDC00 && c <= 0xDFFF; return c >= 0xDC00 && c <= 0xDFFF;
} }
......
...@@ -23,7 +23,7 @@ struct GenericStringBuffer { ...@@ -23,7 +23,7 @@ struct GenericStringBuffer {
void Clear() { stack_.Clear(); } void Clear() { stack_.Clear(); }
const char* GetString() const { const Ch* GetString() const {
// Push and pop a null terminator. This is safe. // Push and pop a null terminator. This is safe.
*stack_.template Push<Ch>() = '\0'; *stack_.template Push<Ch>() = '\0';
stack_.template Pop<Ch>(1); stack_.template Pop<Ch>(1);
......
#include "unittest.h"
#include "rapidjson/filereadstream.h"
#include "rapidjson/filewritestream.h"
#include "rapidjson/encodedstream.h"
#include "rapidjson/stringbuffer.h"
using namespace rapidjson;
class EncodingsTest : public ::testing::Test {
public:
virtual void SetUp() {
json_ = ReadFile("utf8.json", true, &length_);
}
virtual void TearDown() {
free(json_);
}
protected:
static FILE* Open(const char* filename) {
char buffer[1024];
sprintf(buffer, "encodings/%s", filename);
FILE *fp = fopen(buffer, "rb");
if (!fp) {
sprintf(buffer, "../../bin/encodings/%s", filename);
fp = fopen(buffer, "rb");
}
return fp;
}
static char *ReadFile(const char* filename, bool appendPath, size_t* outLength) {
FILE *fp = appendPath ? Open(filename) : fopen(filename, "rb");
if (!fp) {
*outLength = 0;
return 0;
}
fseek(fp, 0, SEEK_END);
*outLength = (size_t)ftell(fp);
fseek(fp, 0, SEEK_SET);
char* buffer = (char*)malloc(*outLength + 1);
fread(buffer, 1, *outLength, fp);
buffer[*outLength] = '\0';
fclose(fp);
return buffer;
}
template <typename FileEncoding, typename MemoryEncoding>
void TestEncodedInputStream(const char* filename) {
char buffer[16];
FILE *fp = Open(filename);
ASSERT_TRUE(fp != 0);
FileReadStream fs(fp, buffer, sizeof(buffer));
EncodedInputStream<FileEncoding, FileReadStream> eis(fs);
StringStream s(json_);
while (eis.Peek() != '\0') {
unsigned expected, actual;
EXPECT_TRUE(UTF8<>::Decode(s, &expected));
EXPECT_TRUE(MemoryEncoding::Decode(eis, &actual));
EXPECT_EQ(expected, actual);
}
EXPECT_EQ('\0', s.Peek());
fclose(fp);
}
void TestAutoUTFInputStream(const char *filename) {
char buffer[16];
FILE *fp = Open(filename);
ASSERT_TRUE(fp != 0);
FileReadStream fs(fp, buffer, sizeof(buffer));
AutoUTFInputStream<unsigned, FileReadStream> eis(fs);
StringStream s(json_);
while (eis.Peek() != '\0') {
unsigned expected, actual;
EXPECT_TRUE(UTF8<>::Decode(s, &expected));
EXPECT_TRUE(AutoUTF<unsigned>::Decode(eis, &actual));
EXPECT_EQ(expected, actual);
}
EXPECT_EQ('\0', s.Peek());
fclose(fp);
}
template <typename FileEncoding, typename MemoryEncoding>
void TestEncodedOutputStream(const char* expectedFilename, bool putBOM) {
char filename[L_tmpnam];
tmpnam(filename);
FILE *fp = fopen(filename, "wb");
char buffer[16];
FileWriteStream os(fp, buffer, sizeof(buffer));
EncodedOutputStream<FileEncoding, FileWriteStream> eos(os, putBOM);
StringStream s(json_);
while (s.Peek() != '\0') {
bool success = Transcoder<UTF8<>, MemoryEncoding>::Transcode(s, eos);
EXPECT_TRUE(success);
}
eos.Flush();
fclose(fp);
EXPECT_TRUE(CompareFile(filename, expectedFilename));
remove(filename);
}
bool CompareFile(char * filename, const char* expectedFilename) {
size_t actualLength, expectedLength;
char* actualBuffer = ReadFile(filename, false, &actualLength);
char* expectedBuffer = ReadFile(expectedFilename, true, &expectedLength);
bool ret = (expectedLength == actualLength) && memcmp(expectedBuffer, actualBuffer, actualLength) == 0;
free(actualBuffer);
free(expectedBuffer);
return ret;
}
void TestAutoUTFOutputStream(UTFType type, bool putBOM, const char *expectedFilename) {
char filename[L_tmpnam];
tmpnam(filename);
FILE *fp = fopen(filename, "wb");
char buffer[16];
FileWriteStream os(fp, buffer, sizeof(buffer));
AutoUTFOutputStream<unsigned, FileWriteStream> eos(os, type, putBOM);
StringStream s(json_);
while (s.Peek() != '\0') {
bool success = Transcoder<UTF8<>, AutoUTF<unsigned> >::Transcode(s, eos);
EXPECT_TRUE(success);
}
eos.Flush();
fclose(fp);
EXPECT_TRUE(CompareFile(filename, expectedFilename));
remove(filename);
}
const char* filename_;
char *json_;
size_t length_;
};
TEST_F(EncodingsTest, EncodedInputStream) {
TestEncodedInputStream<UTF8<>, UTF8<> >("utf8.json");
TestEncodedInputStream<UTF8<>, UTF8<> >("utf8bom.json");
TestEncodedInputStream<UTF16LE<>, UTF16<> >("utf16le.json");
TestEncodedInputStream<UTF16LE<>, UTF16<> >("utf16lebom.json");
TestEncodedInputStream<UTF16BE<>, UTF16<> >("utf16be.json");
TestEncodedInputStream<UTF16BE<>, UTF16<> >("utf16bebom.json");
TestEncodedInputStream<UTF32LE<>, UTF32<> >("utf32le.json");
TestEncodedInputStream<UTF32LE<>, UTF32<> >("utf32lebom.json");
TestEncodedInputStream<UTF32BE<>, UTF32<> >("utf32be.json");
TestEncodedInputStream<UTF32BE<>, UTF32<> >("utf32bebom.json");
}
TEST_F(EncodingsTest, AutoUTFInputStream) {
TestAutoUTFInputStream("utf8.json");
TestAutoUTFInputStream("utf8bom.json");
TestAutoUTFInputStream("utf16le.json");
TestAutoUTFInputStream("utf16lebom.json");
TestAutoUTFInputStream("utf16be.json");
TestAutoUTFInputStream("utf16bebom.json");
TestAutoUTFInputStream("utf32le.json");
TestAutoUTFInputStream("utf32lebom.json");
TestAutoUTFInputStream("utf32be.json");
TestAutoUTFInputStream("utf32bebom.json");
}
TEST_F(EncodingsTest, EncodedOutputStream) {
TestEncodedOutputStream<UTF8<>, UTF8<> >("utf8.json", false);
TestEncodedOutputStream<UTF8<>, UTF8<> >("utf8bom.json", true);
TestEncodedOutputStream<UTF16LE<>, UTF16<> >("utf16le.json", false);
TestEncodedOutputStream<UTF16LE<>, UTF16<> >("utf16lebom.json",true);
TestEncodedOutputStream<UTF16BE<>, UTF16<> >("utf16be.json", false);
TestEncodedOutputStream<UTF16BE<>, UTF16<> >("utf16bebom.json",true);
TestEncodedOutputStream<UTF32LE<>, UTF32<> >("utf32le.json", false);
TestEncodedOutputStream<UTF32LE<>, UTF32<> >("utf32lebom.json",true);
TestEncodedOutputStream<UTF32BE<>, UTF32<> >("utf32be.json", false);
TestEncodedOutputStream<UTF32BE<>, UTF32<> >("utf32bebom.json",true);
}
TEST_F(EncodingsTest, AutoUTFOutputStream) {
TestAutoUTFOutputStream(kUTF8, false, "utf8.json");
TestAutoUTFOutputStream(kUTF8, true, "utf8bom.json");
TestAutoUTFOutputStream(kUTF16LE, false, "utf16le.json");
TestAutoUTFOutputStream(kUTF16LE, true, "utf16lebom.json");
TestAutoUTFOutputStream(kUTF16BE, false, "utf16be.json");
TestAutoUTFOutputStream(kUTF16BE, true, "utf16bebom.json");
TestAutoUTFOutputStream(kUTF32LE, false, "utf32le.json");
TestAutoUTFOutputStream(kUTF32LE, true, "utf32lebom.json");
TestAutoUTFOutputStream(kUTF32BE, false, "utf32be.json");
TestAutoUTFOutputStream(kUTF32BE, true, "utf32bebom.json");
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment