Commit 3b1a995d authored by miloyip@gmail.com's avatar miloyip@gmail.com

Added EncodedOutputStream and AutoUTFOutputStream with unit tests

git-svn-id: https://rapidjson.googlecode.com/svn/trunk@42 c5894555-1306-4e8d-425f-1f6f381ee07c
parent 77255c6a
......@@ -12,12 +12,11 @@ public:
typedef typename Encoding::Ch Ch;
EncodedInputStream(InputStream& is) : is_(is) {
Encoding::TakeBOM(is_);
Read();
current_ = Encoding::TakeBOM(is_);
}
Ch Peek() const { return current_; }
Ch Take() { Ch c = current_; Read(); return c; }
Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
size_t Tell() const { is_.Tell(); }
// Not implemented
......@@ -27,13 +26,37 @@ public:
size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
private:
void Read() {
current_ = Encoding::Take(is_);
}
InputStream& is_;
Ch current_;
};
//! Adapts an output byte stream with an specified encoding.
template <typename Encoding, typename OutputStream>
class EncodedOutputStream {
public:
typedef typename Encoding::Ch Ch;
EncodedOutputStream(OutputStream& os, bool putBOM = true) : os_(os) {
if (putBOM)
Encoding::PutBOM(os_);
}
void Put(Ch c) { Encoding::Put(os_, c); }
void Flush() { os_.Flush(); }
// Not implemented
Ch Peek() const { RAPIDJSON_ASSERT(false); }
Ch Take() { RAPIDJSON_ASSERT(false); }
size_t Tell() const { RAPIDJSON_ASSERT(false); }
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
private:
OutputStream& os_;
};
#define ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
template <typename CharType, typename InputStream>
class AutoUTFInputStream {
public:
......@@ -58,33 +81,34 @@ private:
friend struct AutoUTF<Ch>;
void TakeBOM(InputStream& is) {
#define TAKE() is.Take()
#define PEEK(x) if ((unsigned char)is.Peek() != x) break
#define ASSUME(x) if ((unsigned char)is.Peek() != x) break; is.Take()
switch ((unsigned char)is.Peek()) {
case 0x00: TAKE(); PEEK(0x00); TAKE(); PEEK(0xFE); TAKE(); PEEK(0xFF); type_ = kUTF32BE; return;
case 0xEF: TAKE(); PEEK(0xBB); TAKE(); PEEK(0xBF); TAKE(); type_ = kUTF8; return;
case 0xFE: TAKE(); PEEK(0xFF); TAKE(); type_ = kUTF16BE; return;
case 0xFF: TAKE(); PEEK(0xFE); TAKE();
case 0x00: is.Take(); ASSUME(0x00); ASSUME(0xFE); ASSUME(0xFF); type_ = kUTF32BE; break;
case 0xEF: is.Take(); ASSUME(0xBB); ASSUME(0xBF); type_ = kUTF8; break;
case 0xFE: is.Take(); ASSUME(0xFF); type_ = kUTF16BE; break;
case 0xFF: is.Take(); ASSUME(0xFE);
if (is.Peek() == 0x00) {
TAKE(); PEEK(0x00); TAKE(); type_ = kUTF32LE; return;
is.Take(); ASSUME(0x00); type_ = kUTF32LE; break;
}
type_ = kUTF16LE;
return;
}
#undef TAKE
#undef PEEK
#undef ASSUME
// RUntime check whether the size of character type is sufficient. It only perform checks with assertion.
switch (type_) {
case kUTF16LE:
case kUTF16BE:
RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
break;
case kUTF32LE:
case kUTF32BE:
RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
break;
}
}
void Read() {
typedef Ch (*TakeFunc)(InputStream& is);
static const TakeFunc f[] = {
UTF8<Ch>::Take,
UTF16LE<Ch>::Take,
UTF16BE<Ch>::Take,
UTF32LE<Ch>::Take,
UTF32BE<Ch>::Take,
};
static const TakeFunc f[] = { ENCODINGS_FUNC(Take) };
current_ = f[type_](is_);
}
......@@ -93,6 +117,59 @@ private:
Ch current_;
};
template <typename CharType, typename OutputStream>
class AutoUTFOutputStream {
public:
typedef CharType Ch;
AutoUTFOutputStream(OutputStream& os, UTFType type, bool putBOM) : os_(os), type_(type) {
// RUntime check whether the size of character type is sufficient. It only perform checks with assertion.
switch (type_) {
case kUTF16LE:
case kUTF16BE:
RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
break;
case kUTF32LE:
case kUTF32BE:
RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
break;
}
if (putBOM)
PutBOM();
}
void Put(Ch c) {
typedef void (*PutFunc)(OutputStream&, Ch);
static const PutFunc f[] = { ENCODINGS_FUNC(Put) };
f[type_](os_, c);
}
void Flush() { os_.Flush(); }
// Not implemented
Ch Peek() const { RAPIDJSON_ASSERT(false); }
Ch Take() { RAPIDJSON_ASSERT(false); }
size_t Tell() const { RAPIDJSON_ASSERT(false); }
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
private:
friend struct AutoUTF<Ch>;
void PutBOM() {
typedef void (*PutBOMFunc)(OutputStream&);
static const PutBOMFunc f[] = { ENCODINGS_FUNC(PutBOM) };
f[type_](os_);
}
OutputStream& os_;
UTFType type_;
};
#undef ENCODINGS_FUNC
} // namespace rapidjson
#endif // RAPIDJSON_FILESTREAM_H_
......@@ -141,19 +141,31 @@ struct UTF8 {
}
template <typename InputStream>
static void TakeBOM(InputStream& is) {
if ((unsigned char)is.Peek() != 0xEF) return;
is.Take();
if ((unsigned char)is.Peek() != 0xBB) return;
is.Take();
if ((unsigned char)is.Peek() != 0xBF) return;
is.Take();
static CharType TakeBOM(InputStream& is) {
Ch c = Take(is);
if ((unsigned char)c != 0xEFu) return c;
c = is.Take();
if ((unsigned char)c != 0xBBu) return c;
c = is.Take();
if ((unsigned char)c != 0xBFu) return c;
c = is.Take();
return c;
}
template <typename InputStream>
RAPIDJSON_FORCEINLINE static Ch Take(InputStream& is) {
return is.Take();
}
template <typename OutputStream>
static void PutBOM(OutputStream& os) {
os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu);
}
template <typename OutputStream>
static void Put(OutputStream& os, Ch c) {
os.Put(c);
}
};
///////////////////////////////////////////////////////////////////////////////
......@@ -217,11 +229,9 @@ struct UTF16 {
template<typename CharType = wchar_t>
struct UTF16LE : UTF16<CharType> {
template <typename InputStream>
static void TakeBOM(InputStream& is) {
if ((unsigned char)is.Peek() != 0xFF) return;
is.Take();
if ((unsigned char)is.Peek() != 0xFE) return;
is.Take();
static CharType TakeBOM(InputStream& is) {
CharType c = Take(is);
return (unsigned short)c == 0xFEFFu ? Take(is) : c;
}
template <typename InputStream>
......@@ -230,16 +240,25 @@ struct UTF16LE : UTF16<CharType> {
c |= (unsigned char)is.Take() << 8;
return c;
}
template <typename OutputStream>
static void PutBOM(OutputStream& os) {
os.Put(0xFFu); os.Put(0xFEu);
}
template <typename OutputStream>
static void Put(OutputStream& os, Ch c) {
os.Put(c & 0xFFu);
os.Put((c >> 8) & 0xFFu);
}
};
template<typename CharType = wchar_t>
struct UTF16BE : UTF16<CharType> {
template <typename InputStream>
static void TakeBOM(InputStream& is) {
if ((unsigned char)is.Peek() != 0xFE) return;
is.Take();
if ((unsigned char)is.Peek() != 0xFF) return;
is.Take();
static CharType TakeBOM(InputStream& is) {
CharType c = Take(is);
return (unsigned short)c == 0xFEFFu ? Take(is) : c;
}
template <typename InputStream>
......@@ -248,6 +267,17 @@ struct UTF16BE : UTF16<CharType> {
c |= (unsigned char)is.Take();
return c;
}
template <typename OutputStream>
static void PutBOM(OutputStream& os) {
os.Put(0xFEu); os.Put(0xFFu);
}
template <typename OutputStream>
static void Put(OutputStream& os, Ch c) {
os.Put((c >> 8) & 0xFFu);
os.Put(c & 0xFFu);
}
};
///////////////////////////////////////////////////////////////////////////////
......@@ -286,15 +316,9 @@ struct UTF32 {
template<typename CharType = unsigned>
struct UTF32LE : UTF32<CharType> {
template <typename InputStream>
static void TakeBOM(InputStream& is) {
if ((unsigned char)is.Peek() != 0xFF) return;
is.Take();
if ((unsigned char)is.Peek() != 0xFE) return;
is.Take();
if ((unsigned char)is.Peek() != 0x00) return;
is.Take();
if ((unsigned char)is.Peek() != 0x00) return;
is.Take();
static CharType TakeBOM(InputStream& is) {
CharType c = Take(is);
return (unsigned)c == 0x0000FEFFu ? Take(is) : c;
}
template <typename InputStream>
......@@ -305,20 +329,27 @@ struct UTF32LE : UTF32<CharType> {
c |= (unsigned char)is.Take() << 24;
return c;
}
template <typename OutputStream>
static void PutBOM(OutputStream& os) {
os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u);
}
template <typename OutputStream>
static void Put(OutputStream& os, Ch c) {
os.Put(c & 0xFFu);
os.Put((c >> 8) & 0xFFu);
os.Put((c >> 16) & 0xFFu);
os.Put((c >> 24) & 0xFFu);
}
};
template<typename CharType = unsigned>
struct UTF32BE : UTF32<CharType> {
template <typename InputStream>
static void TakeBOM(InputStream& is) {
if ((unsigned char)is.Peek() != 0x00) return;
is.Take();
if ((unsigned char)is.Peek() != 0x00) return;
is.Take();
if ((unsigned char)is.Peek() != 0xFE) return;
is.Take();
if ((unsigned char)is.Peek() != 0xFF) return;
is.Take();
static CharType TakeBOM(InputStream& is) {
CharType c = Take(is);
return (unsigned)c == 0x0000FEFFu ? Take(is) : c;
}
template <typename InputStream>
......@@ -329,6 +360,19 @@ struct UTF32BE : UTF32<CharType> {
c |= (unsigned char)is.Take();
return c;
}
template <typename OutputStream>
static void PutBOM(OutputStream& os) {
os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu);
}
template <typename OutputStream>
static void Put(OutputStream& os, Ch c) {
os.Put((c >> 24) & 0xFFu);
os.Put((c >> 16) & 0xFFu);
os.Put((c >> 8) & 0xFFu);
os.Put(c & 0xFFu);
}
};
///////////////////////////////////////////////////////////////////////////////
......@@ -347,44 +391,30 @@ template<typename CharType>
struct AutoUTF {
typedef CharType Ch;
#define ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
template<typename OutputStream>
RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) {
typedef void (*EncodeFunc)(OutputStream&, unsigned);
static const EncodeFunc f[] = {
UTF8<Ch>::Encode,
UTF16<Ch>::Encode,
UTF16<Ch>::Encode,
UTF32<Ch>::Encode,
UTF32<Ch>::Encode,
};
static const EncodeFunc f[] = { ENCODINGS_FUNC(Encode) };
(*f[os.type_])(os, codepoint);
}
template <typename InputStream>
RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
typedef bool (*DecodeFunc)(InputStream&, unsigned*);
static const DecodeFunc f[] = {
UTF8<Ch>::Decode,
UTF16<Ch>::Decode,
UTF16<Ch>::Decode,
UTF32<Ch>::Decode,
UTF32<Ch>::Decode,
};
static const DecodeFunc f[] = { ENCODINGS_FUNC(Decode) };
return (*f[is.type_])(is, codepoint);
}
template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
typedef bool (*ValidateFunc)(InputStream&, unsigned*);
static const ValidateFunc f[] = {
UTF8<Ch>::Decode,
UTF16<Ch>::Decode,
UTF16<Ch>::Decode,
UTF32<Ch>::Decode,
UTF32<Ch>::Decode,
};
static const ValidateFunc f[] = { ENCODINGS_FUNC(Validate) };
return (*f[is.type_])(is, os);
}
#undef ENCODINGS_FUNC
};
///////////////////////////////////////////////////////////////////////////////
......
......@@ -7,7 +7,16 @@ using namespace rapidjson;
class EncodingsTest : public ::testing::Test {
public:
FILE* Open(const char* filename) {
virtual void SetUp() {
json_ = ReadFile("utf8.json", true, &length_);
}
virtual void TearDown() {
free(json_);
}
protected:
static FILE* Open(const char* filename) {
char buffer[1024];
sprintf(buffer, "encodings/%s", filename);
FILE *fp = fopen(buffer, "rb");
......@@ -18,105 +27,158 @@ public:
return fp;
}
virtual void SetUp() {
FILE *fp = Open("utf8.json");
ASSERT_TRUE(fp != 0);
static char *ReadFile(const char* filename, bool appendPath, size_t* outLength) {
FILE *fp = appendPath ? Open(filename) : fopen(filename, "rb");
if (!fp) {
*outLength = 0;
return 0;
}
fseek(fp, 0, SEEK_END);
length_ = (size_t)ftell(fp);
*outLength = (size_t)ftell(fp);
fseek(fp, 0, SEEK_SET);
json_ = (char*)malloc(length_ + 1);
fread(json_, 1, length_, fp);
json_[length_] = '\0';
char* buffer = (char*)malloc(*outLength + 1);
fread(buffer, 1, *outLength, fp);
buffer[*outLength] = '\0';
fclose(fp);
return buffer;
}
virtual void TearDown() {
free(json_);
}
protected:
const char* filename_;
char *json_;
size_t length_;
};
TEST_F(EncodingsTest, EncodedInputStream_UTF8BOM) {
template <typename FileEncoding, typename MemoryEncoding>
void TestEncodedInputStream(const char* filename) {
char buffer[16];
FILE *fp = Open("utf8bom.json");
FILE *fp = Open(filename);
ASSERT_TRUE(fp != 0);
FileReadStream fs(fp, buffer, sizeof(buffer));
EncodedInputStream<UTF8<>, FileReadStream> eis(fs);
EncodedInputStream<FileEncoding, FileReadStream> eis(fs);
StringStream s(json_);
while (eis.Peek() != '\0') {
unsigned expected, actual;
UTF8<>::Decode(s, &expected);
UTF8<>::Decode(eis, &actual);
EXPECT_TRUE(UTF8<>::Decode(s, &expected));
EXPECT_TRUE(MemoryEncoding::Decode(eis, &actual));
EXPECT_EQ(expected, actual);
}
EXPECT_EQ('\0', s.Peek());
fclose(fp);
}
}
TEST_F(EncodingsTest, EncodedInputStream_UTF16LEBOM) {
void TestAutoUTFInputStream(const char *filename) {
char buffer[16];
FILE *fp = Open("utf16lebom.json");
FILE *fp = Open(filename);
ASSERT_TRUE(fp != 0);
FileReadStream fs(fp, buffer, sizeof(buffer));
EncodedInputStream<UTF16LE<>, FileReadStream> eis(fs);
AutoUTFInputStream<unsigned, FileReadStream> eis(fs);
StringStream s(json_);
while (eis.Peek() != '\0') {
unsigned expected, actual;
UTF8<>::Decode(s, &expected);
UTF16<>::Decode(eis, &actual);
EXPECT_TRUE(UTF8<>::Decode(s, &expected));
EXPECT_TRUE(AutoUTF<unsigned>::Decode(eis, &actual));
EXPECT_EQ(expected, actual);
}
EXPECT_EQ('\0', s.Peek());
fclose(fp);
}
}
template <typename FileEncoding, typename MemoryEncoding>
void TestEncodedOutputStream(const char* expectedFilename, bool putBOM) {
char filename[L_tmpnam];
tmpnam(filename);
TEST_F(EncodingsTest, EncodedInputStream_UTF16BEBOM) {
FILE *fp = fopen(filename, "wb");
char buffer[16];
FILE *fp = Open("utf16bebom.json");
ASSERT_TRUE(fp != 0);
FileReadStream fs(fp, buffer, sizeof(buffer));
EncodedInputStream<UTF16BE<>, FileReadStream> eis(fs);
FileWriteStream os(fp, buffer, sizeof(buffer));
EncodedOutputStream<FileEncoding, FileWriteStream> eos(os, putBOM);
StringStream s(json_);
while (s.Peek() != '\0') {
bool success = Transcoder<UTF8<>, MemoryEncoding>::Transcode(s, eos);
EXPECT_TRUE(success);
}
eos.Flush();
fclose(fp);
EXPECT_TRUE(CompareFile(filename, expectedFilename));
remove(filename);
}
while (eis.Peek() != '\0') {
unsigned expected, actual;
UTF8<>::Decode(s, &expected);
UTF16<>::Decode(eis, &actual);
EXPECT_EQ(expected, actual);
bool CompareFile(char * filename, const char* expectedFilename) {
size_t actualLength, expectedLength;
char* actualBuffer = ReadFile(filename, false, &actualLength);
char* expectedBuffer = ReadFile(expectedFilename, true, &expectedLength);
bool ret = (expectedLength == actualLength) && memcmp(expectedBuffer, actualBuffer, actualLength) == 0;
free(actualBuffer);
free(expectedBuffer);
return ret;
}
EXPECT_EQ('\0', s.Peek());
void TestAutoUTFOutputStream(UTFType type, bool putBOM, const char *expectedFilename) {
char filename[L_tmpnam];
tmpnam(filename);
FILE *fp = fopen(filename, "wb");
char buffer[16];
FileWriteStream os(fp, buffer, sizeof(buffer));
AutoUTFOutputStream<unsigned, FileWriteStream> eos(os, type, putBOM);
StringStream s(json_);
while (s.Peek() != '\0') {
bool success = Transcoder<UTF8<>, AutoUTF<unsigned>>::Transcode(s, eos);
EXPECT_TRUE(success);
}
eos.Flush();
fclose(fp);
EXPECT_TRUE(CompareFile(filename, expectedFilename));
remove(filename);
}
const char* filename_;
char *json_;
size_t length_;
};
TEST_F(EncodingsTest, EncodedInputStream) {
TestEncodedInputStream<UTF8<>, UTF8<>>("utf8.json");
TestEncodedInputStream<UTF8<>, UTF8<>>("utf8bom.json");
TestEncodedInputStream<UTF16LE<>, UTF16<>>("utf16le.json");
TestEncodedInputStream<UTF16LE<>, UTF16<>>("utf16lebom.json");
TestEncodedInputStream<UTF16BE<>, UTF16<>>("utf16be.json");
TestEncodedInputStream<UTF16BE<>, UTF16<>>("utf16bebom.json");
TestEncodedInputStream<UTF32LE<>, UTF32<>>("utf32le.json");
TestEncodedInputStream<UTF32LE<>, UTF32<>>("utf32lebom.json");
TestEncodedInputStream<UTF32BE<>, UTF32<>>("utf32be.json");
TestEncodedInputStream<UTF32BE<>, UTF32<>>("utf32bebom.json");
}
TEST_F(EncodingsTest, AutoUTFInputStream) {
#define TEST_FILE(filename) \
{ \
char buffer[16]; \
FILE *fp = Open(filename); \
ASSERT_TRUE(fp != 0); \
FileReadStream fs(fp, buffer, sizeof(buffer)); \
AutoUTFInputStream<wchar_t, FileReadStream> eis(fs); \
StringStream s(json_); \
while (eis.Peek() != '\0') { \
unsigned expected, actual; \
UTF8<>::Decode(s, &expected); \
AutoUTF<wchar_t>::Decode(eis, &actual); \
EXPECT_EQ(expected, actual); \
} \
EXPECT_EQ('\0', s.Peek()); \
fclose(fp); \
}
TestAutoUTFInputStream("utf8.json");
TestAutoUTFInputStream("utf8bom.json");
TestAutoUTFInputStream("utf16lebom.json");
TestAutoUTFInputStream("utf16bebom.json");
TestAutoUTFInputStream("utf32lebom.json");
TestAutoUTFInputStream("utf32bebom.json");
}
TEST_F(EncodingsTest, EncodedOutputStream) {
TestEncodedOutputStream<UTF8<>, UTF8<>>("utf8.json", false);
TestEncodedOutputStream<UTF8<>, UTF8<>>("utf8bom.json", true);
TestEncodedOutputStream<UTF16LE<>, UTF16<>>("utf16le.json", false);
TestEncodedOutputStream<UTF16LE<>, UTF16<>>("utf16lebom.json", true);
TestEncodedOutputStream<UTF16BE<>, UTF16<>>("utf16be.json", false);
TestEncodedOutputStream<UTF16BE<>, UTF16<>>("utf16bebom.json", true);
TestEncodedOutputStream<UTF32LE<>, UTF32<>>("utf32le.json", false);
TestEncodedOutputStream<UTF32LE<>, UTF32<>>("utf32lebom.json", true);
TestEncodedOutputStream<UTF32BE<>, UTF32<>>("utf32be.json", false);
TestEncodedOutputStream<UTF32BE<>, UTF32<>>("utf32bebom.json", true);
}
TEST_FILE("utf8.json");
TEST_FILE("utf8bom.json");
TEST_FILE("utf16lebom.json");
TEST_FILE("utf16bebom.json");
#undef TEST_FILE
TEST_F(EncodingsTest, AutoUTFOutputStream) {
TestAutoUTFOutputStream(kUTF8, false, "utf8.json");
TestAutoUTFOutputStream(kUTF8, true, "utf8bom.json");
TestAutoUTFOutputStream(kUTF16LE, false, "utf16le.json");
TestAutoUTFOutputStream(kUTF16LE, true, "utf16lebom.json");
TestAutoUTFOutputStream(kUTF16BE, false, "utf16be.json");
TestAutoUTFOutputStream(kUTF16BE, true, "utf16bebom.json");
TestAutoUTFOutputStream(kUTF32LE, false, "utf32le.json");
TestAutoUTFOutputStream(kUTF32LE, true, "utf32lebom.json");
TestAutoUTFOutputStream(kUTF32BE, false, "utf32be.json");
TestAutoUTFOutputStream(kUTF32BE, true, "utf32bebom.json");
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment