Commit 5eac4489 authored by's avatar

Added EncodedInputStream, AutoUTFInputStream, AutoUTF

git-svn-id: c5894555-1306-4e8d-425f-1f6f381ee07c
parent a8d631fb
"en":"I can eat glass and it doesn't hurt me.",
"ko":"나는 유리를 먹을 수 있어요. 그래도 아프지 않아요"
\ No newline at end of file
"en":"I can eat glass and it doesn't hurt me.",
"ko":"나는 유리를 먹을 수 있어요. 그래도 아프지 않아요"
\ No newline at end of file
......@@ -19,7 +19,7 @@ int main(int argc, char* argv[]) {
PrettyWriter<FileWriteStream> writer(os);
// JSON reader parse from the input stream and let writer generate the output.
if (!reader.Parse<0>(is, writer)) {
if (!reader.Parse<kParseValidateEncodingFlag>(is, writer)) {
fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), reader.GetParseError());
return 1;
#include "rapidjson.h"
namespace rapidjson {
//! Adapts an input byte stream with an specified encoding.
template <typename Encoding, typename InputStream>
class EncodedInputStream {
typedef typename Encoding::Ch Ch;
EncodedInputStream(InputStream& is) : is_(is) {
Ch Peek() const { return current_; }
Ch Take() { Ch c = current_; Read(); return c; }
size_t Tell() const { is_.Tell(); }
// Not implemented
void Put(Ch c) { RAPIDJSON_ASSERT(false); }
void Flush() { RAPIDJSON_ASSERT(false); }
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
void Read() {
current_ = Encoding::Take(is_);
InputStream& is_;
Ch current_;
template <typename CharType, typename InputStream>
class AutoUTFInputStream {
typedef CharType Ch;
AutoUTFInputStream(InputStream& is, UTFType type = kUTF8) : is_(is), type_(type) {
Ch Peek() const { return current_; }
Ch Take() { Ch c = current_; Read(); return c; }
size_t Tell() const { is_.Tell(); }
// Not implemented
void Put(Ch c) { RAPIDJSON_ASSERT(false); }
void Flush() { RAPIDJSON_ASSERT(false); }
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
friend struct AutoUTF<Ch>;
void TakeBOM(InputStream& is) {
#define TAKE() is.Take()
#define PEEK(x) if ((unsigned char)is.Peek() != x) break
switch ((unsigned char)is.Peek()) {
case 0x00: TAKE(); PEEK(0x00); TAKE(); PEEK(0xFE); TAKE(); PEEK(0xFF); type_ = kUTF32BE; return;
case 0xEF: TAKE(); PEEK(0xBB); TAKE(); PEEK(0xBF); TAKE(); type_ = kUTF8; return;
case 0xFE: TAKE(); PEEK(0xFF); TAKE(); type_ = kUTF16BE; return;
case 0xFF: TAKE(); PEEK(0xFE); TAKE();
if (is.Peek() == 0x00) {
TAKE(); PEEK(0x00); TAKE(); type_ = kUTF32LE; return;
type_ = kUTF16LE;
#undef TAKE
#undef PEEK
void Read() {
typedef Ch (*TakeFunc)(InputStream& is);
static const TakeFunc f[] = {
current_ = f[type_](is_);
InputStream& is_;
UTFType type_;
Ch current_;
} // namespace rapidjson
#include "rapidjson.h"
namespace rapidjson {
// Encoding
/*! \class rapidjson::Encoding
\brief Concept for encoding of Unicode characters.
concept Encoding {
typename Ch; //! Type of character.
//! \brief Encode a Unicode codepoint to a stream.
//! \param os Output stream.
//! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
//! \brief Validate one Unicode codepoint from an encoded stream.
//! \param is Input stream to obtain codepoint.
//! \param os Output for copying one codepoint.
//! \return true if it is valid.
//! \note This function just validating and copying the codepoint without actually decode it.
template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
// UTF8
//! UTF-8 encoding.
\tparam CharType Type for storing 8-bit UTF-8 data. Default is char.
\implements Encoding
template<typename CharType = char>
struct UTF8 {
typedef CharType Ch;
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
if (codepoint <= 0x7F)
os.Put(codepoint & 0xFF);
else if (codepoint <= 0x7FF) {
os.Put(0xC0 | ((codepoint >> 6) & 0xFF));
os.Put(0x80 | ((codepoint & 0x3F)));
else if (codepoint <= 0xFFFF) {
os.Put(0xE0 | ((codepoint >> 12) & 0xFF));
os.Put(0x80 | ((codepoint >> 6) & 0x3F));
os.Put(0x80 | (codepoint & 0x3F));
else {
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
os.Put(0xF0 | ((codepoint >> 18) & 0xFF));
os.Put(0x80 | ((codepoint >> 12) & 0x3F));
os.Put(0x80 | ((codepoint >> 6) & 0x3F));
os.Put(0x80 | (codepoint & 0x3F));
template <typename InputStream>
RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
#define TRANS(mask) result &= ((GetType((unsigned char)c) & mask) != 0)
#define TAIL() COPY(); TRANS(0x70)
Ch c = is.Take();
if (!(c & 0x80)) {
*codepoint = (unsigned char)c;
return true;
unsigned char type = GetType((unsigned char)c);
*codepoint = (0xFF >> type) & (unsigned char)c;
bool result = true;
switch (type) {
case 2: TAIL(); return result;
case 3: TAIL(); TAIL(); return result;
case 4: COPY(); TRANS(0x50); TAIL(); return result;
case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return result;
case 6: TAIL(); TAIL(); TAIL(); return result;
case 10: COPY(); TRANS(0x20); TAIL(); return result;
case 11: COPY(); TRANS(0x60); TAIL(); return result;
default: return false;
#undef COPY
#undef TRANS
#undef TAIL
template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
#define COPY() os.Put(c = is.Take())
#define TRANS(mask) result &= ((GetType(c) & mask) != 0)
#define TAIL() COPY(); TRANS(0x70)
Ch c;
if (!(c & 0x80))
return true;
bool result = true;
switch (GetType(c)) {
case 2: TAIL(); return result;
case 3: TAIL(); TAIL(); return result;
case 4: COPY(); TRANS(0x50); TAIL(); return result;
case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return result;
case 6: TAIL(); TAIL(); TAIL(); return result;
case 10: COPY(); TRANS(0x20); TAIL(); return result;
case 11: COPY(); TRANS(0x60); TAIL(); return result;
default: return false;
#undef COPY
#undef TRANS
#undef TAIL
RAPIDJSON_FORCEINLINE static unsigned char GetType(unsigned char c) {
// Referring to DFA of
// With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
static const unsigned char type[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
return type[c];
template <typename InputStream>
static void TakeBOM(InputStream& is) {
if ((unsigned char)is.Peek() != 0xEF) return;
if ((unsigned char)is.Peek() != 0xBB) return;
if ((unsigned char)is.Peek() != 0xBF) return;
template <typename InputStream>
RAPIDJSON_FORCEINLINE static Ch Take(InputStream& is) {
return is.Take();
// UTF16
//! UTF-16 encoding.
\tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
\implements Encoding
template<typename CharType = wchar_t>
struct UTF16 {
typedef CharType Ch;
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
if (codepoint <= 0xFFFF) {
RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
else {
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
unsigned v = codepoint - 0x10000;
os.Put((v >> 10) | 0xD800);
os.Put((v & 0x3FF) | 0xDC00);
template <typename InputStream>
RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
Ch c = is.Take();
if (c < 0xD800 || c > 0xDFFF) {
*codepoint = c;
return true;
else if (c < 0xDBFF) {
*codepoint = (c & 0x3FF) << 10;
c = is.Take();
*codepoint |= (c & 0x3FF);
*codepoint += 0x10000;
return c >= 0xDC00 && c <= 0xDFFF;
return false;
template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
Ch c;
os.Put(c = is.Take());
if (c < 0xD800 || c > 0xDFFF)
return true;
else if (c < 0xDBFF) {
os.Put(c = is.Take());
return c >= 0xDC00 && c <= 0xDFFF;
return false;
template<typename CharType = wchar_t>
struct UTF16LE : UTF16<CharType> {
template <typename InputStream>
static void TakeBOM(InputStream& is) {
if ((unsigned char)is.Peek() != 0xFF) return;
if ((unsigned char)is.Peek() != 0xFE) return;
template <typename InputStream>
RAPIDJSON_FORCEINLINE static CharType Take(InputStream& is) {
CharType c = (unsigned char)is.Take();
c |= (unsigned char)is.Take() << 8;
return c;
template<typename CharType = wchar_t>
struct UTF16BE : UTF16<CharType> {
template <typename InputStream>
static void TakeBOM(InputStream& is) {
if ((unsigned char)is.Peek() != 0xFE) return;
if ((unsigned char)is.Peek() != 0xFF) return;
template <typename InputStream>
RAPIDJSON_FORCEINLINE static CharType Take(InputStream& is) {
CharType c = (unsigned char)is.Take() << 8;
c |= (unsigned char)is.Take();
return c;
// UTF32
//! UTF-32 encoding.
\tparam Ch Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
\implements Encoding
template<typename CharType = unsigned>
struct UTF32 {
typedef CharType Ch;
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
template <typename InputStream>
RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
Ch c = is.Take();
*codepoint = c;
return c <= 0x10FFFF;
template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
Ch c;
os.Put(c = is.Take());
return c <= 0x10FFFF;
template<typename CharType = unsigned>
struct UTF32LE : UTF32<CharType> {
template <typename InputStream>
static void TakeBOM(InputStream& is) {
if ((unsigned char)is.Peek() != 0xFF) return;
if ((unsigned char)is.Peek() != 0xFE) return;
if ((unsigned char)is.Peek() != 0x00) return;
if ((unsigned char)is.Peek() != 0x00) return;
template <typename InputStream>
RAPIDJSON_FORCEINLINE static CharType Take(InputStream& is) {
CharType c = (unsigned char)is.Take();
c |= (unsigned char)is.Take() << 8;
c |= (unsigned char)is.Take() << 16;
c |= (unsigned char)is.Take() << 24;
return c;
template<typename CharType = unsigned>
struct UTF32BE : UTF32<CharType> {
template <typename InputStream>
static void TakeBOM(InputStream& is) {
if ((unsigned char)is.Peek() != 0x00) return;
if ((unsigned char)is.Peek() != 0x00) return;
if ((unsigned char)is.Peek() != 0xFE) return;
if ((unsigned char)is.Peek() != 0xFF) return;
template <typename InputStream>
RAPIDJSON_FORCEINLINE static CharType Take(InputStream& is) {
CharType c = (unsigned char)is.Take() << 24;
c |= (unsigned char)is.Take() << 16;
c |= (unsigned char)is.Take() << 8;
c |= (unsigned char)is.Take();
return c;
// AutoUTF
enum UTFType {
kUTF8 = 0,
kUTF16LE = 1,
kUTF16BE = 2,
kUTF32LE = 3,
kUTF32BE = 4,
// Dynamically select encoding according to BOM or user setting.
template<typename CharType>
struct AutoUTF {
typedef CharType Ch;
template<typename OutputStream>
RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) {
typedef void (*EncodeFunc)(OutputStream&, unsigned);
static const EncodeFunc f[] = {
(*f[os.type_])(os, codepoint);
template <typename InputStream>
RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
typedef bool (*DecodeFunc)(InputStream&, unsigned*);
static const DecodeFunc f[] = {
return (*f[is.type_])(is, codepoint);
template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
typedef bool (*ValidateFunc)(InputStream&, unsigned*);
static const ValidateFunc f[] = {
return (*f[is.type_])(is, os);
// Transcoder
template<typename SourceEncoding, typename TargetEncoding>
struct Transcoder {
template<typename InputStream, typename OutputStream>
static bool Transcode(InputStream& is, OutputStream& os) {
unsigned codepoint;
if (!SourceEncoding::Decode(is, &codepoint))
return false;
TargetEncoding::Encode(os, codepoint);
return true;
template<typename InputStream, typename OutputStream>
static bool Validate(InputStream& is, OutputStream& os) {
return Transcode(is, os);
//! Specialization of Transcoder with same source and target encoding.
template<typename Encoding>
struct Transcoder<Encoding, Encoding> {
template<typename InputStream, typename OutputStream>
static bool Transcode(InputStream& is, OutputStream& os) {
return true;
template<typename InputStream, typename OutputStream>
static bool Validate(InputStream& is, OutputStream& os) {
return Encoding::Validate(is, os);
} // namespace rapidjson
......@@ -82,455 +82,10 @@ typedef unsigned SizeType;
#define RAPIDJSON_ASSERT(x) assert(x)
namespace rapidjson {
// Allocator
/*! \class rapidjson::Allocator
\brief Concept for allocating, resizing and freeing memory block.
Note that Malloc() and Realloc() are non-static but Free() is static.
So if an allocator need to support Free(), it needs to put its pointer in
the header of memory block.
concept Allocator {
static const bool kNeedFree; //!< Whether this allocator needs to call Free().
// Allocate a memory block.
// \param size of the memory block in bytes.
// \returns pointer to the memory block.
void* Malloc(size_t size);
// Resize a memory block.
// \param originalPtr The pointer to current memory block. Null pointer is permitted.
// \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.)
// \param newSize the new size in bytes.
void* Realloc(void* originalPtr, size_t originalSize, size_t newSize);
// Free a memory block.
// \param pointer to the memory block. Null pointer is permitted.
static void Free(void *ptr);
// CrtAllocator
//! C-runtime library allocator.
/*! This class is just wrapper for standard C library memory routines.
\implements Allocator
class CrtAllocator {
static const bool kNeedFree = true;
void* Malloc(size_t size) { return malloc(size); }
void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { return realloc(originalPtr, newSize); }
static void Free(void *ptr) { free(ptr); }
// MemoryPoolAllocator
//! Default memory allocator used by the parser and DOM.
/*! This allocator allocate memory blocks from pre-allocated memory chunks.
It does not free memory blocks. And Realloc() only allocate new memory.
The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default.
#include "allocators.h"
#include "encodings.h"
User may also supply a buffer as the first chunk.
If the user-buffer is full then additional chunks are allocated by BaseAllocator.
The user-buffer is not deallocated by this allocator.
\tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator.
\implements Allocator
template <typename BaseAllocator = CrtAllocator>
class MemoryPoolAllocator {
static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator)
//! Constructor with chunkSize.
/*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
\param baseAllocator The allocator for allocating memory chunks.
MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
if (!baseAllocator_)
ownBaseAllocator_ = baseAllocator_ = new BaseAllocator();
//! Constructor with user-supplied buffer.
/*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size.
The user buffer will not be deallocated when this allocator is destructed.
\param buffer User supplied buffer.
\param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader).
\param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
\param baseAllocator The allocator for allocating memory chunks.
MemoryPoolAllocator(char *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
RAPIDJSON_ASSERT(buffer != 0);
RAPIDJSON_ASSERT(size > sizeof(ChunkHeader));
chunkHead_ = (ChunkHeader*)buffer;
chunkHead_->capacity = size - sizeof(ChunkHeader);
chunkHead_->size = 0;
chunkHead_->next = 0;
//! Destructor.
/*! This deallocates all memory chunks, excluding the user-supplied buffer.
~MemoryPoolAllocator() {
delete ownBaseAllocator_;
//! Deallocates all memory chunks, excluding the user-supplied buffer.
void Clear() {
while(chunkHead_ != 0 && chunkHead_ != (ChunkHeader *)userBuffer_) {
ChunkHeader* next = chunkHead_->next;
chunkHead_ = next;
//! Computes the total capacity of allocated memory chunks.
/*! \return total capacity in bytes.
size_t Capacity() {
size_t capacity = 0;
for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
capacity += c->capacity;
return capacity;
//! Computes the memory blocks allocated.
/*! \return total used bytes.
size_t Size() {
size_t size = 0;
for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
size += c->size;
return size;
//! Allocates a memory block. (concept Allocator)
void* Malloc(size_t size) {
if (chunkHead_->size + size > chunkHead_->capacity)
AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size);
char *buffer = (char *)(chunkHead_ + 1) + chunkHead_->size;
chunkHead_->size += size;
return buffer;
//! Resizes a memory block (concept Allocator)
void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) {
if (originalPtr == 0)
return Malloc(newSize);
// Do not shrink if new size is smaller than original
if (originalSize >= newSize)
return originalPtr;
// Simply expand it if it is the last allocation and there is sufficient space
if (originalPtr == (char *)(chunkHead_ + 1) + chunkHead_->size - originalSize) {
size_t increment = newSize - originalSize;
if (chunkHead_->size + increment <= chunkHead_->capacity) {
chunkHead_->size += increment;
return originalPtr;
// Realloc process: allocate and copy memory, do not free original buffer.
void* newBuffer = Malloc(newSize);
RAPIDJSON_ASSERT(newBuffer != 0); // Do not handle out-of-memory explicitly.
return memcpy(newBuffer, originalPtr, originalSize);
//! Frees a memory block (concept Allocator)
static void Free(void *ptr) {} // Do nothing
//! Creates a new chunk.
/*! \param capacity Capacity of the chunk in bytes.
void AddChunk(size_t capacity) {
ChunkHeader* chunk = (ChunkHeader*)baseAllocator_->Malloc(sizeof(ChunkHeader) + capacity);
chunk->capacity = capacity;
chunk->size = 0;
chunk->next = chunkHead_;
chunkHead_ = chunk;
static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity.
//! Chunk header for perpending to each chunk.
/*! Chunks are stored as a singly linked list.
struct ChunkHeader {
size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself).
size_t size; //!< Current size of allocated memory in bytes.
ChunkHeader *next; //!< Next chunk in the linked list.
ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation.
size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated.
char *userBuffer_; //!< User supplied buffer.
BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks.
BaseAllocator* ownBaseAllocator_; //!< base allocator created by this object.
// Encoding
/*! \class rapidjson::Encoding
\brief Concept for encoding of Unicode characters.
concept Encoding {
typename Ch; //! Type of character.
//! \brief Encode a Unicode codepoint to a stream.
//! \param os Output stream.
//! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
//! \brief Validate one Unicode codepoint from an encoded stream.
//! \param is Input stream to obtain codepoint.
//! \param os Output for copying one codepoint.
//! \return true if it is valid.
//! \note This function just validating and copying the codepoint without actually decode it.
template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
// UTF8
//! UTF-8 encoding.
\tparam CharType Type for storing 8-bit UTF-8 data. Default is char.
\implements Encoding
template<typename CharType = char>
struct UTF8 {
typedef CharType Ch;
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
if (codepoint <= 0x7F)
os.Put(codepoint & 0xFF);
else if (codepoint <= 0x7FF) {
os.Put(0xC0 | ((codepoint >> 6) & 0xFF));
os.Put(0x80 | ((codepoint & 0x3F)));
else if (codepoint <= 0xFFFF) {
os.Put(0xE0 | ((codepoint >> 12) & 0xFF));
os.Put(0x80 | ((codepoint >> 6) & 0x3F));
os.Put(0x80 | (codepoint & 0x3F));
else {
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
os.Put(0xF0 | ((codepoint >> 18) & 0xFF));
os.Put(0x80 | ((codepoint >> 12) & 0x3F));
os.Put(0x80 | ((codepoint >> 6) & 0x3F));
os.Put(0x80 | (codepoint & 0x3F));
template <typename InputStream>
static bool Decode(InputStream& is, unsigned* codepoint) {
#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
#define TRANS(mask) result &= ((GetType(c) & mask) != 0)
#define TAIL() COPY(); TRANS(0x70)
Ch c = is.Take();
if (!(c & 0x80)) {
*codepoint = (unsigned char)c;
return true;
unsigned char type = GetType(c);
*codepoint = (0xFF >> type) & (unsigned char)c;
bool result = true;
switch (type) {
case 2: TAIL(); return result;
case 3: TAIL(); TAIL(); return result;
case 4: COPY(); TRANS(0x50); TAIL(); return result;
case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return result;
case 6: TAIL(); TAIL(); TAIL(); return result;
case 10: COPY(); TRANS(0x20); TAIL(); return result;
case 11: COPY(); TRANS(0x60); TAIL(); return result;
default: return false;
#undef COPY
#undef TRANS
#undef TAIL
template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
#define COPY() os.Put(c = is.Take())
#define TRANS(mask) result &= ((GetType(c) & mask) != 0)
#define TAIL() COPY(); TRANS(0x70)
Ch c;
if (!(c & 0x80))
return true;
bool result = true;
switch (GetType(c)) {
case 2: TAIL(); return result;
case 3: TAIL(); TAIL(); return result;
case 4: COPY(); TRANS(0x50); TAIL(); return result;
case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return result;
case 6: TAIL(); TAIL(); TAIL(); return result;
case 10: COPY(); TRANS(0x20); TAIL(); return result;
case 11: COPY(); TRANS(0x60); TAIL(); return result;
default: return false;
#undef COPY
#undef TRANS
#undef TAIL
RAPIDJSON_FORCEINLINE static unsigned char GetType(unsigned char c) {
// Referring to DFA of
// With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
static const unsigned char type[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
return type[c];
// UTF16
//! UTF-16 encoding.
\tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
\implements Encoding
template<typename CharType = wchar_t>
struct UTF16 {
typedef CharType Ch;
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
if (codepoint <= 0xFFFF) {
RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
else {
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
unsigned v = codepoint - 0x10000;
os.Put((v >> 10) + 0xD800);
os.Put((v & 0x3FF) + 0xDC00);
template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
Ch c;
os.Put(c = is.Take());
if (c < 0xD800 || c > 0xDFFF)
return true;
else if (c < 0xDBFF) {
os.Put(c = is.Take());
return c >= 0xDC00 && c <= 0xDFFF;
return false;
// UTF32
//! UTF-32 encoding.
\tparam Ch Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
\implements Encoding
template<typename CharType = unsigned>
struct UTF32 {
typedef CharType Ch;
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
template <typename InputStream, typename OutputStream>
RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
Ch c;
os.Put(c = is.Take());
return c <= 0x10FFFF;
// Transcoder
template<typename SourceEncoding, typename TargetEncoding>
struct Transcoder {
template<typename InputStream, typename OutputStream>
static bool Transcode(InputStream& is, OutputStream& os) {
unsigned codepoint;
if (!SourceEncoding::Decode(is, &codepoint))
return false;
TargetEncoding::Encode(os, codepoint);
return true;
template<typename InputStream, typename OutputStream>
static bool Validate(InputStream& is, OutputStream& os) {
return Transcode(is, os);
//! Specialization of Transcoder with same source and target encoding.
template<typename Encoding>
struct Transcoder<Encoding, Encoding> {
template<typename InputStream, typename OutputStream>
static bool Transcode(InputStream& is, OutputStream& os) {
return true;
template<typename InputStream, typename OutputStream>
static bool Validate(InputStream& is, OutputStream& os) {
return Encoding::Validate(is, os);
namespace rapidjson {
// Stream
......@@ -5,6 +5,7 @@
// Version 0.1
#include "rapidjson.h"
#include "encodings.h"
#include "internal/pow10.h"
#include "internal/stack.h"
#include <csetjmp>
......@@ -6,7 +6,7 @@
#define TEST_YAJL 0
#define TEST_MISC 1
#define TEST_MISC 0
//#define RAPIDJSON_SSE2
......@@ -2,6 +2,7 @@
#include "rapidjson/filestream.h"
#include "rapidjson/filereadstream.h"
#include "rapidjson/filewritestream.h"
#include "rapidjson/encodedstream.h"
using namespace rapidjson;
......@@ -31,22 +32,23 @@ protected:
size_t length_;
TEST_F(FileStreamTest, FileStream_Read) {
FILE *fp = fopen(filename_, "rb");
ASSERT_TRUE(fp != 0);
FileStream s(fp);
for (size_t i = 0; i < length_; i++) {
EXPECT_EQ(json_[i], s.Peek());
EXPECT_EQ(json_[i], s.Peek()); // 2nd time should be the same
EXPECT_EQ(json_[i], s.Take());
EXPECT_EQ(length_, s.Tell());
EXPECT_EQ('\0', s.Peek());
// Depreciated
//TEST_F(FileStreamTest, FileStream_Read) {
// FILE *fp = fopen(filename_, "rb");
// ASSERT_TRUE(fp != 0);
// FileStream s(fp);
// for (size_t i = 0; i < length_; i++) {
// EXPECT_EQ(json_[i], s.Peek());
// EXPECT_EQ(json_[i], s.Peek()); // 2nd time should be the same
// EXPECT_EQ(json_[i], s.Take());
// }
// EXPECT_EQ(length_, s.Tell());
// EXPECT_EQ('\0', s.Peek());
// fclose(fp);
TEST_F(FileStreamTest, FileReadStream) {
FILE *fp = fopen(filename_, "rb");
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment