Commit 54a5d103 authored by miloyip's avatar miloyip

Merge branch 'issue69writeescapeunicode'

parents 2d732794 9974e355
......@@ -6,6 +6,7 @@
#ifdef _MSC_VER
RAPIDJSON_DIAG_PUSH
RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
RAPIDJSON_DIAG_OFF(4702) // unreachable code
#elif defined(__GNUC__)
RAPIDJSON_DIAG_PUSH
RAPIDJSON_DIAG_OFF(effc++)
......@@ -23,6 +24,8 @@ namespace rapidjson {
concept Encoding {
typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition.
enum { supportUnicode = 1 }; // or 0 if not supporting unicode
//! \brief Encode a Unicode codepoint to an output stream.
//! \param os Output stream.
//! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
......@@ -78,6 +81,8 @@ template<typename CharType = char>
struct UTF8 {
typedef CharType Ch;
enum { supportUnicode = 1 };
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
if (codepoint <= 0x7F)
......@@ -222,6 +227,8 @@ struct UTF16 {
typedef CharType Ch;
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
enum { supportUnicode = 1 };
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
......@@ -351,6 +358,8 @@ struct UTF32 {
typedef CharType Ch;
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
enum { supportUnicode = 1 };
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
......@@ -447,6 +456,66 @@ struct UTF32BE : UTF32<CharType> {
}
};
///////////////////////////////////////////////////////////////////////////////
// ASCII
//! ASCII encoding.
/*! http://en.wikipedia.org/wiki/ASCII
\tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
\note implements Encoding concept
*/
template<typename CharType = char>
struct ASCII {
typedef CharType Ch;
enum { supportUnicode = 0 };
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_ASSERT(codepoint <= 0x7F);
os.Put(static_cast<Ch>(codepoint & 0xFF));
}
template <typename InputStream>
static bool Decode(InputStream& is, unsigned* codepoint) {
unsigned char c = static_cast<unsigned char>(is.Take());
*codepoint = c;
return c <= 0X7F;
}
template <typename InputStream, typename OutputStream>
static bool Validate(InputStream& is, OutputStream& os) {
unsigned char c = is.Take();
os.Put(c);
return c <= 0x7F;
}
template <typename InputByteStream>
static CharType TakeBOM(InputByteStream& is) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
Ch c = Take(is);
return c;
}
template <typename InputByteStream>
static Ch Take(InputByteStream& is) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
return is.Take();
}
template <typename OutputByteStream>
static void PutBOM(OutputByteStream& os) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
(void)os;
}
template <typename OutputByteStream>
static void Put(OutputByteStream& os, Ch c) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
os.Put(static_cast<typename OutputByteStream::Ch>(c));
}
};
///////////////////////////////////////////////////////////////////////////////
// AutoUTF
......@@ -466,6 +535,8 @@ template<typename CharType>
struct AutoUTF {
typedef CharType Ch;
enum { supportUnicode = 1 };
#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
template<typename OutputStream>
......
......@@ -22,6 +22,7 @@
#ifdef _MSC_VER
RAPIDJSON_DIAG_PUSH
RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
RAPIDJSON_DIAG_OFF(4702) // unreachable code
#endif
#define RAPIDJSON_NOTHING /* deliberately empty */
......
......@@ -289,7 +289,39 @@ protected:
GenericStringStream<SourceEncoding> is(str);
while (is.Tell() < length) {
const Ch c = is.Peek();
if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c]) {
if (!TargetEncoding::supportUnicode && (unsigned)c >= 0x80) {
// Unicode escaping
unsigned codepoint;
if (!SourceEncoding::Decode(is, &codepoint))
return false;
os_->Put('\\');
os_->Put('u');
if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
os_->Put(hexDigits[(codepoint >> 12) & 15]);
os_->Put(hexDigits[(codepoint >> 8) & 15]);
os_->Put(hexDigits[(codepoint >> 4) & 15]);
os_->Put(hexDigits[(codepoint ) & 15]);
}
else if (codepoint >= 0x010000 && codepoint <= 0x10FFFF) {
// Surrogate pair
unsigned s = codepoint - 0x010000;
unsigned lead = (s >> 10) + 0xD800;
unsigned trail = (s & 0x3FF) + 0xDC00;
os_->Put(hexDigits[(lead >> 12) & 15]);
os_->Put(hexDigits[(lead >> 8) & 15]);
os_->Put(hexDigits[(lead >> 4) & 15]);
os_->Put(hexDigits[(lead ) & 15]);
os_->Put('\\');
os_->Put('u');
os_->Put(hexDigits[(trail >> 12) & 15]);
os_->Put(hexDigits[(trail >> 8) & 15]);
os_->Put(hexDigits[(trail >> 4) & 15]);
os_->Put(hexDigits[(trail ) & 15]);
}
else
return false; // invalid code point
}
else if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c]) {
is.Take();
os_->Put('\\');
os_->Put(escape[(unsigned char)c]);
......
......@@ -117,14 +117,34 @@ TEST(Writer,DoublePrecision) {
}
TEST(Writer, Transcode) {
const char json[] = "{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}";
// UTF8 -> UTF16 -> UTF8
StringStream s("{ \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3], \"dollar\":\"\x24\", \"cents\":\"\xC2\xA2\", \"euro\":\"\xE2\x82\xAC\", \"gclef\":\"\xF0\x9D\x84\x9E\" } ");
{
StringStream s(json);
StringBuffer buffer;
Writer<StringBuffer, UTF16<>, UTF8<> > writer(buffer);
GenericReader<UTF8<>, UTF16<> > reader;
reader.Parse<0>(s, writer);
EXPECT_TRUE(writer.IsComplete());
EXPECT_STREQ("{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}", buffer.GetString());
reader.Parse(s, writer);
EXPECT_STREQ(json, buffer.GetString());
}
// UTF8 -> UTF8 -> ASCII -> UTF8 -> UTF8
{
StringStream s(json);
StringBuffer buffer;
Writer<StringBuffer, UTF8<>, ASCII<> > writer(buffer);
Reader reader;
reader.Parse(s, writer);
StringBuffer buffer2;
Writer<StringBuffer> writer2(buffer2);
GenericReader<ASCII<>, UTF8<> > reader2;
StringStream s2(buffer.GetString());
reader2.Parse(s2, writer2);
EXPECT_STREQ(json, buffer2.GetString());
}
}
#include <sstream>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment