Commit 54a5d103 authored by miloyip's avatar miloyip

Merge branch 'issue69writeescapeunicode'

parents 2d732794 9974e355
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#ifdef _MSC_VER #ifdef _MSC_VER
RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PUSH
RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
RAPIDJSON_DIAG_OFF(4702) // unreachable code
#elif defined(__GNUC__) #elif defined(__GNUC__)
RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PUSH
RAPIDJSON_DIAG_OFF(effc++) RAPIDJSON_DIAG_OFF(effc++)
...@@ -23,6 +24,8 @@ namespace rapidjson { ...@@ -23,6 +24,8 @@ namespace rapidjson {
concept Encoding { concept Encoding {
typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition. typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition.
enum { supportUnicode = 1 }; // or 0 if not supporting unicode
//! \brief Encode a Unicode codepoint to an output stream. //! \brief Encode a Unicode codepoint to an output stream.
//! \param os Output stream. //! \param os Output stream.
//! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
...@@ -78,6 +81,8 @@ template<typename CharType = char> ...@@ -78,6 +81,8 @@ template<typename CharType = char>
struct UTF8 { struct UTF8 {
typedef CharType Ch; typedef CharType Ch;
enum { supportUnicode = 1 };
template<typename OutputStream> template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) { static void Encode(OutputStream& os, unsigned codepoint) {
if (codepoint <= 0x7F) if (codepoint <= 0x7F)
...@@ -222,6 +227,8 @@ struct UTF16 { ...@@ -222,6 +227,8 @@ struct UTF16 {
typedef CharType Ch; typedef CharType Ch;
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
enum { supportUnicode = 1 };
template<typename OutputStream> template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) { static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
...@@ -351,6 +358,8 @@ struct UTF32 { ...@@ -351,6 +358,8 @@ struct UTF32 {
typedef CharType Ch; typedef CharType Ch;
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
enum { supportUnicode = 1 };
template<typename OutputStream> template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) { static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
...@@ -447,6 +456,66 @@ struct UTF32BE : UTF32<CharType> { ...@@ -447,6 +456,66 @@ struct UTF32BE : UTF32<CharType> {
} }
}; };
///////////////////////////////////////////////////////////////////////////////
// ASCII
//! ASCII encoding.
/*! http://en.wikipedia.org/wiki/ASCII
\tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
\note implements Encoding concept
*/
template<typename CharType = char>
struct ASCII {
typedef CharType Ch;
enum { supportUnicode = 0 };
template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_ASSERT(codepoint <= 0x7F);
os.Put(static_cast<Ch>(codepoint & 0xFF));
}
template <typename InputStream>
static bool Decode(InputStream& is, unsigned* codepoint) {
unsigned char c = static_cast<unsigned char>(is.Take());
*codepoint = c;
return c <= 0X7F;
}
template <typename InputStream, typename OutputStream>
static bool Validate(InputStream& is, OutputStream& os) {
unsigned char c = is.Take();
os.Put(c);
return c <= 0x7F;
}
template <typename InputByteStream>
static CharType TakeBOM(InputByteStream& is) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
Ch c = Take(is);
return c;
}
template <typename InputByteStream>
static Ch Take(InputByteStream& is) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
return is.Take();
}
template <typename OutputByteStream>
static void PutBOM(OutputByteStream& os) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
(void)os;
}
template <typename OutputByteStream>
static void Put(OutputByteStream& os, Ch c) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
os.Put(static_cast<typename OutputByteStream::Ch>(c));
}
};
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// AutoUTF // AutoUTF
...@@ -466,6 +535,8 @@ template<typename CharType> ...@@ -466,6 +535,8 @@ template<typename CharType>
struct AutoUTF { struct AutoUTF {
typedef CharType Ch; typedef CharType Ch;
enum { supportUnicode = 1 };
#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
template<typename OutputStream> template<typename OutputStream>
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#ifdef _MSC_VER #ifdef _MSC_VER
RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PUSH
RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
RAPIDJSON_DIAG_OFF(4702) // unreachable code
#endif #endif
#define RAPIDJSON_NOTHING /* deliberately empty */ #define RAPIDJSON_NOTHING /* deliberately empty */
......
...@@ -289,7 +289,39 @@ protected: ...@@ -289,7 +289,39 @@ protected:
GenericStringStream<SourceEncoding> is(str); GenericStringStream<SourceEncoding> is(str);
while (is.Tell() < length) { while (is.Tell() < length) {
const Ch c = is.Peek(); const Ch c = is.Peek();
if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c]) { if (!TargetEncoding::supportUnicode && (unsigned)c >= 0x80) {
// Unicode escaping
unsigned codepoint;
if (!SourceEncoding::Decode(is, &codepoint))
return false;
os_->Put('\\');
os_->Put('u');
if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
os_->Put(hexDigits[(codepoint >> 12) & 15]);
os_->Put(hexDigits[(codepoint >> 8) & 15]);
os_->Put(hexDigits[(codepoint >> 4) & 15]);
os_->Put(hexDigits[(codepoint ) & 15]);
}
else if (codepoint >= 0x010000 && codepoint <= 0x10FFFF) {
// Surrogate pair
unsigned s = codepoint - 0x010000;
unsigned lead = (s >> 10) + 0xD800;
unsigned trail = (s & 0x3FF) + 0xDC00;
os_->Put(hexDigits[(lead >> 12) & 15]);
os_->Put(hexDigits[(lead >> 8) & 15]);
os_->Put(hexDigits[(lead >> 4) & 15]);
os_->Put(hexDigits[(lead ) & 15]);
os_->Put('\\');
os_->Put('u');
os_->Put(hexDigits[(trail >> 12) & 15]);
os_->Put(hexDigits[(trail >> 8) & 15]);
os_->Put(hexDigits[(trail >> 4) & 15]);
os_->Put(hexDigits[(trail ) & 15]);
}
else
return false; // invalid code point
}
else if ((sizeof(Ch) == 1 || (unsigned)c < 256) && escape[(unsigned char)c]) {
is.Take(); is.Take();
os_->Put('\\'); os_->Put('\\');
os_->Put(escape[(unsigned char)c]); os_->Put(escape[(unsigned char)c]);
......
...@@ -117,14 +117,34 @@ TEST(Writer,DoublePrecision) { ...@@ -117,14 +117,34 @@ TEST(Writer,DoublePrecision) {
} }
TEST(Writer, Transcode) { TEST(Writer, Transcode) {
const char json[] = "{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}";
// UTF8 -> UTF16 -> UTF8 // UTF8 -> UTF16 -> UTF8
StringStream s("{ \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3], \"dollar\":\"\x24\", \"cents\":\"\xC2\xA2\", \"euro\":\"\xE2\x82\xAC\", \"gclef\":\"\xF0\x9D\x84\x9E\" } "); {
StringBuffer buffer; StringStream s(json);
Writer<StringBuffer, UTF16<>, UTF8<> > writer(buffer); StringBuffer buffer;
GenericReader<UTF8<>, UTF16<> > reader; Writer<StringBuffer, UTF16<>, UTF8<> > writer(buffer);
reader.Parse<0>(s, writer); GenericReader<UTF8<>, UTF16<> > reader;
EXPECT_TRUE(writer.IsComplete()); reader.Parse(s, writer);
EXPECT_STREQ("{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}", buffer.GetString()); EXPECT_STREQ(json, buffer.GetString());
}
// UTF8 -> UTF8 -> ASCII -> UTF8 -> UTF8
{
StringStream s(json);
StringBuffer buffer;
Writer<StringBuffer, UTF8<>, ASCII<> > writer(buffer);
Reader reader;
reader.Parse(s, writer);
StringBuffer buffer2;
Writer<StringBuffer> writer2(buffer2);
GenericReader<ASCII<>, UTF8<> > reader2;
StringStream s2(buffer.GetString());
reader2.Parse(s2, writer2);
EXPECT_STREQ(json, buffer2.GetString());
}
} }
#include <sstream> #include <sstream>
...@@ -270,4 +290,4 @@ TEST(Writer, RootArrayIsComplete) { ...@@ -270,4 +290,4 @@ TEST(Writer, RootArrayIsComplete) {
EXPECT_FALSE(writer.IsComplete()); EXPECT_FALSE(writer.IsComplete());
writer.EndArray(); writer.EndArray();
EXPECT_TRUE(writer.IsComplete()); EXPECT_TRUE(writer.IsComplete());
} }
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment