Commit 3caa86c9 authored by Kosta's avatar Kosta

short string optimization

Since the payload (the `Data` union) of the current implementation of `GenericValue` is `12 bytes` (32 bit) or `16 bytes` (64 bit) it could store `UTF8`-encoded strings up to `10` or `14` chars plus the `terminating zero` character plus the string length:
``` C++
    struct ShortString {
        enum { MaxSize = sizeof(GenericValue::String) / sizeof(Ch) - sizeof(unsigned char) };
        Ch str[MaxSize];
        unsigned char length;
    };  // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode

```

This is achieved by introducing additional `kInlineStrFlag` and `kShortStringFlag` flags. When setting a new string value in `SetStringRaw(s, alloc)` it is first checked if the string is short enough to fit into the `inline string buffer` and if so the given source string will be copied into the new `ShortString` target instead of allocating additional memory for it.
parent 2a4e0555
...@@ -1247,12 +1247,12 @@ int z = a[0u].GetInt(); // This works too. ...@@ -1247,12 +1247,12 @@ int z = a[0u].GetInt(); // This works too.
//!@name String //!@name String
//@{ //@{
const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return data_.s.str; } const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return ((flags_ & kInlineStrFlag) ? data_.ss.str : data_.s.str); }
//! Get the length of string. //! Get the length of string.
/*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength(). /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength().
*/ */
SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return data_.s.length; } SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return ((flags_ & kInlineStrFlag) ? data_.ss.length : data_.s.length); }
//! Set this value as a string without copying source string. //! Set this value as a string without copying source string.
/*! This version has better performance with supplied length, and also support string containing null character. /*! This version has better performance with supplied length, and also support string containing null character.
...@@ -1320,7 +1320,7 @@ int z = a[0u].GetInt(); // This works too. ...@@ -1320,7 +1320,7 @@ int z = a[0u].GetInt(); // This works too.
if (!handler.StartObject()) if (!handler.StartObject())
return false; return false;
for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) { for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) {
if (!handler.String(m->name.data_.s.str, m->name.data_.s.length, (m->name.flags_ & kCopyFlag) != 0)) if (!handler.String(m->name.GetString(), m->name.GetStringLength(), (m->name.flags_ & kCopyFlag) != 0))
return false; return false;
if (!m->value.Accept(handler)) if (!m->value.Accept(handler))
return false; return false;
...@@ -1336,7 +1336,7 @@ int z = a[0u].GetInt(); // This works too. ...@@ -1336,7 +1336,7 @@ int z = a[0u].GetInt(); // This works too.
return handler.EndArray(data_.a.size); return handler.EndArray(data_.a.size);
case kStringType: case kStringType:
return handler.String(data_.s.str, data_.s.length, (flags_ & kCopyFlag) != 0); return handler.String(data_.GetString(), data_.GetStringLength(), (flags_ & kCopyFlag) != 0);
case kNumberType: case kNumberType:
if (IsInt()) return handler.Int(data_.n.i.i); if (IsInt()) return handler.Int(data_.n.i.i);
...@@ -1365,6 +1365,7 @@ private: ...@@ -1365,6 +1365,7 @@ private:
kDoubleFlag = 0x4000, kDoubleFlag = 0x4000,
kStringFlag = 0x100000, kStringFlag = 0x100000,
kCopyFlag = 0x200000, kCopyFlag = 0x200000,
kInlineStrFlag = 0x400000,
// Initial flags of different types. // Initial flags of different types.
kNullFlag = kNullType, kNullFlag = kNullType,
...@@ -1378,6 +1379,7 @@ private: ...@@ -1378,6 +1379,7 @@ private:
kNumberAnyFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag, kNumberAnyFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag,
kConstStringFlag = kStringType | kStringFlag, kConstStringFlag = kStringType | kStringFlag,
kCopyStringFlag = kStringType | kStringFlag | kCopyFlag, kCopyStringFlag = kStringType | kStringFlag | kCopyFlag,
kShortStringFlag = kStringType | kStringFlag | kCopyFlag | kInlineStrFlag,
kObjectFlag = kObjectType, kObjectFlag = kObjectType,
kArrayFlag = kArrayType, kArrayFlag = kArrayType,
...@@ -1393,6 +1395,12 @@ private: ...@@ -1393,6 +1395,12 @@ private:
unsigned hashcode; //!< reserved unsigned hashcode; //!< reserved
}; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
struct ShortString {
enum { MaxSize = sizeof(String) / sizeof(Ch) - sizeof(unsigned char) };
Ch str[MaxSize];
unsigned char length;
}; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
// By using proper binary layout, retrieval of different integer types do not need conversions. // By using proper binary layout, retrieval of different integer types do not need conversions.
union Number { union Number {
#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN #if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN
...@@ -1433,6 +1441,7 @@ private: ...@@ -1433,6 +1441,7 @@ private:
union Data { union Data {
String s; String s;
ShortString ss;
Number n; Number n;
Object o; Object o;
Array a; Array a;
...@@ -1463,11 +1472,19 @@ private: ...@@ -1463,11 +1472,19 @@ private:
//! Initialize this value as copy string with initial data, without calling destructor. //! Initialize this value as copy string with initial data, without calling destructor.
void SetStringRaw(StringRefType s, Allocator& allocator) { void SetStringRaw(StringRefType s, Allocator& allocator) {
flags_ = kCopyStringFlag; Ch* str = NULL;
data_.s.str = (Ch *)allocator.Malloc((s.length + 1) * sizeof(Ch)); if(s.length < ShortString::MaxSize) {
data_.s.length = s.length; flags_ = kShortStringFlag;
memcpy(const_cast<Ch*>(data_.s.str), s, s.length * sizeof(Ch)); data_.ss.length = s.length;
const_cast<Ch*>(data_.s.str)[s.length] = '\0'; str = data_.ss.str;
} else {
flags_ = kCopyStringFlag;
data_.s.length = s.length;
str = (Ch *)allocator.Malloc((s.length + 1) * sizeof(Ch));
data_.s.str = str;
}
memcpy(str, s, s.length * sizeof(Ch));
str[s.length] = '\0';
} }
//! Assignment without calling destructor //! Assignment without calling destructor
...@@ -1480,9 +1497,16 @@ private: ...@@ -1480,9 +1497,16 @@ private:
bool StringEqual(const GenericValue& rhs) const { bool StringEqual(const GenericValue& rhs) const {
RAPIDJSON_ASSERT(IsString()); RAPIDJSON_ASSERT(IsString());
RAPIDJSON_ASSERT(rhs.IsString()); RAPIDJSON_ASSERT(rhs.IsString());
return data_.s.length == rhs.data_.s.length &&
(data_.s.str == rhs.data_.s.str // fast path for constant string const SizeType len1 = (flags_ == kShortStringFlag) ? data_.ss.length : data_.s.length;
|| memcmp(data_.s.str, rhs.data_.s.str, sizeof(Ch) * data_.s.length) == 0); const SizeType len2 = (rhs.flags_ == kShortStringFlag) ? rhs.data_.ss.length : rhs.data_.s.length;
if(len1 != len2) { return false; }
const Ch* const str1 = (flags_ == kShortStringFlag) ? data_.ss.str : data_.s.str;
const Ch* const str2 = (rhs.flags_ == kShortStringFlag) ? rhs.data_.ss.str : rhs.data_.s.str;
if(str1 == str2) { return true; } // fast path for constant string
return (memcmp(str1, str2, sizeof(Ch) * len1) == 0);
} }
Data data_; Data data_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment