Merge pull request #80 from miloyip/issue31optimizeitoa

Issue31optimizeitoa

Merge pull request #80 from miloyip/issue31optimizeitoa
Issue31optimizeitoa
c4ce48cd · Milo Yip · a7dca0d4 · 6f164613 · c4ce48cd · c4ce48cd
Commit c4ce48cd authored Jul 27, 2014 by Milo Yip
9 changed files
--- a/include/rapidjson/internal/itoa.h
+++ b/include/rapidjson/internal/itoa.h
+#ifndef RAPIDJSON_ITOA_
+#define RAPIDJSON_ITOA_
+namespace rapidjson {
+namespace internal {
+// Modified from https://github.com/miloyip/itoa-benchmark/blob/master/src/branchlut.cpp
+// API is changed to return the character passed the end of string, without writing '\0'
+inline const char* GetDigitsLut() {
+    static const char cDigitsLut[200] = {
+        '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
+        '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
+        '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
+        '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
+        '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
+        '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
+        '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
+        '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
+        '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
+        '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
+    };
+    return cDigitsLut;
+}
+inline char* u32toa(uint32_t value, char* buffer) {
+    const char* cDigitsLut = GetDigitsLut();
+    if (value < 10000) {
+        const uint32_t d1 = (value / 100) << 1;
+        const uint32_t d2 = (value % 100) << 1;
+        if (value >= 1000)
+            *buffer++ = cDigitsLut[d1];
+        if (value >= 100)
+            *buffer++ = cDigitsLut[d1 + 1];
+        if (value >= 10)
+            *buffer++ = cDigitsLut[d2];
+        *buffer++ = cDigitsLut[d2 + 1];
+    }
+    else if (value < 100000000) {
+        // value = bbbbcccc
+        const uint32_t b = value / 10000;
+        const uint32_t c = value % 10000;
+        const uint32_t d1 = (b / 100) << 1;
+        const uint32_t d2 = (b % 100) << 1;
+        const uint32_t d3 = (c / 100) << 1;
+        const uint32_t d4 = (c % 100) << 1;
+        if (value >= 10000000)
+            *buffer++ = cDigitsLut[d1];
+        if (value >= 1000000)
+            *buffer++ = cDigitsLut[d1 + 1];
+        if (value >= 100000)
+            *buffer++ = cDigitsLut[d2];
+        *buffer++ = cDigitsLut[d2 + 1];
+        *buffer++ = cDigitsLut[d3];
+        *buffer++ = cDigitsLut[d3 + 1];
+        *buffer++ = cDigitsLut[d4];
+        *buffer++ = cDigitsLut[d4 + 1];
+    }
+    else {
+        // value = aabbbbcccc in decimal
+        const uint32_t a = value / 100000000; // 1 to 42
+        value %= 100000000;
+        if (a >= 10) {
+            const unsigned i = a << 1;
+            *buffer++ = cDigitsLut[i];
+            *buffer++ = cDigitsLut[i + 1];
+        }
+        else
+            *buffer++ = '0' + static_cast<char>(a);
+        const uint32_t b = value / 10000; // 0 to 9999
+        const uint32_t c = value % 10000; // 0 to 9999
+        const uint32_t d1 = (b / 100) << 1;
+        const uint32_t d2 = (b % 100) << 1;
+        const uint32_t d3 = (c / 100) << 1;
+        const uint32_t d4 = (c % 100) << 1;
+        *buffer++ = cDigitsLut[d1];
+        *buffer++ = cDigitsLut[d1 + 1];
+        *buffer++ = cDigitsLut[d2];
+        *buffer++ = cDigitsLut[d2 + 1];
+        *buffer++ = cDigitsLut[d3];
+        *buffer++ = cDigitsLut[d3 + 1];
+        *buffer++ = cDigitsLut[d4];
+        *buffer++ = cDigitsLut[d4 + 1];
+    }
+    return buffer;
+}
+inline char* i32toa(int32_t value, char* buffer) {
+	if (value < 0) {
+		*buffer++ = '-';
+		value = -value;
+	}
+	return u32toa(static_cast<uint32_t>(value), buffer);
+}
+inline char* u64toa(uint64_t value, char* buffer) {
+    const char* cDigitsLut = GetDigitsLut();
+    if (value < 100000000) {
+        uint32_t v = static_cast<uint32_t>(value);
+        if (v < 10000) {
+            const uint32_t d1 = (v / 100) << 1;
+            const uint32_t d2 = (v % 100) << 1;
+            if (v >= 1000)
+                *buffer++ = cDigitsLut[d1];
+            if (v >= 100)
+                *buffer++ = cDigitsLut[d1 + 1];
+            if (v >= 10)
+                *buffer++ = cDigitsLut[d2];
+            *buffer++ = cDigitsLut[d2 + 1];
+        }
+        else {
+            // value = bbbbcccc
+            const uint32_t b = v / 10000;
+            const uint32_t c = v % 10000;
+            const uint32_t d1 = (b / 100) << 1;
+            const uint32_t d2 = (b % 100) << 1;
+            const uint32_t d3 = (c / 100) << 1;
+            const uint32_t d4 = (c % 100) << 1;
+            if (value >= 10000000)
+                *buffer++ = cDigitsLut[d1];
+            if (value >= 1000000)
+                *buffer++ = cDigitsLut[d1 + 1];
+            if (value >= 100000)
+                *buffer++ = cDigitsLut[d2];
+            *buffer++ = cDigitsLut[d2 + 1];
+            *buffer++ = cDigitsLut[d3];
+            *buffer++ = cDigitsLut[d3 + 1];
+            *buffer++ = cDigitsLut[d4];
+            *buffer++ = cDigitsLut[d4 + 1];
+        }
+    }
+    else if (value < 10000000000000000) {
+        const uint32_t v0 = static_cast<uint32_t>(value / 100000000);
+        const uint32_t v1 = static_cast<uint32_t>(value % 100000000);
+        const uint32_t b0 = v0 / 10000;
+        const uint32_t c0 = v0 % 10000;
+        const uint32_t d1 = (b0 / 100) << 1;
+        const uint32_t d2 = (b0 % 100) << 1;
+        const uint32_t d3 = (c0 / 100) << 1;
+        const uint32_t d4 = (c0 % 100) << 1;
+        const uint32_t b1 = v1 / 10000;
+        const uint32_t c1 = v1 % 10000;
+        const uint32_t d5 = (b1 / 100) << 1;
+        const uint32_t d6 = (b1 % 100) << 1;
+        const uint32_t d7 = (c1 / 100) << 1;
+        const uint32_t d8 = (c1 % 100) << 1;
+        if (value >= 1000000000000000)
+            *buffer++ = cDigitsLut[d1];
+        if (value >= 100000000000000)
+            *buffer++ = cDigitsLut[d1 + 1];
+        if (value >= 10000000000000)
+            *buffer++ = cDigitsLut[d2];
+        if (value >= 1000000000000)
+            *buffer++ = cDigitsLut[d2 + 1];
+        if (value >= 100000000000)
+            *buffer++ = cDigitsLut[d3];
+        if (value >= 10000000000)
+            *buffer++ = cDigitsLut[d3 + 1];
+        if (value >= 1000000000)
+            *buffer++ = cDigitsLut[d4];
+        if (value >= 100000000)
+            *buffer++ = cDigitsLut[d4 + 1];
+        *buffer++ = cDigitsLut[d5];
+        *buffer++ = cDigitsLut[d5 + 1];
+        *buffer++ = cDigitsLut[d6];
+        *buffer++ = cDigitsLut[d6 + 1];
+        *buffer++ = cDigitsLut[d7];
+        *buffer++ = cDigitsLut[d7 + 1];
+        *buffer++ = cDigitsLut[d8];
+        *buffer++ = cDigitsLut[d8 + 1];
+    }
+    else {
+        const uint32_t a = static_cast<uint32_t>(value / 10000000000000000); // 1 to 1844
+        value %= 10000000000000000;
+        if (a < 10)
+            *buffer++ = '0' + static_cast<char>(a);
+        else if (a < 100) {
+            const uint32_t i = a << 1;
+            *buffer++ = cDigitsLut[i];
+            *buffer++ = cDigitsLut[i + 1];
+        }
+        else if (a < 1000) {
+            *buffer++ = '0' + static_cast<char>(a / 100);
+            const uint32_t i = (a % 100) << 1;
+            *buffer++ = cDigitsLut[i];
+            *buffer++ = cDigitsLut[i + 1];
+        }
+        else {
+            const uint32_t i = (a / 100) << 1;
+            const uint32_t j = (a % 100) << 1;
+            *buffer++ = cDigitsLut[i];
+            *buffer++ = cDigitsLut[i + 1];
+            *buffer++ = cDigitsLut[j];
+            *buffer++ = cDigitsLut[j + 1];
+        }
+        const uint32_t v0 = static_cast<uint32_t>(value / 100000000);
+        const uint32_t v1 = static_cast<uint32_t>(value % 100000000);
+        const uint32_t b0 = v0 / 10000;
+        const uint32_t c0 = v0 % 10000;
+        const uint32_t d1 = (b0 / 100) << 1;
+        const uint32_t d2 = (b0 % 100) << 1;
+        const uint32_t d3 = (c0 / 100) << 1;
+        const uint32_t d4 = (c0 % 100) << 1;
+        const uint32_t b1 = v1 / 10000;
+        const uint32_t c1 = v1 % 10000;
+        const uint32_t d5 = (b1 / 100) << 1;
+        const uint32_t d6 = (b1 % 100) << 1;
+        const uint32_t d7 = (c1 / 100) << 1;
+        const uint32_t d8 = (c1 % 100) << 1;
+        *buffer++ = cDigitsLut[d1];
+        *buffer++ = cDigitsLut[d1 + 1];
+        *buffer++ = cDigitsLut[d2];
+        *buffer++ = cDigitsLut[d2 + 1];
+        *buffer++ = cDigitsLut[d3];
+        *buffer++ = cDigitsLut[d3 + 1];
+        *buffer++ = cDigitsLut[d4];
+        *buffer++ = cDigitsLut[d4 + 1];
+        *buffer++ = cDigitsLut[d5];
+        *buffer++ = cDigitsLut[d5 + 1];
+        *buffer++ = cDigitsLut[d6];
+        *buffer++ = cDigitsLut[d6 + 1];
+        *buffer++ = cDigitsLut[d7];
+        *buffer++ = cDigitsLut[d7 + 1];
+        *buffer++ = cDigitsLut[d8];
+        *buffer++ = cDigitsLut[d8 + 1];
+    }
+	return buffer;
+}
+inline char* i64toa(int64_t value, char* buffer) {
+	if (value < 0) {
+		*buffer++ = '-';
+		value = -value;
+	}
+	return u64toa(static_cast<uint64_t>(value), buffer);
+}
+} // namespace internal
+} // namespace rapidjson
+#endif // RAPIDJSON_ITOA_
--- a/include/rapidjson/msinttypes/stdint.h
+++ b/include/rapidjson/msinttypes/stdint.h
@@ -42,8 +42,44 @@
 #endif
 // miloyip: Originally Visual Studio 2010 uses its own stdint.h. However it generates warning with INT64_C(), so change to use this file for vs2010.
-#if _MSC_VER >= 1700 // [
+#if _MSC_VER >= 1600 // [
 #include <stdint.h>
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+#undef INT8_C
+#undef INT16_C
+#undef INT32_C
+#undef INT64_C
+#undef UINT8_C
+#undef UINT16_C
+#undef UINT32_C
+#undef UINT64_C
+// 7.18.4.1 Macros for minimum-width integer constants
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+// 7.18.4.2 Macros for greatest-width integer constants
+// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
+// Check out Issue 9 for the details.
+#ifndef INTMAX_C //   [
+#  define INTMAX_C   INT64_C
+#endif // INTMAX_C    ]
+#ifndef UINTMAX_C //  [
+#  define UINTMAX_C  UINT64_C
+#endif // UINTMAX_C   ]
+#endif // __STDC_CONSTANT_MACROS ]
 #else // ] _MSC_VER >= 1700 [
 #include <limits.h>

--- a/include/rapidjson/rapidjson.h
+++ b/include/rapidjson/rapidjson.h
@@ -360,10 +360,14 @@ struct GenericInsituStringStream {
 	size_t Tell() { return static_cast<size_t>(src_ - head_); }
 	// Write
-	Ch* PutBegin() { return dst_ = src_; }
 	void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; }
-	void Flush() {}
+	Ch* PutBegin() { return dst_ = src_; }
 	size_t PutEnd(Ch* begin) { return static_cast<size_t>(dst_ - begin); }
+	void Flush() {}
+	Ch* Push(size_t count) { Ch* begin = dst_; dst_ += count; return begin; }
+	void Pop(size_t count) { dst_ -= count; }
 	Ch* src_;
 	Ch* dst_;

--- a/include/rapidjson/stringbuffer.h
+++ b/include/rapidjson/stringbuffer.h
@@ -22,6 +22,8 @@ struct GenericStringBuffer {
 	void Flush() {}
 	void Clear() { stack_.Clear(); }
+	Ch* Push(size_t count) { return stack_.template Push<Ch>(count); }
+	void Pop(size_t count) { stack_.template Pop<Ch>(count); }
 	const Ch* GetString() const {
 		// Push and pop a null terminator. This is safe.

--- a/include/rapidjson/writer.h
+++ b/include/rapidjson/writer.h
@@ -4,6 +4,8 @@
 #include "rapidjson.h"
 #include "internal/stack.h"
 #include "internal/strfunc.h"
+#include "internal/itoa.h"
+#include "stringbuffer.h"
 #include <cstdio>	// snprintf() or _sprintf_s()
 #include <new>		// placement new
@@ -44,6 +46,10 @@ public:
 		os_(&os), level_stack_(allocator, levelDepth * sizeof(Level)),
 		doublePrecision_(kDefaultDoublePrecision), hasRoot_(false) {}
+	Writer(Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) :
+		os_(0), level_stack_(allocator, levelDepth * sizeof(Level)),
+		doublePrecision_(kDefaultDoublePrecision), hasRoot_(false) {}
 	//! Reset the writer with a new stream.
 	/*!
 		This function reset the writer with a new stream and default settings,
@@ -208,49 +214,34 @@ protected:
 	}
 	bool WriteInt(int i) {
-		if (i < 0) {
+		char buffer[11];
-			os_->Put('-');
+		const char* end = internal::i32toa(i, buffer);
-			i = -i;
+		for (const char* p = buffer; p != end; ++p)
-		}
+			os_->Put(*p);
-		return WriteUint((unsigned)i);
+		return true;
 	}
 	bool WriteUint(unsigned u) {
 		char buffer[10];
-		char *p = buffer;
+		const char* end = internal::u32toa(u, buffer);
-		do {
+		for (const char* p = buffer; p != end; ++p)
-			*p++ = char(u % 10) + '0';
-			u /= 10;
-		} while (u > 0);
-		do {
-			--p;
 			os_->Put(*p);
-		} while (p != buffer);
 		return true;
 	}
 	bool WriteInt64(int64_t i64) {
-		if (i64 < 0) {
+		char buffer[21];
-			os_->Put('-');
+		const char* end = internal::i64toa(i64, buffer);
-			i64 = -i64;
+		for (const char* p = buffer; p != end; ++p)
-		}
+			os_->Put(*p);
-		WriteUint64((uint64_t)i64);
 		return true;
 	}
 	bool WriteUint64(uint64_t u64) {
 		char buffer[20];
-		char *p = buffer;
+		const char* end = internal::u64toa(u64, buffer);
-		do {
+		for (const char* p = buffer; p != end; ++p)
-			*p++ = char(u64 % 10) + '0';
-			u64 /= 10;
-		} while (u64 > 0);
-		do {
-			--p;
 			os_->Put(*p);
-		} while (p != buffer);
 		return true;
 	}
@@ -378,6 +369,40 @@ private:
 	Writer& operator=(const Writer&);
 };
+// Full specialization for StringStream to prevent memory copying
+template<>
+inline bool Writer<StringBuffer>::WriteInt(int i) {
+	char *buffer = os_->Push(11);
+	const char* end = internal::i32toa(i, buffer);
+	os_->Pop(11 - (end - buffer));
+	return true;
+}
+template<>
+inline bool Writer<StringBuffer>::WriteUint(unsigned u) {
+	char *buffer = os_->Push(10);
+	const char* end = internal::u32toa(u, buffer);
+	os_->Pop(10 - (end - buffer));
+	return true;
+}
+template<>
+inline bool Writer<StringBuffer>::WriteInt64(int64_t i64) {
+	char *buffer = os_->Push(21);
+	const char* end = internal::i64toa(i64, buffer);
+	os_->Pop(21 - (end - buffer));
+	return true;
+}
+template<>
+inline bool Writer<StringBuffer>::WriteUint64(uint64_t u) {
+	char *buffer = os_->Push(20);
+	const char* end = internal::u64toa(u, buffer);
+	os_->Pop(20 - (end - buffer));
+	return true;
+}
 } // namespace rapidjson
 #ifdef _MSC_VER

--- a/test/perftest/misctest.cpp
+++ b/test/perftest/misctest.cpp
--- a/thirdparty/cppformat/format.cc
+++ b/thirdparty/cppformat/format.cc
--- a/thirdparty/cppformat/format.h
+++ b/thirdparty/cppformat/format.h
--- a/thirdparty/strtk/strtk.hpp
+++ b/thirdparty/strtk/strtk.hpp