Commit 2498c577 authored by ylavic's avatar ylavic

Optimize FileReadStream and BasicIStreamWrapper.

On (my) linux, perftest reports:
- ~40% gain for FileReadStream (Take() loop),
- ~10% gain for ReaderParse_DummyHandler_FileReadStream.

With the same logic applied to BasicIStreamWrapper, which thus can now
also be created with a user buffer, performances align with those of
FileReadStream (same buffer size).

The "unbuffered" versions (added for FileReadStream) work solely with
the internal peekBuffer (Ch[4]) and are measured in perftest.  When
performances don't matter much, they can avoid the use of large
stack/heap buffers.
parent 30d92a63
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "stream.h" #include "stream.h"
#include <cstdio> #include <cstdio>
#include <cstring>
#ifdef __clang__ #ifdef __clang__
RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PUSH
...@@ -35,21 +36,42 @@ class FileReadStream { ...@@ -35,21 +36,42 @@ class FileReadStream {
public: public:
typedef char Ch; //!< Character type (byte). typedef char Ch; //!< Character type (byte).
//! Constructor.
/*!
\param fp File pointer opened for read.
*/
FileReadStream(std::FILE* fp) : fp_(fp), buffer_(peekBuffer_), size_(sizeof(peekBuffer_) / sizeof(Ch)), pos_(), len_(), count_()
{
RAPIDJSON_ASSERT(fp_ != 0);
}
//! Constructor. //! Constructor.
/*! /*!
\param fp File pointer opened for read. \param fp File pointer opened for read.
\param buffer user-supplied buffer. \param buffer user-supplied buffer.
\param bufferSize size of buffer in bytes. Must >=4 bytes. \param bufferSize size of buffer in bytes. Must >=4 bytes.
*/ */
FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { FileReadStream(std::FILE* fp, Ch *buffer, size_t size) : fp_(fp), buffer_(buffer), size_(size), pos_(), len_(), count_() {
RAPIDJSON_ASSERT(fp_ != 0); RAPIDJSON_ASSERT(fp_ != 0 && buffer_ != 0 && size_ > 0);
RAPIDJSON_ASSERT(bufferSize >= 4); if (RAPIDJSON_UNLIKELY(size_ < sizeof(peekBuffer_) / sizeof(Ch))) {
Read(); size_ = sizeof(peekBuffer_) / sizeof(Ch);
buffer_ = peekBuffer_;
}
} }
Ch Peek() const { return *current_; } Ch Peek() const {
Ch Take() { Ch c = *current_; Read(); return c; } if (RAPIDJSON_UNLIKELY(pos_ == len_) && !Read())
size_t Tell() const { return count_ + static_cast<size_t>(current_ - buffer_); } return static_cast<Ch>('\0');
return buffer_[pos_];
}
Ch Take() {
if (RAPIDJSON_UNLIKELY(pos_ == len_) && !Read())
return static_cast<Ch>('\0');
return buffer_[pos_++];
}
size_t Tell() const { return count_ + pos_; }
// Not implemented // Not implemented
void Put(Ch) { RAPIDJSON_ASSERT(false); } void Put(Ch) { RAPIDJSON_ASSERT(false); }
...@@ -59,35 +81,36 @@ public: ...@@ -59,35 +81,36 @@ public:
// For encoding detection only. // For encoding detection only.
const Ch* Peek4() const { const Ch* Peek4() const {
return (current_ + 4 <= bufferLast_) ? current_ : 0; if (len_ - pos_ < 4) {
if (pos_) {
len_ -= pos_;
std::memmove(buffer_, buffer_ + pos_, len_);
count_ += pos_;
pos_ = 0;
} }
len_ += std::fread(buffer_ + len_, sizeof(Ch), size_ - len_, fp_);
private: if (len_ < 4)
void Read() { return 0;
if (current_ < bufferLast_)
++current_;
else if (!eof_) {
count_ += readCount_;
readCount_ = std::fread(buffer_, 1, bufferSize_, fp_);
bufferLast_ = buffer_ + readCount_ - 1;
current_ = buffer_;
if (readCount_ < bufferSize_) {
buffer_[readCount_] = '\0';
++bufferLast_;
eof_ = true;
} }
return &buffer_[pos_];
} }
private:
FileReadStream();
FileReadStream(const FileReadStream&);
FileReadStream& operator=(const FileReadStream&);
size_t Read() const {
count_ += pos_;
pos_ = 0;
len_ = std::fread(buffer_, sizeof(Ch), size_, fp_);
return len_;
} }
std::FILE* fp_; std::FILE* fp_;
Ch *buffer_; Ch peekBuffer_[4], *buffer_;
size_t bufferSize_; size_t size_;
Ch *bufferLast_; mutable size_t pos_, len_, count_;
Ch *current_;
size_t readCount_;
size_t count_; //!< Number of characters read
bool eof_;
}; };
RAPIDJSON_NAMESPACE_END RAPIDJSON_NAMESPACE_END
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "stream.h" #include "stream.h"
#include <iosfwd> #include <iosfwd>
#include <cstring>
#ifdef __clang__ #ifdef __clang__
RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PUSH
...@@ -48,26 +49,33 @@ template <typename StreamType> ...@@ -48,26 +49,33 @@ template <typename StreamType>
class BasicIStreamWrapper { class BasicIStreamWrapper {
public: public:
typedef typename StreamType::char_type Ch; typedef typename StreamType::char_type Ch;
BasicIStreamWrapper(StreamType& stream) : stream_(stream), count_(), peekBuffer_() {}
BasicIStreamWrapper(StreamType& stream) : stream_(stream), buffer_(peekBuffer_), size_(sizeof(peekBuffer_) / sizeof(Ch)), pos_(), len_(), count_() {}
BasicIStreamWrapper(StreamType& stream, Ch *buffer, size_t size) : stream_(stream), buffer_(buffer), size_(size), pos_(), len_(), count_() {
RAPIDJSON_ASSERT(buffer_ != 0 && static_cast<std::streamsize>(size_) > 0);
if (RAPIDJSON_UNLIKELY(size_ < sizeof(peekBuffer_) / sizeof(Ch))) {
size_ = sizeof(peekBuffer_) / sizeof(Ch);
buffer_ = peekBuffer_;
}
}
Ch Peek() const { Ch Peek() const {
typename StreamType::int_type c = stream_.peek(); if (RAPIDJSON_UNLIKELY(pos_ == len_) && !Read())
return RAPIDJSON_LIKELY(c != StreamType::traits_type::eof()) ? static_cast<Ch>(c) : static_cast<Ch>('\0'); return static_cast<Ch>('\0');
return buffer_[pos_];
} }
Ch Take() { Ch Take() {
typename StreamType::int_type c = stream_.get(); if (RAPIDJSON_UNLIKELY(pos_ == len_) && !Read())
if (RAPIDJSON_LIKELY(c != StreamType::traits_type::eof())) { return static_cast<Ch>('\0');
count_++; return buffer_[pos_++];
return static_cast<Ch>(c);
}
else
return '\0';
} }
// tellg() may return -1 when failed. So we count by ourself. // tellg() may return -1 when failed. So we count by ourself.
size_t Tell() const { return count_; } size_t Tell() const { return count_ + pos_; }
// Not implemented
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
void Put(Ch) { RAPIDJSON_ASSERT(false); } void Put(Ch) { RAPIDJSON_ASSERT(false); }
void Flush() { RAPIDJSON_ASSERT(false); } void Flush() { RAPIDJSON_ASSERT(false); }
...@@ -76,29 +84,42 @@ public: ...@@ -76,29 +84,42 @@ public:
// For encoding detection only. // For encoding detection only.
const Ch* Peek4() const { const Ch* Peek4() const {
RAPIDJSON_ASSERT(sizeof(Ch) == 1); // Only usable for byte stream. RAPIDJSON_ASSERT(sizeof(Ch) == 1); // Only usable for byte stream.
int i; if (len_ - pos_ < 4) {
bool hasError = false; if (pos_) {
for (i = 0; i < 4; ++i) { len_ -= pos_;
typename StreamType::int_type c = stream_.get(); std::memmove(buffer_, buffer_ + pos_, len_);
if (c == StreamType::traits_type::eof()) { count_ += pos_;
hasError = true; pos_ = 0;
stream_.clear();
break;
} }
peekBuffer_[i] = static_cast<Ch>(c); if (!stream_.read(buffer_ + len_, static_cast<std::streamsize>(size_ - len_))) {
len_ += static_cast<size_t>(stream_.gcount());
if (len_ < 4)
return 0;
} }
for (--i; i >= 0; --i) else
stream_.putback(peekBuffer_[i]); len_ = size_;
return !hasError ? peekBuffer_ : 0; }
return &buffer_[pos_];
} }
private: private:
BasicIStreamWrapper(const BasicIStreamWrapper&); BasicIStreamWrapper(const BasicIStreamWrapper&);
BasicIStreamWrapper& operator=(const BasicIStreamWrapper&); BasicIStreamWrapper& operator=(const BasicIStreamWrapper&);
size_t Read() const {
count_ += pos_;
pos_ = 0;
if (!stream_.read(buffer_, static_cast<std::streamsize>(size_)))
len_ = static_cast<size_t>(stream_.gcount());
else
len_ = size_;
return len_;
}
StreamType& stream_; StreamType& stream_;
size_t count_; //!< Number of characters read. Note: Ch peekBuffer_[4], *buffer_;
mutable Ch peekBuffer_[4]; size_t size_;
mutable size_t pos_, len_, count_;
}; };
typedef BasicIStreamWrapper<std::istream> IStreamWrapper; typedef BasicIStreamWrapper<std::istream> IStreamWrapper;
......
...@@ -21,9 +21,12 @@ ...@@ -21,9 +21,12 @@
#include "rapidjson/prettywriter.h" #include "rapidjson/prettywriter.h"
#include "rapidjson/stringbuffer.h" #include "rapidjson/stringbuffer.h"
#include "rapidjson/filereadstream.h" #include "rapidjson/filereadstream.h"
#include "rapidjson/istreamwrapper.h"
#include "rapidjson/encodedstream.h" #include "rapidjson/encodedstream.h"
#include "rapidjson/memorystream.h" #include "rapidjson/memorystream.h"
#include <fstream>
#ifdef RAPIDJSON_SSE2 #ifdef RAPIDJSON_SSE2
#define SIMD_SUFFIX(name) name##_SSE2 #define SIMD_SUFFIX(name) name##_SSE2
#elif defined(RAPIDJSON_SSE42) #elif defined(RAPIDJSON_SSE42)
...@@ -451,6 +454,16 @@ TEST_F(RapidJson, FileReadStream) { ...@@ -451,6 +454,16 @@ TEST_F(RapidJson, FileReadStream) {
} }
} }
TEST_F(RapidJson, FileReadStream_Unbuffered) {
for (size_t i = 0; i < kTrialCount; i++) {
FILE *fp = fopen(filename_, "rb");
FileReadStream s(fp);
while (s.Take() != '\0')
;
fclose(fp);
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream)) { TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream)) {
for (size_t i = 0; i < kTrialCount; i++) { for (size_t i = 0; i < kTrialCount; i++) {
FILE *fp = fopen(filename_, "rb"); FILE *fp = fopen(filename_, "rb");
...@@ -463,6 +476,88 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream)) { ...@@ -463,6 +476,88 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream)) {
} }
} }
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream_Unbuffered)) {
for (size_t i = 0; i < kTrialCount; i++) {
FILE *fp = fopen(filename_, "rb");
FileReadStream s(fp);
BaseReaderHandler<> h;
Reader reader;
reader.Parse(s, h);
fclose(fp);
}
}
TEST_F(RapidJson, IStreamWrapper) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is(filename_);
char buffer[65536];
IStreamWrapper isw(is, buffer, sizeof(buffer));
while (isw.Take() != '\0')
;
is.close();
}
}
TEST_F(RapidJson, IStreamWrapper_Unbuffered) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is(filename_);
IStreamWrapper isw(is);
while (isw.Take() != '\0')
;
is.close();
}
}
TEST_F(RapidJson, IStreamWrapper_Setbuffered) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is;
char buffer[65536];
is.rdbuf()->pubsetbuf(buffer, sizeof(buffer));
is.open(filename_);
IStreamWrapper isw(is);
while (isw.Take() != '\0')
;
is.close();
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_IStreamWrapper)) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is(filename_);
char buffer[65536];
IStreamWrapper isw(is, buffer, sizeof(buffer));
BaseReaderHandler<> h;
Reader reader;
reader.Parse(isw, h);
is.close();
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_IStreamWrapper_Unbuffered)) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is(filename_);
IStreamWrapper isw(is);
BaseReaderHandler<> h;
Reader reader;
reader.Parse(isw, h);
is.close();
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_IStreamWrapper_Setbuffered)) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is;
char buffer[65536];
is.rdbuf()->pubsetbuf(buffer, sizeof(buffer));
is.open(filename_);
IStreamWrapper isw(is);
BaseReaderHandler<> h;
Reader reader;
reader.Parse(isw, h);
is.close();
}
}
TEST_F(RapidJson, StringBuffer) { TEST_F(RapidJson, StringBuffer) {
StringBuffer sb; StringBuffer sb;
for (int i = 0; i < 32 * 1024 * 1024; i++) for (int i = 0; i < 32 * 1024 * 1024; i++)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment