Commit 2498c577 authored by ylavic's avatar ylavic

Optimize FileReadStream and BasicIStreamWrapper.

On (my) linux, perftest reports:
- ~40% gain for FileReadStream (Take() loop),
- ~10% gain for ReaderParse_DummyHandler_FileReadStream.

With the same logic applied to BasicIStreamWrapper, which thus can now
also be created with a user buffer, performances align with those of
FileReadStream (same buffer size).

The "unbuffered" versions (added for FileReadStream) work solely with
the internal peekBuffer (Ch[4]) and are measured in perftest.  When
performances don't matter much, they can avoid the use of large
stack/heap buffers.
parent 30d92a63
......@@ -17,6 +17,7 @@
#include "stream.h"
#include <cstdio>
#include <cstring>
#ifdef __clang__
RAPIDJSON_DIAG_PUSH
......@@ -35,21 +36,42 @@ class FileReadStream {
public:
typedef char Ch; //!< Character type (byte).
//! Constructor.
/*!
\param fp File pointer opened for read.
*/
FileReadStream(std::FILE* fp) : fp_(fp), buffer_(peekBuffer_), size_(sizeof(peekBuffer_) / sizeof(Ch)), pos_(), len_(), count_()
{
RAPIDJSON_ASSERT(fp_ != 0);
}
//! Constructor.
/*!
\param fp File pointer opened for read.
\param buffer user-supplied buffer.
\param bufferSize size of buffer in bytes. Must >=4 bytes.
*/
FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) {
RAPIDJSON_ASSERT(fp_ != 0);
RAPIDJSON_ASSERT(bufferSize >= 4);
Read();
FileReadStream(std::FILE* fp, Ch *buffer, size_t size) : fp_(fp), buffer_(buffer), size_(size), pos_(), len_(), count_() {
RAPIDJSON_ASSERT(fp_ != 0 && buffer_ != 0 && size_ > 0);
if (RAPIDJSON_UNLIKELY(size_ < sizeof(peekBuffer_) / sizeof(Ch))) {
size_ = sizeof(peekBuffer_) / sizeof(Ch);
buffer_ = peekBuffer_;
}
}
Ch Peek() const { return *current_; }
Ch Take() { Ch c = *current_; Read(); return c; }
size_t Tell() const { return count_ + static_cast<size_t>(current_ - buffer_); }
Ch Peek() const {
if (RAPIDJSON_UNLIKELY(pos_ == len_) && !Read())
return static_cast<Ch>('\0');
return buffer_[pos_];
}
Ch Take() {
if (RAPIDJSON_UNLIKELY(pos_ == len_) && !Read())
return static_cast<Ch>('\0');
return buffer_[pos_++];
}
size_t Tell() const { return count_ + pos_; }
// Not implemented
void Put(Ch) { RAPIDJSON_ASSERT(false); }
......@@ -59,35 +81,36 @@ public:
// For encoding detection only.
const Ch* Peek4() const {
return (current_ + 4 <= bufferLast_) ? current_ : 0;
if (len_ - pos_ < 4) {
if (pos_) {
len_ -= pos_;
std::memmove(buffer_, buffer_ + pos_, len_);
count_ += pos_;
pos_ = 0;
}
private:
void Read() {
if (current_ < bufferLast_)
++current_;
else if (!eof_) {
count_ += readCount_;
readCount_ = std::fread(buffer_, 1, bufferSize_, fp_);
bufferLast_ = buffer_ + readCount_ - 1;
current_ = buffer_;
if (readCount_ < bufferSize_) {
buffer_[readCount_] = '\0';
++bufferLast_;
eof_ = true;
len_ += std::fread(buffer_ + len_, sizeof(Ch), size_ - len_, fp_);
if (len_ < 4)
return 0;
}
return &buffer_[pos_];
}
private:
FileReadStream();
FileReadStream(const FileReadStream&);
FileReadStream& operator=(const FileReadStream&);
size_t Read() const {
count_ += pos_;
pos_ = 0;
len_ = std::fread(buffer_, sizeof(Ch), size_, fp_);
return len_;
}
std::FILE* fp_;
Ch *buffer_;
size_t bufferSize_;
Ch *bufferLast_;
Ch *current_;
size_t readCount_;
size_t count_; //!< Number of characters read
bool eof_;
Ch peekBuffer_[4], *buffer_;
size_t size_;
mutable size_t pos_, len_, count_;
};
RAPIDJSON_NAMESPACE_END
......
......@@ -17,6 +17,7 @@
#include "stream.h"
#include <iosfwd>
#include <cstring>
#ifdef __clang__
RAPIDJSON_DIAG_PUSH
......@@ -48,26 +49,33 @@ template <typename StreamType>
class BasicIStreamWrapper {
public:
typedef typename StreamType::char_type Ch;
BasicIStreamWrapper(StreamType& stream) : stream_(stream), count_(), peekBuffer_() {}
BasicIStreamWrapper(StreamType& stream) : stream_(stream), buffer_(peekBuffer_), size_(sizeof(peekBuffer_) / sizeof(Ch)), pos_(), len_(), count_() {}
BasicIStreamWrapper(StreamType& stream, Ch *buffer, size_t size) : stream_(stream), buffer_(buffer), size_(size), pos_(), len_(), count_() {
RAPIDJSON_ASSERT(buffer_ != 0 && static_cast<std::streamsize>(size_) > 0);
if (RAPIDJSON_UNLIKELY(size_ < sizeof(peekBuffer_) / sizeof(Ch))) {
size_ = sizeof(peekBuffer_) / sizeof(Ch);
buffer_ = peekBuffer_;
}
}
Ch Peek() const {
typename StreamType::int_type c = stream_.peek();
return RAPIDJSON_LIKELY(c != StreamType::traits_type::eof()) ? static_cast<Ch>(c) : static_cast<Ch>('\0');
if (RAPIDJSON_UNLIKELY(pos_ == len_) && !Read())
return static_cast<Ch>('\0');
return buffer_[pos_];
}
Ch Take() {
typename StreamType::int_type c = stream_.get();
if (RAPIDJSON_LIKELY(c != StreamType::traits_type::eof())) {
count_++;
return static_cast<Ch>(c);
}
else
return '\0';
if (RAPIDJSON_UNLIKELY(pos_ == len_) && !Read())
return static_cast<Ch>('\0');
return buffer_[pos_++];
}
// tellg() may return -1 when failed. So we count by ourself.
size_t Tell() const { return count_; }
size_t Tell() const { return count_ + pos_; }
// Not implemented
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
void Put(Ch) { RAPIDJSON_ASSERT(false); }
void Flush() { RAPIDJSON_ASSERT(false); }
......@@ -76,29 +84,42 @@ public:
// For encoding detection only.
const Ch* Peek4() const {
RAPIDJSON_ASSERT(sizeof(Ch) == 1); // Only usable for byte stream.
int i;
bool hasError = false;
for (i = 0; i < 4; ++i) {
typename StreamType::int_type c = stream_.get();
if (c == StreamType::traits_type::eof()) {
hasError = true;
stream_.clear();
break;
if (len_ - pos_ < 4) {
if (pos_) {
len_ -= pos_;
std::memmove(buffer_, buffer_ + pos_, len_);
count_ += pos_;
pos_ = 0;
}
peekBuffer_[i] = static_cast<Ch>(c);
if (!stream_.read(buffer_ + len_, static_cast<std::streamsize>(size_ - len_))) {
len_ += static_cast<size_t>(stream_.gcount());
if (len_ < 4)
return 0;
}
for (--i; i >= 0; --i)
stream_.putback(peekBuffer_[i]);
return !hasError ? peekBuffer_ : 0;
else
len_ = size_;
}
return &buffer_[pos_];
}
private:
BasicIStreamWrapper(const BasicIStreamWrapper&);
BasicIStreamWrapper& operator=(const BasicIStreamWrapper&);
size_t Read() const {
count_ += pos_;
pos_ = 0;
if (!stream_.read(buffer_, static_cast<std::streamsize>(size_)))
len_ = static_cast<size_t>(stream_.gcount());
else
len_ = size_;
return len_;
}
StreamType& stream_;
size_t count_; //!< Number of characters read. Note:
mutable Ch peekBuffer_[4];
Ch peekBuffer_[4], *buffer_;
size_t size_;
mutable size_t pos_, len_, count_;
};
typedef BasicIStreamWrapper<std::istream> IStreamWrapper;
......
......@@ -21,9 +21,12 @@
#include "rapidjson/prettywriter.h"
#include "rapidjson/stringbuffer.h"
#include "rapidjson/filereadstream.h"
#include "rapidjson/istreamwrapper.h"
#include "rapidjson/encodedstream.h"
#include "rapidjson/memorystream.h"
#include <fstream>
#ifdef RAPIDJSON_SSE2
#define SIMD_SUFFIX(name) name##_SSE2
#elif defined(RAPIDJSON_SSE42)
......@@ -451,6 +454,16 @@ TEST_F(RapidJson, FileReadStream) {
}
}
TEST_F(RapidJson, FileReadStream_Unbuffered) {
for (size_t i = 0; i < kTrialCount; i++) {
FILE *fp = fopen(filename_, "rb");
FileReadStream s(fp);
while (s.Take() != '\0')
;
fclose(fp);
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream)) {
for (size_t i = 0; i < kTrialCount; i++) {
FILE *fp = fopen(filename_, "rb");
......@@ -463,6 +476,88 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream)) {
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream_Unbuffered)) {
for (size_t i = 0; i < kTrialCount; i++) {
FILE *fp = fopen(filename_, "rb");
FileReadStream s(fp);
BaseReaderHandler<> h;
Reader reader;
reader.Parse(s, h);
fclose(fp);
}
}
TEST_F(RapidJson, IStreamWrapper) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is(filename_);
char buffer[65536];
IStreamWrapper isw(is, buffer, sizeof(buffer));
while (isw.Take() != '\0')
;
is.close();
}
}
TEST_F(RapidJson, IStreamWrapper_Unbuffered) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is(filename_);
IStreamWrapper isw(is);
while (isw.Take() != '\0')
;
is.close();
}
}
TEST_F(RapidJson, IStreamWrapper_Setbuffered) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is;
char buffer[65536];
is.rdbuf()->pubsetbuf(buffer, sizeof(buffer));
is.open(filename_);
IStreamWrapper isw(is);
while (isw.Take() != '\0')
;
is.close();
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_IStreamWrapper)) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is(filename_);
char buffer[65536];
IStreamWrapper isw(is, buffer, sizeof(buffer));
BaseReaderHandler<> h;
Reader reader;
reader.Parse(isw, h);
is.close();
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_IStreamWrapper_Unbuffered)) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is(filename_);
IStreamWrapper isw(is);
BaseReaderHandler<> h;
Reader reader;
reader.Parse(isw, h);
is.close();
}
}
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_IStreamWrapper_Setbuffered)) {
for (size_t i = 0; i < kTrialCount; i++) {
std::ifstream is;
char buffer[65536];
is.rdbuf()->pubsetbuf(buffer, sizeof(buffer));
is.open(filename_);
IStreamWrapper isw(is);
BaseReaderHandler<> h;
Reader reader;
reader.Parse(isw, h);
is.close();
}
}
TEST_F(RapidJson, StringBuffer) {
StringBuffer sb;
for (int i = 0; i < 32 * 1024 * 1024; i++)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment