Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
R
rapidjson
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
rapidjson
Commits
dd25c965
Commit
dd25c965
authored
Feb 20, 2016
by
Milo Yip
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #553 from miloyip/issue158_parsestdstring
Issue158 parsestdstring
parents
ff12c04a
3595b1f6
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
243 additions
and
2 deletions
+243
-2
document.h
include/rapidjson/document.h
+38
-0
encodedstream.h
include/rapidjson/encodedstream.h
+25
-0
memorystream.h
include/rapidjson/memorystream.h
+2
-2
reader.h
include/rapidjson/reader.h
+77
-0
perftest.h
test/perftest/perftest.h
+2
-0
rapidjsontest.cpp
test/perftest/rapidjsontest.cpp
+19
-0
documenttest.cpp
test/unittest/documenttest.cpp
+58
-0
simdtest.cpp
test/unittest/simdtest.cpp
+22
-0
No files found.
include/rapidjson/document.h
View file @
dd25c965
...
...
@@ -20,6 +20,8 @@
#include "reader.h"
#include "internal/meta.h"
#include "internal/strfunc.h"
#include "memorystream.h"
#include "encodedstream.h"
#include <new> // placement new
#ifdef _MSC_VER
...
...
@@ -2224,6 +2226,42 @@ public:
GenericDocument
&
Parse
(
const
Ch
*
str
)
{
return
Parse
<
kParseDefaultFlags
>
(
str
);
}
template
<
unsigned
parseFlags
,
typename
SourceEncoding
>
GenericDocument
&
Parse
(
const
typename
SourceEncoding
::
Ch
*
str
,
size_t
length
)
{
RAPIDJSON_ASSERT
(
!
(
parseFlags
&
kParseInsituFlag
));
MemoryStream
ms
(
static_cast
<
const
char
*>
(
str
),
length
*
sizeof
(
typename
SourceEncoding
::
Ch
));
EncodedInputStream
<
SourceEncoding
,
MemoryStream
>
is
(
ms
);
ParseStream
<
parseFlags
,
SourceEncoding
>
(
is
);
return
*
this
;
}
template
<
unsigned
parseFlags
>
GenericDocument
&
Parse
(
const
Ch
*
str
,
size_t
length
)
{
return
Parse
<
parseFlags
,
Encoding
>
(
str
,
length
);
}
GenericDocument
&
Parse
(
const
Ch
*
str
,
size_t
length
)
{
return
Parse
<
kParseDefaultFlags
>
(
str
,
length
);
}
#if RAPIDJSON_HAS_STDSTRING
template
<
unsigned
parseFlags
,
typename
SourceEncoding
>
GenericDocument
&
Parse
(
const
std
::
basic_string
<
typename
SourceEncoding
::
Ch
>&
str
)
{
// c_str() is constant complexity according to standard. Should be faster than Parse(const char*, size_t)
return
Parse
<
parseFlags
,
SourceEncoding
>
(
str
.
c_str
());
}
template
<
unsigned
parseFlags
>
GenericDocument
&
Parse
(
const
std
::
basic_string
<
Ch
>&
str
)
{
return
Parse
<
parseFlags
,
Encoding
>
(
str
);
}
GenericDocument
&
Parse
(
const
std
::
basic_string
<
Ch
>&
str
)
{
return
Parse
<
kParseDefaultFlags
>
(
str
);
}
#endif // RAPIDJSON_HAS_STDSTRING
//!@}
//!@name Handling parse errors
...
...
include/rapidjson/encodedstream.h
View file @
dd25c965
...
...
@@ -16,6 +16,7 @@
#define RAPIDJSON_ENCODEDSTREAM_H_
#include "stream.h"
#include "memorystream.h"
#ifdef __GNUC__
RAPIDJSON_DIAG_PUSH
...
...
@@ -62,6 +63,30 @@ private:
Ch
current_
;
};
//! Specialized for UTF8 MemoryStream.
template
<>
class
EncodedInputStream
<
UTF8
<>
,
MemoryStream
>
{
public
:
typedef
UTF8
<>::
Ch
Ch
;
EncodedInputStream
(
MemoryStream
&
is
)
:
is_
(
is
)
{
if
(
static_cast
<
unsigned
char
>
(
is_
.
Peek
())
==
0xEFu
)
is_
.
Take
();
if
(
static_cast
<
unsigned
char
>
(
is_
.
Peek
())
==
0xBBu
)
is_
.
Take
();
if
(
static_cast
<
unsigned
char
>
(
is_
.
Peek
())
==
0xBFu
)
is_
.
Take
();
}
Ch
Peek
()
const
{
return
is_
.
Peek
();
}
Ch
Take
()
{
return
is_
.
Take
();
}
size_t
Tell
()
const
{
return
is_
.
Tell
();
}
// Not implemented
void
Put
(
Ch
)
{}
void
Flush
()
{}
Ch
*
PutBegin
()
{
return
0
;
}
size_t
PutEnd
(
Ch
*
)
{
return
0
;
}
MemoryStream
&
is_
;
};
//! Output byte stream wrapper with statically bound encoding.
/*!
\tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
...
...
include/rapidjson/memorystream.h
View file @
dd25c965
...
...
@@ -42,8 +42,8 @@ struct MemoryStream {
MemoryStream
(
const
Ch
*
src
,
size_t
size
)
:
src_
(
src
),
begin_
(
src
),
end_
(
src
+
size
),
size_
(
size
)
{}
Ch
Peek
()
const
{
return
(
src_
==
end_
)
?
'\0'
:
*
src_
;
}
Ch
Take
()
{
return
(
src_
==
end_
)
?
'\0'
:
*
src_
++
;
}
Ch
Peek
()
const
{
return
RAPIDJSON_UNLIKELY
(
src_
==
end_
)
?
'\0'
:
*
src_
;
}
Ch
Take
()
{
return
RAPIDJSON_UNLIKELY
(
src_
==
end_
)
?
'\0'
:
*
src_
++
;
}
size_t
Tell
()
const
{
return
static_cast
<
size_t
>
(
src_
-
begin_
);
}
Ch
*
PutBegin
()
{
RAPIDJSON_ASSERT
(
false
);
return
0
;
}
...
...
include/rapidjson/reader.h
View file @
dd25c965
...
...
@@ -19,6 +19,7 @@
#include "allocators.h"
#include "stream.h"
#include "encodedstream.h"
#include "internal/meta.h"
#include "internal/stack.h"
#include "internal/strtod.h"
...
...
@@ -259,6 +260,12 @@ void SkipWhitespace(InputStream& is) {
s
.
Take
();
}
inline
const
char
*
SkipWhitespace
(
const
char
*
p
,
const
char
*
end
)
{
while
(
p
!=
end
&&
(
*
p
==
' '
||
*
p
==
'\n'
||
*
p
==
'\r'
||
*
p
==
'\t'
))
++
p
;
return
p
;
}
#ifdef RAPIDJSON_SSE42
//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
inline
const
char
*
SkipWhitespace_SIMD
(
const
char
*
p
)
{
...
...
@@ -295,6 +302,34 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
}
}
inline
const
char
*
SkipWhitespace_SIMD
(
const
char
*
p
,
const
char
*
end
)
{
// Fast return for single non-whitespace
if
(
p
!=
end
&&
(
*
p
==
' '
||
*
p
==
'\n'
||
*
p
==
'\r'
||
*
p
==
'\t'
))
++
p
;
else
return
p
;
// The middle of string using SIMD
static
const
char
whitespace
[
16
]
=
"
\n\r\t
"
;
const
__m128i
w
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
&
whitespace
[
0
]));
for
(;
p
<=
end
-
16
;
p
+=
16
)
{
const
__m128i
s
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
p
));
const
int
r
=
_mm_cvtsi128_si32
(
_mm_cmpistrm
(
w
,
s
,
_SIDD_UBYTE_OPS
|
_SIDD_CMP_EQUAL_ANY
|
_SIDD_BIT_MASK
|
_SIDD_NEGATIVE_POLARITY
));
if
(
r
!=
0
)
{
// some of characters is non-whitespace
#ifdef _MSC_VER // Find the index of first non-whitespace
unsigned
long
offset
;
_BitScanForward
(
&
offset
,
r
);
return
p
+
offset
;
#else
return
p
+
__builtin_ffs
(
r
)
-
1
;
#endif
}
}
return
SkipWhitespace
(
p
,
end
);
}
#elif defined(RAPIDJSON_SSE2)
//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
...
...
@@ -342,6 +377,44 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
}
}
inline
const
char
*
SkipWhitespace_SIMD
(
const
char
*
p
,
const
char
*
end
)
{
// Fast return for single non-whitespace
if
(
p
!=
end
&&
(
*
p
==
' '
||
*
p
==
'\n'
||
*
p
==
'\r'
||
*
p
==
'\t'
))
++
p
;
else
return
p
;
// The rest of string
#define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
static
const
char
whitespaces
[
4
][
16
]
=
{
C16
(
' '
),
C16
(
'\n'
),
C16
(
'\r'
),
C16
(
'\t'
)
};
#undef C16
const
__m128i
w0
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
&
whitespaces
[
0
][
0
]));
const
__m128i
w1
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
&
whitespaces
[
1
][
0
]));
const
__m128i
w2
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
&
whitespaces
[
2
][
0
]));
const
__m128i
w3
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
&
whitespaces
[
3
][
0
]));
for
(;
p
<=
end
-
16
;
p
+=
16
)
{
const
__m128i
s
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
p
));
__m128i
x
=
_mm_cmpeq_epi8
(
s
,
w0
);
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w1
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w2
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w3
));
unsigned
short
r
=
static_cast
<
unsigned
short
>
(
~
_mm_movemask_epi8
(
x
));
if
(
r
!=
0
)
{
// some of characters may be non-whitespace
#ifdef _MSC_VER // Find the index of first non-whitespace
unsigned
long
offset
;
_BitScanForward
(
&
offset
,
r
);
return
p
+
offset
;
#else
return
p
+
__builtin_ffs
(
r
)
-
1
;
#endif
}
}
return
SkipWhitespace
(
p
,
end
);
}
#endif // RAPIDJSON_SSE2
#ifdef RAPIDJSON_SIMD
...
...
@@ -354,6 +427,10 @@ template<> inline void SkipWhitespace(InsituStringStream& is) {
template
<>
inline
void
SkipWhitespace
(
StringStream
&
is
)
{
is
.
src_
=
SkipWhitespace_SIMD
(
is
.
src_
);
}
template
<>
inline
void
SkipWhitespace
(
EncodedInputStream
<
UTF8
<>
,
MemoryStream
>&
is
)
{
is
.
is_
.
src_
=
SkipWhitespace_SIMD
(
is
.
is_
.
src_
,
is
.
is_
.
end_
);
}
#endif // RAPIDJSON_SIMD
///////////////////////////////////////////////////////////////////////////////
...
...
test/perftest/perftest.h
View file @
dd25c965
...
...
@@ -30,6 +30,8 @@
# define RAPIDJSON_SSE2
#endif
#define RAPIDJSON_HAS_STDSTRING 1
////////////////////////////////////////////////////////////////////////////////
// Google Test
...
...
test/perftest/rapidjsontest.cpp
View file @
dd25c965
...
...
@@ -187,6 +187,25 @@ TEST_F(RapidJson, SIMD_SUFFIX(DocumentParse_MemoryPoolAllocator)) {
}
}
TEST_F
(
RapidJson
,
SIMD_SUFFIX
(
DocumentParseLength_MemoryPoolAllocator
))
{
for
(
size_t
i
=
0
;
i
<
kTrialCount
;
i
++
)
{
Document
doc
;
doc
.
Parse
(
json_
,
length_
);
ASSERT_TRUE
(
doc
.
IsObject
());
}
}
#if RAPIDJSON_HAS_STDSTRING
TEST_F
(
RapidJson
,
SIMD_SUFFIX
(
DocumentParseStdString_MemoryPoolAllocator
))
{
const
std
::
string
s
(
json_
,
length_
);
for
(
size_t
i
=
0
;
i
<
kTrialCount
;
i
++
)
{
Document
doc
;
doc
.
Parse
(
s
);
ASSERT_TRUE
(
doc
.
IsObject
());
}
}
#endif
TEST_F
(
RapidJson
,
SIMD_SUFFIX
(
DocumentParseIterative_MemoryPoolAllocator
))
{
for
(
size_t
i
=
0
;
i
<
kTrialCount
;
i
++
)
{
Document
doc
;
...
...
test/unittest/documenttest.cpp
View file @
dd25c965
...
...
@@ -34,6 +34,8 @@ void ParseCheck(DocumentType& doc) {
typedef
typename
DocumentType
::
ValueType
ValueType
;
EXPECT_FALSE
(
doc
.
HasParseError
());
if
(
doc
.
HasParseError
())
printf
(
"Error: %d at %zu
\n
"
,
static_cast
<
int
>
(
doc
.
GetParseError
()),
doc
.
GetErrorOffset
());
EXPECT_TRUE
(
static_cast
<
ParseResult
>
(
doc
));
EXPECT_TRUE
(
doc
.
IsObject
());
...
...
@@ -93,6 +95,26 @@ void ParseTest() {
doc
.
ParseInsitu
(
buffer
);
ParseCheck
(
doc
);
free
(
buffer
);
// Parse(const Ch*, size_t)
size_t
length
=
strlen
(
json
);
buffer
=
reinterpret_cast
<
char
*>
(
malloc
(
length
*
2
));
memcpy
(
buffer
,
json
,
length
);
memset
(
buffer
+
length
,
'X'
,
length
);
#if RAPIDJSON_HAS_STDSTRING
std
::
string
s2
(
buffer
,
length
);
// backup buffer
#endif
doc
.
SetNull
();
doc
.
Parse
(
buffer
,
length
);
free
(
buffer
);
ParseCheck
(
doc
);
#if RAPIDJSON_HAS_STDSTRING
// Parse(std::string)
doc
.
SetNull
();
doc
.
Parse
(
s2
);
ParseCheck
(
doc
);
#endif
}
TEST
(
Document
,
Parse
)
{
...
...
@@ -140,6 +162,42 @@ static FILE* OpenEncodedFile(const char* filename) {
return
0
;
}
TEST
(
Document
,
Parse_Encoding
)
{
const
char
*
json
=
" {
\"
hello
\"
:
\"
world
\"
,
\"
t
\"
: true ,
\"
f
\"
: false,
\"
n
\"
: null,
\"
i
\"
:123,
\"
pi
\"
: 3.1416,
\"
a
\"
:[1, 2, 3, 4] } "
;
typedef
GenericDocument
<
UTF16
<>
>
DocumentType
;
DocumentType
doc
;
// Parse<unsigned, SourceEncoding>(const SourceEncoding::Ch*)
// doc.Parse<kParseDefaultFlags, UTF8<> >(json);
// EXPECT_FALSE(doc.HasParseError());
// EXPECT_EQ(0, StrCmp(doc[L"hello"].GetString(), L"world"));
// Parse<unsigned, SourceEncoding>(const SourceEncoding::Ch*, size_t)
size_t
length
=
strlen
(
json
);
char
*
buffer
=
reinterpret_cast
<
char
*>
(
malloc
(
length
*
2
));
memcpy
(
buffer
,
json
,
length
);
memset
(
buffer
+
length
,
'X'
,
length
);
#if RAPIDJSON_HAS_STDSTRING
std
::
string
s2
(
buffer
,
length
);
// backup buffer
#endif
doc
.
SetNull
();
doc
.
Parse
<
kParseDefaultFlags
,
UTF8
<>
>
(
buffer
,
length
);
free
(
buffer
);
EXPECT_FALSE
(
doc
.
HasParseError
());
if
(
doc
.
HasParseError
())
printf
(
"Error: %d at %zu
\n
"
,
static_cast
<
int
>
(
doc
.
GetParseError
()),
doc
.
GetErrorOffset
());
EXPECT_EQ
(
0
,
StrCmp
(
doc
[
L"hello"
].
GetString
(),
L"world"
));
#if RAPIDJSON_HAS_STDSTRING
// Parse<unsigned, SourceEncoding>(std::string)
doc
.
SetNull
();
doc
.
Parse
<
kParseDefaultFlags
,
UTF8
<>
>
(
s2
);
EXPECT_FALSE
(
doc
.
HasParseError
());
EXPECT_EQ
(
0
,
StrCmp
(
doc
[
L"hello"
].
GetString
(),
L"world"
));
#endif
}
TEST
(
Document
,
ParseStream_EncodedInputStream
)
{
// UTF8 -> UTF16
FILE
*
fp
=
OpenEncodedFile
(
"utf8.json"
);
...
...
test/unittest/simdtest.cpp
View file @
dd25c965
...
...
@@ -73,6 +73,28 @@ TEST(SIMD, SIMD_SUFFIX(SkipWhitespace)) {
TestSkipWhitespace
<
InsituStringStream
>
();
}
TEST
(
SIMD
,
SIMD_SUFFIX
(
SkipWhitespace_EncodedMemoryStream
))
{
for
(
size_t
step
=
1
;
step
<
32
;
step
++
)
{
char
buffer
[
1024
];
for
(
size_t
i
=
0
;
i
<
1024
;
i
++
)
buffer
[
i
]
=
"
\t\r\n
"
[
i
%
4
];
for
(
size_t
i
=
0
;
i
<
1024
;
i
+=
step
)
buffer
[
i
]
=
'X'
;
MemoryStream
ms
(
buffer
,
1024
);
EncodedInputStream
<
UTF8
<>
,
MemoryStream
>
s
(
ms
);
size_t
i
=
0
;
for
(;;)
{
SkipWhitespace
(
s
);
if
(
s
.
Peek
()
==
'\0'
)
break
;
//EXPECT_EQ(i, s.Tell());
EXPECT_EQ
(
'X'
,
s
.
Take
());
i
+=
step
;
}
}
}
struct
ScanCopyUnescapedStringHandler
:
BaseReaderHandler
<
UTF8
<>
,
ScanCopyUnescapedStringHandler
>
{
bool
String
(
const
char
*
str
,
size_t
length
,
bool
)
{
memcpy
(
buffer
,
str
,
length
+
1
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment