Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
R
rapidjson
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
rapidjson
Commits
dd25c965
Commit
dd25c965
authored
9 years ago
by
Milo Yip
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #553 from miloyip/issue158_parsestdstring
Issue158 parsestdstring
parents
ff12c04a
3595b1f6
master
v1.1.0
No related merge requests found
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
243 additions
and
2 deletions
+243
-2
document.h
include/rapidjson/document.h
+38
-0
encodedstream.h
include/rapidjson/encodedstream.h
+25
-0
memorystream.h
include/rapidjson/memorystream.h
+2
-2
reader.h
include/rapidjson/reader.h
+77
-0
perftest.h
test/perftest/perftest.h
+2
-0
rapidjsontest.cpp
test/perftest/rapidjsontest.cpp
+19
-0
documenttest.cpp
test/unittest/documenttest.cpp
+58
-0
simdtest.cpp
test/unittest/simdtest.cpp
+22
-0
No files found.
include/rapidjson/document.h
View file @
dd25c965
...
...
@@ -20,6 +20,8 @@
#include "reader.h"
#include "internal/meta.h"
#include "internal/strfunc.h"
#include "memorystream.h"
#include "encodedstream.h"
#include <new> // placement new
#ifdef _MSC_VER
...
...
@@ -2224,6 +2226,42 @@ public:
GenericDocument
&
Parse
(
const
Ch
*
str
)
{
return
Parse
<
kParseDefaultFlags
>
(
str
);
}
template
<
unsigned
parseFlags
,
typename
SourceEncoding
>
GenericDocument
&
Parse
(
const
typename
SourceEncoding
::
Ch
*
str
,
size_t
length
)
{
RAPIDJSON_ASSERT
(
!
(
parseFlags
&
kParseInsituFlag
));
MemoryStream
ms
(
static_cast
<
const
char
*>
(
str
),
length
*
sizeof
(
typename
SourceEncoding
::
Ch
));
EncodedInputStream
<
SourceEncoding
,
MemoryStream
>
is
(
ms
);
ParseStream
<
parseFlags
,
SourceEncoding
>
(
is
);
return
*
this
;
}
template
<
unsigned
parseFlags
>
GenericDocument
&
Parse
(
const
Ch
*
str
,
size_t
length
)
{
return
Parse
<
parseFlags
,
Encoding
>
(
str
,
length
);
}
GenericDocument
&
Parse
(
const
Ch
*
str
,
size_t
length
)
{
return
Parse
<
kParseDefaultFlags
>
(
str
,
length
);
}
#if RAPIDJSON_HAS_STDSTRING
template
<
unsigned
parseFlags
,
typename
SourceEncoding
>
GenericDocument
&
Parse
(
const
std
::
basic_string
<
typename
SourceEncoding
::
Ch
>&
str
)
{
// c_str() is constant complexity according to standard. Should be faster than Parse(const char*, size_t)
return
Parse
<
parseFlags
,
SourceEncoding
>
(
str
.
c_str
());
}
template
<
unsigned
parseFlags
>
GenericDocument
&
Parse
(
const
std
::
basic_string
<
Ch
>&
str
)
{
return
Parse
<
parseFlags
,
Encoding
>
(
str
);
}
GenericDocument
&
Parse
(
const
std
::
basic_string
<
Ch
>&
str
)
{
return
Parse
<
kParseDefaultFlags
>
(
str
);
}
#endif // RAPIDJSON_HAS_STDSTRING
//!@}
//!@name Handling parse errors
...
...
This diff is collapsed.
Click to expand it.
include/rapidjson/encodedstream.h
View file @
dd25c965
...
...
@@ -16,6 +16,7 @@
#define RAPIDJSON_ENCODEDSTREAM_H_
#include "stream.h"
#include "memorystream.h"
#ifdef __GNUC__
RAPIDJSON_DIAG_PUSH
...
...
@@ -62,6 +63,30 @@ private:
Ch
current_
;
};
//! Specialized for UTF8 MemoryStream.
template
<>
class
EncodedInputStream
<
UTF8
<>
,
MemoryStream
>
{
public
:
typedef
UTF8
<>::
Ch
Ch
;
EncodedInputStream
(
MemoryStream
&
is
)
:
is_
(
is
)
{
if
(
static_cast
<
unsigned
char
>
(
is_
.
Peek
())
==
0xEFu
)
is_
.
Take
();
if
(
static_cast
<
unsigned
char
>
(
is_
.
Peek
())
==
0xBBu
)
is_
.
Take
();
if
(
static_cast
<
unsigned
char
>
(
is_
.
Peek
())
==
0xBFu
)
is_
.
Take
();
}
Ch
Peek
()
const
{
return
is_
.
Peek
();
}
Ch
Take
()
{
return
is_
.
Take
();
}
size_t
Tell
()
const
{
return
is_
.
Tell
();
}
// Not implemented
void
Put
(
Ch
)
{}
void
Flush
()
{}
Ch
*
PutBegin
()
{
return
0
;
}
size_t
PutEnd
(
Ch
*
)
{
return
0
;
}
MemoryStream
&
is_
;
};
//! Output byte stream wrapper with statically bound encoding.
/*!
\tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
...
...
This diff is collapsed.
Click to expand it.
include/rapidjson/memorystream.h
View file @
dd25c965
...
...
@@ -42,8 +42,8 @@ struct MemoryStream {
MemoryStream
(
const
Ch
*
src
,
size_t
size
)
:
src_
(
src
),
begin_
(
src
),
end_
(
src
+
size
),
size_
(
size
)
{}
Ch
Peek
()
const
{
return
(
src_
==
end_
)
?
'\0'
:
*
src_
;
}
Ch
Take
()
{
return
(
src_
==
end_
)
?
'\0'
:
*
src_
++
;
}
Ch
Peek
()
const
{
return
RAPIDJSON_UNLIKELY
(
src_
==
end_
)
?
'\0'
:
*
src_
;
}
Ch
Take
()
{
return
RAPIDJSON_UNLIKELY
(
src_
==
end_
)
?
'\0'
:
*
src_
++
;
}
size_t
Tell
()
const
{
return
static_cast
<
size_t
>
(
src_
-
begin_
);
}
Ch
*
PutBegin
()
{
RAPIDJSON_ASSERT
(
false
);
return
0
;
}
...
...
This diff is collapsed.
Click to expand it.
include/rapidjson/reader.h
View file @
dd25c965
...
...
@@ -19,6 +19,7 @@
#include "allocators.h"
#include "stream.h"
#include "encodedstream.h"
#include "internal/meta.h"
#include "internal/stack.h"
#include "internal/strtod.h"
...
...
@@ -259,6 +260,12 @@ void SkipWhitespace(InputStream& is) {
s
.
Take
();
}
inline
const
char
*
SkipWhitespace
(
const
char
*
p
,
const
char
*
end
)
{
while
(
p
!=
end
&&
(
*
p
==
' '
||
*
p
==
'\n'
||
*
p
==
'\r'
||
*
p
==
'\t'
))
++
p
;
return
p
;
}
#ifdef RAPIDJSON_SSE42
//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
inline
const
char
*
SkipWhitespace_SIMD
(
const
char
*
p
)
{
...
...
@@ -295,6 +302,34 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
}
}
inline
const
char
*
SkipWhitespace_SIMD
(
const
char
*
p
,
const
char
*
end
)
{
// Fast return for single non-whitespace
if
(
p
!=
end
&&
(
*
p
==
' '
||
*
p
==
'\n'
||
*
p
==
'\r'
||
*
p
==
'\t'
))
++
p
;
else
return
p
;
// The middle of string using SIMD
static
const
char
whitespace
[
16
]
=
"
\n\r\t
"
;
const
__m128i
w
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
&
whitespace
[
0
]));
for
(;
p
<=
end
-
16
;
p
+=
16
)
{
const
__m128i
s
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
p
));
const
int
r
=
_mm_cvtsi128_si32
(
_mm_cmpistrm
(
w
,
s
,
_SIDD_UBYTE_OPS
|
_SIDD_CMP_EQUAL_ANY
|
_SIDD_BIT_MASK
|
_SIDD_NEGATIVE_POLARITY
));
if
(
r
!=
0
)
{
// some of characters is non-whitespace
#ifdef _MSC_VER // Find the index of first non-whitespace
unsigned
long
offset
;
_BitScanForward
(
&
offset
,
r
);
return
p
+
offset
;
#else
return
p
+
__builtin_ffs
(
r
)
-
1
;
#endif
}
}
return
SkipWhitespace
(
p
,
end
);
}
#elif defined(RAPIDJSON_SSE2)
//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
...
...
@@ -342,6 +377,44 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
}
}
inline
const
char
*
SkipWhitespace_SIMD
(
const
char
*
p
,
const
char
*
end
)
{
// Fast return for single non-whitespace
if
(
p
!=
end
&&
(
*
p
==
' '
||
*
p
==
'\n'
||
*
p
==
'\r'
||
*
p
==
'\t'
))
++
p
;
else
return
p
;
// The rest of string
#define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
static
const
char
whitespaces
[
4
][
16
]
=
{
C16
(
' '
),
C16
(
'\n'
),
C16
(
'\r'
),
C16
(
'\t'
)
};
#undef C16
const
__m128i
w0
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
&
whitespaces
[
0
][
0
]));
const
__m128i
w1
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
&
whitespaces
[
1
][
0
]));
const
__m128i
w2
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
&
whitespaces
[
2
][
0
]));
const
__m128i
w3
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
&
whitespaces
[
3
][
0
]));
for
(;
p
<=
end
-
16
;
p
+=
16
)
{
const
__m128i
s
=
_mm_loadu_si128
(
reinterpret_cast
<
const
__m128i
*>
(
p
));
__m128i
x
=
_mm_cmpeq_epi8
(
s
,
w0
);
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w1
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w2
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w3
));
unsigned
short
r
=
static_cast
<
unsigned
short
>
(
~
_mm_movemask_epi8
(
x
));
if
(
r
!=
0
)
{
// some of characters may be non-whitespace
#ifdef _MSC_VER // Find the index of first non-whitespace
unsigned
long
offset
;
_BitScanForward
(
&
offset
,
r
);
return
p
+
offset
;
#else
return
p
+
__builtin_ffs
(
r
)
-
1
;
#endif
}
}
return
SkipWhitespace
(
p
,
end
);
}
#endif // RAPIDJSON_SSE2
#ifdef RAPIDJSON_SIMD
...
...
@@ -354,6 +427,10 @@ template<> inline void SkipWhitespace(InsituStringStream& is) {
template
<>
inline
void
SkipWhitespace
(
StringStream
&
is
)
{
is
.
src_
=
SkipWhitespace_SIMD
(
is
.
src_
);
}
template
<>
inline
void
SkipWhitespace
(
EncodedInputStream
<
UTF8
<>
,
MemoryStream
>&
is
)
{
is
.
is_
.
src_
=
SkipWhitespace_SIMD
(
is
.
is_
.
src_
,
is
.
is_
.
end_
);
}
#endif // RAPIDJSON_SIMD
///////////////////////////////////////////////////////////////////////////////
...
...
This diff is collapsed.
Click to expand it.
test/perftest/perftest.h
View file @
dd25c965
...
...
@@ -30,6 +30,8 @@
# define RAPIDJSON_SSE2
#endif
#define RAPIDJSON_HAS_STDSTRING 1
////////////////////////////////////////////////////////////////////////////////
// Google Test
...
...
This diff is collapsed.
Click to expand it.
test/perftest/rapidjsontest.cpp
View file @
dd25c965
...
...
@@ -187,6 +187,25 @@ TEST_F(RapidJson, SIMD_SUFFIX(DocumentParse_MemoryPoolAllocator)) {
}
}
TEST_F
(
RapidJson
,
SIMD_SUFFIX
(
DocumentParseLength_MemoryPoolAllocator
))
{
for
(
size_t
i
=
0
;
i
<
kTrialCount
;
i
++
)
{
Document
doc
;
doc
.
Parse
(
json_
,
length_
);
ASSERT_TRUE
(
doc
.
IsObject
());
}
}
#if RAPIDJSON_HAS_STDSTRING
TEST_F
(
RapidJson
,
SIMD_SUFFIX
(
DocumentParseStdString_MemoryPoolAllocator
))
{
const
std
::
string
s
(
json_
,
length_
);
for
(
size_t
i
=
0
;
i
<
kTrialCount
;
i
++
)
{
Document
doc
;
doc
.
Parse
(
s
);
ASSERT_TRUE
(
doc
.
IsObject
());
}
}
#endif
TEST_F
(
RapidJson
,
SIMD_SUFFIX
(
DocumentParseIterative_MemoryPoolAllocator
))
{
for
(
size_t
i
=
0
;
i
<
kTrialCount
;
i
++
)
{
Document
doc
;
...
...
This diff is collapsed.
Click to expand it.
test/unittest/documenttest.cpp
View file @
dd25c965
...
...
@@ -34,6 +34,8 @@ void ParseCheck(DocumentType& doc) {
typedef
typename
DocumentType
::
ValueType
ValueType
;
EXPECT_FALSE
(
doc
.
HasParseError
());
if
(
doc
.
HasParseError
())
printf
(
"Error: %d at %zu
\n
"
,
static_cast
<
int
>
(
doc
.
GetParseError
()),
doc
.
GetErrorOffset
());
EXPECT_TRUE
(
static_cast
<
ParseResult
>
(
doc
));
EXPECT_TRUE
(
doc
.
IsObject
());
...
...
@@ -93,6 +95,26 @@ void ParseTest() {
doc
.
ParseInsitu
(
buffer
);
ParseCheck
(
doc
);
free
(
buffer
);
// Parse(const Ch*, size_t)
size_t
length
=
strlen
(
json
);
buffer
=
reinterpret_cast
<
char
*>
(
malloc
(
length
*
2
));
memcpy
(
buffer
,
json
,
length
);
memset
(
buffer
+
length
,
'X'
,
length
);
#if RAPIDJSON_HAS_STDSTRING
std
::
string
s2
(
buffer
,
length
);
// backup buffer
#endif
doc
.
SetNull
();
doc
.
Parse
(
buffer
,
length
);
free
(
buffer
);
ParseCheck
(
doc
);
#if RAPIDJSON_HAS_STDSTRING
// Parse(std::string)
doc
.
SetNull
();
doc
.
Parse
(
s2
);
ParseCheck
(
doc
);
#endif
}
TEST
(
Document
,
Parse
)
{
...
...
@@ -140,6 +162,42 @@ static FILE* OpenEncodedFile(const char* filename) {
return
0
;
}
TEST
(
Document
,
Parse_Encoding
)
{
const
char
*
json
=
" {
\"
hello
\"
:
\"
world
\"
,
\"
t
\"
: true ,
\"
f
\"
: false,
\"
n
\"
: null,
\"
i
\"
:123,
\"
pi
\"
: 3.1416,
\"
a
\"
:[1, 2, 3, 4] } "
;
typedef
GenericDocument
<
UTF16
<>
>
DocumentType
;
DocumentType
doc
;
// Parse<unsigned, SourceEncoding>(const SourceEncoding::Ch*)
// doc.Parse<kParseDefaultFlags, UTF8<> >(json);
// EXPECT_FALSE(doc.HasParseError());
// EXPECT_EQ(0, StrCmp(doc[L"hello"].GetString(), L"world"));
// Parse<unsigned, SourceEncoding>(const SourceEncoding::Ch*, size_t)
size_t
length
=
strlen
(
json
);
char
*
buffer
=
reinterpret_cast
<
char
*>
(
malloc
(
length
*
2
));
memcpy
(
buffer
,
json
,
length
);
memset
(
buffer
+
length
,
'X'
,
length
);
#if RAPIDJSON_HAS_STDSTRING
std
::
string
s2
(
buffer
,
length
);
// backup buffer
#endif
doc
.
SetNull
();
doc
.
Parse
<
kParseDefaultFlags
,
UTF8
<>
>
(
buffer
,
length
);
free
(
buffer
);
EXPECT_FALSE
(
doc
.
HasParseError
());
if
(
doc
.
HasParseError
())
printf
(
"Error: %d at %zu
\n
"
,
static_cast
<
int
>
(
doc
.
GetParseError
()),
doc
.
GetErrorOffset
());
EXPECT_EQ
(
0
,
StrCmp
(
doc
[
L"hello"
].
GetString
(),
L"world"
));
#if RAPIDJSON_HAS_STDSTRING
// Parse<unsigned, SourceEncoding>(std::string)
doc
.
SetNull
();
doc
.
Parse
<
kParseDefaultFlags
,
UTF8
<>
>
(
s2
);
EXPECT_FALSE
(
doc
.
HasParseError
());
EXPECT_EQ
(
0
,
StrCmp
(
doc
[
L"hello"
].
GetString
(),
L"world"
));
#endif
}
TEST
(
Document
,
ParseStream_EncodedInputStream
)
{
// UTF8 -> UTF16
FILE
*
fp
=
OpenEncodedFile
(
"utf8.json"
);
...
...
This diff is collapsed.
Click to expand it.
test/unittest/simdtest.cpp
View file @
dd25c965
...
...
@@ -73,6 +73,28 @@ TEST(SIMD, SIMD_SUFFIX(SkipWhitespace)) {
TestSkipWhitespace
<
InsituStringStream
>
();
}
TEST
(
SIMD
,
SIMD_SUFFIX
(
SkipWhitespace_EncodedMemoryStream
))
{
for
(
size_t
step
=
1
;
step
<
32
;
step
++
)
{
char
buffer
[
1024
];
for
(
size_t
i
=
0
;
i
<
1024
;
i
++
)
buffer
[
i
]
=
"
\t\r\n
"
[
i
%
4
];
for
(
size_t
i
=
0
;
i
<
1024
;
i
+=
step
)
buffer
[
i
]
=
'X'
;
MemoryStream
ms
(
buffer
,
1024
);
EncodedInputStream
<
UTF8
<>
,
MemoryStream
>
s
(
ms
);
size_t
i
=
0
;
for
(;;)
{
SkipWhitespace
(
s
);
if
(
s
.
Peek
()
==
'\0'
)
break
;
//EXPECT_EQ(i, s.Tell());
EXPECT_EQ
(
'X'
,
s
.
Take
());
i
+=
step
;
}
}
}
struct
ScanCopyUnescapedStringHandler
:
BaseReaderHandler
<
UTF8
<>
,
ScanCopyUnescapedStringHandler
>
{
bool
String
(
const
char
*
str
,
size_t
length
,
bool
)
{
memcpy
(
buffer
,
str
,
length
+
1
);
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment