Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
R
rapidjson
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
rapidjson
Commits
ca5000ba
Commit
ca5000ba
authored
Aug 21, 2014
by
Milo Yip
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #108 from miloyip/ParsingOptimization
Parsing optimization
parents
df70ee82
4f81c873
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
101 additions
and
108 deletions
+101
-108
rapidjson.h
include/rapidjson/rapidjson.h
+12
-0
reader.h
include/rapidjson/reader.h
+89
-108
No files found.
include/rapidjson/rapidjson.h
View file @
ca5000ba
...
@@ -147,6 +147,18 @@
...
@@ -147,6 +147,18 @@
# endif
# endif
#endif // RAPIDJSON_ENDIAN
#endif // RAPIDJSON_ENDIAN
///////////////////////////////////////////////////////////////////////////////
// RAPIDJSON_64BIT
//! Whether using 64-bit architecture
#ifndef RAPIDJSON_64BIT
#if defined(__LP64__) || defined(_WIN64)
#define RAPIDJSON_64BIT 1
#else
#define RAPIDJSON_64BIT 0
#endif
#endif // RAPIDJSON_64BIT
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// RAPIDJSON_ALIGN
// RAPIDJSON_ALIGN
...
...
include/rapidjson/reader.h
View file @
ca5000ba
...
@@ -242,57 +242,34 @@ void SkipWhitespace(InputStream& is) {
...
@@ -242,57 +242,34 @@ void SkipWhitespace(InputStream& is) {
#ifdef RAPIDJSON_SSE42
#ifdef RAPIDJSON_SSE42
//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
inline
const
char
*
SkipWhitespace_SIMD
(
const
char
*
p
)
{
inline
const
char
*
SkipWhitespace_SIMD
(
const
char
*
p
)
{
static
const
char
whitespace
[
16
]
=
"
\n\r\t
"
;
// Fast return for single non-whitespace
static
const
char
whitespaces
[
4
][
17
]
=
{
if
(
*
p
==
' '
||
*
p
==
'\n'
||
*
p
==
'\r'
||
*
p
==
'\t'
)
" "
,
++
p
;
"
\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
"
,
else
"
\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r
"
,
return
p
;
"
\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t
"
};
// 16-byte align to the next boundary
// 16-byte align to the lower boundary
const
char
*
nextAligned
=
reinterpret_cast
<
const
char
*>
((
reinterpret_cast
<
size_t
>
(
p
)
+
15
)
&
~
15
);
const
char
*
ap
=
reinterpret_cast
<
const
char
*>
(
reinterpret_cast
<
size_t
>
(
p
)
&
~
15
);
while
(
p
!=
nextAligned
)
if
(
*
p
==
' '
||
*
p
==
'\n'
||
*
p
==
'\r'
||
*
p
==
'\t'
)
// Test first unaligned characters
++
p
;
// Cannot make use of _mm_cmpistrm() because it stops when encounters '\0' before p
else
if
(
ap
!=
p
)
{
return
p
;
const
__m128i
w0
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
0
][
0
]);
const
__m128i
w1
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
1
][
0
]);
// The rest of string using SIMD
const
__m128i
w2
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
2
][
0
]);
static
const
char
whitespace
[
16
]
=
"
\n\r\t
"
;
const
__m128i
w3
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
3
][
0
]);
const
__m128i
w
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespace
[
0
]);
unsigned
char
shift
=
reinterpret_cast
<
size_t
>
(
p
)
&
15
;
for
(;;
p
+=
16
)
{
const
__m128i
s
=
_mm_load_si128
(
reinterpret_cast
<
const
__m128i
*>
(
ap
));
const
__m128i
s
=
_mm_load_si128
((
const
__m128i
*
)
p
);
__m128i
x
=
_mm_cmpeq_epi8
(
s
,
w0
);
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w1
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w2
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w3
));
unsigned
short
r
=
(
unsigned
short
)
~
_mm_movemask_epi8
(
x
);
r
=
r
>>
shift
<<
shift
;
// Clear results before p
if
(
r
!=
0
)
{
#ifdef _MSC_VER // Find the index of first non-whitespace
unsigned
long
offset
;
_BitScanForward
(
&
offset
,
r
);
return
ap
+
offset
;
#else
return
ap
+
__builtin_ffs
(
r
)
-
1
;
#endif
}
ap
+=
16
;
}
const
__m128i
w
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespace
[
0
]);
// The rest of string
for
(;;
ap
+=
16
)
{
const
__m128i
s
=
_mm_load_si128
((
const
__m128i
*
)
ap
);
const
unsigned
r
=
_mm_cvtsi128_si32
(
_mm_cmpistrm
(
w
,
s
,
_SIDD_UBYTE_OPS
|
_SIDD_CMP_EQUAL_ANY
|
_SIDD_BIT_MASK
|
_SIDD_NEGATIVE_POLARITY
));
const
unsigned
r
=
_mm_cvtsi128_si32
(
_mm_cmpistrm
(
w
,
s
,
_SIDD_UBYTE_OPS
|
_SIDD_CMP_EQUAL_ANY
|
_SIDD_BIT_MASK
|
_SIDD_NEGATIVE_POLARITY
));
if
(
r
!=
0
)
{
// some of characters is non-whitespace
if
(
r
!=
0
)
{
// some of characters is non-whitespace
#ifdef _MSC_VER // Find the index of first non-whitespace
#ifdef _MSC_VER // Find the index of first non-whitespace
unsigned
long
offset
;
unsigned
long
offset
;
_BitScanForward
(
&
offset
,
r
);
_BitScanForward
(
&
offset
,
r
);
return
a
p
+
offset
;
return
p
+
offset
;
#else
#else
return
a
p
+
__builtin_ffs
(
r
)
-
1
;
return
p
+
__builtin_ffs
(
r
)
-
1
;
#endif
#endif
}
}
}
}
...
@@ -302,45 +279,34 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
...
@@ -302,45 +279,34 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
inline
const
char
*
SkipWhitespace_SIMD
(
const
char
*
p
)
{
inline
const
char
*
SkipWhitespace_SIMD
(
const
char
*
p
)
{
static
const
char
whitespaces
[
4
][
17
]
=
{
// Fast return for single non-whitespace
" "
,
if
(
*
p
==
' '
||
*
p
==
'\n'
||
*
p
==
'\r'
||
*
p
==
'\t'
)
"
\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
"
,
++
p
;
"
\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r
"
,
else
"
\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t
"
};
return
p
;
const
__m128i
w0
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
0
][
0
]);
// 16-byte align to the next boundary
const
__m128i
w1
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
1
][
0
]);
const
char
*
nextAligned
=
reinterpret_cast
<
const
char
*>
((
reinterpret_cast
<
size_t
>
(
p
)
+
15
)
&
~
15
);
const
__m128i
w2
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
2
][
0
]);
while
(
p
!=
nextAligned
)
const
__m128i
w3
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
3
][
0
]);
if
(
*
p
==
' '
||
*
p
==
'\n'
||
*
p
==
'\r'
||
*
p
==
'\t'
)
++
p
;
// 16-byte align to the lower boundary
else
const
char
*
ap
=
reinterpret_cast
<
const
char
*>
(
reinterpret_cast
<
size_t
>
(
p
)
&
~
15
);
return
p
;
// Test first unaligned characters
if
(
ap
!=
p
)
{
unsigned
char
shift
=
reinterpret_cast
<
size_t
>
(
p
)
&
15
;
const
__m128i
s
=
_mm_load_si128
(
reinterpret_cast
<
const
__m128i
*>
(
ap
));
__m128i
x
=
_mm_cmpeq_epi8
(
s
,
w0
);
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w1
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w2
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w3
));
unsigned
short
r
=
(
unsigned
short
)
~
_mm_movemask_epi8
(
x
);
r
=
r
>>
shift
<<
shift
;
// Clear results before p
if
(
r
!=
0
)
{
#ifdef _MSC_VER // Find the index of first non-whitespace
unsigned
long
offset
;
_BitScanForward
(
&
offset
,
r
);
return
ap
+
offset
;
#else
return
ap
+
__builtin_ffs
(
r
)
-
1
;
#endif
}
ap
+=
16
;
}
// The rest of string
// The rest of string
for
(;;
ap
+=
16
)
{
static
const
char
whitespaces
[
4
][
17
]
=
{
const
__m128i
s
=
_mm_load_si128
((
const
__m128i
*
)
ap
);
" "
,
"
\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
"
,
"
\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r
"
,
"
\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t
"
};
const
__m128i
w0
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
0
][
0
]);
const
__m128i
w1
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
1
][
0
]);
const
__m128i
w2
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
2
][
0
]);
const
__m128i
w3
=
_mm_loadu_si128
((
const
__m128i
*
)
&
whitespaces
[
3
][
0
]);
for
(;;
p
+=
16
)
{
const
__m128i
s
=
_mm_load_si128
((
const
__m128i
*
)
p
);
__m128i
x
=
_mm_cmpeq_epi8
(
s
,
w0
);
__m128i
x
=
_mm_cmpeq_epi8
(
s
,
w0
);
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w1
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w1
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w2
));
x
=
_mm_or_si128
(
x
,
_mm_cmpeq_epi8
(
s
,
w2
));
...
@@ -350,9 +316,9 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
...
@@ -350,9 +316,9 @@ inline const char *SkipWhitespace_SIMD(const char* p) {
#ifdef _MSC_VER // Find the index of first non-whitespace
#ifdef _MSC_VER // Find the index of first non-whitespace
unsigned
long
offset
;
unsigned
long
offset
;
_BitScanForward
(
&
offset
,
r
);
_BitScanForward
(
&
offset
,
r
);
return
a
p
+
offset
;
return
p
+
offset
;
#else
#else
return
a
p
+
__builtin_ffs
(
r
)
-
1
;
return
p
+
__builtin_ffs
(
r
)
-
1
;
#endif
#endif
}
}
}
}
...
@@ -760,7 +726,8 @@ private:
...
@@ -760,7 +726,8 @@ private:
// Parse int: zero / ( digit1-9 *DIGIT )
// Parse int: zero / ( digit1-9 *DIGIT )
unsigned
i
=
0
;
unsigned
i
=
0
;
bool
try64bit
=
false
;
uint64_t
i64
=
0
;
bool
use64bit
=
false
;
if
(
s
.
Peek
()
==
'0'
)
{
if
(
s
.
Peek
()
==
'0'
)
{
i
=
0
;
i
=
0
;
s
.
Take
();
s
.
Take
();
...
@@ -772,7 +739,8 @@ private:
...
@@ -772,7 +739,8 @@ private:
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
if
(
i
>=
214748364
)
{
// 2^31 = 2147483648
if
(
i
>=
214748364
)
{
// 2^31 = 2147483648
if
(
i
!=
214748364
||
s
.
Peek
()
>
'8'
)
{
if
(
i
!=
214748364
||
s
.
Peek
()
>
'8'
)
{
try64bit
=
true
;
i64
=
i
;
use64bit
=
true
;
break
;
break
;
}
}
}
}
...
@@ -782,7 +750,8 @@ private:
...
@@ -782,7 +750,8 @@ private:
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
if
(
i
>=
429496729
)
{
// 2^32 - 1 = 4294967295
if
(
i
>=
429496729
)
{
// 2^32 - 1 = 4294967295
if
(
i
!=
429496729
||
s
.
Peek
()
>
'5'
)
{
if
(
i
!=
429496729
||
s
.
Peek
()
>
'5'
)
{
try64bit
=
true
;
i64
=
i
;
use64bit
=
true
;
break
;
break
;
}
}
}
}
...
@@ -793,14 +762,14 @@ private:
...
@@ -793,14 +762,14 @@ private:
RAPIDJSON_PARSE_ERROR
(
kParseErrorValueInvalid
,
s
.
Tell
());
RAPIDJSON_PARSE_ERROR
(
kParseErrorValueInvalid
,
s
.
Tell
());
// Parse 64bit int
// Parse 64bit int
uint64_t
i64
=
0
;
double
d
=
0
.
0
;
bool
useDouble
=
false
;
bool
useDouble
=
false
;
if
(
try64bit
)
{
if
(
use64bit
)
{
i64
=
i
;
if
(
minus
)
if
(
minus
)
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
if
(
i64
>=
RAPIDJSON_UINT64_C2
(
0x0CCCCCCC
,
0xCCCCCCCC
))
// 2^63 = 9223372036854775808
if
(
i64
>=
RAPIDJSON_UINT64_C2
(
0x0CCCCCCC
,
0xCCCCCCCC
))
// 2^63 = 9223372036854775808
if
(
i64
!=
RAPIDJSON_UINT64_C2
(
0x0CCCCCCC
,
0xCCCCCCCC
)
||
s
.
Peek
()
>
'8'
)
{
if
(
i64
!=
RAPIDJSON_UINT64_C2
(
0x0CCCCCCC
,
0xCCCCCCCC
)
||
s
.
Peek
()
>
'8'
)
{
d
=
(
double
)
i64
;
useDouble
=
true
;
useDouble
=
true
;
break
;
break
;
}
}
...
@@ -810,6 +779,7 @@ private:
...
@@ -810,6 +779,7 @@ private:
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
if
(
i64
>=
RAPIDJSON_UINT64_C2
(
0x19999999
,
0x99999999
))
// 2^64 - 1 = 18446744073709551615
if
(
i64
>=
RAPIDJSON_UINT64_C2
(
0x19999999
,
0x99999999
))
// 2^64 - 1 = 18446744073709551615
if
(
i64
!=
RAPIDJSON_UINT64_C2
(
0x19999999
,
0x99999999
)
||
s
.
Peek
()
>
'5'
)
{
if
(
i64
!=
RAPIDJSON_UINT64_C2
(
0x19999999
,
0x99999999
)
||
s
.
Peek
()
>
'5'
)
{
d
=
(
double
)
i64
;
useDouble
=
true
;
useDouble
=
true
;
break
;
break
;
}
}
...
@@ -818,9 +788,7 @@ private:
...
@@ -818,9 +788,7 @@ private:
}
}
// Force double for big integer
// Force double for big integer
double
d
=
0
.
0
;
if
(
useDouble
)
{
if
(
useDouble
)
{
d
=
(
double
)
i64
;
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
if
(
d
>=
1.7976931348623157e307
)
// DBL_MAX / 10.0
if
(
d
>=
1.7976931348623157e307
)
// DBL_MAX / 10.0
RAPIDJSON_PARSE_ERROR
(
kParseErrorNumberTooBig
,
s
.
Tell
());
RAPIDJSON_PARSE_ERROR
(
kParseErrorNumberTooBig
,
s
.
Tell
());
...
@@ -831,33 +799,46 @@ private:
...
@@ -831,33 +799,46 @@ private:
// Parse frac = decimal-point 1*DIGIT
// Parse frac = decimal-point 1*DIGIT
int
expFrac
=
0
;
int
expFrac
=
0
;
if
(
s
.
Peek
()
==
'.'
)
{
if
(
s
.
Peek
()
==
'.'
)
{
s
.
Take
();
#if RAPIDJSON_64BIT
// Use i64 to store significand in 64-bit architecture
if
(
!
useDouble
)
{
if
(
!
useDouble
)
{
d
=
try64bit
?
(
double
)
i64
:
(
double
)
i
;
if
(
!
use64bit
)
useDouble
=
true
;
i64
=
i
;
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
if
(
i64
>=
RAPIDJSON_UINT64_C2
(
0x19999999
,
0x99999999
))
break
;
else
{
i64
=
i64
*
10
+
static_cast
<
unsigned
>
(
s
.
Take
()
-
'0'
);
--
expFrac
;
}
}
d
=
(
double
)
i64
;
}
}
s
.
Take
();
#else
// Use double to store significand in 32-bit architecture
if
(
!
useDouble
)
d
=
use64bit
?
(
double
)
i64
:
(
double
)
i
;
#endif
useDouble
=
true
;
if
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
d
=
d
*
10
+
(
s
.
Take
()
-
'0'
);
d
=
d
*
10
+
(
s
.
Take
()
-
'0'
);
--
expFrac
;
--
expFrac
;
}
}
else
RAPIDJSON_PARSE_ERROR
(
kParseErrorNumberMissFraction
,
s
.
Tell
());
while
(
s
.
Peek
()
>=
'0'
&&
s
.
Peek
()
<=
'9'
)
{
if
(
expFrac
==
0
)
if
(
expFrac
>
-
16
)
{
RAPIDJSON_PARSE_ERROR
(
kParseErrorNumberMissFraction
,
s
.
Tell
());
d
=
d
*
10
+
(
s
.
Peek
()
-
'0'
);
--
expFrac
;
}
s
.
Take
();
}
}
}
// Parse exp = e [ minus / plus ] 1*DIGIT
// Parse exp = e [ minus / plus ] 1*DIGIT
int
exp
=
0
;
int
exp
=
0
;
if
(
s
.
Peek
()
==
'e'
||
s
.
Peek
()
==
'E'
)
{
if
(
s
.
Peek
()
==
'e'
||
s
.
Peek
()
==
'E'
)
{
if
(
!
useDouble
)
{
if
(
!
useDouble
)
{
d
=
try
64bit
?
(
double
)
i64
:
(
double
)
i
;
d
=
use
64bit
?
(
double
)
i64
:
(
double
)
i
;
useDouble
=
true
;
useDouble
=
true
;
}
}
s
.
Take
();
s
.
Take
();
...
@@ -900,7 +881,7 @@ private:
...
@@ -900,7 +881,7 @@ private:
cont
=
handler
.
Double
(
minus
?
-
d
:
d
);
cont
=
handler
.
Double
(
minus
?
-
d
:
d
);
}
}
else
{
else
{
if
(
try
64bit
)
{
if
(
use
64bit
)
{
if
(
minus
)
if
(
minus
)
cont
=
handler
.
Int64
(
-
(
int64_t
)
i64
);
cont
=
handler
.
Int64
(
-
(
int64_t
)
i64
);
else
else
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment