Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
B
brpc
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
brpc
Commits
29ab8982
Unverified
Commit
29ab8982
authored
Nov 30, 2020
by
jamesge
Committed by
GitHub
Nov 30, 2020
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1295 from lrita/l_s
make butil::BasicStringPiece<T> support string split functions-family
parents
e64eb3ce
32c05be9
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
218 additions
and
16 deletions
+218
-16
string_piece.h
src/butil/strings/string_piece.h
+7
-0
string_split.cc
src/butil/strings/string_split.cc
+89
-14
string_split.h
src/butil/strings/string_split.h
+28
-0
string_util.cc
src/butil/strings/string_util.cc
+20
-1
string_util.h
src/butil/strings/string_util.h
+10
-1
string_split_unittest.cc
test/string_split_unittest.cc
+64
-0
No files found.
src/butil/strings/string_piece.h
View file @
29ab8982
...
...
@@ -185,6 +185,8 @@ template <typename STRING_TYPE> class BasicStringPiece {
:
ptr_
(
str
.
data
()),
length_
(
str
.
size
())
{}
BasicStringPiece
(
const
value_type
*
offset
,
size_type
len
)
:
ptr_
(
offset
),
length_
(
len
)
{}
BasicStringPiece
(
const
BasicStringPiece
&
str
,
size_type
pos
,
size_type
len
=
npos
)
:
ptr_
(
str
.
data
()
+
pos
),
length_
(
std
::
min
(
len
,
str
.
length
()
-
pos
))
{}
BasicStringPiece
(
const
typename
STRING_TYPE
::
const_iterator
&
begin
,
const
typename
STRING_TYPE
::
const_iterator
&
end
)
:
ptr_
((
end
>
begin
)
?
&
(
*
begin
)
:
NULL
),
...
...
@@ -203,6 +205,11 @@ template <typename STRING_TYPE> class BasicStringPiece {
ptr_
=
NULL
;
length_
=
0
;
}
BasicStringPiece
&
assign
(
const
BasicStringPiece
&
str
,
size_type
pos
,
size_type
len
=
npos
)
{
ptr_
=
str
.
data
()
+
pos
;
length_
=
std
::
min
(
len
,
str
.
length
()
-
pos
);
return
*
this
;
}
void
set
(
const
value_type
*
data
,
size_type
len
)
{
ptr_
=
data
;
length_
=
len
;
...
...
src/butil/strings/string_split.cc
View file @
29ab8982
...
...
@@ -35,25 +35,26 @@ void SplitStringT(const STR& str,
}
}
bool
SplitStringIntoKeyValue
(
const
std
::
string
&
line
,
char
key_value_delimiter
,
std
::
string
*
key
,
std
::
string
*
value
)
{
template
<
typename
STR
>
bool
SplitStringIntoKeyValueT
(
const
STR
&
line
,
typename
STR
::
value_type
key_value_delimiter
,
STR
*
key
,
STR
*
value
)
{
key
->
clear
();
value
->
clear
();
// Find the delimiter.
size_t
end_key_pos
=
line
.
find_first_of
(
key_value_delimiter
);
if
(
end_key_pos
==
std
::
string
::
npos
)
{
if
(
end_key_pos
==
STR
::
npos
)
{
DVLOG
(
1
)
<<
"cannot find delimiter in: "
<<
line
;
return
false
;
// no delimiter
}
key
->
assign
(
line
,
0
,
end_key_pos
);
// Find the value string.
std
::
string
remains
(
line
,
end_key_pos
,
line
.
size
()
-
end_key_pos
);
STR
remains
(
line
,
end_key_pos
,
line
.
size
()
-
end_key_pos
);
size_t
begin_value_pos
=
remains
.
find_first_not_of
(
key_value_delimiter
);
if
(
begin_value_pos
==
std
::
string
::
npos
)
{
if
(
begin_value_pos
==
STR
::
npos
)
{
DVLOG
(
1
)
<<
"cannot parse value from line: "
<<
line
;
return
false
;
// no value
}
...
...
@@ -134,6 +135,13 @@ void SplitString(const string16& str,
SplitStringT
(
str
,
c
,
true
,
r
);
}
void
SplitString
(
const
butil
::
StringPiece16
&
str
,
char16
c
,
std
::
vector
<
butil
::
StringPiece16
>*
r
)
{
DCHECK
(
CBU16_IS_SINGLE
(
c
));
SplitStringT
(
str
,
c
,
true
,
r
);
}
void
SplitString
(
const
std
::
string
&
str
,
char
c
,
std
::
vector
<
std
::
string
>*
r
)
{
...
...
@@ -144,13 +152,24 @@ void SplitString(const std::string& str,
SplitStringT
(
str
,
c
,
true
,
r
);
}
bool
SplitStringIntoKeyValuePairs
(
const
std
::
string
&
line
,
void
SplitString
(
const
StringPiece
&
str
,
char
c
,
std
::
vector
<
StringPiece
>*
r
)
{
#if CHAR_MIN < 0
DCHECK
(
c
>=
0
);
#endif
DCHECK
(
c
<
0x7F
);
SplitStringT
(
str
,
c
,
true
,
r
);
}
template
<
typename
STR
>
bool
SplitStringIntoKeyValuePairsT
(
const
STR
&
line
,
char
key_value_delimiter
,
char
key_value_pair_delimiter
,
StringPairs
*
key_value_pairs
)
{
std
::
vector
<
std
::
pair
<
STR
,
STR
>
>
*
key_value_pairs
)
{
key_value_pairs
->
clear
();
std
::
vector
<
std
::
string
>
pairs
;
std
::
vector
<
STR
>
pairs
;
SplitString
(
line
,
key_value_pair_delimiter
,
&
pairs
);
bool
success
=
true
;
...
...
@@ -159,30 +178,58 @@ bool SplitStringIntoKeyValuePairs(const std::string& line,
if
(
pairs
[
i
].
empty
())
continue
;
std
::
string
key
;
std
::
string
value
;
if
(
!
SplitStringIntoKeyValue
(
pairs
[
i
],
key_value_delimiter
,
&
key
,
&
value
))
{
STR
key
;
STR
value
;
if
(
!
SplitStringIntoKeyValue
T
(
pairs
[
i
],
key_value_delimiter
,
&
key
,
&
value
))
{
// Don't return here, to allow for pairs without associated
// value or key; just record that the split failed.
success
=
false
;
}
key_value_pairs
->
push_back
(
make_pair
(
key
,
value
));
key_value_pairs
->
push_back
(
std
::
make_pair
(
key
,
value
));
}
return
success
;
}
bool
SplitStringIntoKeyValuePairs
(
const
std
::
string
&
line
,
char
key_value_delimiter
,
char
key_value_pair_delimiter
,
StringPairs
*
key_value_pairs
)
{
return
SplitStringIntoKeyValuePairsT
(
line
,
key_value_delimiter
,
key_value_pair_delimiter
,
key_value_pairs
);
}
bool
SplitStringIntoKeyValuePairs
(
const
butil
::
StringPiece
&
line
,
char
key_value_delimiter
,
char
key_value_pair_delimiter
,
StringPiecePairs
*
key_value_pairs
)
{
return
SplitStringIntoKeyValuePairsT
(
line
,
key_value_delimiter
,
key_value_pair_delimiter
,
key_value_pairs
);
}
void
SplitStringUsingSubstr
(
const
string16
&
str
,
const
string16
&
s
,
std
::
vector
<
string16
>*
r
)
{
SplitStringUsingSubstrT
(
str
,
s
,
r
);
}
void
SplitStringUsingSubstr
(
const
butil
::
StringPiece16
&
str
,
const
butil
::
StringPiece16
&
s
,
std
::
vector
<
butil
::
StringPiece16
>*
r
)
{
SplitStringUsingSubstrT
(
str
,
s
,
r
);
}
void
SplitStringUsingSubstr
(
const
std
::
string
&
str
,
const
std
::
string
&
s
,
std
::
vector
<
std
::
string
>*
r
)
{
SplitStringUsingSubstrT
(
str
,
s
,
r
);
}
void
SplitStringUsingSubstr
(
const
butil
::
StringPiece
&
str
,
const
butil
::
StringPiece
&
s
,
std
::
vector
<
butil
::
StringPiece
>*
r
)
{
SplitStringUsingSubstrT
(
str
,
s
,
r
);
}
void
SplitStringDontTrim
(
const
string16
&
str
,
char16
c
,
std
::
vector
<
string16
>*
r
)
{
...
...
@@ -190,6 +237,13 @@ void SplitStringDontTrim(const string16& str,
SplitStringT
(
str
,
c
,
false
,
r
);
}
void
SplitStringDontTrim
(
const
butil
::
StringPiece16
&
str
,
char16
c
,
std
::
vector
<
butil
::
StringPiece16
>*
r
)
{
DCHECK
(
CBU16_IS_SINGLE
(
c
));
SplitStringT
(
str
,
c
,
false
,
r
);
}
void
SplitStringDontTrim
(
const
std
::
string
&
str
,
char
c
,
std
::
vector
<
std
::
string
>*
r
)
{
...
...
@@ -201,14 +255,35 @@ void SplitStringDontTrim(const std::string& str,
SplitStringT
(
str
,
c
,
false
,
r
);
}
void
SplitStringDontTrim
(
const
butil
::
StringPiece
&
str
,
char
c
,
std
::
vector
<
butil
::
StringPiece
>*
r
)
{
DCHECK
(
IsStringUTF8
(
str
));
#if CHAR_MIN < 0
DCHECK
(
c
>=
0
);
#endif
DCHECK
(
c
<
0x7F
);
SplitStringT
(
str
,
c
,
false
,
r
);
}
void
SplitStringAlongWhitespace
(
const
string16
&
str
,
std
::
vector
<
string16
>*
result
)
{
SplitStringAlongWhitespaceT
(
str
,
result
);
}
void
SplitStringAlongWhitespace
(
const
butil
::
StringPiece16
&
str
,
std
::
vector
<
butil
::
StringPiece16
>*
result
)
{
SplitStringAlongWhitespaceT
(
str
,
result
);
}
void
SplitStringAlongWhitespace
(
const
std
::
string
&
str
,
std
::
vector
<
std
::
string
>*
result
)
{
SplitStringAlongWhitespaceT
(
str
,
result
);
}
void
SplitStringAlongWhitespace
(
const
butil
::
StringPiece
&
str
,
std
::
vector
<
butil
::
StringPiece
>*
result
)
{
SplitStringAlongWhitespaceT
(
str
,
result
);
}
}
// namespace butil
src/butil/strings/string_split.h
View file @
29ab8982
...
...
@@ -11,6 +11,7 @@
#include "butil/base_export.h"
#include "butil/strings/string16.h"
#include "butil/strings/string_piece.h"
namespace
butil
{
...
...
@@ -23,6 +24,9 @@ namespace butil {
BUTIL_EXPORT
void
SplitString
(
const
string16
&
str
,
char16
c
,
std
::
vector
<
string16
>*
r
);
BUTIL_EXPORT
void
SplitString
(
const
butil
::
StringPiece16
&
str
,
char16
c
,
std
::
vector
<
butil
::
StringPiece16
>*
r
);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
...
...
@@ -31,8 +35,12 @@ BUTIL_EXPORT void SplitString(const string16& str,
BUTIL_EXPORT
void
SplitString
(
const
std
::
string
&
str
,
char
c
,
std
::
vector
<
std
::
string
>*
r
);
BUTIL_EXPORT
void
SplitString
(
const
butil
::
StringPiece
&
str
,
char
c
,
std
::
vector
<
butil
::
StringPiece
>*
r
);
typedef
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>
StringPairs
;
typedef
std
::
vector
<
std
::
pair
<
butil
::
StringPiece
,
butil
::
StringPiece
>
>
StringPiecePairs
;
// Splits |line| into key value pairs according to the given delimiters and
// removes whitespace leading each key and trailing each value. Returns true
...
...
@@ -42,20 +50,33 @@ BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line,
char
key_value_delimiter
,
char
key_value_pair_delimiter
,
StringPairs
*
key_value_pairs
);
BUTIL_EXPORT
bool
SplitStringIntoKeyValuePairs
(
const
butil
::
StringPiece
&
line
,
char
key_value_delimiter
,
char
key_value_pair_delimiter
,
StringPiecePairs
*
key_value_pairs
);
// The same as SplitString, but use a substring delimiter instead of a char.
BUTIL_EXPORT
void
SplitStringUsingSubstr
(
const
string16
&
str
,
const
string16
&
s
,
std
::
vector
<
string16
>*
r
);
BUTIL_EXPORT
void
SplitStringUsingSubstr
(
const
butil
::
StringPiece16
&
str
,
const
butil
::
StringPiece16
&
s
,
std
::
vector
<
butil
::
StringPiece16
>*
r
);
BUTIL_EXPORT
void
SplitStringUsingSubstr
(
const
std
::
string
&
str
,
const
std
::
string
&
s
,
std
::
vector
<
std
::
string
>*
r
);
BUTIL_EXPORT
void
SplitStringUsingSubstr
(
const
butil
::
StringPiece
&
str
,
const
butil
::
StringPiece
&
s
,
std
::
vector
<
butil
::
StringPiece
>*
r
);
// The same as SplitString, but don't trim white space.
// NOTE: |c| must be in BMP (Basic Multilingual Plane)
BUTIL_EXPORT
void
SplitStringDontTrim
(
const
string16
&
str
,
char16
c
,
std
::
vector
<
string16
>*
r
);
BUTIL_EXPORT
void
SplitStringDontTrim
(
const
butil
::
StringPiece16
&
str
,
char16
c
,
std
::
vector
<
butil
::
StringPiece16
>*
r
);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
...
...
@@ -63,6 +84,9 @@ BUTIL_EXPORT void SplitStringDontTrim(const string16& str,
BUTIL_EXPORT
void
SplitStringDontTrim
(
const
std
::
string
&
str
,
char
c
,
std
::
vector
<
std
::
string
>*
r
);
BUTIL_EXPORT
void
SplitStringDontTrim
(
const
butil
::
StringPiece
&
str
,
char
c
,
std
::
vector
<
butil
::
StringPiece
>*
r
);
// WARNING: this uses whitespace as defined by the HTML5 spec. If you need
// a function similar to this but want to trim all types of whitespace, then
...
...
@@ -74,8 +98,12 @@ BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
// characters is added to result.
BUTIL_EXPORT
void
SplitStringAlongWhitespace
(
const
string16
&
str
,
std
::
vector
<
string16
>*
result
);
BUTIL_EXPORT
void
SplitStringAlongWhitespace
(
const
butil
::
StringPiece16
&
str
,
std
::
vector
<
butil
::
StringPiece16
>*
result
);
BUTIL_EXPORT
void
SplitStringAlongWhitespace
(
const
std
::
string
&
str
,
std
::
vector
<
std
::
string
>*
result
);
BUTIL_EXPORT
void
SplitStringAlongWhitespace
(
const
butil
::
StringPiece
&
str
,
std
::
vector
<
butil
::
StringPiece
>*
result
);
}
// namespace butil
...
...
src/butil/strings/string_util.cc
View file @
29ab8982
...
...
@@ -246,12 +246,25 @@ TrimPositions TrimWhitespace(const string16& input,
output
);
}
TrimPositions
TrimWhitespace
(
const
butil
::
StringPiece16
&
input
,
TrimPositions
positions
,
butil
::
StringPiece16
*
output
)
{
return
TrimStringT
(
input
,
butil
::
StringPiece16
(
kWhitespaceUTF16
),
positions
,
output
);
}
TrimPositions
TrimWhitespaceASCII
(
const
std
::
string
&
input
,
TrimPositions
positions
,
std
::
string
*
output
)
{
return
TrimStringT
(
input
,
std
::
string
(
kWhitespaceASCII
),
positions
,
output
);
}
TrimPositions
TrimWhitespaceASCII
(
const
butil
::
StringPiece
&
input
,
TrimPositions
positions
,
butil
::
StringPiece
*
output
)
{
return
TrimStringT
(
input
,
butil
::
StringPiece
(
kWhitespaceASCII
),
positions
,
output
);
}
// This function is only for backward-compatibility.
// To be removed when all callers are updated.
TrimPositions
TrimWhitespace
(
const
std
::
string
&
input
,
...
...
@@ -260,6 +273,12 @@ TrimPositions TrimWhitespace(const std::string& input,
return
TrimWhitespaceASCII
(
input
,
positions
,
output
);
}
TrimPositions
TrimWhitespace
(
const
butil
::
StringPiece
&
input
,
TrimPositions
positions
,
butil
::
StringPiece
*
output
)
{
return
TrimWhitespaceASCII
(
input
,
positions
,
output
);
}
template
<
typename
STR
>
STR
CollapseWhitespaceT
(
const
STR
&
text
,
bool
trim_sequences_with_line_breaks
)
{
...
...
@@ -340,7 +359,7 @@ bool IsStringASCII(const string16& str) {
return
DoIsStringASCII
(
str
);
}
bool
IsStringUTF8
(
const
std
::
string
&
str
)
{
bool
IsStringUTF8
(
const
StringPiece
&
str
)
{
const
char
*
src
=
str
.
data
();
int32_t
src_len
=
static_cast
<
int32_t
>
(
str
.
length
());
int32_t
char_index
=
0
;
...
...
src/butil/strings/string_util.h
View file @
29ab8982
...
...
@@ -202,15 +202,24 @@ enum TrimPositions {
BUTIL_EXPORT
TrimPositions
TrimWhitespace
(
const
string16
&
input
,
TrimPositions
positions
,
butil
::
string16
*
output
);
BUTIL_EXPORT
TrimPositions
TrimWhitespace
(
const
butil
::
StringPiece16
&
input
,
TrimPositions
positions
,
butil
::
StringPiece16
*
output
);
BUTIL_EXPORT
TrimPositions
TrimWhitespaceASCII
(
const
std
::
string
&
input
,
TrimPositions
positions
,
std
::
string
*
output
);
BUTIL_EXPORT
TrimPositions
TrimWhitespaceASCII
(
const
butil
::
StringPiece
&
input
,
TrimPositions
positions
,
butil
::
StringPiece
*
output
);
// Deprecated. This function is only for backward compatibility and calls
// TrimWhitespaceASCII().
BUTIL_EXPORT
TrimPositions
TrimWhitespace
(
const
std
::
string
&
input
,
TrimPositions
positions
,
std
::
string
*
output
);
BUTIL_EXPORT
TrimPositions
TrimWhitespace
(
const
butil
::
StringPiece
&
input
,
TrimPositions
positions
,
butil
::
StringPiece
*
output
);
// Searches for CR or LF characters. Removes all contiguous whitespace
// strings that contain them. This is useful when trying to deal with text
...
...
@@ -245,7 +254,7 @@ BUTIL_EXPORT bool ContainsOnlyChars(const StringPiece16& input,
// to have the maximum 'discriminating' power from other encodings. If
// there's a use case for just checking the structural validity, we have to
// add a new function for that.
BUTIL_EXPORT
bool
IsStringUTF8
(
const
std
::
string
&
str
);
BUTIL_EXPORT
bool
IsStringUTF8
(
const
StringPiece
&
str
);
BUTIL_EXPORT
bool
IsStringASCII
(
const
StringPiece
&
str
);
BUTIL_EXPORT
bool
IsStringASCII
(
const
string16
&
str
);
...
...
test/string_split_unittest.cc
View file @
29ab8982
...
...
@@ -239,6 +239,70 @@ TEST(StringUtilTest, SplitString) {
r
.
clear
();
}
TEST
(
StringUtilTest
,
SplitStringStringPiece
)
{
std
::
vector
<
butil
::
StringPiece
>
r
;
SplitString
(
butil
::
StringPiece
(),
','
,
&
r
);
EXPECT_EQ
(
0U
,
r
.
size
());
r
.
clear
();
SplitString
(
butil
::
StringPiece
(
"a,b,c"
),
','
,
&
r
);
ASSERT_EQ
(
3U
,
r
.
size
());
EXPECT_EQ
(
r
[
0
],
"a"
);
EXPECT_EQ
(
r
[
1
],
"b"
);
EXPECT_EQ
(
r
[
2
],
"c"
);
r
.
clear
();
SplitString
(
butil
::
StringPiece
(
"a, b, c"
),
','
,
&
r
);
ASSERT_EQ
(
3U
,
r
.
size
());
EXPECT_EQ
(
r
[
0
],
"a"
);
EXPECT_EQ
(
r
[
1
],
"b"
);
EXPECT_EQ
(
r
[
2
],
"c"
);
r
.
clear
();
SplitString
(
butil
::
StringPiece
(
"a,,c"
),
','
,
&
r
);
ASSERT_EQ
(
3U
,
r
.
size
());
EXPECT_EQ
(
r
[
0
],
"a"
);
EXPECT_EQ
(
r
[
1
],
""
);
EXPECT_EQ
(
r
[
2
],
"c"
);
r
.
clear
();
SplitString
(
butil
::
StringPiece
(
" "
),
'*'
,
&
r
);
EXPECT_EQ
(
0U
,
r
.
size
());
r
.
clear
();
SplitString
(
butil
::
StringPiece
(
"foo"
),
'*'
,
&
r
);
ASSERT_EQ
(
1U
,
r
.
size
());
EXPECT_EQ
(
r
[
0
],
"foo"
);
r
.
clear
();
SplitString
(
butil
::
StringPiece
(
"foo ,"
),
','
,
&
r
);
ASSERT_EQ
(
2U
,
r
.
size
());
EXPECT_EQ
(
r
[
0
],
"foo"
);
EXPECT_EQ
(
r
[
1
],
""
);
r
.
clear
();
SplitString
(
butil
::
StringPiece
(
","
),
','
,
&
r
);
ASSERT_EQ
(
2U
,
r
.
size
());
EXPECT_EQ
(
r
[
0
],
""
);
EXPECT_EQ
(
r
[
1
],
""
);
r
.
clear
();
SplitString
(
butil
::
StringPiece
(
"
\t\t
a
\t
"
),
'\t'
,
&
r
);
ASSERT_EQ
(
4U
,
r
.
size
());
EXPECT_EQ
(
r
[
0
],
""
);
EXPECT_EQ
(
r
[
1
],
""
);
EXPECT_EQ
(
r
[
2
],
"a"
);
EXPECT_EQ
(
r
[
3
],
""
);
r
.
clear
();
SplitString
(
butil
::
StringPiece
(
"
\t
a
\t\n
b
\t
cc"
),
'\n'
,
&
r
);
ASSERT_EQ
(
2U
,
r
.
size
());
EXPECT_EQ
(
r
[
0
],
"a"
);
EXPECT_EQ
(
r
[
1
],
"b
\t
cc"
);
r
.
clear
();
}
TEST
(
SplitStringUsingSubstrTest
,
StringWithNoDelimiter
)
{
std
::
vector
<
std
::
string
>
results
;
SplitStringUsingSubstr
(
"alongwordwithnodelimiter"
,
"DELIMITER"
,
&
results
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment