Unverified Commit 29ab8982 authored by jamesge's avatar jamesge Committed by GitHub

Merge pull request #1295 from lrita/l_s

make butil::BasicStringPiece<T> support string split functions-family
parents e64eb3ce 32c05be9
...@@ -185,6 +185,8 @@ template <typename STRING_TYPE> class BasicStringPiece { ...@@ -185,6 +185,8 @@ template <typename STRING_TYPE> class BasicStringPiece {
: ptr_(str.data()), length_(str.size()) {} : ptr_(str.data()), length_(str.size()) {}
BasicStringPiece(const value_type* offset, size_type len) BasicStringPiece(const value_type* offset, size_type len)
: ptr_(offset), length_(len) {} : ptr_(offset), length_(len) {}
BasicStringPiece(const BasicStringPiece& str, size_type pos, size_type len = npos)
: ptr_(str.data() + pos), length_(std::min(len, str.length() - pos)) {}
BasicStringPiece(const typename STRING_TYPE::const_iterator& begin, BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
const typename STRING_TYPE::const_iterator& end) const typename STRING_TYPE::const_iterator& end)
: ptr_((end > begin) ? &(*begin) : NULL), : ptr_((end > begin) ? &(*begin) : NULL),
...@@ -203,6 +205,11 @@ template <typename STRING_TYPE> class BasicStringPiece { ...@@ -203,6 +205,11 @@ template <typename STRING_TYPE> class BasicStringPiece {
ptr_ = NULL; ptr_ = NULL;
length_ = 0; length_ = 0;
} }
BasicStringPiece& assign(const BasicStringPiece& str, size_type pos, size_type len = npos) {
ptr_ = str.data() + pos;
length_ = std::min(len, str.length() - pos);
return *this;
}
void set(const value_type* data, size_type len) { void set(const value_type* data, size_type len) {
ptr_ = data; ptr_ = data;
length_ = len; length_ = len;
......
...@@ -35,25 +35,26 @@ void SplitStringT(const STR& str, ...@@ -35,25 +35,26 @@ void SplitStringT(const STR& str,
} }
} }
bool SplitStringIntoKeyValue(const std::string& line, template <typename STR>
char key_value_delimiter, bool SplitStringIntoKeyValueT(const STR& line,
std::string* key, typename STR::value_type key_value_delimiter,
std::string* value) { STR* key,
STR* value) {
key->clear(); key->clear();
value->clear(); value->clear();
// Find the delimiter. // Find the delimiter.
size_t end_key_pos = line.find_first_of(key_value_delimiter); size_t end_key_pos = line.find_first_of(key_value_delimiter);
if (end_key_pos == std::string::npos) { if (end_key_pos == STR::npos) {
DVLOG(1) << "cannot find delimiter in: " << line; DVLOG(1) << "cannot find delimiter in: " << line;
return false; // no delimiter return false; // no delimiter
} }
key->assign(line, 0, end_key_pos); key->assign(line, 0, end_key_pos);
// Find the value string. // Find the value string.
std::string remains(line, end_key_pos, line.size() - end_key_pos); STR remains(line, end_key_pos, line.size() - end_key_pos);
size_t begin_value_pos = remains.find_first_not_of(key_value_delimiter); size_t begin_value_pos = remains.find_first_not_of(key_value_delimiter);
if (begin_value_pos == std::string::npos) { if (begin_value_pos == STR::npos) {
DVLOG(1) << "cannot parse value from line: " << line; DVLOG(1) << "cannot parse value from line: " << line;
return false; // no value return false; // no value
} }
...@@ -134,6 +135,13 @@ void SplitString(const string16& str, ...@@ -134,6 +135,13 @@ void SplitString(const string16& str,
SplitStringT(str, c, true, r); SplitStringT(str, c, true, r);
} }
void SplitString(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, true, r);
}
void SplitString(const std::string& str, void SplitString(const std::string& str,
char c, char c,
std::vector<std::string>* r) { std::vector<std::string>* r) {
...@@ -144,13 +152,24 @@ void SplitString(const std::string& str, ...@@ -144,13 +152,24 @@ void SplitString(const std::string& str,
SplitStringT(str, c, true, r); SplitStringT(str, c, true, r);
} }
bool SplitStringIntoKeyValuePairs(const std::string& line, void SplitString(const StringPiece& str,
char c,
std::vector<StringPiece>* r) {
#if CHAR_MIN < 0
DCHECK(c >= 0);
#endif
DCHECK(c < 0x7F);
SplitStringT(str, c, true, r);
}
template<typename STR>
bool SplitStringIntoKeyValuePairsT(const STR& line,
char key_value_delimiter, char key_value_delimiter,
char key_value_pair_delimiter, char key_value_pair_delimiter,
StringPairs* key_value_pairs) { std::vector<std::pair<STR, STR> >* key_value_pairs) {
key_value_pairs->clear(); key_value_pairs->clear();
std::vector<std::string> pairs; std::vector<STR> pairs;
SplitString(line, key_value_pair_delimiter, &pairs); SplitString(line, key_value_pair_delimiter, &pairs);
bool success = true; bool success = true;
...@@ -159,30 +178,58 @@ bool SplitStringIntoKeyValuePairs(const std::string& line, ...@@ -159,30 +178,58 @@ bool SplitStringIntoKeyValuePairs(const std::string& line,
if (pairs[i].empty()) if (pairs[i].empty())
continue; continue;
std::string key; STR key;
std::string value; STR value;
if (!SplitStringIntoKeyValue(pairs[i], key_value_delimiter, &key, &value)) { if (!SplitStringIntoKeyValueT(pairs[i], key_value_delimiter, &key, &value)) {
// Don't return here, to allow for pairs without associated // Don't return here, to allow for pairs without associated
// value or key; just record that the split failed. // value or key; just record that the split failed.
success = false; success = false;
} }
key_value_pairs->push_back(make_pair(key, value)); key_value_pairs->push_back(std::make_pair(key, value));
} }
return success; return success;
} }
bool SplitStringIntoKeyValuePairs(const std::string& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs) {
return SplitStringIntoKeyValuePairsT(line, key_value_delimiter,
key_value_pair_delimiter, key_value_pairs);
}
bool SplitStringIntoKeyValuePairs(const butil::StringPiece& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPiecePairs* key_value_pairs) {
return SplitStringIntoKeyValuePairsT(line, key_value_delimiter,
key_value_pair_delimiter, key_value_pairs);
}
void SplitStringUsingSubstr(const string16& str, void SplitStringUsingSubstr(const string16& str,
const string16& s, const string16& s,
std::vector<string16>* r) { std::vector<string16>* r) {
SplitStringUsingSubstrT(str, s, r); SplitStringUsingSubstrT(str, s, r);
} }
void SplitStringUsingSubstr(const butil::StringPiece16& str,
const butil::StringPiece16& s,
std::vector<butil::StringPiece16>* r) {
SplitStringUsingSubstrT(str, s, r);
}
void SplitStringUsingSubstr(const std::string& str, void SplitStringUsingSubstr(const std::string& str,
const std::string& s, const std::string& s,
std::vector<std::string>* r) { std::vector<std::string>* r) {
SplitStringUsingSubstrT(str, s, r); SplitStringUsingSubstrT(str, s, r);
} }
void SplitStringUsingSubstr(const butil::StringPiece& str,
const butil::StringPiece& s,
std::vector<butil::StringPiece>* r) {
SplitStringUsingSubstrT(str, s, r);
}
void SplitStringDontTrim(const string16& str, void SplitStringDontTrim(const string16& str,
char16 c, char16 c,
std::vector<string16>* r) { std::vector<string16>* r) {
...@@ -190,6 +237,13 @@ void SplitStringDontTrim(const string16& str, ...@@ -190,6 +237,13 @@ void SplitStringDontTrim(const string16& str,
SplitStringT(str, c, false, r); SplitStringT(str, c, false, r);
} }
void SplitStringDontTrim(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, false, r);
}
void SplitStringDontTrim(const std::string& str, void SplitStringDontTrim(const std::string& str,
char c, char c,
std::vector<std::string>* r) { std::vector<std::string>* r) {
...@@ -201,14 +255,35 @@ void SplitStringDontTrim(const std::string& str, ...@@ -201,14 +255,35 @@ void SplitStringDontTrim(const std::string& str,
SplitStringT(str, c, false, r); SplitStringT(str, c, false, r);
} }
void SplitStringDontTrim(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r) {
DCHECK(IsStringUTF8(str));
#if CHAR_MIN < 0
DCHECK(c >= 0);
#endif
DCHECK(c < 0x7F);
SplitStringT(str, c, false, r);
}
void SplitStringAlongWhitespace(const string16& str, void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result) { std::vector<string16>* result) {
SplitStringAlongWhitespaceT(str, result); SplitStringAlongWhitespaceT(str, result);
} }
void SplitStringAlongWhitespace(const butil::StringPiece16& str,
std::vector<butil::StringPiece16>* result) {
SplitStringAlongWhitespaceT(str, result);
}
void SplitStringAlongWhitespace(const std::string& str, void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result) { std::vector<std::string>* result) {
SplitStringAlongWhitespaceT(str, result); SplitStringAlongWhitespaceT(str, result);
} }
void SplitStringAlongWhitespace(const butil::StringPiece& str,
std::vector<butil::StringPiece>* result) {
SplitStringAlongWhitespaceT(str, result);
}
} // namespace butil } // namespace butil
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "butil/base_export.h" #include "butil/base_export.h"
#include "butil/strings/string16.h" #include "butil/strings/string16.h"
#include "butil/strings/string_piece.h"
namespace butil { namespace butil {
...@@ -23,6 +24,9 @@ namespace butil { ...@@ -23,6 +24,9 @@ namespace butil {
BUTIL_EXPORT void SplitString(const string16& str, BUTIL_EXPORT void SplitString(const string16& str,
char16 c, char16 c,
std::vector<string16>* r); std::vector<string16>* r);
BUTIL_EXPORT void SplitString(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range. // the trailing byte of a multi-byte character can be in the ASCII range.
...@@ -31,8 +35,12 @@ BUTIL_EXPORT void SplitString(const string16& str, ...@@ -31,8 +35,12 @@ BUTIL_EXPORT void SplitString(const string16& str,
BUTIL_EXPORT void SplitString(const std::string& str, BUTIL_EXPORT void SplitString(const std::string& str,
char c, char c,
std::vector<std::string>* r); std::vector<std::string>* r);
BUTIL_EXPORT void SplitString(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r);
typedef std::vector<std::pair<std::string, std::string> > StringPairs; typedef std::vector<std::pair<std::string, std::string> > StringPairs;
typedef std::vector<std::pair<butil::StringPiece, butil::StringPiece> > StringPiecePairs;
// Splits |line| into key value pairs according to the given delimiters and // Splits |line| into key value pairs according to the given delimiters and
// removes whitespace leading each key and trailing each value. Returns true // removes whitespace leading each key and trailing each value. Returns true
...@@ -42,20 +50,33 @@ BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line, ...@@ -42,20 +50,33 @@ BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line,
char key_value_delimiter, char key_value_delimiter,
char key_value_pair_delimiter, char key_value_pair_delimiter,
StringPairs* key_value_pairs); StringPairs* key_value_pairs);
BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const butil::StringPiece& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPiecePairs* key_value_pairs);
// The same as SplitString, but use a substring delimiter instead of a char. // The same as SplitString, but use a substring delimiter instead of a char.
BUTIL_EXPORT void SplitStringUsingSubstr(const string16& str, BUTIL_EXPORT void SplitStringUsingSubstr(const string16& str,
const string16& s, const string16& s,
std::vector<string16>* r); std::vector<string16>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const butil::StringPiece16& str,
const butil::StringPiece16& s,
std::vector<butil::StringPiece16>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const std::string& str, BUTIL_EXPORT void SplitStringUsingSubstr(const std::string& str,
const std::string& s, const std::string& s,
std::vector<std::string>* r); std::vector<std::string>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const butil::StringPiece& str,
const butil::StringPiece& s,
std::vector<butil::StringPiece>* r);
// The same as SplitString, but don't trim white space. // The same as SplitString, but don't trim white space.
// NOTE: |c| must be in BMP (Basic Multilingual Plane) // NOTE: |c| must be in BMP (Basic Multilingual Plane)
BUTIL_EXPORT void SplitStringDontTrim(const string16& str, BUTIL_EXPORT void SplitStringDontTrim(const string16& str,
char16 c, char16 c,
std::vector<string16>* r); std::vector<string16>* r);
BUTIL_EXPORT void SplitStringDontTrim(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range. // the trailing byte of a multi-byte character can be in the ASCII range.
// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
...@@ -63,6 +84,9 @@ BUTIL_EXPORT void SplitStringDontTrim(const string16& str, ...@@ -63,6 +84,9 @@ BUTIL_EXPORT void SplitStringDontTrim(const string16& str,
BUTIL_EXPORT void SplitStringDontTrim(const std::string& str, BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
char c, char c,
std::vector<std::string>* r); std::vector<std::string>* r);
BUTIL_EXPORT void SplitStringDontTrim(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r);
// WARNING: this uses whitespace as defined by the HTML5 spec. If you need // WARNING: this uses whitespace as defined by the HTML5 spec. If you need
// a function similar to this but want to trim all types of whitespace, then // a function similar to this but want to trim all types of whitespace, then
...@@ -74,8 +98,12 @@ BUTIL_EXPORT void SplitStringDontTrim(const std::string& str, ...@@ -74,8 +98,12 @@ BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
// characters is added to result. // characters is added to result.
BUTIL_EXPORT void SplitStringAlongWhitespace(const string16& str, BUTIL_EXPORT void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result); std::vector<string16>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const butil::StringPiece16& str,
std::vector<butil::StringPiece16>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const std::string& str, BUTIL_EXPORT void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result); std::vector<std::string>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const butil::StringPiece& str,
std::vector<butil::StringPiece>* result);
} // namespace butil } // namespace butil
......
...@@ -246,12 +246,25 @@ TrimPositions TrimWhitespace(const string16& input, ...@@ -246,12 +246,25 @@ TrimPositions TrimWhitespace(const string16& input,
output); output);
} }
TrimPositions TrimWhitespace(const butil::StringPiece16& input,
TrimPositions positions,
butil::StringPiece16* output) {
return TrimStringT(input, butil::StringPiece16(kWhitespaceUTF16), positions,
output);
}
TrimPositions TrimWhitespaceASCII(const std::string& input, TrimPositions TrimWhitespaceASCII(const std::string& input,
TrimPositions positions, TrimPositions positions,
std::string* output) { std::string* output) {
return TrimStringT(input, std::string(kWhitespaceASCII), positions, output); return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
} }
TrimPositions TrimWhitespaceASCII(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output) {
return TrimStringT(input, butil::StringPiece(kWhitespaceASCII), positions, output);
}
// This function is only for backward-compatibility. // This function is only for backward-compatibility.
// To be removed when all callers are updated. // To be removed when all callers are updated.
TrimPositions TrimWhitespace(const std::string& input, TrimPositions TrimWhitespace(const std::string& input,
...@@ -260,6 +273,12 @@ TrimPositions TrimWhitespace(const std::string& input, ...@@ -260,6 +273,12 @@ TrimPositions TrimWhitespace(const std::string& input,
return TrimWhitespaceASCII(input, positions, output); return TrimWhitespaceASCII(input, positions, output);
} }
TrimPositions TrimWhitespace(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output) {
return TrimWhitespaceASCII(input, positions, output);
}
template<typename STR> template<typename STR>
STR CollapseWhitespaceT(const STR& text, STR CollapseWhitespaceT(const STR& text,
bool trim_sequences_with_line_breaks) { bool trim_sequences_with_line_breaks) {
...@@ -340,7 +359,7 @@ bool IsStringASCII(const string16& str) { ...@@ -340,7 +359,7 @@ bool IsStringASCII(const string16& str) {
return DoIsStringASCII(str); return DoIsStringASCII(str);
} }
bool IsStringUTF8(const std::string& str) { bool IsStringUTF8(const StringPiece& str) {
const char *src = str.data(); const char *src = str.data();
int32_t src_len = static_cast<int32_t>(str.length()); int32_t src_len = static_cast<int32_t>(str.length());
int32_t char_index = 0; int32_t char_index = 0;
......
...@@ -202,15 +202,24 @@ enum TrimPositions { ...@@ -202,15 +202,24 @@ enum TrimPositions {
BUTIL_EXPORT TrimPositions TrimWhitespace(const string16& input, BUTIL_EXPORT TrimPositions TrimWhitespace(const string16& input,
TrimPositions positions, TrimPositions positions,
butil::string16* output); butil::string16* output);
BUTIL_EXPORT TrimPositions TrimWhitespace(const butil::StringPiece16& input,
TrimPositions positions,
butil::StringPiece16* output);
BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
TrimPositions positions, TrimPositions positions,
std::string* output); std::string* output);
BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output);
// Deprecated. This function is only for backward compatibility and calls // Deprecated. This function is only for backward compatibility and calls
// TrimWhitespaceASCII(). // TrimWhitespaceASCII().
BUTIL_EXPORT TrimPositions TrimWhitespace(const std::string& input, BUTIL_EXPORT TrimPositions TrimWhitespace(const std::string& input,
TrimPositions positions, TrimPositions positions,
std::string* output); std::string* output);
BUTIL_EXPORT TrimPositions TrimWhitespace(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output);
// Searches for CR or LF characters. Removes all contiguous whitespace // Searches for CR or LF characters. Removes all contiguous whitespace
// strings that contain them. This is useful when trying to deal with text // strings that contain them. This is useful when trying to deal with text
...@@ -245,7 +254,7 @@ BUTIL_EXPORT bool ContainsOnlyChars(const StringPiece16& input, ...@@ -245,7 +254,7 @@ BUTIL_EXPORT bool ContainsOnlyChars(const StringPiece16& input,
// to have the maximum 'discriminating' power from other encodings. If // to have the maximum 'discriminating' power from other encodings. If
// there's a use case for just checking the structural validity, we have to // there's a use case for just checking the structural validity, we have to
// add a new function for that. // add a new function for that.
BUTIL_EXPORT bool IsStringUTF8(const std::string& str); BUTIL_EXPORT bool IsStringUTF8(const StringPiece& str);
BUTIL_EXPORT bool IsStringASCII(const StringPiece& str); BUTIL_EXPORT bool IsStringASCII(const StringPiece& str);
BUTIL_EXPORT bool IsStringASCII(const string16& str); BUTIL_EXPORT bool IsStringASCII(const string16& str);
......
...@@ -239,6 +239,70 @@ TEST(StringUtilTest, SplitString) { ...@@ -239,6 +239,70 @@ TEST(StringUtilTest, SplitString) {
r.clear(); r.clear();
} }
TEST(StringUtilTest, SplitStringStringPiece) {
std::vector<butil::StringPiece> r;
SplitString(butil::StringPiece(), ',', &r);
EXPECT_EQ(0U, r.size());
r.clear();
SplitString(butil::StringPiece("a,b,c"), ',', &r);
ASSERT_EQ(3U, r.size());
EXPECT_EQ(r[0], "a");
EXPECT_EQ(r[1], "b");
EXPECT_EQ(r[2], "c");
r.clear();
SplitString(butil::StringPiece("a, b, c"), ',', &r);
ASSERT_EQ(3U, r.size());
EXPECT_EQ(r[0], "a");
EXPECT_EQ(r[1], "b");
EXPECT_EQ(r[2], "c");
r.clear();
SplitString(butil::StringPiece("a,,c"), ',', &r);
ASSERT_EQ(3U, r.size());
EXPECT_EQ(r[0], "a");
EXPECT_EQ(r[1], "");
EXPECT_EQ(r[2], "c");
r.clear();
SplitString(butil::StringPiece(" "), '*', &r);
EXPECT_EQ(0U, r.size());
r.clear();
SplitString(butil::StringPiece("foo"), '*', &r);
ASSERT_EQ(1U, r.size());
EXPECT_EQ(r[0], "foo");
r.clear();
SplitString(butil::StringPiece("foo ,"), ',', &r);
ASSERT_EQ(2U, r.size());
EXPECT_EQ(r[0], "foo");
EXPECT_EQ(r[1], "");
r.clear();
SplitString(butil::StringPiece(","), ',', &r);
ASSERT_EQ(2U, r.size());
EXPECT_EQ(r[0], "");
EXPECT_EQ(r[1], "");
r.clear();
SplitString(butil::StringPiece("\t\ta\t"), '\t', &r);
ASSERT_EQ(4U, r.size());
EXPECT_EQ(r[0], "");
EXPECT_EQ(r[1], "");
EXPECT_EQ(r[2], "a");
EXPECT_EQ(r[3], "");
r.clear();
SplitString(butil::StringPiece("\ta\t\nb\tcc"), '\n', &r);
ASSERT_EQ(2U, r.size());
EXPECT_EQ(r[0], "a");
EXPECT_EQ(r[1], "b\tcc");
r.clear();
}
TEST(SplitStringUsingSubstrTest, StringWithNoDelimiter) { TEST(SplitStringUsingSubstrTest, StringWithNoDelimiter) {
std::vector<std::string> results; std::vector<std::string> results;
SplitStringUsingSubstr("alongwordwithnodelimiter", "DELIMITER", &results); SplitStringUsingSubstr("alongwordwithnodelimiter", "DELIMITER", &results);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment