Unverified Commit 29ab8982 authored by jamesge's avatar jamesge Committed by GitHub

Merge pull request #1295 from lrita/l_s

make butil::BasicStringPiece<T> support string split functions-family
parents e64eb3ce 32c05be9
......@@ -185,6 +185,8 @@ template <typename STRING_TYPE> class BasicStringPiece {
: ptr_(str.data()), length_(str.size()) {}
BasicStringPiece(const value_type* offset, size_type len)
: ptr_(offset), length_(len) {}
BasicStringPiece(const BasicStringPiece& str, size_type pos, size_type len = npos)
: ptr_(str.data() + pos), length_(std::min(len, str.length() - pos)) {}
BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
const typename STRING_TYPE::const_iterator& end)
: ptr_((end > begin) ? &(*begin) : NULL),
......@@ -203,6 +205,11 @@ template <typename STRING_TYPE> class BasicStringPiece {
ptr_ = NULL;
length_ = 0;
}
BasicStringPiece& assign(const BasicStringPiece& str, size_type pos, size_type len = npos) {
ptr_ = str.data() + pos;
length_ = std::min(len, str.length() - pos);
return *this;
}
void set(const value_type* data, size_type len) {
ptr_ = data;
length_ = len;
......
......@@ -35,25 +35,26 @@ void SplitStringT(const STR& str,
}
}
bool SplitStringIntoKeyValue(const std::string& line,
char key_value_delimiter,
std::string* key,
std::string* value) {
template <typename STR>
bool SplitStringIntoKeyValueT(const STR& line,
typename STR::value_type key_value_delimiter,
STR* key,
STR* value) {
key->clear();
value->clear();
// Find the delimiter.
size_t end_key_pos = line.find_first_of(key_value_delimiter);
if (end_key_pos == std::string::npos) {
if (end_key_pos == STR::npos) {
DVLOG(1) << "cannot find delimiter in: " << line;
return false; // no delimiter
}
key->assign(line, 0, end_key_pos);
// Find the value string.
std::string remains(line, end_key_pos, line.size() - end_key_pos);
STR remains(line, end_key_pos, line.size() - end_key_pos);
size_t begin_value_pos = remains.find_first_not_of(key_value_delimiter);
if (begin_value_pos == std::string::npos) {
if (begin_value_pos == STR::npos) {
DVLOG(1) << "cannot parse value from line: " << line;
return false; // no value
}
......@@ -134,6 +135,13 @@ void SplitString(const string16& str,
SplitStringT(str, c, true, r);
}
void SplitString(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, true, r);
}
void SplitString(const std::string& str,
char c,
std::vector<std::string>* r) {
......@@ -144,13 +152,24 @@ void SplitString(const std::string& str,
SplitStringT(str, c, true, r);
}
bool SplitStringIntoKeyValuePairs(const std::string& line,
void SplitString(const StringPiece& str,
char c,
std::vector<StringPiece>* r) {
#if CHAR_MIN < 0
DCHECK(c >= 0);
#endif
DCHECK(c < 0x7F);
SplitStringT(str, c, true, r);
}
template<typename STR>
bool SplitStringIntoKeyValuePairsT(const STR& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs) {
std::vector<std::pair<STR, STR> >* key_value_pairs) {
key_value_pairs->clear();
std::vector<std::string> pairs;
std::vector<STR> pairs;
SplitString(line, key_value_pair_delimiter, &pairs);
bool success = true;
......@@ -159,30 +178,58 @@ bool SplitStringIntoKeyValuePairs(const std::string& line,
if (pairs[i].empty())
continue;
std::string key;
std::string value;
if (!SplitStringIntoKeyValue(pairs[i], key_value_delimiter, &key, &value)) {
STR key;
STR value;
if (!SplitStringIntoKeyValueT(pairs[i], key_value_delimiter, &key, &value)) {
// Don't return here, to allow for pairs without associated
// value or key; just record that the split failed.
success = false;
}
key_value_pairs->push_back(make_pair(key, value));
key_value_pairs->push_back(std::make_pair(key, value));
}
return success;
}
bool SplitStringIntoKeyValuePairs(const std::string& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs) {
return SplitStringIntoKeyValuePairsT(line, key_value_delimiter,
key_value_pair_delimiter, key_value_pairs);
}
bool SplitStringIntoKeyValuePairs(const butil::StringPiece& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPiecePairs* key_value_pairs) {
return SplitStringIntoKeyValuePairsT(line, key_value_delimiter,
key_value_pair_delimiter, key_value_pairs);
}
void SplitStringUsingSubstr(const string16& str,
const string16& s,
std::vector<string16>* r) {
SplitStringUsingSubstrT(str, s, r);
}
void SplitStringUsingSubstr(const butil::StringPiece16& str,
const butil::StringPiece16& s,
std::vector<butil::StringPiece16>* r) {
SplitStringUsingSubstrT(str, s, r);
}
void SplitStringUsingSubstr(const std::string& str,
const std::string& s,
std::vector<std::string>* r) {
SplitStringUsingSubstrT(str, s, r);
}
void SplitStringUsingSubstr(const butil::StringPiece& str,
const butil::StringPiece& s,
std::vector<butil::StringPiece>* r) {
SplitStringUsingSubstrT(str, s, r);
}
void SplitStringDontTrim(const string16& str,
char16 c,
std::vector<string16>* r) {
......@@ -190,6 +237,13 @@ void SplitStringDontTrim(const string16& str,
SplitStringT(str, c, false, r);
}
void SplitStringDontTrim(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, false, r);
}
void SplitStringDontTrim(const std::string& str,
char c,
std::vector<std::string>* r) {
......@@ -201,14 +255,35 @@ void SplitStringDontTrim(const std::string& str,
SplitStringT(str, c, false, r);
}
void SplitStringDontTrim(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r) {
DCHECK(IsStringUTF8(str));
#if CHAR_MIN < 0
DCHECK(c >= 0);
#endif
DCHECK(c < 0x7F);
SplitStringT(str, c, false, r);
}
void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result) {
SplitStringAlongWhitespaceT(str, result);
}
void SplitStringAlongWhitespace(const butil::StringPiece16& str,
std::vector<butil::StringPiece16>* result) {
SplitStringAlongWhitespaceT(str, result);
}
void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result) {
SplitStringAlongWhitespaceT(str, result);
}
void SplitStringAlongWhitespace(const butil::StringPiece& str,
std::vector<butil::StringPiece>* result) {
SplitStringAlongWhitespaceT(str, result);
}
} // namespace butil
......@@ -11,6 +11,7 @@
#include "butil/base_export.h"
#include "butil/strings/string16.h"
#include "butil/strings/string_piece.h"
namespace butil {
......@@ -23,6 +24,9 @@ namespace butil {
BUTIL_EXPORT void SplitString(const string16& str,
char16 c,
std::vector<string16>* r);
BUTIL_EXPORT void SplitString(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
......@@ -31,8 +35,12 @@ BUTIL_EXPORT void SplitString(const string16& str,
BUTIL_EXPORT void SplitString(const std::string& str,
char c,
std::vector<std::string>* r);
BUTIL_EXPORT void SplitString(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r);
typedef std::vector<std::pair<std::string, std::string> > StringPairs;
typedef std::vector<std::pair<butil::StringPiece, butil::StringPiece> > StringPiecePairs;
// Splits |line| into key value pairs according to the given delimiters and
// removes whitespace leading each key and trailing each value. Returns true
......@@ -42,20 +50,33 @@ BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs);
BUTIL_EXPORT bool SplitStringIntoKeyValuePairs(const butil::StringPiece& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPiecePairs* key_value_pairs);
// The same as SplitString, but use a substring delimiter instead of a char.
BUTIL_EXPORT void SplitStringUsingSubstr(const string16& str,
const string16& s,
std::vector<string16>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const butil::StringPiece16& str,
const butil::StringPiece16& s,
std::vector<butil::StringPiece16>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const std::string& str,
const std::string& s,
std::vector<std::string>* r);
BUTIL_EXPORT void SplitStringUsingSubstr(const butil::StringPiece& str,
const butil::StringPiece& s,
std::vector<butil::StringPiece>* r);
// The same as SplitString, but don't trim white space.
// NOTE: |c| must be in BMP (Basic Multilingual Plane)
BUTIL_EXPORT void SplitStringDontTrim(const string16& str,
char16 c,
std::vector<string16>* r);
BUTIL_EXPORT void SplitStringDontTrim(const butil::StringPiece16& str,
char16 c,
std::vector<butil::StringPiece16>* r);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
......@@ -63,6 +84,9 @@ BUTIL_EXPORT void SplitStringDontTrim(const string16& str,
BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
char c,
std::vector<std::string>* r);
BUTIL_EXPORT void SplitStringDontTrim(const butil::StringPiece& str,
char c,
std::vector<butil::StringPiece>* r);
// WARNING: this uses whitespace as defined by the HTML5 spec. If you need
// a function similar to this but want to trim all types of whitespace, then
......@@ -74,8 +98,12 @@ BUTIL_EXPORT void SplitStringDontTrim(const std::string& str,
// characters is added to result.
BUTIL_EXPORT void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const butil::StringPiece16& str,
std::vector<butil::StringPiece16>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result);
BUTIL_EXPORT void SplitStringAlongWhitespace(const butil::StringPiece& str,
std::vector<butil::StringPiece>* result);
} // namespace butil
......
......@@ -246,12 +246,25 @@ TrimPositions TrimWhitespace(const string16& input,
output);
}
TrimPositions TrimWhitespace(const butil::StringPiece16& input,
TrimPositions positions,
butil::StringPiece16* output) {
return TrimStringT(input, butil::StringPiece16(kWhitespaceUTF16), positions,
output);
}
TrimPositions TrimWhitespaceASCII(const std::string& input,
TrimPositions positions,
std::string* output) {
return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
}
TrimPositions TrimWhitespaceASCII(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output) {
return TrimStringT(input, butil::StringPiece(kWhitespaceASCII), positions, output);
}
// This function is only for backward-compatibility.
// To be removed when all callers are updated.
TrimPositions TrimWhitespace(const std::string& input,
......@@ -260,6 +273,12 @@ TrimPositions TrimWhitespace(const std::string& input,
return TrimWhitespaceASCII(input, positions, output);
}
TrimPositions TrimWhitespace(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output) {
return TrimWhitespaceASCII(input, positions, output);
}
template<typename STR>
STR CollapseWhitespaceT(const STR& text,
bool trim_sequences_with_line_breaks) {
......@@ -340,7 +359,7 @@ bool IsStringASCII(const string16& str) {
return DoIsStringASCII(str);
}
bool IsStringUTF8(const std::string& str) {
bool IsStringUTF8(const StringPiece& str) {
const char *src = str.data();
int32_t src_len = static_cast<int32_t>(str.length());
int32_t char_index = 0;
......
......@@ -202,15 +202,24 @@ enum TrimPositions {
BUTIL_EXPORT TrimPositions TrimWhitespace(const string16& input,
TrimPositions positions,
butil::string16* output);
BUTIL_EXPORT TrimPositions TrimWhitespace(const butil::StringPiece16& input,
TrimPositions positions,
butil::StringPiece16* output);
BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
TrimPositions positions,
std::string* output);
BUTIL_EXPORT TrimPositions TrimWhitespaceASCII(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output);
// Deprecated. This function is only for backward compatibility and calls
// TrimWhitespaceASCII().
BUTIL_EXPORT TrimPositions TrimWhitespace(const std::string& input,
TrimPositions positions,
std::string* output);
BUTIL_EXPORT TrimPositions TrimWhitespace(const butil::StringPiece& input,
TrimPositions positions,
butil::StringPiece* output);
// Searches for CR or LF characters. Removes all contiguous whitespace
// strings that contain them. This is useful when trying to deal with text
......@@ -245,7 +254,7 @@ BUTIL_EXPORT bool ContainsOnlyChars(const StringPiece16& input,
// to have the maximum 'discriminating' power from other encodings. If
// there's a use case for just checking the structural validity, we have to
// add a new function for that.
BUTIL_EXPORT bool IsStringUTF8(const std::string& str);
BUTIL_EXPORT bool IsStringUTF8(const StringPiece& str);
BUTIL_EXPORT bool IsStringASCII(const StringPiece& str);
BUTIL_EXPORT bool IsStringASCII(const string16& str);
......
......@@ -239,6 +239,70 @@ TEST(StringUtilTest, SplitString) {
r.clear();
}
TEST(StringUtilTest, SplitStringStringPiece) {
std::vector<butil::StringPiece> r;
SplitString(butil::StringPiece(), ',', &r);
EXPECT_EQ(0U, r.size());
r.clear();
SplitString(butil::StringPiece("a,b,c"), ',', &r);
ASSERT_EQ(3U, r.size());
EXPECT_EQ(r[0], "a");
EXPECT_EQ(r[1], "b");
EXPECT_EQ(r[2], "c");
r.clear();
SplitString(butil::StringPiece("a, b, c"), ',', &r);
ASSERT_EQ(3U, r.size());
EXPECT_EQ(r[0], "a");
EXPECT_EQ(r[1], "b");
EXPECT_EQ(r[2], "c");
r.clear();
SplitString(butil::StringPiece("a,,c"), ',', &r);
ASSERT_EQ(3U, r.size());
EXPECT_EQ(r[0], "a");
EXPECT_EQ(r[1], "");
EXPECT_EQ(r[2], "c");
r.clear();
SplitString(butil::StringPiece(" "), '*', &r);
EXPECT_EQ(0U, r.size());
r.clear();
SplitString(butil::StringPiece("foo"), '*', &r);
ASSERT_EQ(1U, r.size());
EXPECT_EQ(r[0], "foo");
r.clear();
SplitString(butil::StringPiece("foo ,"), ',', &r);
ASSERT_EQ(2U, r.size());
EXPECT_EQ(r[0], "foo");
EXPECT_EQ(r[1], "");
r.clear();
SplitString(butil::StringPiece(","), ',', &r);
ASSERT_EQ(2U, r.size());
EXPECT_EQ(r[0], "");
EXPECT_EQ(r[1], "");
r.clear();
SplitString(butil::StringPiece("\t\ta\t"), '\t', &r);
ASSERT_EQ(4U, r.size());
EXPECT_EQ(r[0], "");
EXPECT_EQ(r[1], "");
EXPECT_EQ(r[2], "a");
EXPECT_EQ(r[3], "");
r.clear();
SplitString(butil::StringPiece("\ta\t\nb\tcc"), '\n', &r);
ASSERT_EQ(2U, r.size());
EXPECT_EQ(r[0], "a");
EXPECT_EQ(r[1], "b\tcc");
r.clear();
}
TEST(SplitStringUsingSubstrTest, StringWithNoDelimiter) {
std::vector<std::string> results;
SplitStringUsingSubstr("alongwordwithnodelimiter", "DELIMITER", &results);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment