Commit c43d848c authored by zhujiashun's avatar zhujiashun

support_kv_pair_splitter: refactor key()/value() in StringSplitter && add comments

parent 75da0bec
...@@ -66,7 +66,10 @@ public: ...@@ -66,7 +66,10 @@ public:
// if str_end is not NULL. // if str_end is not NULL.
inline StringSplitter(const char* str_begin, const char* str_end, inline StringSplitter(const char* str_begin, const char* str_end,
char separator, char separator,
EmptyFieldAction = SKIP_EMPTY_FIELD); EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Allows containing embedded '\0' characters and separator can be '\0',
inline StringSplitter(const StringPiece& input, char separator,
EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Move splitter forward. // Move splitter forward.
inline StringSplitter& operator++(); inline StringSplitter& operator++();
...@@ -79,6 +82,7 @@ public: ...@@ -79,6 +82,7 @@ public:
// not be '\0' because we don't modify `input'. // not be '\0' because we don't modify `input'.
inline const char* field() const; inline const char* field() const;
inline size_t length() const; inline size_t length() const;
inline StringPiece field_sp() const;
// Cast field to specific type, and write the value into `pv'. // Cast field to specific type, and write the value into `pv'.
// Returns 0 on success, -1 otherwise. // Returns 0 on success, -1 otherwise.
...@@ -133,6 +137,7 @@ public: ...@@ -133,6 +137,7 @@ public:
// not be '\0' because we don't modify `input'. // not be '\0' because we don't modify `input'.
inline const char* field() const; inline const char* field() const;
inline size_t length() const; inline size_t length() const;
inline StringPiece field_sp() const;
// Cast field to specific type, and write the value into `pv'. // Cast field to specific type, and write the value into `pv'.
// Returns 0 on success, -1 otherwise. // Returns 0 on success, -1 otherwise.
...@@ -161,8 +166,14 @@ private: ...@@ -161,8 +166,14 @@ private:
}; };
// Split query in the format according to the given delimiters. // Split query in the format according to the given delimiters.
// This class can also handle some exceptional cases, such as // This class can also handle some exceptional cases.
// consecutive ampersand, only equal sign, only key and so on. // 1. consecutive key_value_pair_delimiter are omitted, for example,
// suppose key_value_delimiter is '=' and key_value_pair_delimiter
// is '&', then k1=v1&&&k2=v2 is normalized to k1=k2&k2=v2.
// 2. key or value can be empty or both can be empty
// 3. consecutive key_value_delimiter are not omitted, for example,
// suppose input is k1===v2 and key_value_delimiter is '=', then
// key() returns 'k1', value() returns '==v2'.
class KeyValuePairsSplitter { class KeyValuePairsSplitter {
public: public:
inline KeyValuePairsSplitter(const char* str_begin, inline KeyValuePairsSplitter(const char* str_begin,
...@@ -170,38 +181,29 @@ public: ...@@ -170,38 +181,29 @@ public:
char key_value_delimiter, char key_value_delimiter,
char key_value_pair_delimiter) char key_value_pair_delimiter)
: _sp(str_begin, str_end, key_value_pair_delimiter) : _sp(str_begin, str_end, key_value_pair_delimiter)
, _is_split(false) , _deli_pos(StringPiece::npos)
, _key_value_delimiter(key_value_delimiter) { , _key_value_delimiter(key_value_delimiter) {
UpdateDelimiterPos();
} }
inline KeyValuePairsSplitter(const char* str_begin, inline KeyValuePairsSplitter(const char* str_begin,
char key_value_delimiter, char key_value_delimiter,
char key_value_pair_delimiter) char key_value_pair_delimiter)
: _sp(str_begin, key_value_pair_delimiter) : KeyValuePairsSplitter(str_begin, NULL,
, _is_split(false) key_value_delimiter, key_value_pair_delimiter) {}
, _key_value_delimiter(key_value_delimiter) {
}
inline KeyValuePairsSplitter(const StringPiece &sp, inline KeyValuePairsSplitter(const StringPiece &sp,
char key_value_delimiter, char key_value_delimiter,
char key_value_pair_delimiter) char key_value_pair_delimiter)
: _sp(sp.begin(), sp.end(), key_value_pair_delimiter) : KeyValuePairsSplitter(sp.begin(), sp.end(),
, _is_split(false) key_value_delimiter, key_value_pair_delimiter) {}
, _key_value_delimiter(key_value_delimiter) {
}
inline const StringPiece& key() { inline StringPiece key() {
if (!_is_split) { return StringPiece(_sp.field(), _sp.length()).substr(0, _deli_pos);
split();
}
return _key;
} }
inline const StringPiece& value() { inline StringPiece value() {
if (!_is_split) { return StringPiece(_sp.field(), _sp.length()).substr(_deli_pos + 1);
split();
}
return _value;
} }
// Get the current value of key and value // Get the current value of key and value
...@@ -213,7 +215,7 @@ public: ...@@ -213,7 +215,7 @@ public:
// Move splitter forward. // Move splitter forward.
inline KeyValuePairsSplitter& operator++() { inline KeyValuePairsSplitter& operator++() {
++_sp; ++_sp;
_is_split = false; UpdateDelimiterPos();
return *this; return *this;
} }
...@@ -226,13 +228,11 @@ public: ...@@ -226,13 +228,11 @@ public:
inline operator const void*() const { return _sp; } inline operator const void*() const { return _sp; }
private: private:
inline void split(); inline void UpdateDelimiterPos();
private: private:
StringSplitter _sp; StringSplitter _sp;
StringPiece _key; StringPiece::size_type _deli_pos;
StringPiece _value;
bool _is_split;
const char _key_value_delimiter; const char _key_value_delimiter;
}; };
......
...@@ -22,15 +22,6 @@ ...@@ -22,15 +22,6 @@
namespace butil { namespace butil {
StringSplitter::StringSplitter(const char* str, char sep,
EmptyFieldAction action)
: _head(str)
, _str_tail(NULL)
, _sep(sep)
, _empty_field_action(action) {
init();
}
StringSplitter::StringSplitter(const char* str_begin, StringSplitter::StringSplitter(const char* str_begin,
const char* str_end, const char* str_end,
const char sep, const char sep,
...@@ -42,6 +33,14 @@ StringSplitter::StringSplitter(const char* str_begin, ...@@ -42,6 +33,14 @@ StringSplitter::StringSplitter(const char* str_begin,
init(); init();
} }
StringSplitter::StringSplitter(const char* str, char sep,
EmptyFieldAction action)
: StringSplitter(str, NULL, sep, action) {}
StringSplitter::StringSplitter(const StringPiece& input, char sep,
EmptyFieldAction action)
: StringSplitter(input.data(), input.data() + input.length(), sep, action) {}
void StringSplitter::init() { void StringSplitter::init() {
// Find the starting _head and _tail. // Find the starting _head and _tail.
if (__builtin_expect(_head != NULL, 1)) { if (__builtin_expect(_head != NULL, 1)) {
...@@ -86,6 +85,10 @@ size_t StringSplitter::length() const { ...@@ -86,6 +85,10 @@ size_t StringSplitter::length() const {
return static_cast<size_t>(_tail - _head); return static_cast<size_t>(_tail - _head);
} }
StringPiece StringSplitter::field_sp() const {
return StringPiece(field(), length());
}
bool StringSplitter::not_end(const char* p) const { bool StringSplitter::not_end(const char* p) const {
return (_str_tail == NULL) ? *p : (p != _str_tail); return (_str_tail == NULL) ? *p : (p != _str_tail);
} }
...@@ -233,6 +236,10 @@ size_t StringMultiSplitter::length() const { ...@@ -233,6 +236,10 @@ size_t StringMultiSplitter::length() const {
return static_cast<size_t>(_tail - _head); return static_cast<size_t>(_tail - _head);
} }
StringPiece StringMultiSplitter::field_sp() const {
return StringPiece(field(), length());
}
bool StringMultiSplitter::not_end(const char* p) const { bool StringMultiSplitter::not_end(const char* p) const {
return (_str_tail == NULL) ? *p : (p != _str_tail); return (_str_tail == NULL) ? *p : (p != _str_tail);
} }
...@@ -309,17 +316,12 @@ int StringMultiSplitter::to_double(double* pv) const { ...@@ -309,17 +316,12 @@ int StringMultiSplitter::to_double(double* pv) const {
return (endptr == field() + length()) ? 0 : -1; return (endptr == field() + length()) ? 0 : -1;
} }
void KeyValuePairsSplitter::split() { void KeyValuePairsSplitter::UpdateDelimiterPos() {
StringPiece query_pair(_sp.field(), _sp.length()); StringPiece key_value_pair(_sp.field(), _sp.length());
const size_t pos = query_pair.find('='); _deli_pos = key_value_pair.find(_key_value_delimiter);
if (pos == StringPiece::npos) { if (_deli_pos == StringPiece::npos) {
_key = query_pair; _deli_pos = key_value_pair.length();
_value.clear();
} else {
_key= query_pair.substr(0, pos);
_value = query_pair.substr(pos + 1);
} }
_is_split = true;
} }
} // namespace butil } // namespace butil
......
...@@ -319,10 +319,26 @@ TEST_F(StringSplitterTest, split_limit_len) { ...@@ -319,10 +319,26 @@ TEST_F(StringSplitterTest, split_limit_len) {
++ss2; ++ss2;
ASSERT_FALSE(ss2); ASSERT_FALSE(ss2);
butil::StringPiece sp(str, 5);
// Allows using '\0' as separator
butil::StringSplitter ss3(sp, '\0');
ASSERT_TRUE(ss3);
ASSERT_EQ(3ul, ss3.length());
ASSERT_FALSE(strncmp(ss3.field(), "1\t1", ss3.length()));
++ss3;
ASSERT_TRUE(ss3);
ASSERT_EQ(1ul, ss3.length());
ASSERT_FALSE(strncmp(ss3.field(), "3", ss3.length()));
++ss3;
ASSERT_FALSE(ss3);
} }
TEST_F(StringSplitterTest, key_value_pairs_splitter_sanity) { TEST_F(StringSplitterTest, key_value_pairs_splitter_sanity) {
std::string kvstr = "key1=value1&key2=value2&key3=value3"; std::string kvstr = "key1=value1&&&key2=value2&key3=value3";
{ {
butil::KeyValuePairsSplitter splitter(kvstr, '=', '&'); butil::KeyValuePairsSplitter splitter(kvstr, '=', '&');
ASSERT_TRUE(splitter); ASSERT_TRUE(splitter);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment