Unverified Commit f9558a09 authored by Ge Jun's avatar Ge Jun Committed by GitHub

Merge pull request #732 from zyearn/support_kv_pair_splitter

Support kv pair splitter
parents 1ebba7f7 f08558d2
...@@ -407,19 +407,6 @@ void URI::SetH2Path(const char* h2_path) { ...@@ -407,19 +407,6 @@ void URI::SetH2Path(const char* h2_path) {
} }
} }
void QuerySplitter::split() {
butil::StringPiece query_pair(_sp.field(), _sp.length());
const size_t pos = query_pair.find('=');
if (pos == butil::StringPiece::npos) {
_key = query_pair;
_value.clear();
} else {
_key= query_pair.substr(0, pos);
_value = query_pair.substr(pos + 1);
}
_is_split = true;
}
QueryRemover::QueryRemover(const std::string* str) QueryRemover::QueryRemover(const std::string* str)
: _query(str) : _query(str)
, _qs(str->data(), str->data() + str->size()) , _qs(str->data(), str->data() + str->size())
......
...@@ -195,68 +195,19 @@ inline std::ostream& operator<<(std::ostream& os, const URI& uri) { ...@@ -195,68 +195,19 @@ inline std::ostream& operator<<(std::ostream& os, const URI& uri) {
} }
// Split query in the format of "key1=value1&key2&key3=value3" // Split query in the format of "key1=value1&key2&key3=value3"
// This class can also handle some exceptional cases, such as class QuerySplitter : public butil::KeyValuePairsSplitter {
// consecutive ampersand, only equal sign, only key and so on.
class QuerySplitter {
public: public:
QuerySplitter(const char* str_begin, const char* str_end) inline QuerySplitter(const char* str_begin, const char* str_end)
: _sp(str_begin, str_end, '&') : KeyValuePairsSplitter(str_begin, str_end, '=', '&')
, _is_split(false) { {}
}
QuerySplitter(const char* str_begin)
: _sp(str_begin, '&')
, _is_split(false) {
}
QuerySplitter(const butil::StringPiece &sp) inline QuerySplitter(const char* str_begin)
: _sp(sp.begin(), sp.end(), '&') : KeyValuePairsSplitter(str_begin, '=', '&')
, _is_split(false) { {}
}
const butil::StringPiece& key() { inline QuerySplitter(const butil::StringPiece &sp)
if (!_is_split) { : KeyValuePairsSplitter(sp, '=', '&')
split(); {}
}
return _key;
}
const butil::StringPiece& value() {
if (!_is_split) {
split();
}
return _value;
}
// Get the current value of key and value
// in the format of "key=value"
butil::StringPiece key_and_value(){
return butil::StringPiece(_sp.field(), _sp.length());
}
// Move splitter forward.
QuerySplitter& operator++() {
++_sp;
_is_split = false;
return *this;
}
QuerySplitter operator++(int) {
QuerySplitter tmp = *this;
operator++();
return tmp;
}
operator const void*() const { return _sp; }
private:
void split();
private:
butil::StringSplitter _sp;
butil::StringPiece _key;
butil::StringPiece _value;
bool _is_split;
}; };
// A class to remove some specific keys in a query string, // A class to remove some specific keys in a query string,
...@@ -266,8 +217,8 @@ class QueryRemover { ...@@ -266,8 +217,8 @@ class QueryRemover {
public: public:
QueryRemover(const std::string* str); QueryRemover(const std::string* str);
const butil::StringPiece& key() { return _qs.key();} butil::StringPiece key() { return _qs.key();}
const butil::StringPiece& value() { return _qs.value(); } butil::StringPiece value() { return _qs.value(); }
butil::StringPiece key_and_value() { return _qs.key_and_value(); } butil::StringPiece key_and_value() { return _qs.key_and_value(); }
// Move splitter forward. // Move splitter forward.
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
#include "butil/strings/string_piece.h"
// It's common to encode data into strings separated by special characters // It's common to encode data into strings separated by special characters
// and decode them back, but functions such as `split_string' has to modify // and decode them back, but functions such as `split_string' has to modify
...@@ -65,7 +66,10 @@ public: ...@@ -65,7 +66,10 @@ public:
// if str_end is not NULL. // if str_end is not NULL.
inline StringSplitter(const char* str_begin, const char* str_end, inline StringSplitter(const char* str_begin, const char* str_end,
char separator, char separator,
EmptyFieldAction = SKIP_EMPTY_FIELD); EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Allows containing embedded '\0' characters and separator can be '\0',
inline StringSplitter(const StringPiece& input, char separator,
EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Move splitter forward. // Move splitter forward.
inline StringSplitter& operator++(); inline StringSplitter& operator++();
...@@ -78,6 +82,7 @@ public: ...@@ -78,6 +82,7 @@ public:
// not be '\0' because we don't modify `input'. // not be '\0' because we don't modify `input'.
inline const char* field() const; inline const char* field() const;
inline size_t length() const; inline size_t length() const;
inline StringPiece field_sp() const;
// Cast field to specific type, and write the value into `pv'. // Cast field to specific type, and write the value into `pv'.
// Returns 0 on success, -1 otherwise. // Returns 0 on success, -1 otherwise.
...@@ -132,6 +137,7 @@ public: ...@@ -132,6 +137,7 @@ public:
// not be '\0' because we don't modify `input'. // not be '\0' because we don't modify `input'.
inline const char* field() const; inline const char* field() const;
inline size_t length() const; inline size_t length() const;
inline StringPiece field_sp() const;
// Cast field to specific type, and write the value into `pv'. // Cast field to specific type, and write the value into `pv'.
// Returns 0 on success, -1 otherwise. // Returns 0 on success, -1 otherwise.
...@@ -159,6 +165,77 @@ private: ...@@ -159,6 +165,77 @@ private:
const EmptyFieldAction _empty_field_action; const EmptyFieldAction _empty_field_action;
}; };
// Split query in the format according to the given delimiters.
// This class can also handle some exceptional cases.
// 1. consecutive key_value_pair_delimiter are omitted, for example,
// suppose key_value_delimiter is '=' and key_value_pair_delimiter
// is '&', then 'k1=v1&&&k2=v2' is normalized to 'k1=k2&k2=v2'.
// 2. key or value can be empty or both can be empty.
// 3. consecutive key_value_delimiter are not omitted, for example,
// suppose input is 'k1===v2' and key_value_delimiter is '=', then
// key() returns 'k1', value() returns '==v2'.
class KeyValuePairsSplitter {
public:
inline KeyValuePairsSplitter(const char* str_begin,
const char* str_end,
char key_value_delimiter,
char key_value_pair_delimiter)
: _sp(str_begin, str_end, key_value_pair_delimiter)
, _delim_pos(StringPiece::npos)
, _key_value_delim(key_value_delimiter) {
UpdateDelimiterPosition();
}
inline KeyValuePairsSplitter(const char* str_begin,
char key_value_delimiter,
char key_value_pair_delimiter)
: KeyValuePairsSplitter(str_begin, NULL,
key_value_delimiter, key_value_pair_delimiter) {}
inline KeyValuePairsSplitter(const StringPiece &sp,
char key_value_delimiter,
char key_value_pair_delimiter)
: KeyValuePairsSplitter(sp.begin(), sp.end(),
key_value_delimiter, key_value_pair_delimiter) {}
inline StringPiece key() {
return key_and_value().substr(0, _delim_pos);
}
inline StringPiece value() {
return key_and_value().substr(_delim_pos + 1);
}
// Get the current value of key and value
// in the format of "key=value"
inline StringPiece key_and_value() {
return StringPiece(_sp.field(), _sp.length());
}
// Move splitter forward.
inline KeyValuePairsSplitter& operator++() {
++_sp;
UpdateDelimiterPosition();
return *this;
}
inline KeyValuePairsSplitter operator++(int) {
KeyValuePairsSplitter tmp = *this;
operator++();
return tmp;
}
inline operator const void*() const { return _sp; }
private:
inline void UpdateDelimiterPosition();
private:
StringSplitter _sp;
StringPiece::size_type _delim_pos;
const char _key_value_delim;
};
} // namespace butil } // namespace butil
#include "butil/string_splitter_inl.h" #include "butil/string_splitter_inl.h"
......
...@@ -22,15 +22,6 @@ ...@@ -22,15 +22,6 @@
namespace butil { namespace butil {
StringSplitter::StringSplitter(const char* str, char sep,
EmptyFieldAction action)
: _head(str)
, _str_tail(NULL)
, _sep(sep)
, _empty_field_action(action) {
init();
}
StringSplitter::StringSplitter(const char* str_begin, StringSplitter::StringSplitter(const char* str_begin,
const char* str_end, const char* str_end,
const char sep, const char sep,
...@@ -42,6 +33,14 @@ StringSplitter::StringSplitter(const char* str_begin, ...@@ -42,6 +33,14 @@ StringSplitter::StringSplitter(const char* str_begin,
init(); init();
} }
StringSplitter::StringSplitter(const char* str, char sep,
EmptyFieldAction action)
: StringSplitter(str, NULL, sep, action) {}
StringSplitter::StringSplitter(const StringPiece& input, char sep,
EmptyFieldAction action)
: StringSplitter(input.data(), input.data() + input.length(), sep, action) {}
void StringSplitter::init() { void StringSplitter::init() {
// Find the starting _head and _tail. // Find the starting _head and _tail.
if (__builtin_expect(_head != NULL, 1)) { if (__builtin_expect(_head != NULL, 1)) {
...@@ -86,6 +85,10 @@ size_t StringSplitter::length() const { ...@@ -86,6 +85,10 @@ size_t StringSplitter::length() const {
return static_cast<size_t>(_tail - _head); return static_cast<size_t>(_tail - _head);
} }
StringPiece StringSplitter::field_sp() const {
return StringPiece(field(), length());
}
bool StringSplitter::not_end(const char* p) const { bool StringSplitter::not_end(const char* p) const {
return (_str_tail == NULL) ? *p : (p != _str_tail); return (_str_tail == NULL) ? *p : (p != _str_tail);
} }
...@@ -233,6 +236,10 @@ size_t StringMultiSplitter::length() const { ...@@ -233,6 +236,10 @@ size_t StringMultiSplitter::length() const {
return static_cast<size_t>(_tail - _head); return static_cast<size_t>(_tail - _head);
} }
StringPiece StringMultiSplitter::field_sp() const {
return StringPiece(field(), length());
}
bool StringMultiSplitter::not_end(const char* p) const { bool StringMultiSplitter::not_end(const char* p) const {
return (_str_tail == NULL) ? *p : (p != _str_tail); return (_str_tail == NULL) ? *p : (p != _str_tail);
} }
...@@ -309,6 +316,14 @@ int StringMultiSplitter::to_double(double* pv) const { ...@@ -309,6 +316,14 @@ int StringMultiSplitter::to_double(double* pv) const {
return (endptr == field() + length()) ? 0 : -1; return (endptr == field() + length()) ? 0 : -1;
} }
void KeyValuePairsSplitter::UpdateDelimiterPosition() {
const StringPiece key_value_pair(key_and_value());
_delim_pos = key_value_pair.find(_key_value_delim);
if (_delim_pos == StringPiece::npos) {
_delim_pos = key_value_pair.length();
}
}
} // namespace butil } // namespace butil
#endif // BUTIL_STRING_SPLITTER_INL_H #endif // BUTIL_STRING_SPLITTER_INL_H
...@@ -319,6 +319,71 @@ TEST_F(StringSplitterTest, split_limit_len) { ...@@ -319,6 +319,71 @@ TEST_F(StringSplitterTest, split_limit_len) {
++ss2; ++ss2;
ASSERT_FALSE(ss2); ASSERT_FALSE(ss2);
butil::StringPiece sp(str, 5);
// Allows using '\0' as separator
butil::StringSplitter ss3(sp, '\0');
ASSERT_TRUE(ss3);
ASSERT_EQ(3ul, ss3.length());
ASSERT_FALSE(strncmp(ss3.field(), "1\t1", ss3.length()));
++ss3;
ASSERT_TRUE(ss3);
ASSERT_EQ(1ul, ss3.length());
ASSERT_FALSE(strncmp(ss3.field(), "3", ss3.length()));
++ss3;
ASSERT_FALSE(ss3);
}
TEST_F(StringSplitterTest, key_value_pairs_splitter_sanity) {
std::string kvstr = "key1=value1&&&key2=value2&key3=value3&===&key4=&=&=value5";
for (int i = 0 ; i < 3; ++i) {
// Test three constructors
butil::KeyValuePairsSplitter* psplitter = NULL;
if (i == 0) {
psplitter = new butil::KeyValuePairsSplitter(kvstr, '=', '&');
} else if (i == 1) {
psplitter = new butil::KeyValuePairsSplitter(
kvstr.data(), kvstr.data() + kvstr.size(), '=', '&');
} else if (i == 2) {
psplitter = new butil::KeyValuePairsSplitter(kvstr.c_str(), '=', '&');
}
butil::KeyValuePairsSplitter& splitter = *psplitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "key1");
ASSERT_EQ(splitter.value(), "value1");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "key2");
ASSERT_EQ(splitter.value(), "value2");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "key3");
ASSERT_EQ(splitter.value(), "value3");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "");
ASSERT_EQ(splitter.value(), "==");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "key4");
ASSERT_EQ(splitter.value(), "");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "");
ASSERT_EQ(splitter.value(), "");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "");
ASSERT_EQ(splitter.value(), "value5");
++splitter;
ASSERT_FALSE(splitter);
delete psplitter;
}
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment