Unverified Commit f9558a09 authored by Ge Jun's avatar Ge Jun Committed by GitHub

Merge pull request #732 from zyearn/support_kv_pair_splitter

Support kv pair splitter
parents 1ebba7f7 f08558d2
......@@ -407,19 +407,6 @@ void URI::SetH2Path(const char* h2_path) {
}
}
void QuerySplitter::split() {
butil::StringPiece query_pair(_sp.field(), _sp.length());
const size_t pos = query_pair.find('=');
if (pos == butil::StringPiece::npos) {
_key = query_pair;
_value.clear();
} else {
_key= query_pair.substr(0, pos);
_value = query_pair.substr(pos + 1);
}
_is_split = true;
}
QueryRemover::QueryRemover(const std::string* str)
: _query(str)
, _qs(str->data(), str->data() + str->size())
......
......@@ -195,68 +195,19 @@ inline std::ostream& operator<<(std::ostream& os, const URI& uri) {
}
// Split query in the format of "key1=value1&key2&key3=value3"
// This class can also handle some exceptional cases, such as
// consecutive ampersand, only equal sign, only key and so on.
class QuerySplitter {
class QuerySplitter : public butil::KeyValuePairsSplitter {
public:
QuerySplitter(const char* str_begin, const char* str_end)
: _sp(str_begin, str_end, '&')
, _is_split(false) {
}
QuerySplitter(const char* str_begin)
: _sp(str_begin, '&')
, _is_split(false) {
}
inline QuerySplitter(const char* str_begin, const char* str_end)
: KeyValuePairsSplitter(str_begin, str_end, '=', '&')
{}
QuerySplitter(const butil::StringPiece &sp)
: _sp(sp.begin(), sp.end(), '&')
, _is_split(false) {
}
inline QuerySplitter(const char* str_begin)
: KeyValuePairsSplitter(str_begin, '=', '&')
{}
const butil::StringPiece& key() {
if (!_is_split) {
split();
}
return _key;
}
const butil::StringPiece& value() {
if (!_is_split) {
split();
}
return _value;
}
// Get the current value of key and value
// in the format of "key=value"
butil::StringPiece key_and_value(){
return butil::StringPiece(_sp.field(), _sp.length());
}
// Move splitter forward.
QuerySplitter& operator++() {
++_sp;
_is_split = false;
return *this;
}
QuerySplitter operator++(int) {
QuerySplitter tmp = *this;
operator++();
return tmp;
}
operator const void*() const { return _sp; }
private:
void split();
private:
butil::StringSplitter _sp;
butil::StringPiece _key;
butil::StringPiece _value;
bool _is_split;
inline QuerySplitter(const butil::StringPiece &sp)
: KeyValuePairsSplitter(sp, '=', '&')
{}
};
// A class to remove some specific keys in a query string,
......@@ -266,8 +217,8 @@ class QueryRemover {
public:
QueryRemover(const std::string* str);
const butil::StringPiece& key() { return _qs.key();}
const butil::StringPiece& value() { return _qs.value(); }
butil::StringPiece key() { return _qs.key();}
butil::StringPiece value() { return _qs.value(); }
butil::StringPiece key_and_value() { return _qs.key_and_value(); }
// Move splitter forward.
......
......@@ -22,6 +22,7 @@
#include <stdlib.h>
#include <stdint.h>
#include "butil/strings/string_piece.h"
// It's common to encode data into strings separated by special characters
// and decode them back, but functions such as `split_string' has to modify
......@@ -65,7 +66,10 @@ public:
// if str_end is not NULL.
inline StringSplitter(const char* str_begin, const char* str_end,
char separator,
EmptyFieldAction = SKIP_EMPTY_FIELD);
EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Allows containing embedded '\0' characters and separator can be '\0',
inline StringSplitter(const StringPiece& input, char separator,
EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Move splitter forward.
inline StringSplitter& operator++();
......@@ -78,6 +82,7 @@ public:
// not be '\0' because we don't modify `input'.
inline const char* field() const;
inline size_t length() const;
inline StringPiece field_sp() const;
// Cast field to specific type, and write the value into `pv'.
// Returns 0 on success, -1 otherwise.
......@@ -132,6 +137,7 @@ public:
// not be '\0' because we don't modify `input'.
inline const char* field() const;
inline size_t length() const;
inline StringPiece field_sp() const;
// Cast field to specific type, and write the value into `pv'.
// Returns 0 on success, -1 otherwise.
......@@ -159,6 +165,77 @@ private:
const EmptyFieldAction _empty_field_action;
};
// Split query in the format according to the given delimiters.
// This class can also handle some exceptional cases.
// 1. consecutive key_value_pair_delimiter are omitted, for example,
// suppose key_value_delimiter is '=' and key_value_pair_delimiter
// is '&', then 'k1=v1&&&k2=v2' is normalized to 'k1=k2&k2=v2'.
// 2. key or value can be empty or both can be empty.
// 3. consecutive key_value_delimiter are not omitted, for example,
// suppose input is 'k1===v2' and key_value_delimiter is '=', then
// key() returns 'k1', value() returns '==v2'.
class KeyValuePairsSplitter {
public:
inline KeyValuePairsSplitter(const char* str_begin,
const char* str_end,
char key_value_delimiter,
char key_value_pair_delimiter)
: _sp(str_begin, str_end, key_value_pair_delimiter)
, _delim_pos(StringPiece::npos)
, _key_value_delim(key_value_delimiter) {
UpdateDelimiterPosition();
}
inline KeyValuePairsSplitter(const char* str_begin,
char key_value_delimiter,
char key_value_pair_delimiter)
: KeyValuePairsSplitter(str_begin, NULL,
key_value_delimiter, key_value_pair_delimiter) {}
inline KeyValuePairsSplitter(const StringPiece &sp,
char key_value_delimiter,
char key_value_pair_delimiter)
: KeyValuePairsSplitter(sp.begin(), sp.end(),
key_value_delimiter, key_value_pair_delimiter) {}
inline StringPiece key() {
return key_and_value().substr(0, _delim_pos);
}
inline StringPiece value() {
return key_and_value().substr(_delim_pos + 1);
}
// Get the current value of key and value
// in the format of "key=value"
inline StringPiece key_and_value() {
return StringPiece(_sp.field(), _sp.length());
}
// Move splitter forward.
inline KeyValuePairsSplitter& operator++() {
++_sp;
UpdateDelimiterPosition();
return *this;
}
inline KeyValuePairsSplitter operator++(int) {
KeyValuePairsSplitter tmp = *this;
operator++();
return tmp;
}
inline operator const void*() const { return _sp; }
private:
inline void UpdateDelimiterPosition();
private:
StringSplitter _sp;
StringPiece::size_type _delim_pos;
const char _key_value_delim;
};
} // namespace butil
#include "butil/string_splitter_inl.h"
......
......@@ -22,15 +22,6 @@
namespace butil {
StringSplitter::StringSplitter(const char* str, char sep,
EmptyFieldAction action)
: _head(str)
, _str_tail(NULL)
, _sep(sep)
, _empty_field_action(action) {
init();
}
StringSplitter::StringSplitter(const char* str_begin,
const char* str_end,
const char sep,
......@@ -42,6 +33,14 @@ StringSplitter::StringSplitter(const char* str_begin,
init();
}
StringSplitter::StringSplitter(const char* str, char sep,
EmptyFieldAction action)
: StringSplitter(str, NULL, sep, action) {}
StringSplitter::StringSplitter(const StringPiece& input, char sep,
EmptyFieldAction action)
: StringSplitter(input.data(), input.data() + input.length(), sep, action) {}
void StringSplitter::init() {
// Find the starting _head and _tail.
if (__builtin_expect(_head != NULL, 1)) {
......@@ -86,6 +85,10 @@ size_t StringSplitter::length() const {
return static_cast<size_t>(_tail - _head);
}
StringPiece StringSplitter::field_sp() const {
return StringPiece(field(), length());
}
bool StringSplitter::not_end(const char* p) const {
return (_str_tail == NULL) ? *p : (p != _str_tail);
}
......@@ -233,6 +236,10 @@ size_t StringMultiSplitter::length() const {
return static_cast<size_t>(_tail - _head);
}
StringPiece StringMultiSplitter::field_sp() const {
return StringPiece(field(), length());
}
bool StringMultiSplitter::not_end(const char* p) const {
return (_str_tail == NULL) ? *p : (p != _str_tail);
}
......@@ -309,6 +316,14 @@ int StringMultiSplitter::to_double(double* pv) const {
return (endptr == field() + length()) ? 0 : -1;
}
void KeyValuePairsSplitter::UpdateDelimiterPosition() {
const StringPiece key_value_pair(key_and_value());
_delim_pos = key_value_pair.find(_key_value_delim);
if (_delim_pos == StringPiece::npos) {
_delim_pos = key_value_pair.length();
}
}
} // namespace butil
#endif // BUTIL_STRING_SPLITTER_INL_H
......@@ -319,6 +319,71 @@ TEST_F(StringSplitterTest, split_limit_len) {
++ss2;
ASSERT_FALSE(ss2);
butil::StringPiece sp(str, 5);
// Allows using '\0' as separator
butil::StringSplitter ss3(sp, '\0');
ASSERT_TRUE(ss3);
ASSERT_EQ(3ul, ss3.length());
ASSERT_FALSE(strncmp(ss3.field(), "1\t1", ss3.length()));
++ss3;
ASSERT_TRUE(ss3);
ASSERT_EQ(1ul, ss3.length());
ASSERT_FALSE(strncmp(ss3.field(), "3", ss3.length()));
++ss3;
ASSERT_FALSE(ss3);
}
TEST_F(StringSplitterTest, key_value_pairs_splitter_sanity) {
std::string kvstr = "key1=value1&&&key2=value2&key3=value3&===&key4=&=&=value5";
for (int i = 0 ; i < 3; ++i) {
// Test three constructors
butil::KeyValuePairsSplitter* psplitter = NULL;
if (i == 0) {
psplitter = new butil::KeyValuePairsSplitter(kvstr, '=', '&');
} else if (i == 1) {
psplitter = new butil::KeyValuePairsSplitter(
kvstr.data(), kvstr.data() + kvstr.size(), '=', '&');
} else if (i == 2) {
psplitter = new butil::KeyValuePairsSplitter(kvstr.c_str(), '=', '&');
}
butil::KeyValuePairsSplitter& splitter = *psplitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "key1");
ASSERT_EQ(splitter.value(), "value1");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "key2");
ASSERT_EQ(splitter.value(), "value2");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "key3");
ASSERT_EQ(splitter.value(), "value3");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "");
ASSERT_EQ(splitter.value(), "==");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "key4");
ASSERT_EQ(splitter.value(), "");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "");
ASSERT_EQ(splitter.value(), "");
++splitter;
ASSERT_TRUE(splitter);
ASSERT_EQ(splitter.key(), "");
ASSERT_EQ(splitter.value(), "value5");
++splitter;
ASSERT_FALSE(splitter);
delete psplitter;
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment