Unverified Commit 1c73d2a3 authored by Ge Jun's avatar Ge Jun Committed by GitHub

Merge pull request #567 from wanglun/support_null_separator

string_splitter supports embedded '\0' characters and separator can be '\0'
parents dde83f0c cd02be84
...@@ -400,7 +400,7 @@ static void LoadSymbols() { ...@@ -400,7 +400,7 @@ static void LoadSymbols() {
size_t line_len = 0; size_t line_len = 0;
ssize_t nr = 0; ssize_t nr = 0;
while ((nr = getline(&line, &line_len, fp.get())) != -1) { while ((nr = getline(&line, &line_len, fp.get())) != -1) {
butil::StringSplitter sp(line, line + line_len, ' '); butil::StringSplitter sp(line, line + nr, ' ');
if (sp == NULL) { if (sp == NULL) {
continue; continue;
} }
......
...@@ -61,6 +61,8 @@ public: ...@@ -61,6 +61,8 @@ public:
// length() field() will be skipped. // length() field() will be skipped.
inline StringSplitter(const char* input, char separator, inline StringSplitter(const char* input, char separator,
EmptyFieldAction action = SKIP_EMPTY_FIELD); EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Allows containing embedded '\0' characters and separator can be '\0',
// if str_end is not NULL.
inline StringSplitter(const char* str_begin, const char* str_end, inline StringSplitter(const char* str_begin, const char* str_end,
char separator, char separator,
EmptyFieldAction = SKIP_EMPTY_FIELD); EmptyFieldAction = SKIP_EMPTY_FIELD);
...@@ -113,6 +115,8 @@ public: ...@@ -113,6 +115,8 @@ public:
// longer than this utility. // longer than this utility.
inline StringMultiSplitter(const char* input, const char* separators, inline StringMultiSplitter(const char* input, const char* separators,
EmptyFieldAction action = SKIP_EMPTY_FIELD); EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Allows containing embedded '\0' characters if str_end is not NULL.
// NOTE: `separators` cannot contain embedded '\0' character.
inline StringMultiSplitter(const char* str_begin, const char* str_end, inline StringMultiSplitter(const char* str_begin, const char* str_end,
const char* separators, const char* separators,
EmptyFieldAction action = SKIP_EMPTY_FIELD); EmptyFieldAction action = SKIP_EMPTY_FIELD);
......
...@@ -87,7 +87,7 @@ size_t StringSplitter::length() const { ...@@ -87,7 +87,7 @@ size_t StringSplitter::length() const {
} }
bool StringSplitter::not_end(const char* p) const { bool StringSplitter::not_end(const char* p) const {
return *p && p != _str_tail; return (_str_tail == NULL) ? *p : (p != _str_tail);
} }
int StringSplitter::to_int8(int8_t* pv) const { int StringSplitter::to_int8(int8_t* pv) const {
...@@ -234,7 +234,7 @@ size_t StringMultiSplitter::length() const { ...@@ -234,7 +234,7 @@ size_t StringMultiSplitter::length() const {
} }
bool StringMultiSplitter::not_end(const char* p) const { bool StringMultiSplitter::not_end(const char* p) const {
return *p && p != _str_tail; return (_str_tail == NULL) ? *p : (p != _str_tail);
} }
int StringMultiSplitter::to_int8(int8_t* pv) const { int StringMultiSplitter::to_int8(int8_t* pv) const {
......
...@@ -165,7 +165,7 @@ TEST_F(StringSplitterTest, site_id_as_example) { ...@@ -165,7 +165,7 @@ TEST_F(StringSplitterTest, site_id_as_example) {
} }
TEST_F(StringSplitterTest, number_list) { TEST_F(StringSplitterTest, number_list) {
const char* str = " 123,,12,1, 21 4321"; const char* str = " 123,,12,1, 21 4321\00056";
butil::StringMultiSplitter ss(str, ", "); butil::StringMultiSplitter ss(str, ", ");
ASSERT_TRUE(ss); ASSERT_TRUE(ss);
ASSERT_EQ(3ul, ss.length()); ASSERT_EQ(3ul, ss.length());
...@@ -195,6 +195,38 @@ TEST_F(StringSplitterTest, number_list) { ...@@ -195,6 +195,38 @@ TEST_F(StringSplitterTest, number_list) {
ASSERT_FALSE(ss); ASSERT_FALSE(ss);
ASSERT_EQ(0ul, ss.length()); ASSERT_EQ(0ul, ss.length());
ASSERT_EQ(ss.field(), str + strlen(str)); ASSERT_EQ(ss.field(), str + strlen(str));
// contains embedded '\0'
const size_t str_len = 23;
butil::StringMultiSplitter ss2(str, str + str_len, ", ");
ASSERT_TRUE(ss2);
ASSERT_EQ(3ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "123", ss2.length()));
ss2++;
ASSERT_TRUE(ss2);
ASSERT_EQ(2ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "12", ss2.length()));
ss2++;
ASSERT_TRUE(ss2);
ASSERT_EQ(1ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "1", ss2.length()));
ss2++;
ASSERT_TRUE(ss2);
ASSERT_EQ(2ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "21", ss2.length()));
ss2++;
ASSERT_TRUE(ss2);
ASSERT_EQ(7ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "4321\00056", ss2.length()));
++ss2;
ASSERT_FALSE(ss2);
ASSERT_EQ(0ul, ss2.length());
ASSERT_EQ(ss2.field(), str + str_len);
} }
TEST_F(StringSplitterTest, cast_type) { TEST_F(StringSplitterTest, cast_type) {
...@@ -258,7 +290,7 @@ TEST_F(StringSplitterTest, cast_type) { ...@@ -258,7 +290,7 @@ TEST_F(StringSplitterTest, cast_type) {
} }
TEST_F(StringSplitterTest, split_limit_len) { TEST_F(StringSplitterTest, split_limit_len) {
const char* str = "1\t123\t111\t1\t10\t11\t1.3\t3.1415926"; const char* str = "1\t1\0003\t111\t1\t10\t11\t1.3\t3.1415926";
butil::StringSplitter ss(str, str + 5, '\t'); butil::StringSplitter ss(str, str + 5, '\t');
ASSERT_TRUE(ss); ASSERT_TRUE(ss);
...@@ -268,10 +300,25 @@ TEST_F(StringSplitterTest, split_limit_len) { ...@@ -268,10 +300,25 @@ TEST_F(StringSplitterTest, split_limit_len) {
++ss; ++ss;
ASSERT_TRUE(ss); ASSERT_TRUE(ss);
ASSERT_EQ(3ul, ss.length()); ASSERT_EQ(3ul, ss.length());
ASSERT_FALSE(strncmp(ss.field(), "123", ss.length())); ASSERT_FALSE(strncmp(ss.field(), "1\0003", ss.length()));
++ss; ++ss;
ASSERT_FALSE(ss); ASSERT_FALSE(ss);
// Allows using '\0' as separator
butil::StringSplitter ss2(str, str + 5, '\0');
ASSERT_TRUE(ss2);
ASSERT_EQ(3ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "1\t1", ss2.length()));
++ss2;
ASSERT_TRUE(ss2);
ASSERT_EQ(1ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "3", ss2.length()));
++ss2;
ASSERT_FALSE(ss2);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment