Unverified Commit 1c73d2a3 authored by Ge Jun's avatar Ge Jun Committed by GitHub

Merge pull request #567 from wanglun/support_null_separator

string_splitter supports embedded '\0' characters and separator can be '\0'
parents dde83f0c cd02be84
......@@ -400,7 +400,7 @@ static void LoadSymbols() {
size_t line_len = 0;
ssize_t nr = 0;
while ((nr = getline(&line, &line_len, fp.get())) != -1) {
butil::StringSplitter sp(line, line + line_len, ' ');
butil::StringSplitter sp(line, line + nr, ' ');
if (sp == NULL) {
continue;
}
......
......@@ -61,6 +61,8 @@ public:
// length() field() will be skipped.
inline StringSplitter(const char* input, char separator,
EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Allows containing embedded '\0' characters and separator can be '\0',
// if str_end is not NULL.
inline StringSplitter(const char* str_begin, const char* str_end,
char separator,
EmptyFieldAction = SKIP_EMPTY_FIELD);
......@@ -113,6 +115,8 @@ public:
// longer than this utility.
inline StringMultiSplitter(const char* input, const char* separators,
EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Allows containing embedded '\0' characters if str_end is not NULL.
// NOTE: `separators` cannot contain embedded '\0' character.
inline StringMultiSplitter(const char* str_begin, const char* str_end,
const char* separators,
EmptyFieldAction action = SKIP_EMPTY_FIELD);
......
......@@ -87,7 +87,7 @@ size_t StringSplitter::length() const {
}
bool StringSplitter::not_end(const char* p) const {
return *p && p != _str_tail;
return (_str_tail == NULL) ? *p : (p != _str_tail);
}
int StringSplitter::to_int8(int8_t* pv) const {
......@@ -234,7 +234,7 @@ size_t StringMultiSplitter::length() const {
}
bool StringMultiSplitter::not_end(const char* p) const {
return *p && p != _str_tail;
return (_str_tail == NULL) ? *p : (p != _str_tail);
}
int StringMultiSplitter::to_int8(int8_t* pv) const {
......
......@@ -165,7 +165,7 @@ TEST_F(StringSplitterTest, site_id_as_example) {
}
TEST_F(StringSplitterTest, number_list) {
const char* str = " 123,,12,1, 21 4321";
const char* str = " 123,,12,1, 21 4321\00056";
butil::StringMultiSplitter ss(str, ", ");
ASSERT_TRUE(ss);
ASSERT_EQ(3ul, ss.length());
......@@ -195,6 +195,38 @@ TEST_F(StringSplitterTest, number_list) {
ASSERT_FALSE(ss);
ASSERT_EQ(0ul, ss.length());
ASSERT_EQ(ss.field(), str + strlen(str));
// contains embedded '\0'
const size_t str_len = 23;
butil::StringMultiSplitter ss2(str, str + str_len, ", ");
ASSERT_TRUE(ss2);
ASSERT_EQ(3ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "123", ss2.length()));
ss2++;
ASSERT_TRUE(ss2);
ASSERT_EQ(2ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "12", ss2.length()));
ss2++;
ASSERT_TRUE(ss2);
ASSERT_EQ(1ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "1", ss2.length()));
ss2++;
ASSERT_TRUE(ss2);
ASSERT_EQ(2ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "21", ss2.length()));
ss2++;
ASSERT_TRUE(ss2);
ASSERT_EQ(7ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "4321\00056", ss2.length()));
++ss2;
ASSERT_FALSE(ss2);
ASSERT_EQ(0ul, ss2.length());
ASSERT_EQ(ss2.field(), str + str_len);
}
TEST_F(StringSplitterTest, cast_type) {
......@@ -258,7 +290,7 @@ TEST_F(StringSplitterTest, cast_type) {
}
TEST_F(StringSplitterTest, split_limit_len) {
const char* str = "1\t123\t111\t1\t10\t11\t1.3\t3.1415926";
const char* str = "1\t1\0003\t111\t1\t10\t11\t1.3\t3.1415926";
butil::StringSplitter ss(str, str + 5, '\t');
ASSERT_TRUE(ss);
......@@ -268,10 +300,25 @@ TEST_F(StringSplitterTest, split_limit_len) {
++ss;
ASSERT_TRUE(ss);
ASSERT_EQ(3ul, ss.length());
ASSERT_FALSE(strncmp(ss.field(), "123", ss.length()));
ASSERT_FALSE(strncmp(ss.field(), "1\0003", ss.length()));
++ss;
ASSERT_FALSE(ss);
// Allows using '\0' as separator
butil::StringSplitter ss2(str, str + 5, '\0');
ASSERT_TRUE(ss2);
ASSERT_EQ(3ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "1\t1", ss2.length()));
++ss2;
ASSERT_TRUE(ss2);
ASSERT_EQ(1ul, ss2.length());
ASSERT_FALSE(strncmp(ss2.field(), "3", ss2.length()));
++ss2;
ASSERT_FALSE(ss2);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment