1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// Copyright (c) 2011 Baidu, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Author: Ge,Jun (gejun@baidu.com)
// Date: Mon. Apr. 18 19:52:34 CST 2011
// Iteratively split a string by one or multiple separators.
#ifndef BAIDU_BASE_STRING_SPLITTER_H
#define BAIDU_BASE_STRING_SPLITTER_H
#include <stdlib.h>
#include <stdint.h>
// It's common to encode data into strings separated by special characters
// and decode them back, but functions such as `split_string' has to modify
// the input string, which is bad. If we parse the string from scratch, the
// code will be filled with pointer operations and obscure to understand.
//
// What we want is:
// - Scan the string once: just do simple things efficiently.
// - Do not modify input string: Changing input is bad, it may bring hidden
// bugs, concurrency issues and non-const propagations.
// - Split the string in-place without additional buffer/array.
//
// StringSplitter does meet these requirements.
// Usage:
// const char* the_string_to_split = ...;
// for (StringSplitter s(the_string_to_split, '\t'); s; ++s) {
// printf("%*s\n", s.length(), s.field());
// }
//
// "s" behaves as an iterator and evaluates to true before ending.
// "s.field()" and "s.length()" are address and length of current field
// respectively. Notice that "s.field()" may not end with '\0' because
// we don't modify input. You can copy the field to a dedicated buffer
// or apply a function supporting length.
namespace butil {
enum EmptyFieldAction {
SKIP_EMPTY_FIELD,
ALLOW_EMPTY_FIELD
};
// Split a string with one character
class StringSplitter {
public:
// Split `input' with `separator'. If `action' is SKIP_EMPTY_FIELD, zero-
// length() field() will be skipped.
inline StringSplitter(const char* input, char separator,
EmptyFieldAction action = SKIP_EMPTY_FIELD);
inline StringSplitter(const char* str_begin, const char* str_end,
char separator,
EmptyFieldAction = SKIP_EMPTY_FIELD);
// Move splitter forward.
inline StringSplitter& operator++();
inline StringSplitter operator++(int);
// True iff field() is valid.
inline operator const void*() const;
// Beginning address and length of the field. *(field() + length()) may
// not be '\0' because we don't modify `input'.
inline const char* field() const;
inline size_t length() const;
// Cast field to specific type, and write the value into `pv'.
// Returns 0 on success, -1 otherwise.
// NOTE: If separator is a digit, casting functions always return -1.
inline int to_int8(int8_t *pv) const;
inline int to_uint8(uint8_t *pv) const;
inline int to_int(int *pv) const;
inline int to_uint(unsigned int *pv) const;
inline int to_long(long *pv) const;
inline int to_ulong(unsigned long *pv) const;
inline int to_longlong(long long *pv) const;
inline int to_ulonglong(unsigned long long *pv) const;
inline int to_float(float *pv) const;
inline int to_double(double *pv) const;
private:
inline bool not_end(const char* p) const;
inline void init();
const char* _head;
const char* _tail;
const char* _str_tail;
const char _sep;
const EmptyFieldAction _empty_field_action;
};
// Split a string with one of the separators
class StringMultiSplitter {
public:
// Split `input' with one character of `separators'. If `action' is
// SKIP_EMPTY_FIELD, zero-length() field() will be skipped.
// NOTE: This utility stores pointer of `separators' directly rather than
// copying the content because this utility is intended to be used
// in ad-hoc manner where lifetime of `separators' is generally
// longer than this utility.
inline StringMultiSplitter(const char* input, const char* separators,
EmptyFieldAction action = SKIP_EMPTY_FIELD);
inline StringMultiSplitter(const char* str_begin, const char* str_end,
const char* separators,
EmptyFieldAction action = SKIP_EMPTY_FIELD);
// Move splitter forward.
inline StringMultiSplitter& operator++();
inline StringMultiSplitter operator++(int);
// True iff field() is valid.
inline operator const void*() const;
// Beginning address and length of the field. *(field() + length()) may
// not be '\0' because we don't modify `input'.
inline const char* field() const;
inline size_t length() const;
// Cast field to specific type, and write the value into `pv'.
// Returns 0 on success, -1 otherwise.
// NOTE: If separators contains digit, casting functions always return -1.
inline int to_int8(int8_t *pv) const;
inline int to_uint8(uint8_t *pv) const;
inline int to_int(int *pv) const;
inline int to_uint(unsigned int *pv) const;
inline int to_long(long *pv) const;
inline int to_ulong(unsigned long *pv) const;
inline int to_longlong(long long *pv) const;
inline int to_ulonglong(unsigned long long *pv) const;
inline int to_float(float *pv) const;
inline int to_double(double *pv) const;
private:
inline bool is_sep(char c) const;
inline bool not_end(const char* p) const;
inline void init();
const char* _head;
const char* _tail;
const char* _str_tail;
const char* const _seps;
const EmptyFieldAction _empty_field_action;
};
} // namespace butil
#include "butil/string_splitter_inl.h"
#endif // BAIDU_BASE_STRING_SPLITTER_H