string.h 19.1 KB
Newer Older
Kenton Varda's avatar
Kenton Varda committed
1 2
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
3
//
Kenton Varda's avatar
Kenton Varda committed
4 5 6 7 8 9
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
10
//
Kenton Varda's avatar
Kenton Varda committed
11 12
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
13
//
Kenton Varda's avatar
Kenton Varda committed
14 15 16 17 18 19 20
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
21 22 23 24

#ifndef KJ_STRING_H_
#define KJ_STRING_H_

Kenton Varda's avatar
Kenton Varda committed
25
#include <initializer_list>
26 27 28 29 30
#include "array.h"
#include <string.h>

namespace kj {

Kenton Varda's avatar
Kenton Varda committed
31 32 33
class StringPtr;
class String;

34 35
class StringTree;   // string-tree.h

36 37 38 39 40 41
// Our STL string SFINAE trick does not work with GCC 4.7, but it works with Clang and GCC 4.8, so
// we'll just preprocess it out if not supported.
#if __clang__ || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
#define KJ_COMPILER_SUPPORTS_STL_STRING_INTEROP 1
#endif

Kenton Varda's avatar
Kenton Varda committed
42 43 44 45 46 47 48 49 50 51 52 53
// =======================================================================================
// StringPtr -- A NUL-terminated ArrayPtr<const char> containing UTF-8 text.
//
// NUL bytes are allowed to appear before the end of the string.  The only requirement is that
// a NUL byte appear immediately after the last byte of the content.  This terminator byte is not
// counted in the string's size.

class StringPtr {
public:
  inline StringPtr(): content("", 1) {}
  inline StringPtr(decltype(nullptr)): content("", 1) {}
  inline StringPtr(const char* value): content(value, strlen(value) + 1) {}
Kenton Varda's avatar
Kenton Varda committed
54 55 56
  inline StringPtr(const char* value, size_t size): content(value, size + 1) {
    KJ_IREQUIRE(value[size] == '\0', "StringPtr must be NUL-terminated.");
  }
57
  inline StringPtr(const char* begin, const char* end): StringPtr(begin, end - begin) {}
Kenton Varda's avatar
Kenton Varda committed
58 59
  inline StringPtr(const String& value);

60 61 62 63 64 65 66 67 68 69 70 71 72 73
#if KJ_COMPILER_SUPPORTS_STL_STRING_INTEROP
  template <typename T, typename = decltype(instance<T>().c_str())>
  inline StringPtr(const T& t): StringPtr(t.c_str()) {}
  // Allow implicit conversion from any class that has a c_str() method (namely, std::string).
  // We use a template trick to detect std::string in order to avoid including the header for
  // those who don't want it.

  template <typename T, typename = decltype(instance<T>().c_str())>
  inline operator T() const { return cStr(); }
  // Allow implicit conversion to any class that has a c_str() method (namely, std::string).
  // We use a template trick to detect std::string in order to avoid including the header for
  // those who don't want it.
#endif

Kenton Varda's avatar
Kenton Varda committed
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
  inline operator ArrayPtr<const char>() const;
  inline ArrayPtr<const char> asArray() const;
  // Result does not include NUL terminator.

  inline const char* cStr() const { return content.begin(); }
  // Returns NUL-terminated string.

  inline size_t size() const { return content.size() - 1; }
  // Result does not include NUL terminator.

  inline char operator[](size_t index) const { return content[index]; }

  inline const char* begin() const { return content.begin(); }
  inline const char* end() const { return content.end() - 1; }

  inline bool operator==(decltype(nullptr)) const { return content.size() <= 1; }
  inline bool operator!=(decltype(nullptr)) const { return content.size() > 1; }

Kenton Varda's avatar
Kenton Varda committed
92 93 94 95 96 97
  inline bool operator==(const StringPtr& other) const;
  inline bool operator!=(const StringPtr& other) const { return !(*this == other); }
  inline bool operator< (const StringPtr& other) const;
  inline bool operator> (const StringPtr& other) const { return other < *this; }
  inline bool operator<=(const StringPtr& other) const { return !(other < *this); }
  inline bool operator>=(const StringPtr& other) const { return !(*this < other); }
Kenton Varda's avatar
Kenton Varda committed
98 99 100 101 102 103

  inline StringPtr slice(size_t start) const;
  inline ArrayPtr<const char> slice(size_t start, size_t end) const;
  // A string slice is only NUL-terminated if it is a suffix, so slice() has a one-parameter
  // version that assumes end = size().

104 105 106
  inline bool startsWith(const StringPtr& other) const;
  inline bool endsWith(const StringPtr& other) const;

107
  inline Maybe<size_t> findFirst(char c) const;
108
  inline Maybe<size_t> findLast(char c) const;
109

Kenton Varda's avatar
Kenton Varda committed
110 111 112 113 114 115 116 117
private:
  inline StringPtr(ArrayPtr<const char> content): content(content) {}

  ArrayPtr<const char> content;
};

inline bool operator==(const char* a, const StringPtr& b) { return b == a; }
inline bool operator!=(const char* a, const StringPtr& b) { return b != a; }
118 119

// =======================================================================================
Kenton Varda's avatar
Kenton Varda committed
120 121 122 123 124 125 126 127
// String -- A NUL-terminated Array<char> containing UTF-8 text.
//
// NUL bytes are allowed to appear before the end of the string.  The only requirement is that
// a NUL byte appear immediately after the last byte of the content.  This terminator byte is not
// counted in the string's size.
//
// To allocate a String, you must call kj::heapString().  We do not implement implicit copying to
// the heap because this hides potential inefficiency from the developer.
128 129 130 131

class String {
public:
  String() = default;
Kenton Varda's avatar
Kenton Varda committed
132 133 134
  inline String(decltype(nullptr)): content(nullptr) {}
  inline String(char* value, size_t size, const ArrayDisposer& disposer);
  // Does not copy.  `size` does not include NUL terminator, but `value` must be NUL-terminated.
135 136
  inline explicit String(Array<char> buffer);
  // Does not copy.  Requires `buffer` ends with `\0`.
137

Kenton Varda's avatar
Kenton Varda committed
138 139
  inline operator ArrayPtr<char>();
  inline operator ArrayPtr<const char>() const;
140 141
  inline ArrayPtr<char> asArray();
  inline ArrayPtr<const char> asArray() const;
Kenton Varda's avatar
Kenton Varda committed
142 143 144 145 146 147 148 149 150
  // Result does not include NUL terminator.

  inline const char* cStr() const;

  inline size_t size() const;
  // Result does not include NUL terminator.

  inline char operator[](size_t index) const;
  inline char& operator[](size_t index);
151

Kenton Varda's avatar
Kenton Varda committed
152 153 154 155
  inline char* begin();
  inline char* end();
  inline const char* begin() const;
  inline const char* end() const;
156

Kenton Varda's avatar
Kenton Varda committed
157 158 159
  inline bool operator==(decltype(nullptr)) const { return content.size() <= 1; }
  inline bool operator!=(decltype(nullptr)) const { return content.size() > 1; }

Kenton Varda's avatar
Kenton Varda committed
160
  inline bool operator==(const StringPtr& other) const { return StringPtr(*this) == other; }
161 162 163 164 165
  inline bool operator!=(const StringPtr& other) const { return StringPtr(*this) != other; }
  inline bool operator< (const StringPtr& other) const { return StringPtr(*this) <  other; }
  inline bool operator> (const StringPtr& other) const { return StringPtr(*this) >  other; }
  inline bool operator<=(const StringPtr& other) const { return StringPtr(*this) <= other; }
  inline bool operator>=(const StringPtr& other) const { return StringPtr(*this) >= other; }
166

167 168 169
  inline bool startsWith(const StringPtr& other) const { return StringPtr(*this).startsWith(other);}
  inline bool endsWith(const StringPtr& other) const { return StringPtr(*this).endsWith(other); }

170 171 172 173 174
  inline StringPtr slice(size_t start) const { return StringPtr(*this).slice(start); }
  inline ArrayPtr<const char> slice(size_t start, size_t end) const {
    return StringPtr(*this).slice(start, end);
  }

175
  inline Maybe<size_t> findFirst(char c) const { return StringPtr(*this).findFirst(c); }
176
  inline Maybe<size_t> findLast(char c) const { return StringPtr(*this).findLast(c); }
177

178 179 180 181
private:
  Array<char> content;
};

Kenton Varda's avatar
Kenton Varda committed
182 183 184 185 186 187 188 189 190 191
inline bool operator==(const char* a, const String& b) { return b == a; }
inline bool operator!=(const char* a, const String& b) { return b != a; }

String heapString(size_t size);
// Allocate a String of the given size on the heap, not including NUL terminator.  The NUL
// terminator will be initialized automatically but the rest of the content is not initialized.

String heapString(const char* value);
String heapString(const char* value, size_t size);
String heapString(StringPtr value);
192
String heapString(const String& value);
Kenton Varda's avatar
Kenton Varda committed
193 194 195
String heapString(ArrayPtr<const char> value);
// Allocates a copy of the given value on the heap.

Kenton Varda's avatar
Kenton Varda committed
196 197 198 199
// =======================================================================================
// Magic str() function which transforms parameters to text and concatenates them into one big
// String.

200
namespace _ {  // private
Kenton Varda's avatar
Kenton Varda committed
201 202 203 204 205 206 207 208 209 210 211

inline size_t sum(std::initializer_list<size_t> nums) {
  size_t result = 0;
  for (auto num: nums) {
    result += num;
  }
  return result;
}

inline char* fill(char* ptr) { return ptr; }

212 213 214 215 216 217
template <typename... Rest>
char* fill(char* __restrict__ target, const StringTree& first, Rest&&... rest);
// Make str() work with stringifiers that return StringTree by patching fill().
//
// Defined in string-tree.h.

Kenton Varda's avatar
Kenton Varda committed
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
template <typename First, typename... Rest>
char* fill(char* __restrict__ target, const First& first, Rest&&... rest) {
  auto i = first.begin();
  auto end = first.end();
  while (i != end) {
    *target++ = *i++;
  }
  return fill(target, kj::fwd<Rest>(rest)...);
}

template <typename... Params>
String concat(Params&&... params) {
  // Concatenate a bunch of containers into a single Array.  The containers can be anything that
  // is iterable and whose elements can be converted to `char`.

  String result = heapString(sum({params.size()...}));
  fill(result.begin(), kj::fwd<Params>(params)...);
  return result;
}

inline String concat(String&& arr) {
  return kj::mv(arr);
}

struct Stringifier {
  // This is a dummy type with only one instance: STR (below).  To make an arbitrary type
  // stringifiable, define `operator*(Stringifier, T)` to return an iterable container of `char`.
  // The container type must have a `size()` method.  Be sure to declare the operator in the same
  // namespace as `T` **or** in the global scope.
  //
  // A more usual way to accomplish what we're doing here would be to require that you define
  // a function like `toString(T)` and then rely on argument-dependent lookup.  However, this has
  // the problem that it pollutes other people's namespaces and even the global namespace.  For
  // example, some other project may already have functions called `toString` which do something
  // different.  Declaring `operator*` with `Stringifier` as the left operand cannot conflict with
  // anything.

  inline ArrayPtr<const char> operator*(ArrayPtr<const char> s) const { return s; }
  inline ArrayPtr<const char> operator*(const Array<const char>& s) const { return s; }
  inline ArrayPtr<const char> operator*(const Array<char>& s) const { return s; }
  template<size_t n>
  inline ArrayPtr<const char> operator*(const CappedArray<char, n>& s) const { return s; }
  inline ArrayPtr<const char> operator*(const char* s) const { return arrayPtr(s, strlen(s)); }
  inline ArrayPtr<const char> operator*(const String& s) const { return s.asArray(); }
  inline ArrayPtr<const char> operator*(const StringPtr& s) const { return s.asArray(); }

Kenton Varda's avatar
Kenton Varda committed
264 265 266
  inline Range<char> operator*(const Range<char>& r) const { return r; }
  inline Repeat<char> operator*(const Repeat<char>& r) const { return r; }

Kenton Varda's avatar
Kenton Varda committed
267 268 269 270 271 272 273 274
  inline FixedArray<char, 1> operator*(char c) const {
    FixedArray<char, 1> result;
    result[0] = c;
    return result;
  }

  StringPtr operator*(bool b) const;

275 276 277 278 279 280 281 282 283 284
  CappedArray<char, 5> operator*(signed char i) const;
  CappedArray<char, 5> operator*(unsigned char i) const;
  CappedArray<char, sizeof(short) * 3 + 2> operator*(short i) const;
  CappedArray<char, sizeof(unsigned short) * 3 + 2> operator*(unsigned short i) const;
  CappedArray<char, sizeof(int) * 3 + 2> operator*(int i) const;
  CappedArray<char, sizeof(unsigned int) * 3 + 2> operator*(unsigned int i) const;
  CappedArray<char, sizeof(long) * 3 + 2> operator*(long i) const;
  CappedArray<char, sizeof(unsigned long) * 3 + 2> operator*(unsigned long i) const;
  CappedArray<char, sizeof(long long) * 3 + 2> operator*(long long i) const;
  CappedArray<char, sizeof(unsigned long long) * 3 + 2> operator*(unsigned long long i) const;
Kenton Varda's avatar
Kenton Varda committed
285 286
  CappedArray<char, 24> operator*(float f) const;
  CappedArray<char, 32> operator*(double f) const;
287
  CappedArray<char, sizeof(const void*) * 3 + 2> operator*(const void* s) const;
Kenton Varda's avatar
Kenton Varda committed
288 289

  template <typename T>
290
  String operator*(ArrayPtr<T> arr) const;
Kenton Varda's avatar
Kenton Varda committed
291
  template <typename T>
292
  String operator*(const Array<T>& arr) const;
Kenton Varda's avatar
Kenton Varda committed
293 294 295
};
static constexpr Stringifier STR = Stringifier();

296
}  // namespace _ (private)
Kenton Varda's avatar
Kenton Varda committed
297 298

template <typename T>
299
auto toCharSequence(T&& value) -> decltype(_::STR * kj::fwd<T>(value)) {
Kenton Varda's avatar
Kenton Varda committed
300 301 302 303 304 305 306 307
  // Returns an iterable of chars that represent a textual representation of the value, suitable
  // for debugging.
  //
  // Most users should use str() instead, but toCharSequence() may occasionally be useful to avoid
  // heap allocation overhead that str() implies.
  //
  // To specialize this function for your type, see KJ_STRINGIFY.

308
  return _::STR * kj::fwd<T>(value);
Kenton Varda's avatar
Kenton Varda committed
309 310
}

311 312 313 314 315
CappedArray<char, sizeof(unsigned char) * 2 + 1> hex(unsigned char i);
CappedArray<char, sizeof(unsigned short) * 2 + 1> hex(unsigned short i);
CappedArray<char, sizeof(unsigned int) * 2 + 1> hex(unsigned int i);
CappedArray<char, sizeof(unsigned long) * 2 + 1> hex(unsigned long i);
CappedArray<char, sizeof(unsigned long long) * 2 + 1> hex(unsigned long long i);
Kenton Varda's avatar
Kenton Varda committed
316 317 318 319 320 321 322 323 324

template <typename... Params>
String str(Params&&... params) {
  // Magic function which builds a string from a bunch of arbitrary values.  Example:
  //     str(1, " / ", 2, " = ", 0.5)
  // returns:
  //     "1 / 2 = 0.5"
  // To teach `str` how to stringify a type, see `Stringifier`.

325
  return _::concat(toCharSequence(kj::fwd<Params>(params))...);
Kenton Varda's avatar
Kenton Varda committed
326 327 328 329 330 331 332 333
}

inline String str(String&& s) { return mv(s); }
// Overload to prevent redundant allocation.

template <typename T>
String strArray(T&& arr, const char* delim) {
  size_t delimLen = strlen(delim);
334
  KJ_STACK_ARRAY(decltype(_::STR * arr[0]), pieces, arr.size(), 8, 32);
Kenton Varda's avatar
Kenton Varda committed
335 336 337
  size_t size = 0;
  for (size_t i = 0; i < arr.size(); i++) {
    if (i > 0) size += delimLen;
338
    pieces[i] = _::STR * arr[i];
Kenton Varda's avatar
Kenton Varda committed
339 340 341 342 343 344 345 346 347 348
    size += pieces[i].size();
  }

  String result = heapString(size);
  char* pos = result.begin();
  for (size_t i = 0; i < arr.size(); i++) {
    if (i > 0) {
      memcpy(pos, delim, delimLen);
      pos += delimLen;
    }
349
    pos = _::fill(pos, pieces[i]);
Kenton Varda's avatar
Kenton Varda committed
350 351 352 353
  }
  return result;
}

354
namespace _ {  // private
Kenton Varda's avatar
Kenton Varda committed
355 356

template <typename T>
357
inline String Stringifier::operator*(ArrayPtr<T> arr) const {
Kenton Varda's avatar
Kenton Varda committed
358 359 360 361
  return strArray(arr, ", ");
}

template <typename T>
362
inline String Stringifier::operator*(const Array<T>& arr) const {
Kenton Varda's avatar
Kenton Varda committed
363 364 365
  return strArray(arr, ", ");
}

366
}  // namespace _ (private)
Kenton Varda's avatar
Kenton Varda committed
367

368
#define KJ_STRINGIFY(...) operator*(::kj::_::Stringifier, __VA_ARGS__)
Kenton Varda's avatar
Kenton Varda committed
369 370 371 372 373 374 375 376 377 378
// Defines a stringifier for a custom type.  Example:
//
//    class Foo {...};
//    inline StringPtr KJ_STRINGIFY(const Foo& foo) { return foo.name(); }
//
// This allows Foo to be passed to str().
//
// The function should be declared either in the same namespace as the target type or in the global
// namespace.  It can return any type which is an iterable container of chars.

Kenton Varda's avatar
Kenton Varda committed
379 380 381 382 383 384 385 386 387 388 389 390 391
// =======================================================================================
// Inline implementation details.

inline StringPtr::StringPtr(const String& value): content(value.begin(), value.size() + 1) {}

inline StringPtr::operator ArrayPtr<const char>() const {
  return content.slice(0, content.size() - 1);
}

inline ArrayPtr<const char> StringPtr::asArray() const {
  return content.slice(0, content.size() - 1);
}

Kenton Varda's avatar
Kenton Varda committed
392
inline bool StringPtr::operator==(const StringPtr& other) const {
Kenton Varda's avatar
Kenton Varda committed
393 394 395 396
  return content.size() == other.content.size() &&
      memcmp(content.begin(), other.content.begin(), content.size() - 1) == 0;
}

Kenton Varda's avatar
Kenton Varda committed
397 398 399 400 401 402 403
inline bool StringPtr::operator<(const StringPtr& other) const {
  bool shorter = content.size() < other.content.size();
  int cmp = memcmp(content.begin(), other.content.begin(),
                   shorter ? content.size() : other.content.size());
  return cmp < 0 || (cmp == 0 && shorter);
}

Kenton Varda's avatar
Kenton Varda committed
404 405 406 407 408 409 410
inline StringPtr StringPtr::slice(size_t start) const {
  return StringPtr(content.slice(start, content.size()));
}
inline ArrayPtr<const char> StringPtr::slice(size_t start, size_t end) const {
  return content.slice(start, end);
}

411 412 413 414 415 416 417 418 419
inline bool StringPtr::startsWith(const StringPtr& other) const {
  return other.content.size() <= content.size() &&
      memcmp(content.begin(), other.content.begin(), other.size()) == 0;
}
inline bool StringPtr::endsWith(const StringPtr& other) const {
  return other.content.size() <= content.size() &&
      memcmp(end() - other.size(), other.content.begin(), other.size()) == 0;
}

420 421 422 423 424 425 426 427 428
inline Maybe<size_t> StringPtr::findFirst(char c) const {
  const char* pos = reinterpret_cast<const char*>(memchr(content.begin(), c, size()));
  if (pos == nullptr) {
    return nullptr;
  } else {
    return pos - content.begin();
  }
}

429
inline Maybe<size_t> StringPtr::findLast(char c) const {
430 431 432 433
  for (size_t i = size(); i > 0; --i) {
    if (content[i-1] == c) {
      return i-1;
    }
434
  }
435
  return nullptr;
436 437
}

Kenton Varda's avatar
Kenton Varda committed
438 439 440 441 442 443 444
inline String::operator ArrayPtr<char>() {
  return content == nullptr ? ArrayPtr<char>(nullptr) : content.slice(0, content.size() - 1);
}
inline String::operator ArrayPtr<const char>() const {
  return content == nullptr ? ArrayPtr<const char>(nullptr) : content.slice(0, content.size() - 1);
}

445 446 447 448
inline ArrayPtr<char> String::asArray() {
  return content == nullptr ? ArrayPtr<char>(nullptr) : content.slice(0, content.size() - 1);
}
inline ArrayPtr<const char> String::asArray() const {
Kenton Varda's avatar
Kenton Varda committed
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
  return content == nullptr ? ArrayPtr<const char>(nullptr) : content.slice(0, content.size() - 1);
}

inline const char* String::cStr() const { return content == nullptr ? "" : content.begin(); }

inline size_t String::size() const { return content == nullptr ? 0 : content.size() - 1; }

inline char String::operator[](size_t index) const { return content[index]; }
inline char& String::operator[](size_t index) { return content[index]; }

inline char* String::begin() { return content == nullptr ? nullptr : content.begin(); }
inline char* String::end() { return content == nullptr ? nullptr : content.end() - 1; }
inline const char* String::begin() const { return content == nullptr ? nullptr : content.begin(); }
inline const char* String::end() const { return content == nullptr ? nullptr : content.end() - 1; }

inline String::String(char* value, size_t size, const ArrayDisposer& disposer)
    : content(value, size + 1, disposer) {
Kenton Varda's avatar
Kenton Varda committed
466
  KJ_IREQUIRE(value[size] == '\0', "String must be NUL-terminated.");
Kenton Varda's avatar
Kenton Varda committed
467 468
}

469 470 471 472
inline String::String(Array<char> buffer): content(kj::mv(buffer)) {
  KJ_IREQUIRE(content.size() > 0 && content.back() == '\0', "String must be NUL-terminated.");
}

Kenton Varda's avatar
Kenton Varda committed
473 474 475 476 477 478
inline String heapString(const char* value) {
  return heapString(value, strlen(value));
}
inline String heapString(StringPtr value) {
  return heapString(value.begin(), value.size());
}
479 480 481
inline String heapString(const String& value) {
  return heapString(value.begin(), value.size());
}
Kenton Varda's avatar
Kenton Varda committed
482 483
inline String heapString(ArrayPtr<const char> value) {
  return heapString(value.begin(), value.size());
484 485 486 487 488
}

}  // namespace kj

#endif  // KJ_STRING_H_