string.h 19.4 KB
Newer Older
Kenton Varda's avatar
Kenton Varda committed
1 2
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
3
//
Kenton Varda's avatar
Kenton Varda committed
4 5 6 7 8 9
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
10
//
Kenton Varda's avatar
Kenton Varda committed
11 12
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
13
//
Kenton Varda's avatar
Kenton Varda committed
14 15 16 17 18 19 20
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
21 22 23 24

#ifndef KJ_STRING_H_
#define KJ_STRING_H_

Kenton Varda's avatar
Kenton Varda committed
25
#include <initializer_list>
26 27 28 29 30
#include "array.h"
#include <string.h>

namespace kj {

Kenton Varda's avatar
Kenton Varda committed
31 32 33
class StringPtr;
class String;

34 35
class StringTree;   // string-tree.h

36 37 38 39 40 41
// Our STL string SFINAE trick does not work with GCC 4.7, but it works with Clang and GCC 4.8, so
// we'll just preprocess it out if not supported.
#if __clang__ || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
#define KJ_COMPILER_SUPPORTS_STL_STRING_INTEROP 1
#endif

Kenton Varda's avatar
Kenton Varda committed
42 43 44 45 46 47 48 49 50 51 52 53
// =======================================================================================
// StringPtr -- A NUL-terminated ArrayPtr<const char> containing UTF-8 text.
//
// NUL bytes are allowed to appear before the end of the string.  The only requirement is that
// a NUL byte appear immediately after the last byte of the content.  This terminator byte is not
// counted in the string's size.

class StringPtr {
public:
  inline StringPtr(): content("", 1) {}
  inline StringPtr(decltype(nullptr)): content("", 1) {}
  inline StringPtr(const char* value): content(value, strlen(value) + 1) {}
Kenton Varda's avatar
Kenton Varda committed
54 55 56
  inline StringPtr(const char* value, size_t size): content(value, size + 1) {
    KJ_IREQUIRE(value[size] == '\0', "StringPtr must be NUL-terminated.");
  }
57
  inline StringPtr(const char* begin, const char* end): StringPtr(begin, end - begin) {}
Kenton Varda's avatar
Kenton Varda committed
58 59
  inline StringPtr(const String& value);

60 61 62 63 64 65 66 67 68 69 70 71 72 73
#if KJ_COMPILER_SUPPORTS_STL_STRING_INTEROP
  template <typename T, typename = decltype(instance<T>().c_str())>
  inline StringPtr(const T& t): StringPtr(t.c_str()) {}
  // Allow implicit conversion from any class that has a c_str() method (namely, std::string).
  // We use a template trick to detect std::string in order to avoid including the header for
  // those who don't want it.

  template <typename T, typename = decltype(instance<T>().c_str())>
  inline operator T() const { return cStr(); }
  // Allow implicit conversion to any class that has a c_str() method (namely, std::string).
  // We use a template trick to detect std::string in order to avoid including the header for
  // those who don't want it.
#endif

Kenton Varda's avatar
Kenton Varda committed
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
  inline operator ArrayPtr<const char>() const;
  inline ArrayPtr<const char> asArray() const;
  // Result does not include NUL terminator.

  inline const char* cStr() const { return content.begin(); }
  // Returns NUL-terminated string.

  inline size_t size() const { return content.size() - 1; }
  // Result does not include NUL terminator.

  inline char operator[](size_t index) const { return content[index]; }

  inline const char* begin() const { return content.begin(); }
  inline const char* end() const { return content.end() - 1; }

  inline bool operator==(decltype(nullptr)) const { return content.size() <= 1; }
  inline bool operator!=(decltype(nullptr)) const { return content.size() > 1; }

Kenton Varda's avatar
Kenton Varda committed
92 93 94 95 96 97
  inline bool operator==(const StringPtr& other) const;
  inline bool operator!=(const StringPtr& other) const { return !(*this == other); }
  inline bool operator< (const StringPtr& other) const;
  inline bool operator> (const StringPtr& other) const { return other < *this; }
  inline bool operator<=(const StringPtr& other) const { return !(other < *this); }
  inline bool operator>=(const StringPtr& other) const { return !(*this < other); }
Kenton Varda's avatar
Kenton Varda committed
98 99 100 101 102 103

  inline StringPtr slice(size_t start) const;
  inline ArrayPtr<const char> slice(size_t start, size_t end) const;
  // A string slice is only NUL-terminated if it is a suffix, so slice() has a one-parameter
  // version that assumes end = size().

104 105 106
  inline bool startsWith(const StringPtr& other) const;
  inline bool endsWith(const StringPtr& other) const;

107
  inline Maybe<size_t> findFirst(char c) const;
108
  inline Maybe<size_t> findLast(char c) const;
109

Kenton Varda's avatar
Kenton Varda committed
110 111 112 113 114 115 116 117
private:
  inline StringPtr(ArrayPtr<const char> content): content(content) {}

  ArrayPtr<const char> content;
};

inline bool operator==(const char* a, const StringPtr& b) { return b == a; }
inline bool operator!=(const char* a, const StringPtr& b) { return b != a; }
118 119

// =======================================================================================
Kenton Varda's avatar
Kenton Varda committed
120 121 122 123 124 125 126 127
// String -- A NUL-terminated Array<char> containing UTF-8 text.
//
// NUL bytes are allowed to appear before the end of the string.  The only requirement is that
// a NUL byte appear immediately after the last byte of the content.  This terminator byte is not
// counted in the string's size.
//
// To allocate a String, you must call kj::heapString().  We do not implement implicit copying to
// the heap because this hides potential inefficiency from the developer.
128 129 130 131

class String {
public:
  String() = default;
Kenton Varda's avatar
Kenton Varda committed
132 133 134
  inline String(decltype(nullptr)): content(nullptr) {}
  inline String(char* value, size_t size, const ArrayDisposer& disposer);
  // Does not copy.  `size` does not include NUL terminator, but `value` must be NUL-terminated.
135 136
  inline explicit String(Array<char> buffer);
  // Does not copy.  Requires `buffer` ends with `\0`.
137

Kenton Varda's avatar
Kenton Varda committed
138 139
  inline operator ArrayPtr<char>();
  inline operator ArrayPtr<const char>() const;
140 141
  inline ArrayPtr<char> asArray();
  inline ArrayPtr<const char> asArray() const;
Kenton Varda's avatar
Kenton Varda committed
142 143 144 145 146 147 148 149 150
  // Result does not include NUL terminator.

  inline const char* cStr() const;

  inline size_t size() const;
  // Result does not include NUL terminator.

  inline char operator[](size_t index) const;
  inline char& operator[](size_t index);
151

Kenton Varda's avatar
Kenton Varda committed
152 153 154 155
  inline char* begin();
  inline char* end();
  inline const char* begin() const;
  inline const char* end() const;
156

Kenton Varda's avatar
Kenton Varda committed
157 158 159
  inline bool operator==(decltype(nullptr)) const { return content.size() <= 1; }
  inline bool operator!=(decltype(nullptr)) const { return content.size() > 1; }

Kenton Varda's avatar
Kenton Varda committed
160
  inline bool operator==(const StringPtr& other) const { return StringPtr(*this) == other; }
161 162 163 164 165
  inline bool operator!=(const StringPtr& other) const { return StringPtr(*this) != other; }
  inline bool operator< (const StringPtr& other) const { return StringPtr(*this) <  other; }
  inline bool operator> (const StringPtr& other) const { return StringPtr(*this) >  other; }
  inline bool operator<=(const StringPtr& other) const { return StringPtr(*this) <= other; }
  inline bool operator>=(const StringPtr& other) const { return StringPtr(*this) >= other; }
166

167 168 169
  inline bool startsWith(const StringPtr& other) const { return StringPtr(*this).startsWith(other);}
  inline bool endsWith(const StringPtr& other) const { return StringPtr(*this).endsWith(other); }

170 171 172 173 174
  inline StringPtr slice(size_t start) const { return StringPtr(*this).slice(start); }
  inline ArrayPtr<const char> slice(size_t start, size_t end) const {
    return StringPtr(*this).slice(start, end);
  }

175
  inline Maybe<size_t> findFirst(char c) const { return StringPtr(*this).findFirst(c); }
176
  inline Maybe<size_t> findLast(char c) const { return StringPtr(*this).findLast(c); }
177

178 179 180 181
private:
  Array<char> content;
};

Kenton Varda's avatar
Kenton Varda committed
182 183 184 185 186 187 188 189 190 191
inline bool operator==(const char* a, const String& b) { return b == a; }
inline bool operator!=(const char* a, const String& b) { return b != a; }

String heapString(size_t size);
// Allocate a String of the given size on the heap, not including NUL terminator.  The NUL
// terminator will be initialized automatically but the rest of the content is not initialized.

String heapString(const char* value);
String heapString(const char* value, size_t size);
String heapString(StringPtr value);
192
String heapString(const String& value);
Kenton Varda's avatar
Kenton Varda committed
193 194 195
String heapString(ArrayPtr<const char> value);
// Allocates a copy of the given value on the heap.

Kenton Varda's avatar
Kenton Varda committed
196 197 198 199
// =======================================================================================
// Magic str() function which transforms parameters to text and concatenates them into one big
// String.

200
namespace _ {  // private
Kenton Varda's avatar
Kenton Varda committed
201 202 203 204 205 206 207 208 209 210 211

inline size_t sum(std::initializer_list<size_t> nums) {
  size_t result = 0;
  for (auto num: nums) {
    result += num;
  }
  return result;
}

inline char* fill(char* ptr) { return ptr; }

212 213 214 215 216 217
template <typename... Rest>
char* fill(char* __restrict__ target, const StringTree& first, Rest&&... rest);
// Make str() work with stringifiers that return StringTree by patching fill().
//
// Defined in string-tree.h.

Kenton Varda's avatar
Kenton Varda committed
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
template <typename First, typename... Rest>
char* fill(char* __restrict__ target, const First& first, Rest&&... rest) {
  auto i = first.begin();
  auto end = first.end();
  while (i != end) {
    *target++ = *i++;
  }
  return fill(target, kj::fwd<Rest>(rest)...);
}

template <typename... Params>
String concat(Params&&... params) {
  // Concatenate a bunch of containers into a single Array.  The containers can be anything that
  // is iterable and whose elements can be converted to `char`.

  String result = heapString(sum({params.size()...}));
  fill(result.begin(), kj::fwd<Params>(params)...);
  return result;
}

inline String concat(String&& arr) {
  return kj::mv(arr);
}

struct Stringifier {
  // This is a dummy type with only one instance: STR (below).  To make an arbitrary type
  // stringifiable, define `operator*(Stringifier, T)` to return an iterable container of `char`.
  // The container type must have a `size()` method.  Be sure to declare the operator in the same
  // namespace as `T` **or** in the global scope.
  //
  // A more usual way to accomplish what we're doing here would be to require that you define
  // a function like `toString(T)` and then rely on argument-dependent lookup.  However, this has
  // the problem that it pollutes other people's namespaces and even the global namespace.  For
  // example, some other project may already have functions called `toString` which do something
  // different.  Declaring `operator*` with `Stringifier` as the left operand cannot conflict with
  // anything.

  inline ArrayPtr<const char> operator*(ArrayPtr<const char> s) const { return s; }
  inline ArrayPtr<const char> operator*(const Array<const char>& s) const { return s; }
  inline ArrayPtr<const char> operator*(const Array<char>& s) const { return s; }
  template<size_t n>
  inline ArrayPtr<const char> operator*(const CappedArray<char, n>& s) const { return s; }
  inline ArrayPtr<const char> operator*(const char* s) const { return arrayPtr(s, strlen(s)); }
  inline ArrayPtr<const char> operator*(const String& s) const { return s.asArray(); }
  inline ArrayPtr<const char> operator*(const StringPtr& s) const { return s.asArray(); }

Kenton Varda's avatar
Kenton Varda committed
264 265 266
  inline Range<char> operator*(const Range<char>& r) const { return r; }
  inline Repeat<char> operator*(const Repeat<char>& r) const { return r; }

Kenton Varda's avatar
Kenton Varda committed
267 268 269 270 271 272 273 274
  inline FixedArray<char, 1> operator*(char c) const {
    FixedArray<char, 1> result;
    result[0] = c;
    return result;
  }

  StringPtr operator*(bool b) const;

275 276 277 278 279 280 281 282 283 284
  CappedArray<char, 5> operator*(signed char i) const;
  CappedArray<char, 5> operator*(unsigned char i) const;
  CappedArray<char, sizeof(short) * 3 + 2> operator*(short i) const;
  CappedArray<char, sizeof(unsigned short) * 3 + 2> operator*(unsigned short i) const;
  CappedArray<char, sizeof(int) * 3 + 2> operator*(int i) const;
  CappedArray<char, sizeof(unsigned int) * 3 + 2> operator*(unsigned int i) const;
  CappedArray<char, sizeof(long) * 3 + 2> operator*(long i) const;
  CappedArray<char, sizeof(unsigned long) * 3 + 2> operator*(unsigned long i) const;
  CappedArray<char, sizeof(long long) * 3 + 2> operator*(long long i) const;
  CappedArray<char, sizeof(unsigned long long) * 3 + 2> operator*(unsigned long long i) const;
Kenton Varda's avatar
Kenton Varda committed
285 286
  CappedArray<char, 24> operator*(float f) const;
  CappedArray<char, 32> operator*(double f) const;
287
  CappedArray<char, sizeof(const void*) * 3 + 2> operator*(const void* s) const;
Kenton Varda's avatar
Kenton Varda committed
288 289

  template <typename T>
290
  String operator*(ArrayPtr<T> arr) const;
Kenton Varda's avatar
Kenton Varda committed
291
  template <typename T>
292
  String operator*(const Array<T>& arr) const;
293 294 295 296 297

#if KJ_COMPILER_SUPPORTS_STL_STRING_INTEROP  // supports expression SFINAE?
  template <typename T, typename Result = decltype(instance<T>().toString())>
  inline Result operator*(T&& value) const { return kj::fwd<T>(value).toString(); }
#endif
Kenton Varda's avatar
Kenton Varda committed
298 299 300
};
static constexpr Stringifier STR = Stringifier();

301
}  // namespace _ (private)
Kenton Varda's avatar
Kenton Varda committed
302 303

template <typename T>
304
auto toCharSequence(T&& value) -> decltype(_::STR * kj::fwd<T>(value)) {
Kenton Varda's avatar
Kenton Varda committed
305 306 307 308 309 310 311 312
  // Returns an iterable of chars that represent a textual representation of the value, suitable
  // for debugging.
  //
  // Most users should use str() instead, but toCharSequence() may occasionally be useful to avoid
  // heap allocation overhead that str() implies.
  //
  // To specialize this function for your type, see KJ_STRINGIFY.

313
  return _::STR * kj::fwd<T>(value);
Kenton Varda's avatar
Kenton Varda committed
314 315
}

316 317 318 319 320
CappedArray<char, sizeof(unsigned char) * 2 + 1> hex(unsigned char i);
CappedArray<char, sizeof(unsigned short) * 2 + 1> hex(unsigned short i);
CappedArray<char, sizeof(unsigned int) * 2 + 1> hex(unsigned int i);
CappedArray<char, sizeof(unsigned long) * 2 + 1> hex(unsigned long i);
CappedArray<char, sizeof(unsigned long long) * 2 + 1> hex(unsigned long long i);
Kenton Varda's avatar
Kenton Varda committed
321 322 323 324 325 326 327 328 329

template <typename... Params>
String str(Params&&... params) {
  // Magic function which builds a string from a bunch of arbitrary values.  Example:
  //     str(1, " / ", 2, " = ", 0.5)
  // returns:
  //     "1 / 2 = 0.5"
  // To teach `str` how to stringify a type, see `Stringifier`.

330
  return _::concat(toCharSequence(kj::fwd<Params>(params))...);
Kenton Varda's avatar
Kenton Varda committed
331 332 333 334 335 336 337 338
}

inline String str(String&& s) { return mv(s); }
// Overload to prevent redundant allocation.

template <typename T>
String strArray(T&& arr, const char* delim) {
  size_t delimLen = strlen(delim);
339
  KJ_STACK_ARRAY(decltype(_::STR * arr[0]), pieces, arr.size(), 8, 32);
Kenton Varda's avatar
Kenton Varda committed
340 341 342
  size_t size = 0;
  for (size_t i = 0; i < arr.size(); i++) {
    if (i > 0) size += delimLen;
343
    pieces[i] = _::STR * arr[i];
Kenton Varda's avatar
Kenton Varda committed
344 345 346 347 348 349 350 351 352 353
    size += pieces[i].size();
  }

  String result = heapString(size);
  char* pos = result.begin();
  for (size_t i = 0; i < arr.size(); i++) {
    if (i > 0) {
      memcpy(pos, delim, delimLen);
      pos += delimLen;
    }
354
    pos = _::fill(pos, pieces[i]);
Kenton Varda's avatar
Kenton Varda committed
355 356 357 358
  }
  return result;
}

359
namespace _ {  // private
Kenton Varda's avatar
Kenton Varda committed
360 361

template <typename T>
362
inline String Stringifier::operator*(ArrayPtr<T> arr) const {
Kenton Varda's avatar
Kenton Varda committed
363 364 365 366
  return strArray(arr, ", ");
}

template <typename T>
367
inline String Stringifier::operator*(const Array<T>& arr) const {
Kenton Varda's avatar
Kenton Varda committed
368 369 370
  return strArray(arr, ", ");
}

371
}  // namespace _ (private)
Kenton Varda's avatar
Kenton Varda committed
372

373
#define KJ_STRINGIFY(...) operator*(::kj::_::Stringifier, __VA_ARGS__)
Kenton Varda's avatar
Kenton Varda committed
374 375 376 377 378 379 380 381 382 383
// Defines a stringifier for a custom type.  Example:
//
//    class Foo {...};
//    inline StringPtr KJ_STRINGIFY(const Foo& foo) { return foo.name(); }
//
// This allows Foo to be passed to str().
//
// The function should be declared either in the same namespace as the target type or in the global
// namespace.  It can return any type which is an iterable container of chars.

Kenton Varda's avatar
Kenton Varda committed
384 385 386 387 388 389 390 391 392 393 394 395 396
// =======================================================================================
// Inline implementation details.

inline StringPtr::StringPtr(const String& value): content(value.begin(), value.size() + 1) {}

inline StringPtr::operator ArrayPtr<const char>() const {
  return content.slice(0, content.size() - 1);
}

inline ArrayPtr<const char> StringPtr::asArray() const {
  return content.slice(0, content.size() - 1);
}

Kenton Varda's avatar
Kenton Varda committed
397
inline bool StringPtr::operator==(const StringPtr& other) const {
Kenton Varda's avatar
Kenton Varda committed
398 399 400 401
  return content.size() == other.content.size() &&
      memcmp(content.begin(), other.content.begin(), content.size() - 1) == 0;
}

Kenton Varda's avatar
Kenton Varda committed
402 403 404 405 406 407 408
inline bool StringPtr::operator<(const StringPtr& other) const {
  bool shorter = content.size() < other.content.size();
  int cmp = memcmp(content.begin(), other.content.begin(),
                   shorter ? content.size() : other.content.size());
  return cmp < 0 || (cmp == 0 && shorter);
}

Kenton Varda's avatar
Kenton Varda committed
409 410 411 412 413 414 415
inline StringPtr StringPtr::slice(size_t start) const {
  return StringPtr(content.slice(start, content.size()));
}
inline ArrayPtr<const char> StringPtr::slice(size_t start, size_t end) const {
  return content.slice(start, end);
}

416 417 418 419 420 421 422 423 424
inline bool StringPtr::startsWith(const StringPtr& other) const {
  return other.content.size() <= content.size() &&
      memcmp(content.begin(), other.content.begin(), other.size()) == 0;
}
inline bool StringPtr::endsWith(const StringPtr& other) const {
  return other.content.size() <= content.size() &&
      memcmp(end() - other.size(), other.content.begin(), other.size()) == 0;
}

425 426 427 428 429 430 431 432 433
inline Maybe<size_t> StringPtr::findFirst(char c) const {
  const char* pos = reinterpret_cast<const char*>(memchr(content.begin(), c, size()));
  if (pos == nullptr) {
    return nullptr;
  } else {
    return pos - content.begin();
  }
}

434
inline Maybe<size_t> StringPtr::findLast(char c) const {
435 436 437 438
  for (size_t i = size(); i > 0; --i) {
    if (content[i-1] == c) {
      return i-1;
    }
439
  }
440
  return nullptr;
441 442
}

Kenton Varda's avatar
Kenton Varda committed
443 444 445 446 447 448 449
inline String::operator ArrayPtr<char>() {
  return content == nullptr ? ArrayPtr<char>(nullptr) : content.slice(0, content.size() - 1);
}
inline String::operator ArrayPtr<const char>() const {
  return content == nullptr ? ArrayPtr<const char>(nullptr) : content.slice(0, content.size() - 1);
}

450 451 452 453
inline ArrayPtr<char> String::asArray() {
  return content == nullptr ? ArrayPtr<char>(nullptr) : content.slice(0, content.size() - 1);
}
inline ArrayPtr<const char> String::asArray() const {
Kenton Varda's avatar
Kenton Varda committed
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
  return content == nullptr ? ArrayPtr<const char>(nullptr) : content.slice(0, content.size() - 1);
}

inline const char* String::cStr() const { return content == nullptr ? "" : content.begin(); }

inline size_t String::size() const { return content == nullptr ? 0 : content.size() - 1; }

inline char String::operator[](size_t index) const { return content[index]; }
inline char& String::operator[](size_t index) { return content[index]; }

inline char* String::begin() { return content == nullptr ? nullptr : content.begin(); }
inline char* String::end() { return content == nullptr ? nullptr : content.end() - 1; }
inline const char* String::begin() const { return content == nullptr ? nullptr : content.begin(); }
inline const char* String::end() const { return content == nullptr ? nullptr : content.end() - 1; }

inline String::String(char* value, size_t size, const ArrayDisposer& disposer)
    : content(value, size + 1, disposer) {
Kenton Varda's avatar
Kenton Varda committed
471
  KJ_IREQUIRE(value[size] == '\0', "String must be NUL-terminated.");
Kenton Varda's avatar
Kenton Varda committed
472 473
}

474 475 476 477
inline String::String(Array<char> buffer): content(kj::mv(buffer)) {
  KJ_IREQUIRE(content.size() > 0 && content.back() == '\0', "String must be NUL-terminated.");
}

Kenton Varda's avatar
Kenton Varda committed
478 479 480 481 482 483
inline String heapString(const char* value) {
  return heapString(value, strlen(value));
}
inline String heapString(StringPtr value) {
  return heapString(value.begin(), value.size());
}
484 485 486
inline String heapString(const String& value) {
  return heapString(value.begin(), value.size());
}
Kenton Varda's avatar
Kenton Varda committed
487 488
inline String heapString(ArrayPtr<const char> value) {
  return heapString(value.begin(), value.size());
489 490 491 492 493
}

}  // namespace kj

#endif  // KJ_STRING_H_