Merge pull request #483 from sandstorm-io/kj-encoding

Add KJ utility functions to encode/decode blobs in common formats.

Merge pull request #483 from sandstorm-io/kj-encoding
Add KJ utility functions to encode/decode blobs in common formats.
c45bd150 · Kenton Varda · GitHub · 35ffb4cd · 745049be · c45bd150
Commit c45bd150 authored May 23, 2017 by Kenton Varda Committed by GitHub May 23, 2017
9 changed files
--- a/c++/Makefile.am
+++ b/c++/Makefile.am
@@ -128,6 +128,7 @@ includekj_HEADERS =                                            \
  src/kj/vector.h                                              \
  src/kj/string.h                                              \
  src/kj/string-tree.h                                         \
+  src/kj/encoding.h                                            \
  src/kj/exception.h                                           \
  src/kj/debug.h                                               \
  src/kj/arena.h                                               \
@@ -218,6 +219,7 @@ libkj_la_SOURCES=                                              \
  src/kj/array.c++                                             \
  src/kj/string.c++                                            \
  src/kj/string-tree.c++                                       \
+  src/kj/encoding.c++                                          \
  src/kj/exception.c++                                         \
  src/kj/debug.c++                                             \
  src/kj/arena.c++                                             \
@@ -451,6 +453,7 @@ capnp_test_SOURCES =                                           \
  src/kj/array-test.c++                                        \
  src/kj/string-test.c++                                       \
  src/kj/string-tree-test.c++                                  \
+  src/kj/encoding-test.c++                                     \
  src/kj/exception-test.c++                                    \
  src/kj/debug-test.c++                                        \
  src/kj/arena-test.c++                                        \

--- a/c++/src/capnp/compat/json.h
+++ b/c++/src/capnp/compat/json.h
@@ -193,6 +193,20 @@ public:
  void addFieldHandler(StructSchema::Field field, Handler<T>& handler);
  // Matches only the specific field. T can be a dynamic type. T must match the field's type.

+  // ---------------------------------------------------------------------------
+  // Hack to support string literal parameters
+
+  template <size_t size, typename... Params>
+  auto decode(const char (&input)[size], Params&&... params) const
+      -> decltype(decode(kj::arrayPtr(input, size), kj::fwd<Params>(params)...)) {
+    return decode(kj::arrayPtr(input, size - 1), kj::fwd<Params>(params)...);
+  }
+  template <size_t size, typename... Params>
+  auto decodeRaw(const char (&input)[size], Params&&... params) const
+      -> decltype(decodeRaw(kj::arrayPtr(input, size), kj::fwd<Params>(params)...)) {
+    return decodeRaw(kj::arrayPtr(input, size - 1), kj::fwd<Params>(params)...);
+  }
+
 private:
  class HandlerBase;
  struct Impl;

--- a/c++/src/capnp/compiler/node-translator.c++
+++ b/c++/src/capnp/compiler/node-translator.c++
@@ -24,6 +24,7 @@
 #include <capnp/serialize.h>
 #include <kj/debug.h>
 #include <kj/arena.h>
+#include <kj/encoding.h>
 #include <set>
 #include <map>
 #include <stdlib.h>
@@ -2408,36 +2409,7 @@ uint64_t NodeTranslator::compileParamList(
 static const char HEXDIGITS[] = "0123456789abcdef";

 static kj::StringTree stringLiteral(kj::StringPtr chars) {
-  // TODO(cleanup): This code keeps coming up. Put somewhere common?
-
-  kj::Vector<char> escaped(chars.size());
-
-  for (char c: chars) {
-    switch (c) {
-      case '\a': escaped.addAll(kj::StringPtr("\\a")); break;
-      case '\b': escaped.addAll(kj::StringPtr("\\b")); break;
-      case '\f': escaped.addAll(kj::StringPtr("\\f")); break;
-      case '\n': escaped.addAll(kj::StringPtr("\\n")); break;
-      case '\r': escaped.addAll(kj::StringPtr("\\r")); break;
-      case '\t': escaped.addAll(kj::StringPtr("\\t")); break;
-      case '\v': escaped.addAll(kj::StringPtr("\\v")); break;
-      case '\'': escaped.addAll(kj::StringPtr("\\\'")); break;
-      case '\"': escaped.addAll(kj::StringPtr("\\\"")); break;
-      case '\\': escaped.addAll(kj::StringPtr("\\\\")); break;
-      default:
-        if (c < 0x20) {
-          escaped.add('\\');
-          escaped.add('x');
-          uint8_t c2 = c;
-          escaped.add(HEXDIGITS[c2 / 16]);
-          escaped.add(HEXDIGITS[c2 % 16]);
-        } else {
-          escaped.add(c);
-        }
-        break;
-    }
-  }
-  return kj::strTree('"', escaped, '"');
+  return kj::strTree('"', kj::encodeCEscape(chars), '"');
 }

 static kj::StringTree binaryLiteral(Data::Reader data) {

--- a/c++/src/capnp/stringify.c++
+++ b/c++/src/capnp/stringify.c++
@@ -22,13 +22,12 @@
 #include "dynamic.h"
 #include <kj/debug.h>
 #include <kj/vector.h>
+#include <kj/encoding.h>

 namespace capnp {

 namespace {

-static const char HEXDIGITS[] = "0123456789abcdef";
-
 enum PrintMode {
  BARE,
  // The value is planned to be printed on its own line, unless it is very short and contains
@@ -150,34 +149,7 @@ static kj::StringTree print(const DynamicValue::Reader& value,
        chars = value.as<Text>();
      }

-      kj::Vector<char> escaped(chars.size());
-
-      for (char c: chars) {
-        switch (c) {
-          case '\a': escaped.addAll(kj::StringPtr("\\a")); break;
-          case '\b': escaped.addAll(kj::StringPtr("\\b")); break;
-          case '\f': escaped.addAll(kj::StringPtr("\\f")); break;
-          case '\n': escaped.addAll(kj::StringPtr("\\n")); break;
-          case '\r': escaped.addAll(kj::StringPtr("\\r")); break;
-          case '\t': escaped.addAll(kj::StringPtr("\\t")); break;
-          case '\v': escaped.addAll(kj::StringPtr("\\v")); break;
-          case '\'': escaped.addAll(kj::StringPtr("\\\'")); break;
-          case '\"': escaped.addAll(kj::StringPtr("\\\"")); break;
-          case '\\': escaped.addAll(kj::StringPtr("\\\\")); break;
-          default:
-            if (c < 0x20) {
-              escaped.add('\\');
-              escaped.add('x');
-              uint8_t c2 = c;
-              escaped.add(HEXDIGITS[c2 / 16]);
-              escaped.add(HEXDIGITS[c2 % 16]);
-            } else {
-              escaped.add(c);
-            }
-            break;
-        }
-      }
-      return kj::strTree('"', escaped, '"');
+      return kj::strTree('"', kj::encodeCEscape(chars), '"');
    }
    case DynamicValue::LIST: {
      auto listValue = value.as<DynamicList>();

--- a/c++/src/kj/CMakeLists.txt
+++ b/c++/src/kj/CMakeLists.txt
@@ -19,6 +19,7 @@ set(kj_sources_heavy
  units.c++
  refcount.c++
  string-tree.c++
+  encoding.c++
  parse/char.c++
 )
 if(NOT CAPNP_LITE)
@@ -36,6 +37,7 @@ set(kj_headers
  vector.h
  string.h
  string-tree.h
+  encoding.h
  exception.h
  debug.h
  arena.h
@@ -170,6 +172,7 @@ if(BUILD_TESTING)
      async-io-test.c++
      refcount-test.c++
      string-tree-test.c++
+      encoding-test.c++
      arena-test.c++
      units-test.c++
      tuple-test.c++

--- a/c++/src/kj/common.h
+++ b/c++/src/kj/common.h
@@ -455,6 +455,10 @@ T refIfLvalue(T&&);
 //     KJ_DECLTYPE_REF(i) i3(i);                  // i3 has type int&.
 //     KJ_DECLTYPE_REF(kj::mv(i)) i4(kj::mv(i));  // i4 has type int.

+template <typename T, typename U> struct IsSameType_ { static constexpr bool value = false; };
+template <typename T> struct IsSameType_<T, T> { static constexpr bool value = true; };
+template <typename T, typename U> constexpr bool isSameType() { return IsSameType_<T, U>::value; }
+
 template <typename T>
 struct CanConvert_ {
  static int sfinae(T);
@@ -911,7 +915,6 @@ public:
    return value;
  }

-private:  // internal interface used by friends only
  inline NullableValue() noexcept: isSet(false) {}
  inline NullableValue(T&& t) noexcept(noexcept(T(instance<T&&>())))
      : isSet(true) {
@@ -1244,8 +1247,31 @@ public:
      : ptr(init.begin()), size_(init.size()) {}

  template <size_t size>
-  inline constexpr ArrayPtr(T (&native)[size]): ptr(native), size_(size) {}
+  inline constexpr ArrayPtr(T (&native)[size]): ptr(native), size_(size) {
    // Construct an ArrayPtr from a native C-style array.
+    //
+    // We disable this constructor for const char arrays because otherwise you would be able to
+    // implicitly convert a character literal to ArrayPtr<const char>, which sounds really great,
+    // except that the NUL terminator would be included, which probably isn't what you intended.
+    //
+    // TODO(someday): Maybe we should support character literals but explicitly chop off the NUL
+    //   terminator. This could do the wrong thing if someone tries to construct an
+    //   ArrayPtr<const char> from a non-NUL-terminated char array, but evidence suggests that all
+    //   real use cases are in fact intending to remove the NUL terminator. It's convenient to be
+    //   able to specify ArrayPtr<const char> as a parameter type and be able to accept strings
+    //   as input in addition to arrays. Currently, you'll need overloading to support string
+    //   literals in this case, but if you overload StringPtr, then you'll find that several
+    //   conversions (e.g. from String and from a literal char array) become ambiguous! You end up
+    //   having to overload for literal char arrays specifically which is cumbersome.
+
+    static_assert(!isSameType<T, const char>(),
+        "Can't implicitly convert literal char array to ArrayPtr because we don't know if "
+        "you meant to include the NUL terminator. We may change this in the future to "
+        "automatically drop the NUL terminator. For now, try explicitly converting to StringPtr, "
+        "which can in turn implicitly convert to ArrayPtr<const char>.");
+    static_assert(!isSameType<T, const char16_t>(), "see above");
+    static_assert(!isSameType<T, const char32_t>(), "see above");
+  }

  inline operator ArrayPtr<const T>() const {
    return ArrayPtr<const T>(ptr, size_);

--- a/c++/src/kj/encoding-test.c++
+++ b/c++/src/kj/encoding-test.c++
+// Copyright (c) 2017 Cloudflare, Inc. and contributors
+// Licensed under the MIT License:
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "encoding.h"
+#include <kj/test.h>
+#include <stdint.h>
+
+namespace kj {
+namespace {
+
+CappedArray<char, sizeof(char    ) * 2 + 1> hex(byte     i) { return kj::hex((uint8_t )i); }
+CappedArray<char, sizeof(char    ) * 2 + 1> hex(char     i) { return kj::hex((uint8_t )i); }
+CappedArray<char, sizeof(char16_t) * 2 + 1> hex(char16_t i) { return kj::hex((uint16_t)i); }
+CappedArray<char, sizeof(char32_t) * 2 + 1> hex(char32_t i) { return kj::hex((uint32_t)i); }
+// Hexify chars correctly.
+//
+// TODO(cleanup): Should this go into string.h with the other definitions of hex()?
+
+template <typename T, typename U>
+void expectResImpl(EncodingResult<T> result,
+                   ArrayPtr<const U> expected,
+                   bool errors = false) {
+  if (errors) {
+    KJ_EXPECT(result.hadErrors);
+  } else {
+    KJ_EXPECT(!result.hadErrors);
+  }
+
+  KJ_EXPECT(result.size() == expected.size(), result.size(), expected.size());
+  for (auto i: kj::zeroTo(kj::min(result.size(), expected.size()))) {
+    KJ_EXPECT(result[i] == expected[i], i, hex(result[i]), hex(expected[i]));
+  }
+}
+
+template <typename T, typename U, size_t s>
+void expectRes(EncodingResult<T> result,
+               const U (&expected)[s],
+               bool errors = false) {
+  expectResImpl(kj::mv(result), arrayPtr(expected, s - 1), errors);
+}
+
+template <typename T, size_t s>
+void expectRes(EncodingResult<T> result,
+               byte (&expected)[s],
+               bool errors = false) {
+  expectResImpl(kj::mv(result), arrayPtr<const byte>(expected, s), errors);
+}
+
+KJ_TEST("encode UTF-8 to UTF-16") {
+  expectRes(encodeUtf16(u8"foo"), u"foo");
+  expectRes(encodeUtf16(u8"Здравствуйте"), u"Здравствуйте");
+  expectRes(encodeUtf16(u8"中国网络"), u"中国网络");
+  expectRes(encodeUtf16(u8"😺☁☄🐵"), u"😺☁☄🐵");
+}
+
+KJ_TEST("invalid UTF-8 to UTF-16") {
+  // Disembodied continuation byte.
+  expectRes(encodeUtf16("\x80"), u"\ufffd", true);
+  expectRes(encodeUtf16("f\xbfo"), u"f\ufffdo", true);
+  expectRes(encodeUtf16("f\xbf\x80\xb0o"), u"f\ufffdo", true);
+
+  // Missing continuation bytes.
+  expectRes(encodeUtf16("\xc2x"), u"\ufffdx", true);
+  expectRes(encodeUtf16("\xe0x"), u"\ufffdx", true);
+  expectRes(encodeUtf16("\xe0\xa0x"), u"\ufffdx", true);
+  expectRes(encodeUtf16("\xf0x"), u"\ufffdx", true);
+  expectRes(encodeUtf16("\xf0\x90x"), u"\ufffdx", true);
+  expectRes(encodeUtf16("\xf0\x90\x80x"), u"\ufffdx", true);
+
+  // Overlong sequences.
+  expectRes(encodeUtf16("\xc0\x80"), u"\ufffd", true);
+  expectRes(encodeUtf16("\xc1\xbf"), u"\ufffd", true);
+  expectRes(encodeUtf16("\xc2\x80"), u"\u0080", false);
+  expectRes(encodeUtf16("\xdf\xbf"), u"\u07ff", false);
+
+  expectRes(encodeUtf16("\xe0\x80\x80"), u"\ufffd", true);
+  expectRes(encodeUtf16("\xe0\x9f\xbf"), u"\ufffd", true);
+  expectRes(encodeUtf16("\xe0\xa0\x80"), u"\u0800", false);
+  expectRes(encodeUtf16("\xef\xbf\xbe"), u"\ufffe", false);
+
+  // Due to a classic off-by-one error, GCC 4.x rather hilariously encodes '\uffff' as the
+  // "surrogate pair" 0xd7ff, 0xdfff: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=41698
+  if (kj::size(u"\uffff") == 2) {
+    expectRes(encodeUtf16("\xef\xbf\xbf"), u"\uffff", false);
+  }
+
+  expectRes(encodeUtf16("\xf0\x80\x80\x80"), u"\ufffd", true);
+  expectRes(encodeUtf16("\xf0\x8f\xbf\xbf"), u"\ufffd", true);
+  expectRes(encodeUtf16("\xf0\x90\x80\x80"), u"\U00010000", false);
+  expectRes(encodeUtf16("\xf4\x8f\xbf\xbf"), u"\U0010ffff", false);
+
+  // Out of Unicode range.
+  expectRes(encodeUtf16("\xf5\x80\x80\x80"), u"\ufffd", true);
+  expectRes(encodeUtf16("\xf8\xbf\x80\x80\x80"), u"\ufffd", true);
+  expectRes(encodeUtf16("\xfc\xbf\x80\x80\x80\x80"), u"\ufffd", true);
+  expectRes(encodeUtf16("\xfe\xbf\x80\x80\x80\x80\x80"), u"\ufffd", true);
+  expectRes(encodeUtf16("\xff\xbf\x80\x80\x80\x80\x80\x80"), u"\ufffd", true);
+}
+
+KJ_TEST("encode UTF-8 to UTF-32") {
+  expectRes(encodeUtf32(u8"foo"), U"foo");
+  expectRes(encodeUtf32(u8"Здравствуйте"), U"Здравствуйте");
+  expectRes(encodeUtf32(u8"中国网络"), U"中国网络");
+  expectRes(encodeUtf32(u8"😺☁☄🐵"), U"😺☁☄🐵");
+}
+
+KJ_TEST("invalid UTF-8 to UTF-32") {
+  // Disembodied continuation byte.
+  expectRes(encodeUtf32("\x80"), U"\ufffd", true);
+  expectRes(encodeUtf32("f\xbfo"), U"f\ufffdo", true);
+  expectRes(encodeUtf32("f\xbf\x80\xb0o"), U"f\ufffdo", true);
+
+  // Missing continuation bytes.
+  expectRes(encodeUtf32("\xc2x"), U"\ufffdx", true);
+  expectRes(encodeUtf32("\xe0x"), U"\ufffdx", true);
+  expectRes(encodeUtf32("\xe0\xa0x"), U"\ufffdx", true);
+  expectRes(encodeUtf32("\xf0x"), U"\ufffdx", true);
+  expectRes(encodeUtf32("\xf0\x90x"), U"\ufffdx", true);
+  expectRes(encodeUtf32("\xf0\x90\x80x"), U"\ufffdx", true);
+
+  // Overlong sequences.
+  expectRes(encodeUtf32("\xc0\x80"), U"\ufffd", true);
+  expectRes(encodeUtf32("\xc1\xbf"), U"\ufffd", true);
+  expectRes(encodeUtf32("\xc2\x80"), U"\u0080", false);
+  expectRes(encodeUtf32("\xdf\xbf"), U"\u07ff", false);
+
+  expectRes(encodeUtf32("\xe0\x80\x80"), U"\ufffd", true);
+  expectRes(encodeUtf32("\xe0\x9f\xbf"), U"\ufffd", true);
+  expectRes(encodeUtf32("\xe0\xa0\x80"), U"\u0800", false);
+  expectRes(encodeUtf32("\xef\xbf\xbf"), U"\uffff", false);
+
+  expectRes(encodeUtf32("\xf0\x80\x80\x80"), U"\ufffd", true);
+  expectRes(encodeUtf32("\xf0\x8f\xbf\xbf"), U"\ufffd", true);
+  expectRes(encodeUtf32("\xf0\x90\x80\x80"), U"\U00010000", false);
+  expectRes(encodeUtf32("\xf4\x8f\xbf\xbf"), U"\U0010ffff", false);
+
+  // Out of Unicode range.
+  expectRes(encodeUtf32("\xf5\x80\x80\x80"), U"\ufffd", true);
+  expectRes(encodeUtf32("\xf8\xbf\x80\x80\x80"), U"\ufffd", true);
+  expectRes(encodeUtf32("\xfc\xbf\x80\x80\x80\x80"), U"\ufffd", true);
+  expectRes(encodeUtf32("\xfe\xbf\x80\x80\x80\x80\x80"), U"\ufffd", true);
+  expectRes(encodeUtf32("\xff\xbf\x80\x80\x80\x80\x80\x80"), U"\ufffd", true);
+}
+
+KJ_TEST("decode UTF-16 to UTF-8") {
+  expectRes(decodeUtf16(u"foo"), u8"foo");
+  expectRes(decodeUtf16(u"Здравствуйте"), u8"Здравствуйте");
+  expectRes(decodeUtf16(u"中国网络"), u8"中国网络");
+  expectRes(decodeUtf16(u"😺☁☄🐵"), u8"😺☁☄🐵");
+}
+
+KJ_TEST("invalid UTF-16 to UTF-8") {
+  // Surrogates in wrong order.
+  expectRes(decodeUtf16(u"\xd7ff\xdc00\xdfff\xe000"), u8"\ud7ff\ufffd\ufffd\ue000", true);
+
+  // Missing second surrogate.
+  expectRes(decodeUtf16(u"f\xd800"), u8"f\ufffd", true);
+  expectRes(decodeUtf16(u"f\xd800x"), u8"f\ufffdx", true);
+  expectRes(decodeUtf16(u"f\xd800\xd800x"), u8"f\ufffd\ufffdx", true);
+}
+
+KJ_TEST("decode UTF-32 to UTF-8") {
+  expectRes(decodeUtf32(U"foo"), u8"foo");
+  expectRes(decodeUtf32(U"Здравствуйте"), u8"Здравствуйте");
+  expectRes(decodeUtf32(U"中国网络"), u8"中国网络");
+  expectRes(decodeUtf32(U"😺☁☄🐵"), u8"😺☁☄🐵");
+}
+
+KJ_TEST("invalid UTF-32 to UTF-8") {
+  // Surrogates rejected.
+  expectRes(decodeUtf32(U"\xd7ff\xdc00\xdfff\xe000"), u8"\ud7ff\ufffd\ufffd\ue000", true);
+
+  // Even if it would be a valid surrogate pair in UTF-16.
+  expectRes(decodeUtf32(U"\xd7ff\xd800\xdfff\xe000"), u8"\ud7ff\ufffd\ufffd\ue000", true);
+}
+
+KJ_TEST("EncodingResult as a Maybe") {
+  KJ_IF_MAYBE(result, encodeUtf16("\x80")) {
+    KJ_FAIL_EXPECT("expected failure");
+  }
+
+  KJ_IF_MAYBE(result, encodeUtf16("foo")) {
+    // good
+  } else {
+    KJ_FAIL_EXPECT("expected success");
+  }
+
+  KJ_EXPECT(KJ_ASSERT_NONNULL(decodeUtf16(u"foo")) == "foo");
+}
+
+// =======================================================================================
+
+KJ_TEST("hex encoding/decoding") {
+  byte bytes[] = {0x12, 0x34, 0xab, 0xf2};
+
+  KJ_EXPECT(encodeHex(bytes) == "1234abf2");
+
+  expectRes(decodeHex("1234abf2"), bytes);
+
+  expectRes(decodeHex("1234abf21"), bytes, true);
+
+  bytes[2] = 0xa0;
+  expectRes(decodeHex("1234axf2"), bytes, true);
+
+  bytes[2] = 0x0b;
+  expectRes(decodeHex("1234xbf2"), bytes, true);
+}
+
+KJ_TEST("URI encoding/decoding") {
+  KJ_EXPECT(encodeUriComponent("foo") == "foo");
+  KJ_EXPECT(encodeUriComponent("foo bar") == "foo%20bar");
+  KJ_EXPECT(encodeUriComponent("\xab\xba") == "%ab%ba");
+  KJ_EXPECT(encodeUriComponent(StringPtr("foo\0bar", 7)) == "foo%00bar");
+
+  expectRes(decodeUriComponent("foo%20bar"), "foo bar");
+  expectRes(decodeUriComponent("%ab%BA"), "\xab\xba");
+
+  expectRes(decodeUriComponent("foo%1xxx"), "foo\1xxx", true);
+  expectRes(decodeUriComponent("foo%1"), "foo\1", true);
+  expectRes(decodeUriComponent("foo%xxx"), "fooxxx", true);
+  expectRes(decodeUriComponent("foo%"), "foo", true);
+
+  byte bytes[] = {12, 34, 56};
+  KJ_EXPECT(decodeBinaryUriComponent(encodeUriComponent(bytes)).asPtr() == bytes);
+}
+
+KJ_TEST("C escape encoding/decoding") {
+  KJ_EXPECT(encodeCEscape("fooo\a\b\f\n\r\t\v\'\"\\bar") ==
+      "fooo\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\\bar");
+  KJ_EXPECT(encodeCEscape("foo\x01\x7fxxx") ==
+      "foo\\001\\177xxx");
+
+  expectRes(decodeCEscape("fooo\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\\bar"),
+      "fooo\a\b\f\n\r\t\v\'\"\\bar");
+  expectRes(decodeCEscape("foo\\x01\\x7fxxx"), "foo\x01\x7fxxx");
+  expectRes(decodeCEscape("foo\\001\\177234"), "foo\001\177234");
+  expectRes(decodeCEscape("foo\\x1"), "foo\x1");
+  expectRes(decodeCEscape("foo\\1"), "foo\1");
+
+  expectRes(decodeCEscape("foo\\u1234bar"), u8"foo\u1234bar");
+  expectRes(decodeCEscape("foo\\U00045678bar"), u8"foo\U00045678bar");
+
+  // Error cases.
+  expectRes(decodeCEscape("foo\\"), "foo", true);
+  expectRes(decodeCEscape("foo\\x123x"), u8"foo\x23x", true);
+  expectRes(decodeCEscape("foo\\u12"), u8"foo\u0012", true);
+  expectRes(decodeCEscape("foo\\u12xxx"), u8"foo\u0012xxx", true);
+  expectRes(decodeCEscape("foo\\U12"), u8"foo\u0012", true);
+  expectRes(decodeCEscape("foo\\U12xxxxxxxx"), u8"foo\u0012xxxxxxxx", true);
+}
+
+KJ_TEST("base64 encoding/decoding") {
+  {
+    auto encoded = encodeBase64(StringPtr("foo").asBytes(), false);
+    KJ_EXPECT(encoded == "Zm9v", encoded, encoded.size());
+    KJ_EXPECT(heapString(decodeBase64(encoded.asArray()).asChars()) == "foo");
+  }
+
+  {
+    auto encoded = encodeBase64(StringPtr("corge").asBytes(), false);
+    KJ_EXPECT(encoded == "Y29yZ2U=", encoded);
+    KJ_EXPECT(heapString(decodeBase64(encoded.asArray()).asChars()) == "corge");
+  }
+
+  KJ_EXPECT(heapString(decodeBase64("Y29yZ2U").asChars()) == "corge");
+  KJ_EXPECT(heapString(decodeBase64("Y\n29y Z@2U=\n").asChars()) == "corge");
+
+  {
+    auto encoded = encodeBase64(StringPtr("corge").asBytes(), true);
+    KJ_EXPECT(encoded == "Y29yZ2U=\n", encoded);
+  }
+
+  StringPtr fullLine = "012345678901234567890123456789012345678901234567890123";
+  {
+    auto encoded = encodeBase64(fullLine.asBytes(), false);
+    KJ_EXPECT(
+        encoded == "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIz",
+        encoded);
+  }
+  {
+    auto encoded = encodeBase64(fullLine.asBytes(), true);
+    KJ_EXPECT(
+        encoded == "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIz\n",
+        encoded);
+  }
+
+  String multiLine = str(fullLine, "456");
+  {
+    auto encoded = encodeBase64(multiLine.asBytes(), false);
+    KJ_EXPECT(
+        encoded == "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2",
+        encoded);
+  }
+  {
+    auto encoded = encodeBase64(multiLine.asBytes(), true);
+    KJ_EXPECT(
+        encoded == "MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIz\n"
+                   "NDU2\n",
+        encoded);
+  }
+}
+
+}  // namespace
+}  // namespace kj
--- a/c++/src/kj/encoding.c++
+++ b/c++/src/kj/encoding.c++
+// Copyright (c) 2017 Cloudflare, Inc.; Sandstorm Development Group, Inc.; and contributors
+// Licensed under the MIT License:
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "encoding.h"
+#include "vector.h"
+#include "debug.h"
+
+namespace kj {
+
+namespace {
+
+#define GOTO_ERROR_IF(cond) if (KJ_UNLIKELY(cond)) goto error
+
+inline void addChar32(Vector<char16_t>& vec, char32_t u) {
+  // Encode as surrogate pair.
+  u -= 0x10000;
+  vec.add(0xd800 | (u >> 10));
+  vec.add(0xdc00 | (u & 0x03ff));
+}
+
+inline void addChar32(Vector<char32_t>& vec, char32_t u) {
+  vec.add(u);
+}
+
+template <typename T>
+EncodingResult<Array<T>> encodeUtf(ArrayPtr<const char> text, bool nulTerminate) {
+  Vector<T> result(text.size() + nulTerminate);
+  bool hadErrors = false;
+
+  size_t i = 0;
+  while (i < text.size()) {
+    byte c = text[i++];
+    if (c < 0x80) {
+      // 0xxxxxxx -- ASCII
+      result.add(c);
+      continue;
+    } else if (KJ_UNLIKELY(c < 0xc0)) {
+      // 10xxxxxx -- malformed continuation byte
+      goto error;
+    } else if (c < 0xe0) {
+      // 110xxxxx -- 2-byte
+      byte c2;
+      GOTO_ERROR_IF(i == text.size() || ((c2 = text[i]) & 0xc0) != 0x80); ++i;
+      char16_t u = (static_cast<char16_t>(c  & 0x1f) <<  6)
+                 | (static_cast<char16_t>(c2 & 0x3f)      );
+
+      // Disallow overlong sequence.
+      GOTO_ERROR_IF(u < 0x80);
+
+      result.add(u);
+      continue;
+    } else if (c < 0xf0) {
+      // 1110xxxx -- 3-byte
+      byte c2, c3;
+      GOTO_ERROR_IF(i == text.size() || ((c2 = text[i]) & 0xc0) != 0x80); ++i;
+      GOTO_ERROR_IF(i == text.size() || ((c3 = text[i]) & 0xc0) != 0x80); ++i;
+      char16_t u = (static_cast<char16_t>(c  & 0x0f) << 12)
+                 | (static_cast<char16_t>(c2 & 0x3f) <<  6)
+                 | (static_cast<char16_t>(c3 & 0x3f)      );
+
+      // Disallow overlong sequence.
+      GOTO_ERROR_IF(u < 0x0800);
+
+      // Disallow surrogate pair code points.
+      GOTO_ERROR_IF((u & 0xf800) == 0xd800);
+
+      result.add(u);
+      continue;
+    } else if (c < 0xf8) {
+      // 11110xxx -- 4-byte
+      byte c2, c3, c4;
+      GOTO_ERROR_IF(i == text.size() || ((c2 = text[i]) & 0xc0) != 0x80); ++i;
+      GOTO_ERROR_IF(i == text.size() || ((c3 = text[i]) & 0xc0) != 0x80); ++i;
+      GOTO_ERROR_IF(i == text.size() || ((c4 = text[i]) & 0xc0) != 0x80); ++i;
+      char32_t u = (static_cast<char32_t>(c  & 0x07) << 18)
+                 | (static_cast<char32_t>(c2 & 0x3f) << 12)
+                 | (static_cast<char32_t>(c3 & 0x3f) <<  6)
+                 | (static_cast<char32_t>(c4 & 0x3f)      );
+
+      // Disallow overlong sequence.
+      GOTO_ERROR_IF(u < 0x10000);
+
+      // Unicode ends at U+10FFFF
+      GOTO_ERROR_IF(u >= 0x110000);
+
+      addChar32(result, u);
+      continue;
+    } else {
+      // 5-byte and 6-byte sequences are not legal as they'd result in codepoints outside the
+      // range of Unicode.
+      goto error;
+    }
+
+  error:
+    result.add(0xfffd);
+    hadErrors = true;
+    // Ignore all continuation bytes.
+    while (i < text.size() && (text[i] & 0xc0) == 0x80) {
+      ++i;
+    }
+  }
+
+  if (nulTerminate) result.add(0);
+
+  return { result.releaseAsArray(), hadErrors };
+}
+
+}  // namespace
+
+EncodingResult<Array<char16_t>> encodeUtf16(ArrayPtr<const char> text, bool nulTerminate) {
+  return encodeUtf<char16_t>(text, nulTerminate);
+}
+
+EncodingResult<Array<char32_t>> encodeUtf32(ArrayPtr<const char> text, bool nulTerminate) {
+  return encodeUtf<char32_t>(text, nulTerminate);
+}
+
+EncodingResult<String> decodeUtf16(ArrayPtr<const char16_t> utf16) {
+  Vector<char> result(utf16.size() + 1);
+  bool hadErrors = false;
+
+  size_t i = 0;
+  while (i < utf16.size()) {
+    char16_t u = utf16[i++];
+
+    if (u < 0x80) {
+      result.add(u);
+      continue;
+    } else if (u < 0x0800) {
+      result.addAll<std::initializer_list<char>>({
+        static_cast<char>(((u >>  6)       ) | 0xc0),
+        static_cast<char>(((u      ) & 0x3f) | 0x80)
+      });
+      continue;
+    } else if ((u & 0xf800) == 0xd800) {
+      // surrogate pair
+      char16_t u2;
+      GOTO_ERROR_IF(i == utf16.size()                       // missing second half
+                 || (u & 0x0400) != 0                       // first half in wrong range
+                 || ((u2 = utf16[i]) & 0xfc00) != 0xdc00);  // second half in wrong range
+      ++i;
+
+      char32_t u32 = (((u & 0x03ff) << 10) | (u2 & 0x03ff)) + 0x10000;
+      result.addAll<std::initializer_list<char>>({
+        static_cast<char>(((u32 >> 18)       ) | 0xf0),
+        static_cast<char>(((u32 >> 12) & 0x3f) | 0x80),
+        static_cast<char>(((u32 >>  6) & 0x3f) | 0x80),
+        static_cast<char>(((u32      ) & 0x3f) | 0x80)
+      });
+      continue;
+    } else {
+      result.addAll<std::initializer_list<char>>({
+        static_cast<char>(((u >> 12)       ) | 0xe0),
+        static_cast<char>(((u >>  6) & 0x3f) | 0x80),
+        static_cast<char>(((u      ) & 0x3f) | 0x80)
+      });
+      continue;
+    }
+
+  error:
+    result.addAll(StringPtr(u8"\ufffd"));
+    hadErrors = true;
+  }
+
+  result.add(0);
+  return { String(result.releaseAsArray()), hadErrors };
+}
+
+EncodingResult<String> decodeUtf32(ArrayPtr<const char32_t> utf16) {
+  Vector<char> result(utf16.size() + 1);
+  bool hadErrors = false;
+
+  size_t i = 0;
+  while (i < utf16.size()) {
+    char32_t u = utf16[i++];
+
+    if (u < 0x80) {
+      result.add(u);
+      continue;
+    } else if (u < 0x0800) {
+      result.addAll<std::initializer_list<char>>({
+        static_cast<char>(((u >>  6)       ) | 0xc0),
+        static_cast<char>(((u      ) & 0x3f) | 0x80)
+      });
+      continue;
+    } else if (u < 0x10000) {
+      GOTO_ERROR_IF((u & 0xfffff800) == 0xd800);  // no surrogates allowed in utf-32
+      result.addAll<std::initializer_list<char>>({
+        static_cast<char>(((u >> 12)       ) | 0xe0),
+        static_cast<char>(((u >>  6) & 0x3f) | 0x80),
+        static_cast<char>(((u      ) & 0x3f) | 0x80)
+      });
+      continue;
+    } else {
+      GOTO_ERROR_IF(u >= 0x110000);  // outside Unicode range
+      result.addAll<std::initializer_list<char>>({
+        static_cast<char>(((u >> 18)       ) | 0xf0),
+        static_cast<char>(((u >> 12) & 0x3f) | 0x80),
+        static_cast<char>(((u >>  6) & 0x3f) | 0x80),
+        static_cast<char>(((u      ) & 0x3f) | 0x80)
+      });
+      continue;
+    }
+
+  error:
+    result.addAll(StringPtr(u8"\ufffd"));
+    hadErrors = true;
+  }
+
+  result.add(0);
+  return { String(result.releaseAsArray()), hadErrors };
+}
+
+// =======================================================================================
+
+namespace {
+
+const char HEX_DIGITS[] = "0123456789abcdef";
+
+static Maybe<uint> tryFromHexDigit(char c) {
+  if ('0' <= c && c <= '9') {
+    return c - '0';
+  } else if ('a' <= c && c <= 'f') {
+    return c - ('a' - 10);
+  } else if ('A' <= c && c <= 'F') {
+    return c - ('A' - 10);
+  } else {
+    return nullptr;
+  }
+}
+
+static Maybe<uint> tryFromOctDigit(char c) {
+  if ('0' <= c && c <= '7') {
+    return c - '0';
+  } else {
+    return nullptr;
+  }
+}
+
+}  // namespace
+
+String encodeHex(ArrayPtr<const byte> input) {
+  return strArray(KJ_MAP(b, input) {
+    return heapArray<char>({HEX_DIGITS[b/16], HEX_DIGITS[b%16]});
+  }, "");
+}
+
+EncodingResult<Array<byte>> decodeHex(ArrayPtr<const char> text) {
+  auto result = heapArray<byte>(text.size() / 2);
+  bool hadErrors = text.size() % 2;
+
+  for (auto i: kj::indices(result)) {
+    byte b = 0;
+    KJ_IF_MAYBE(d1, tryFromHexDigit(text[i*2])) {
+      b = *d1 << 4;
+    } else {
+      hadErrors = true;
+    }
+    KJ_IF_MAYBE(d2, tryFromHexDigit(text[i*2+1])) {
+      b |= *d2;
+    } else {
+      hadErrors = true;
+    }
+    result[i] = b;
+  }
+
+  return { kj::mv(result), hadErrors };
+}
+
+String encodeUriComponent(ArrayPtr<const byte> bytes) {
+  Vector<char> result(bytes.size() + 1);
+  for (byte b: bytes) {
+    if (('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') || ('0' <= b && b <= '9') ||
+        b == '-' || b == '_' || b == '.' || b == '!' || b == '~' || b == '*' || b == '\'' ||
+        b == '(' || b == ')') {
+      result.add(b);
+    } else {
+      result.add('%');
+      result.add(HEX_DIGITS[b/16]);
+      result.add(HEX_DIGITS[b%16]);
+    }
+  }
+  result.add('\0');
+  return String(result.releaseAsArray());
+}
+
+EncodingResult<Array<byte>> decodeBinaryUriComponent(
+    ArrayPtr<const char> text, bool nulTerminate) {
+  Vector<byte> result(text.size() + nulTerminate);
+  bool hadErrors = false;
+
+  const char* ptr = text.begin();
+  const char* end = text.end();
+  while (ptr < end) {
+    if (*ptr == '%') {
+      ++ptr;
+
+      if (ptr == end) {
+        hadErrors = true;
+      } else KJ_IF_MAYBE(d1, tryFromHexDigit(*ptr)) {
+        byte b = *d1;
+        ++ptr;
+        if (ptr == end) {
+          hadErrors = true;
+        } else KJ_IF_MAYBE(d2, tryFromHexDigit(*ptr)) {
+          b = (b << 4) | *d2;
+          ++ptr;
+        } else {
+          hadErrors = true;
+        }
+        result.add(b);
+      } else {
+        hadErrors = true;
+      }
+    } else {
+      result.add(*ptr++);
+    }
+  }
+
+  if (nulTerminate) result.add(0);
+  return { result.releaseAsArray(), hadErrors };
+}
+
+// =======================================================================================
+
+String encodeCEscape(ArrayPtr<const byte> bytes) {
+  Vector<char> escaped(bytes.size());
+
+  for (byte b: bytes) {
+    switch (b) {
+      case '\a': escaped.addAll(StringPtr("\\a")); break;
+      case '\b': escaped.addAll(StringPtr("\\b")); break;
+      case '\f': escaped.addAll(StringPtr("\\f")); break;
+      case '\n': escaped.addAll(StringPtr("\\n")); break;
+      case '\r': escaped.addAll(StringPtr("\\r")); break;
+      case '\t': escaped.addAll(StringPtr("\\t")); break;
+      case '\v': escaped.addAll(StringPtr("\\v")); break;
+      case '\'': escaped.addAll(StringPtr("\\\'")); break;
+      case '\"': escaped.addAll(StringPtr("\\\"")); break;
+      case '\\': escaped.addAll(StringPtr("\\\\")); break;
+      default:
+        if (b < 0x20 || b == 0x7f) {
+          // Use octal escape, not hex, because hex escapes technically have no length limit and
+          // so can create ambiguity with subsequent characters.
+          escaped.add('\\');
+          escaped.add(HEX_DIGITS[b / 64]);
+          escaped.add(HEX_DIGITS[(b / 8) % 8]);
+          escaped.add(HEX_DIGITS[b % 8]);
+        } else {
+          escaped.add(b);
+        }
+        break;
+    }
+  }
+
+  escaped.add(0);
+  return String(escaped.releaseAsArray());
+}
+
+EncodingResult<Array<byte>> decodeBinaryCEscape(ArrayPtr<const char> text, bool nulTerminate) {
+  Vector<byte> result(text.size() + nulTerminate);
+  bool hadErrors = false;
+
+  size_t i = 0;
+  while (i < text.size()) {
+    char c = text[i++];
+    if (c == '\\') {
+      if (i == text.size()) {
+        hadErrors = true;
+        continue;
+      }
+      char c2 = text[i++];
+      switch (c2) {
+        case 'a' : result.add('\a'); break;
+        case 'b' : result.add('\b'); break;
+        case 'f' : result.add('\f'); break;
+        case 'n' : result.add('\n'); break;
+        case 'r' : result.add('\r'); break;
+        case 't' : result.add('\t'); break;
+        case 'v' : result.add('\v'); break;
+        case '\'': result.add('\''); break;
+        case '\"': result.add('\"'); break;
+        case '\\': result.add('\\'); break;
+
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7': {
+          uint value = c2 - '0';
+          for (uint j = 0; j < 2 && i < text.size(); j++) {
+            KJ_IF_MAYBE(d, tryFromOctDigit(text[i])) {
+              ++i;
+              value = (value << 3) | *d;
+            } else {
+              break;
+            }
+          }
+          if (value >= 0x100) hadErrors = true;
+          result.add(value);
+          break;
+        }
+
+        case 'x': {
+          uint value = 0;
+          while (i < text.size()) {
+            KJ_IF_MAYBE(d, tryFromHexDigit(text[i])) {
+              ++i;
+              value = (value << 4) | *d;
+            } else {
+              break;
+            }
+          }
+          if (value >= 0x100) hadErrors = true;
+          result.add(value);
+          break;
+        }
+
+        case 'u': {
+          char16_t value = 0;
+          for (uint j = 0; j < 4; j++) {
+            if (i == text.size()) {
+              hadErrors = true;
+              break;
+            } else KJ_IF_MAYBE(d, tryFromHexDigit(text[i])) {
+              ++i;
+              value = (value << 4) | *d;
+            } else {
+              hadErrors = true;
+              break;
+            }
+          }
+          auto utf = decodeUtf16(arrayPtr(&value, 1));
+          if (utf.hadErrors) hadErrors = true;
+          result.addAll(utf.asBytes());
+          break;
+        }
+
+        case 'U': {
+          char32_t value = 0;
+          for (uint j = 0; j < 8; j++) {
+            if (i == text.size()) {
+              hadErrors = true;
+              break;
+            } else KJ_IF_MAYBE(d, tryFromHexDigit(text[i])) {
+              ++i;
+              value = (value << 4) | *d;
+            } else {
+              hadErrors = true;
+              break;
+            }
+          }
+          auto utf = decodeUtf32(arrayPtr(&value, 1));
+          if (utf.hadErrors) hadErrors = true;
+          result.addAll(utf.asBytes());
+          break;
+        }
+
+        default:
+          result.add(c2);
+      }
+    } else {
+      result.add(c);
+    }
+  }
+
+  if (nulTerminate) result.add(0);
+  return { result.releaseAsArray(), hadErrors };
+}
+
+// =======================================================================================
+// This code is derived from libb64 which has been placed in the public domain.
+// For details, see http://sourceforge.net/projects/libb64
+
+// -------------------------------------------------------------------
+// Encoder
+
+namespace {
+
+typedef enum {
+  step_A, step_B, step_C
+} base64_encodestep;
+
+typedef struct {
+  base64_encodestep step;
+  char result;
+  int stepcount;
+} base64_encodestate;
+
+const int CHARS_PER_LINE = 72;
+
+void base64_init_encodestate(base64_encodestate* state_in) {
+  state_in->step = step_A;
+  state_in->result = 0;
+  state_in->stepcount = 0;
+}
+
+char base64_encode_value(char value_in) {
+  static const char* encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+  if (value_in > 63) return '=';
+  return encoding[(int)value_in];
+}
+
+int base64_encode_block(const char* plaintext_in, int length_in,
+                        char* code_out, base64_encodestate* state_in, bool breakLines) {
+  const char* plainchar = plaintext_in;
+  const char* const plaintextend = plaintext_in + length_in;
+  char* codechar = code_out;
+  char result;
+  char fragment;
+
+  result = state_in->result;
+
+  switch (state_in->step) {
+    while (1) {
+  case step_A:
+      if (plainchar == plaintextend) {
+        state_in->result = result;
+        state_in->step = step_A;
+        return codechar - code_out;
+      }
+      fragment = *plainchar++;
+      result = (fragment & 0x0fc) >> 2;
+      *codechar++ = base64_encode_value(result);
+      result = (fragment & 0x003) << 4;
+  case step_B:
+      if (plainchar == plaintextend) {
+        state_in->result = result;
+        state_in->step = step_B;
+        return codechar - code_out;
+      }
+      fragment = *plainchar++;
+      result |= (fragment & 0x0f0) >> 4;
+      *codechar++ = base64_encode_value(result);
+      result = (fragment & 0x00f) << 2;
+  case step_C:
+      if (plainchar == plaintextend) {
+        state_in->result = result;
+        state_in->step = step_C;
+        return codechar - code_out;
+      }
+      fragment = *plainchar++;
+      result |= (fragment & 0x0c0) >> 6;
+      *codechar++ = base64_encode_value(result);
+      result  = (fragment & 0x03f) >> 0;
+      *codechar++ = base64_encode_value(result);
+
+      ++(state_in->stepcount);
+      if (breakLines && state_in->stepcount == CHARS_PER_LINE/4) {
+        *codechar++ = '\n';
+        state_in->stepcount = 0;
+      }
+    }
+  }
+  /* control should not reach here */
+  return codechar - code_out;
+}
+
+int base64_encode_blockend(char* code_out, base64_encodestate* state_in, bool breakLines) {
+  char* codechar = code_out;
+
+  switch (state_in->step) {
+  case step_B:
+    *codechar++ = base64_encode_value(state_in->result);
+    *codechar++ = '=';
+    *codechar++ = '=';
+    ++state_in->stepcount;
+    break;
+  case step_C:
+    *codechar++ = base64_encode_value(state_in->result);
+    *codechar++ = '=';
+    ++state_in->stepcount;
+    break;
+  case step_A:
+    break;
+  }
+  if (breakLines && state_in->stepcount > 0) {
+    *codechar++ = '\n';
+  }
+
+  return codechar - code_out;
+}
+
+}  // namespace
+
+String encodeBase64(ArrayPtr<const byte> input, bool breakLines) {
+  /* set up a destination buffer large enough to hold the encoded data */
+  // equivalent to ceil(input.size() / 3) * 4
+  auto numChars = (input.size() + 2) / 3 * 4;
+  if (breakLines) {
+    // Add space for newline characters.
+    uint lineCount = numChars / CHARS_PER_LINE;
+    if (numChars % CHARS_PER_LINE > 0) {
+      // Partial line.
+      ++lineCount;
+    }
+    numChars = numChars + lineCount;
+  }
+  auto output = heapString(numChars);
+  /* keep track of our encoded position */
+  char* c = output.begin();
+  /* store the number of bytes encoded by a single call */
+  int cnt = 0;
+  size_t total = 0;
+  /* we need an encoder state */
+  base64_encodestate s;
+
+  /*---------- START ENCODING ----------*/
+  /* initialise the encoder state */
+  base64_init_encodestate(&s);
+  /* gather data from the input and send it to the output */
+  cnt = base64_encode_block((const char *)input.begin(), input.size(), c, &s, breakLines);
+  c += cnt;
+  total += cnt;
+
+  /* since we have encoded the entire input string, we know that
+     there is no more input data; finalise the encoding */
+  cnt = base64_encode_blockend(c, &s, breakLines);
+  c += cnt;
+  total += cnt;
+  /*---------- STOP ENCODING  ----------*/
+
+  KJ_ASSERT(total == output.size(), total, output.size());
+
+  return output;
+}
+
+// -------------------------------------------------------------------
+// Decoder
+
+namespace {
+
+typedef enum {
+  step_a, step_b, step_c, step_d
+} base64_decodestep;
+
+typedef struct {
+  base64_decodestep step;
+  char plainchar;
+} base64_decodestate;
+
+int base64_decode_value(char value_in) {
+  static const char decoding[] = {
+    62,-1,-1,-1,63,52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-2,-1,-1,-1,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,-1,
+    26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51};
+  static const char decoding_size = sizeof(decoding);
+  value_in -= 43;
+  if (value_in < 0 || value_in > decoding_size) return -1;
+  return decoding[(int)value_in];
+}
+
+void base64_init_decodestate(base64_decodestate* state_in) {
+  state_in->step = step_a;
+  state_in->plainchar = 0;
+}
+
+int base64_decode_block(const char* code_in, const int length_in,
+                        char* plaintext_out, base64_decodestate* state_in) {
+  const char* codechar = code_in;
+  char* plainchar = plaintext_out;
+  char fragment;
+
+  *plainchar = state_in->plainchar;
+
+  switch (state_in->step)
+  {
+    while (1)
+    {
+  case step_a:
+      do {
+        if (codechar == code_in+length_in) {
+          state_in->step = step_a;
+          state_in->plainchar = *plainchar;
+          return plainchar - plaintext_out;
+        }
+        fragment = (char)base64_decode_value(*codechar++);
+      } while (fragment < 0);
+      *plainchar    = (fragment & 0x03f) << 2;
+  case step_b:
+      do {
+        if (codechar == code_in+length_in) {
+          state_in->step = step_b;
+          state_in->plainchar = *plainchar;
+          return plainchar - plaintext_out;
+        }
+        fragment = (char)base64_decode_value(*codechar++);
+      } while (fragment < 0);
+      *plainchar++ |= (fragment & 0x030) >> 4;
+      *plainchar    = (fragment & 0x00f) << 4;
+  case step_c:
+      do {
+        if (codechar == code_in+length_in) {
+          state_in->step = step_c;
+          state_in->plainchar = *plainchar;
+          return plainchar - plaintext_out;
+        }
+        fragment = (char)base64_decode_value(*codechar++);
+      } while (fragment < 0);
+      *plainchar++ |= (fragment & 0x03c) >> 2;
+      *plainchar    = (fragment & 0x003) << 6;
+  case step_d:
+      do {
+        if (codechar == code_in+length_in) {
+          state_in->step = step_d;
+          state_in->plainchar = *plainchar;
+          return plainchar - plaintext_out;
+        }
+        fragment = (char)base64_decode_value(*codechar++);
+      } while (fragment < 0);
+      *plainchar++   |= (fragment & 0x03f);
+    }
+  }
+  /* control should not reach here */
+  return plainchar - plaintext_out;
+}
+
+}  // namespace
+
+Array<byte> decodeBase64(ArrayPtr<const char> input) {
+  base64_decodestate state;
+  base64_init_decodestate(&state);
+
+  auto output = heapArray<byte>((input.size() * 6 + 7) / 8);
+
+  size_t n = base64_decode_block(input.begin(), input.size(),
+      reinterpret_cast<char*>(output.begin()), &state);
+
+  if (n < output.size()) {
+    auto copy = heapArray<byte>(n);
+    memcpy(copy.begin(), output.begin(), n);
+    output = kj::mv(copy);
+  }
+
+  return output;
+}
+
+} // namespace kj
--- a/c++/src/kj/encoding.h
+++ b/c++/src/kj/encoding.h
+// Copyright (c) 2017 Cloudflare, Inc. and contributors
+// Licensed under the MIT License:
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef KJ_ENCODING_H_
+#define KJ_ENCODING_H_
+// Functions for encoding/decoding bytes and text in common formats, including:
+// - UTF-{8,16,32}
+// - Hex
+// - URI encoding
+// - Base64
+
+#if defined(__GNUC__) && !KJ_HEADER_WARNINGS
+#pragma GCC system_header
+#endif
+
+#include "string.h"
+
+namespace kj {
+
+template <typename ResultType>
+struct EncodingResult: public ResultType {
+  // Equivalent to ResultType (a String or wide-char array) for all intents and purposes, except
+  // that the bool `hadErrors` can be inspected to see if any errors were encountered in the input.
+  // Each encoding/decoding function that returns this type will "work around" errors in some way,
+  // so an application doesn't strictly have to check for errors. E.g. the Unicode functions
+  // replace errors with U+FFFD in the output.
+  //
+  // Through magic, KJ_IF_MAYBE() and KJ_{REQUIRE,ASSERT}_NONNULL() work on EncodingResult<T>
+  // exactly if it were a Maybe<T> that is null in case of errors.
+
+  inline EncodingResult(ResultType&& result, bool hadErrors)
+      : ResultType(kj::mv(result)), hadErrors(hadErrors) {}
+
+  const bool hadErrors;
+};
+
+EncodingResult<Array<char16_t>> encodeUtf16(ArrayPtr<const char> text, bool nulTerminate = false);
+EncodingResult<Array<char32_t>> encodeUtf32(ArrayPtr<const char> text, bool nulTerminate = false);
+// Convert UTF-8 text (which KJ strings use) to UTF-16 or UTF-32.
+//
+// If `nulTerminate` is true, an extra NUL character will be added to the end of the output.
+//
+// The `try` versions return null if the input is invalid; the non-`try` versions return data
+// containing the Unicode replacement character (U+FFFD).
+//
+// The returned arrays are in platform-native endianness (otherwise they wouldn't really be
+// char16_t / char32_t).
+
+EncodingResult<String> decodeUtf16(ArrayPtr<const char16_t> utf16);
+EncodingResult<String> decodeUtf32(ArrayPtr<const char32_t> utf32);
+// Convert UTF-16 or UTF-32 to UTF-8 (which KJ strings use).
+//
+// The input should NOT include a NUL terminator; any NUL characters in the input array will be
+// preserved in the output.
+//
+// The `try` versions return null if the input is invalid; the non-`try` versions return data
+// containing the Unicode replacement character (U+FFFD).
+//
+// The input must be in platform-native endianness. BOMs are NOT recognized by these functions.
+
+String encodeHex(ArrayPtr<const byte> bytes);
+EncodingResult<Array<byte>> decodeHex(ArrayPtr<const char> text);
+// Encode/decode bytes as hex strings.
+
+String encodeUriComponent(ArrayPtr<const byte> bytes);
+String encodeUriComponent(ArrayPtr<const char> bytes);
+EncodingResult<Array<byte>> decodeBinaryUriComponent(
+    ArrayPtr<const char> text, bool nulTerminate = false);
+EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text);
+// Encode/decode URI components using % escapes. See Javascript's encodeURIComponent().
+
+String encodeCEscape(ArrayPtr<const byte> bytes);
+String encodeCEscape(ArrayPtr<const char> bytes);
+EncodingResult<Array<byte>> decodeBinaryCEscape(
+    ArrayPtr<const char> text, bool nulTerminate = false);
+EncodingResult<String> decodeCEscape(ArrayPtr<const char> text);
+
+String encodeBase64(ArrayPtr<const byte> bytes, bool breakLines = false);
+// Encode the given bytes as base64 text. If `breakLines` is true, line breaks will be inserted
+// into the output every 72 characters (e.g. for encoding e-mail bodies).
+
+Array<byte> decodeBase64(ArrayPtr<const char> text);
+// Decode base64 text. Non-base64 characters are ignored and padding characters are not requried;
+// as such, this function never fails.
+
+// =======================================================================================
+// inline implementation details
+
+namespace _ {  // private
+
+template <typename T>
+NullableValue<T> readMaybe(EncodingResult<T>&& value) {
+  if (value.hadErrors) {
+    return nullptr;
+  } else {
+    return kj::mv(value);
+  }
+}
+
+template <typename T>
+T* readMaybe(EncodingResult<T>& value) {
+  if (value.hadErrors) {
+    return nullptr;
+  } else {
+    return &value;
+  }
+}
+
+template <typename T>
+const T* readMaybe(const EncodingResult<T>& value) {
+  if (value.hadErrors) {
+    return nullptr;
+  } else {
+    return &value;
+  }
+}
+
+}  // namespace _ (private)
+
+inline String encodeUriComponent(ArrayPtr<const char> text) {
+  return encodeUriComponent(text.asBytes());
+}
+inline EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text) {
+  auto result = decodeBinaryUriComponent(text, true);
+  return { String(result.releaseAsChars()), result.hadErrors };
+}
+
+inline String encodeCEscape(ArrayPtr<const char> text) {
+  return encodeCEscape(text.asBytes());
+}
+inline EncodingResult<String> decodeCEscape(ArrayPtr<const char> text) {
+  auto result = decodeBinaryCEscape(text, true);
+  return { String(result.releaseAsChars()), result.hadErrors };
+}
+
+// If you pass a string literal to a function taking ArrayPtr<const char>, it'll include the NUL
+// termintator, which is surprising. Let's add overloads that avoid that. In practice this probably
+// only even matters for encoding-test.c++.
+
+template <size_t s>
+inline EncodingResult<Array<char16_t>> encodeUtf16(const char (&text)[s], bool nulTerminate=false) {
+  return encodeUtf16(arrayPtr(text, s - 1), nulTerminate);
+}
+template <size_t s>
+inline EncodingResult<Array<char32_t>> encodeUtf32(const char (&text)[s], bool nulTerminate=false) {
+  return encodeUtf32(arrayPtr(text, s - 1), nulTerminate);
+}
+template <size_t s>
+inline EncodingResult<String> decodeUtf16(const char16_t (&utf16)[s]) {
+  return decodeUtf16(arrayPtr(utf16, s - 1));
+}
+template <size_t s>
+inline EncodingResult<String> decodeUtf32(const char32_t (&utf32)[s]) {
+  return decodeUtf32(arrayPtr(utf32, s - 1));
+}
+template <size_t s>
+inline EncodingResult<Array<byte>> decodeHex(const char (&text)[s]) {
+  return decodeHex(arrayPtr(text, s - 1));
+}
+template <size_t s>
+inline String encodeUriComponent(const char (&text)[s]) {
+  return encodeUriComponent(arrayPtr(text, s - 1));
+}
+template <size_t s>
+inline Array<byte> decodeBinaryUriComponent(const char (&text)[s]) {
+  return decodeBinaryUriComponent(arrayPtr(text, s - 1));
+}
+template <size_t s>
+inline EncodingResult<String> decodeUriComponent(const char (&text)[s]) {
+  return decodeUriComponent(arrayPtr(text, s-1));
+}
+template <size_t s>
+inline String encodeCEscape(const char (&text)[s]) {
+  return encodeCEscape(arrayPtr(text, s - 1));
+}
+template <size_t s>
+inline EncodingResult<Array<byte>> decodeBinaryCEscape(const char (&text)[s]) {
+  return decodeBinaryCEscape(arrayPtr(text, s - 1));
+}
+template <size_t s>
+inline EncodingResult<String> decodeCEscape(const char (&text)[s]) {
+  return decodeCEscape(arrayPtr(text, s-1));
+}
+template <size_t s>
+Array<byte> decodeBase64(const char (&text)[s]) {
+  return decodeBase64(arrayPtr(text, s - 1));
+}
+
+} // namespace kj
+
+#endif // KJ_ENCODING_H_