Improve KJ encoding lib error handling:

- Rename UtfResult -> EncodingResult - Make it usable like a Maybe, so that we don't need separate "try" functions. - Check errors in hex decoding and URI decoding.

Improve KJ encoding lib error handling:
- Rename UtfResult -> EncodingResult - Make it usable like a Maybe, so that we don't need separate "try" functions. - Check errors in hex decoding and URI decoding.
df52bf86 · Kenton Varda · 03800dfa · df52bf86 · df52bf86 · df52bf86
Commit df52bf86 authored May 23, 2017 by Kenton Varda
Showing with 131 additions and 139 deletions

common.h c++/src/kj/common.h +0 -1

encoding-test.c++ c++/src/kj/encoding-test.c++ +0 -0

encoding.c++ c++/src/kj/encoding.c++ +63 -86

encoding.h c++/src/kj/encoding.h +68 -52

No files found.
--- a/c++/src/kj/common.h
+++ b/c++/src/kj/common.h
@@ -915,7 +915,6 @@ public:
    return value;
  }

-private:  // internal interface used by friends only
  inline NullableValue() noexcept: isSet(false) {}
  inline NullableValue(T&& t) noexcept(noexcept(T(instance<T&&>())))
      : isSet(true) {

--- a/c++/src/kj/encoding-test.c++
+++ b/c++/src/kj/encoding-test.c++
--- a/c++/src/kj/encoding.c++
+++ b/c++/src/kj/encoding.c++
@@ -41,7 +41,7 @@ inline void addChar32(Vector<char32_t>& vec, char32_t u) {
 }

 template <typename T>
-UtfResult<Array<T>> encodeUtf(ArrayPtr<const char> text, bool nulTerminate) {
+EncodingResult<Array<T>> encodeUtf(ArrayPtr<const char> text, bool nulTerminate) {
  Vector<T> result(text.size() + nulTerminate);
  bool hadErrors = false;

@@ -125,33 +125,15 @@ UtfResult<Array<T>> encodeUtf(ArrayPtr<const char> text, bool nulTerminate) {

 }  // namespace

-UtfResult<Array<char16_t>> encodeUtf16(ArrayPtr<const char> text, bool nulTerminate) {
+EncodingResult<Array<char16_t>> encodeUtf16(ArrayPtr<const char> text, bool nulTerminate) {
  return encodeUtf<char16_t>(text, nulTerminate);
 }

-UtfResult<Array<char32_t>> encodeUtf32(ArrayPtr<const char> text, bool nulTerminate) {
+EncodingResult<Array<char32_t>> encodeUtf32(ArrayPtr<const char> text, bool nulTerminate) {
  return encodeUtf<char32_t>(text, nulTerminate);
 }

-Maybe<Array<char16_t>> tryEncodeUtf16(ArrayPtr<const char> text, bool nulTerminate) {
-  auto result = encodeUtf16(text, nulTerminate);
-  if (result.hadErrors) {
-    return nullptr;
-  } else {
-    return kj::mv(result);
-  }
-}
-
-Maybe<Array<char32_t>> tryEncodeUtf32(ArrayPtr<const char> text, bool nulTerminate) {
-  auto result = encodeUtf32(text, nulTerminate);
-  if (result.hadErrors) {
-    return nullptr;
-  } else {
-    return kj::mv(result);
-  }
-}
-
-UtfResult<String> decodeUtf16(ArrayPtr<const char16_t> utf16) {
+EncodingResult<String> decodeUtf16(ArrayPtr<const char16_t> utf16) {
  Vector<char> result(utf16.size() + 1);
  bool hadErrors = false;

@@ -202,7 +184,7 @@ UtfResult<String> decodeUtf16(ArrayPtr<const char16_t> utf16) {
  return { String(result.releaseAsArray()), hadErrors };
 }

-UtfResult<String> decodeUtf32(ArrayPtr<const char32_t> utf16) {
+EncodingResult<String> decodeUtf32(ArrayPtr<const char32_t> utf16) {
  Vector<char> result(utf16.size() + 1);
  bool hadErrors = false;

@@ -247,28 +229,33 @@ UtfResult<String> decodeUtf32(ArrayPtr<const char32_t> utf16) {
  return { String(result.releaseAsArray()), hadErrors };
 }

-Maybe<String> tryDecodeUtf16(ArrayPtr<const char16_t> utf16) {
-  auto result = decodeUtf16(utf16);
-  if (result.hadErrors) {
-    return nullptr;
+// =======================================================================================
+
+namespace {
+
+const char HEX_DIGITS[] = "0123456789abcdef";
+
+static Maybe<uint> tryFromHexDigit(char c) {
+  if ('0' <= c && c <= '9') {
+    return c - '0';
+  } else if ('a' <= c && c <= 'f') {
+    return c - ('a' - 10);
+  } else if ('A' <= c && c <= 'F') {
+    return c - ('A' - 10);
  } else {
-    return kj::mv(result);
+    return nullptr;
  }
 }
-Maybe<String> tryDecodeUtf32(ArrayPtr<const char32_t> utf32) {
-  auto result = decodeUtf32(utf32);
-  if (result.hadErrors) {
-    return nullptr;
+
+static Maybe<uint> tryFromOctDigit(char c) {
+  if ('0' <= c && c <= '7') {
+    return c - '0';
  } else {
-    return kj::mv(result);
+    return nullptr;
  }
 }

-// =======================================================================================
-
-namespace {
-  const char HEX_DIGITS[] = "0123456789abcdef";
-}
+}  // namespace

 String encodeHex(ArrayPtr<const byte> input) {
  return strArray(KJ_MAP(b, input) {
@@ -276,27 +263,26 @@ String encodeHex(ArrayPtr<const byte> input) {
  }, "");
 }

-static uint fromDigit(char c) {
-  if ('0' <= c && c <= '9') {
-    return c - '0';
-  } else if ('a' <= c && c <= 'z') {
-    return c - ('a' - 10);
-  } else if ('A' <= c && c <= 'Z') {
-    return c - ('A' - 10);
-  } else {
-    return 0;
-  }
-}
-
-Array<byte> decodeHex(ArrayPtr<const char> text) {
+EncodingResult<Array<byte>> decodeHex(ArrayPtr<const char> text) {
  auto result = heapArray<byte>(text.size() / 2);
+  bool hadErrors = text.size() % 2;

  for (auto i: kj::indices(result)) {
-    result[i] = (fromDigit(text[i*2]) << 4)
-              | (fromDigit(text[i*2+1]));
+    byte b = 0;
+    KJ_IF_MAYBE(d1, tryFromHexDigit(text[i*2])) {
+      b = *d1 << 4;
+    } else {
+      hadErrors = true;
+    }
+    KJ_IF_MAYBE(d2, tryFromHexDigit(text[i*2+1])) {
+      b |= *d2;
+    } else {
+      hadErrors = true;
+    }
+    result[i] = b;
  }

-  return result;
+  return { kj::mv(result), hadErrors };
 }

 String encodeUriComponent(ArrayPtr<const byte> bytes) {
@@ -316,26 +302,41 @@ String encodeUriComponent(ArrayPtr<const byte> bytes) {
  return String(result.releaseAsArray());
 }

-Array<byte> decodeBinaryUriComponent(ArrayPtr<const char> text, bool nulTerminate) {
+EncodingResult<Array<byte>> decodeBinaryUriComponent(
+    ArrayPtr<const char> text, bool nulTerminate) {
  Vector<byte> result(text.size() + nulTerminate);
+  bool hadErrors = false;

  const char* ptr = text.begin();
  const char* end = text.end();
  while (ptr < end) {
    if (*ptr == '%') {
      ++ptr;
-      if (ptr == end) break;
-      byte b = fromDigit(*ptr++) << 4;
-      if (ptr == end) break;
-      b |= fromDigit(*ptr++);
-      result.add(b);
+
+      if (ptr == end) {
+        hadErrors = true;
+      } else KJ_IF_MAYBE(d1, tryFromHexDigit(*ptr)) {
+        byte b = *d1;
+        ++ptr;
+        if (ptr == end) {
+          hadErrors = true;
+        } else KJ_IF_MAYBE(d2, tryFromHexDigit(*ptr)) {
+          b = (b << 4) | *d2;
+          ++ptr;
+        } else {
+          hadErrors = true;
+        }
+        result.add(b);
+      } else {
+        hadErrors = true;
+      }
    } else {
      result.add(*ptr++);
    }
  }

  if (nulTerminate) result.add(0);
-  return result.releaseAsArray();
+  return { result.releaseAsArray(), hadErrors };
 }

 // =======================================================================================
@@ -374,31 +375,7 @@ String encodeCEscape(ArrayPtr<const byte> bytes) {
  return String(escaped.releaseAsArray());
 }

-namespace {
-
-static Maybe<uint> tryFromHexDigit(char c) {
-  if ('0' <= c && c <= '9') {
-    return c - '0';
-  } else if ('a' <= c && c <= 'f') {
-    return c - ('a' - 10);
-  } else if ('A' <= c && c <= 'F') {
-    return c - ('A' - 10);
-  } else {
-    return nullptr;
-  }
-}
-
-static Maybe<uint> tryFromOctDigit(char c) {
-  if ('0' <= c && c <= '7') {
-    return c - '0';
-  } else {
-    return nullptr;
-  }
-}
-
-}  // namespace
-
-UtfResult<Array<byte>> decodeBinaryCEscape(ArrayPtr<const char> text, bool nulTerminate) {
+EncodingResult<Array<byte>> decodeBinaryCEscape(ArrayPtr<const char> text, bool nulTerminate) {
  Vector<byte> result(text.size() + nulTerminate);
  bool hadErrors = false;


--- a/c++/src/kj/encoding.h
+++ b/c++/src/kj/encoding.h
@@ -36,24 +36,24 @@
 namespace kj {

 template <typename ResultType>
-struct UtfResult: public ResultType {
+struct EncodingResult: public ResultType {
  // Equivalent to ResultType (a String or wide-char array) for all intents and purposes, except
-  // that the bool `hadErrors` can be inspected to see if any errors were encountered in the input,
-  // resulting in instances of the replacement character (U+FFFD) in the output.
-
-  inline UtfResult(ResultType&& result, bool hadErrors)
+  // that the bool `hadErrors` can be inspected to see if any errors were encountered in the input.
+  // Each encoding/decoding function that returns this type will "work around" errors in some way,
+  // so an application doesn't strictly have to check for errors. E.g. the Unicode functions
+  // replace errors with U+FFFD in the output.
+  //
+  // Through magic, KJ_IF_MAYBE() and KJ_{REQUIRE,ASSERT}_NONNULL() work on EncodingResult<T>
+  // exactly if it were a Maybe<T> that is null in case of errors.
+
+  inline EncodingResult(ResultType&& result, bool hadErrors)
      : ResultType(kj::mv(result)), hadErrors(hadErrors) {}

  const bool hadErrors;
-  // If true, then invalid sequences were detected in the input and were replaced with the Unicode
-  // replacement character (U+FFFD) in the output. Many applications will chose to ignore this
-  // boolean and continue on with the damaged data.
 };

-UtfResult<Array<char16_t>> encodeUtf16(ArrayPtr<const char> text, bool nulTerminate = false);
-UtfResult<Array<char32_t>> encodeUtf32(ArrayPtr<const char> text, bool nulTerminate = false);
-Maybe<Array<char16_t>> tryEncodeUtf16(ArrayPtr<const char> text, bool nulTerminate = false);
-Maybe<Array<char32_t>> tryEncodeUtf32(ArrayPtr<const char> text, bool nulTerminate = false);
+EncodingResult<Array<char16_t>> encodeUtf16(ArrayPtr<const char> text, bool nulTerminate = false);
+EncodingResult<Array<char32_t>> encodeUtf32(ArrayPtr<const char> text, bool nulTerminate = false);
 // Convert UTF-8 text (which KJ strings use) to UTF-16 or UTF-32.
 //
 // If `nulTerminate` is true, an extra NUL character will be added to the end of the output.
@@ -64,10 +64,8 @@ Maybe<Array<char32_t>> tryEncodeUtf32(ArrayPtr<const char> text, bool nulTermina
 // The returned arrays are in platform-native endianness (otherwise they wouldn't really be
 // char16_t / char32_t).

-UtfResult<String> decodeUtf16(ArrayPtr<const char16_t> utf16);
-UtfResult<String> decodeUtf32(ArrayPtr<const char32_t> utf32);
-Maybe<String> tryDecodeUtf16(ArrayPtr<const char16_t> utf16);
-Maybe<String> tryDecodeUtf32(ArrayPtr<const char32_t> utf32);
+EncodingResult<String> decodeUtf16(ArrayPtr<const char16_t> utf16);
+EncodingResult<String> decodeUtf32(ArrayPtr<const char32_t> utf32);
 // Convert UTF-16 or UTF-32 to UTF-8 (which KJ strings use).
 //
 // The input should NOT include a NUL terminator; any NUL characters in the input array will be
@@ -79,41 +77,76 @@ Maybe<String> tryDecodeUtf32(ArrayPtr<const char32_t> utf32);
 // The input must be in platform-native endianness. BOMs are NOT recognized by these functions.

 String encodeHex(ArrayPtr<const byte> bytes);
-Array<byte> decodeHex(ArrayPtr<const char> text);
+EncodingResult<Array<byte>> decodeHex(ArrayPtr<const char> text);
 // Encode/decode bytes as hex strings.

 String encodeUriComponent(ArrayPtr<const byte> bytes);
 String encodeUriComponent(ArrayPtr<const char> bytes);
-Array<byte> decodeBinaryUriComponent(ArrayPtr<const char> text, bool nulTerminate = false);
-String decodeUriComponent(ArrayPtr<const char> text);
+EncodingResult<Array<byte>> decodeBinaryUriComponent(
+    ArrayPtr<const char> text, bool nulTerminate = false);
+EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text);
 // Encode/decode URI components using % escapes. See Javascript's encodeURIComponent().

 String encodeCEscape(ArrayPtr<const byte> bytes);
 String encodeCEscape(ArrayPtr<const char> bytes);
-UtfResult<Array<byte>> decodeBinaryCEscape(ArrayPtr<const char> text, bool nulTerminate = false);
-UtfResult<String> decodeCEscape(ArrayPtr<const char> text);
+EncodingResult<Array<byte>> decodeBinaryCEscape(
+    ArrayPtr<const char> text, bool nulTerminate = false);
+EncodingResult<String> decodeCEscape(ArrayPtr<const char> text);

 String encodeBase64(ArrayPtr<const byte> bytes, bool breakLines = false);
 // Encode the given bytes as base64 text. If `breakLines` is true, line breaks will be inserted
 // into the output every 72 characters (e.g. for encoding e-mail bodies).

 Array<byte> decodeBase64(ArrayPtr<const char> text);
-// Decode base64 text. Non-base64 characters are ignored.
+// Decode base64 text. Non-base64 characters are ignored and padding characters are not requried;
+// as such, this function never fails.

 // =======================================================================================
 // inline implementation details

+namespace _ {  // private
+
+template <typename T>
+NullableValue<T> readMaybe(EncodingResult<T>&& value) {
+  if (value.hadErrors) {
+    return nullptr;
+  } else {
+    return kj::mv(value);
+  }
+}
+
+template <typename T>
+T* readMaybe(EncodingResult<T>& value) {
+  if (value.hadErrors) {
+    return nullptr;
+  } else {
+    return &value;
+  }
+}
+
+template <typename T>
+const T* readMaybe(const EncodingResult<T>& value) {
+  if (value.hadErrors) {
+    return nullptr;
+  } else {
+    return &value;
+  }
+}
+
+}  // namespace _ (private)
+
 inline String encodeUriComponent(ArrayPtr<const char> text) {
  return encodeUriComponent(text.asBytes());
 }
-inline String decodeUriComponent(ArrayPtr<const char> text) {
-  return String(decodeBinaryUriComponent(text, true).releaseAsChars());
+inline EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text) {
+  auto result = decodeBinaryUriComponent(text, true);
+  return { String(result.releaseAsChars()), result.hadErrors };
 }

 inline String encodeCEscape(ArrayPtr<const char> text) {
  return encodeCEscape(text.asBytes());
 }
-inline UtfResult<String> decodeCEscape(ArrayPtr<const char> text) {
+inline EncodingResult<String> decodeCEscape(ArrayPtr<const char> text) {
  auto result = decodeBinaryCEscape(text, true);
  return { String(result.releaseAsChars()), result.hadErrors };
 }
@@ -123,39 +156,23 @@ inline UtfResult<String> decodeCEscape(ArrayPtr<const char> text) {
 // only even matters for encoding-test.c++.

 template <size_t s>
-inline UtfResult<Array<char16_t>> encodeUtf16(const char (&text)[s], bool nulTerminate = false) {
+inline EncodingResult<Array<char16_t>> encodeUtf16(const char (&text)[s], bool nulTerminate=false) {
  return encodeUtf16(arrayPtr(text, s - 1), nulTerminate);
 }
 template <size_t s>
-inline UtfResult<Array<char32_t>> encodeUtf32(const char (&text)[s], bool nulTerminate = false) {
+inline EncodingResult<Array<char32_t>> encodeUtf32(const char (&text)[s], bool nulTerminate=false) {
  return encodeUtf32(arrayPtr(text, s - 1), nulTerminate);
 }
 template <size_t s>
-inline Maybe<Array<char16_t>> tryEncodeUtf16(const char (&text)[s], bool nulTerminate = false) {
-  return tryEncodeUtf16(arrayPtr(text, s - 1), nulTerminate);
-}
-template <size_t s>
-inline Maybe<Array<char32_t>> tryEncodeUtf32(const char (&text)[s], bool nulTerminate = false) {
-  return tryEncodeUtf32(arrayPtr(text, s - 1), nulTerminate);
-}
-template <size_t s>
-inline UtfResult<String> decodeUtf16(const char16_t (&utf16)[s]) {
+inline EncodingResult<String> decodeUtf16(const char16_t (&utf16)[s]) {
  return decodeUtf16(arrayPtr(utf16, s - 1));
 }
 template <size_t s>
-inline UtfResult<String> decodeUtf32(const char32_t (&utf32)[s]) {
+inline EncodingResult<String> decodeUtf32(const char32_t (&utf32)[s]) {
  return decodeUtf32(arrayPtr(utf32, s - 1));
 }
 template <size_t s>
-inline Maybe<String> tryDecodeUtf16(const char16_t (&utf16)[s]) {
-  return tryDecodeUtf16(arrayPtr(utf16, s - 1));
-}
-template <size_t s>
-inline Maybe<String> tryDecodeUtf32(const char32_t (&utf32)[s]) {
-  return tryDecodeUtf32(arrayPtr(utf32, s - 1));
-}
-template <size_t s>
-inline Array<byte> decodeHex(const char (&text)[s]) {
+inline EncodingResult<Array<byte>> decodeHex(const char (&text)[s]) {
  return decodeHex(arrayPtr(text, s - 1));
 }
 template <size_t s>
@@ -167,21 +184,20 @@ inline Array<byte> decodeBinaryUriComponent(const char (&text)[s]) {
  return decodeBinaryUriComponent(arrayPtr(text, s - 1));
 }
 template <size_t s>
-inline String decodeUriComponent(const char (&text)[s]) {
-  return String(decodeBinaryUriComponent(arrayPtr(text, s - 1), true).releaseAsChars());
+inline EncodingResult<String> decodeUriComponent(const char (&text)[s]) {
+  return decodeUriComponent(arrayPtr(text, s-1));
 }
 template <size_t s>
 inline String encodeCEscape(const char (&text)[s]) {
  return encodeCEscape(arrayPtr(text, s - 1));
 }
 template <size_t s>
-inline UtfResult<Array<byte>> decodeBinaryCEscape(const char (&text)[s]) {
+inline EncodingResult<Array<byte>> decodeBinaryCEscape(const char (&text)[s]) {
  return decodeBinaryCEscape(arrayPtr(text, s - 1));
 }
 template <size_t s>
-inline UtfResult<String> decodeCEscape(const char (&text)[s]) {
-  auto result = decodeBinaryCEscape(arrayPtr(text, s - 1), true);
-  return { String(result.releaseAsChars()), result.hadErrors };
+inline EncodingResult<String> decodeCEscape(const char (&text)[s]) {
+  return decodeCEscape(arrayPtr(text, s-1));
 }
 template <size_t s>
 Array<byte> decodeBase64(const char (&text)[s]) {