Commit c45bd150 authored by Kenton Varda's avatar Kenton Varda Committed by GitHub

Merge pull request #483 from sandstorm-io/kj-encoding

Add KJ utility functions to encode/decode blobs in common formats.
parents 35ffb4cd 745049be
...@@ -128,6 +128,7 @@ includekj_HEADERS = \ ...@@ -128,6 +128,7 @@ includekj_HEADERS = \
src/kj/vector.h \ src/kj/vector.h \
src/kj/string.h \ src/kj/string.h \
src/kj/string-tree.h \ src/kj/string-tree.h \
src/kj/encoding.h \
src/kj/exception.h \ src/kj/exception.h \
src/kj/debug.h \ src/kj/debug.h \
src/kj/arena.h \ src/kj/arena.h \
...@@ -218,6 +219,7 @@ libkj_la_SOURCES= \ ...@@ -218,6 +219,7 @@ libkj_la_SOURCES= \
src/kj/array.c++ \ src/kj/array.c++ \
src/kj/string.c++ \ src/kj/string.c++ \
src/kj/string-tree.c++ \ src/kj/string-tree.c++ \
src/kj/encoding.c++ \
src/kj/exception.c++ \ src/kj/exception.c++ \
src/kj/debug.c++ \ src/kj/debug.c++ \
src/kj/arena.c++ \ src/kj/arena.c++ \
...@@ -451,6 +453,7 @@ capnp_test_SOURCES = \ ...@@ -451,6 +453,7 @@ capnp_test_SOURCES = \
src/kj/array-test.c++ \ src/kj/array-test.c++ \
src/kj/string-test.c++ \ src/kj/string-test.c++ \
src/kj/string-tree-test.c++ \ src/kj/string-tree-test.c++ \
src/kj/encoding-test.c++ \
src/kj/exception-test.c++ \ src/kj/exception-test.c++ \
src/kj/debug-test.c++ \ src/kj/debug-test.c++ \
src/kj/arena-test.c++ \ src/kj/arena-test.c++ \
......
...@@ -193,6 +193,20 @@ public: ...@@ -193,6 +193,20 @@ public:
void addFieldHandler(StructSchema::Field field, Handler<T>& handler); void addFieldHandler(StructSchema::Field field, Handler<T>& handler);
// Matches only the specific field. T can be a dynamic type. T must match the field's type. // Matches only the specific field. T can be a dynamic type. T must match the field's type.
// ---------------------------------------------------------------------------
// Hack to support string literal parameters
template <size_t size, typename... Params>
auto decode(const char (&input)[size], Params&&... params) const
-> decltype(decode(kj::arrayPtr(input, size), kj::fwd<Params>(params)...)) {
return decode(kj::arrayPtr(input, size - 1), kj::fwd<Params>(params)...);
}
template <size_t size, typename... Params>
auto decodeRaw(const char (&input)[size], Params&&... params) const
-> decltype(decodeRaw(kj::arrayPtr(input, size), kj::fwd<Params>(params)...)) {
return decodeRaw(kj::arrayPtr(input, size - 1), kj::fwd<Params>(params)...);
}
private: private:
class HandlerBase; class HandlerBase;
struct Impl; struct Impl;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <capnp/serialize.h> #include <capnp/serialize.h>
#include <kj/debug.h> #include <kj/debug.h>
#include <kj/arena.h> #include <kj/arena.h>
#include <kj/encoding.h>
#include <set> #include <set>
#include <map> #include <map>
#include <stdlib.h> #include <stdlib.h>
...@@ -2408,36 +2409,7 @@ uint64_t NodeTranslator::compileParamList( ...@@ -2408,36 +2409,7 @@ uint64_t NodeTranslator::compileParamList(
static const char HEXDIGITS[] = "0123456789abcdef"; static const char HEXDIGITS[] = "0123456789abcdef";
static kj::StringTree stringLiteral(kj::StringPtr chars) { static kj::StringTree stringLiteral(kj::StringPtr chars) {
// TODO(cleanup): This code keeps coming up. Put somewhere common? return kj::strTree('"', kj::encodeCEscape(chars), '"');
kj::Vector<char> escaped(chars.size());
for (char c: chars) {
switch (c) {
case '\a': escaped.addAll(kj::StringPtr("\\a")); break;
case '\b': escaped.addAll(kj::StringPtr("\\b")); break;
case '\f': escaped.addAll(kj::StringPtr("\\f")); break;
case '\n': escaped.addAll(kj::StringPtr("\\n")); break;
case '\r': escaped.addAll(kj::StringPtr("\\r")); break;
case '\t': escaped.addAll(kj::StringPtr("\\t")); break;
case '\v': escaped.addAll(kj::StringPtr("\\v")); break;
case '\'': escaped.addAll(kj::StringPtr("\\\'")); break;
case '\"': escaped.addAll(kj::StringPtr("\\\"")); break;
case '\\': escaped.addAll(kj::StringPtr("\\\\")); break;
default:
if (c < 0x20) {
escaped.add('\\');
escaped.add('x');
uint8_t c2 = c;
escaped.add(HEXDIGITS[c2 / 16]);
escaped.add(HEXDIGITS[c2 % 16]);
} else {
escaped.add(c);
}
break;
}
}
return kj::strTree('"', escaped, '"');
} }
static kj::StringTree binaryLiteral(Data::Reader data) { static kj::StringTree binaryLiteral(Data::Reader data) {
......
...@@ -22,13 +22,12 @@ ...@@ -22,13 +22,12 @@
#include "dynamic.h" #include "dynamic.h"
#include <kj/debug.h> #include <kj/debug.h>
#include <kj/vector.h> #include <kj/vector.h>
#include <kj/encoding.h>
namespace capnp { namespace capnp {
namespace { namespace {
static const char HEXDIGITS[] = "0123456789abcdef";
enum PrintMode { enum PrintMode {
BARE, BARE,
// The value is planned to be printed on its own line, unless it is very short and contains // The value is planned to be printed on its own line, unless it is very short and contains
...@@ -150,34 +149,7 @@ static kj::StringTree print(const DynamicValue::Reader& value, ...@@ -150,34 +149,7 @@ static kj::StringTree print(const DynamicValue::Reader& value,
chars = value.as<Text>(); chars = value.as<Text>();
} }
kj::Vector<char> escaped(chars.size()); return kj::strTree('"', kj::encodeCEscape(chars), '"');
for (char c: chars) {
switch (c) {
case '\a': escaped.addAll(kj::StringPtr("\\a")); break;
case '\b': escaped.addAll(kj::StringPtr("\\b")); break;
case '\f': escaped.addAll(kj::StringPtr("\\f")); break;
case '\n': escaped.addAll(kj::StringPtr("\\n")); break;
case '\r': escaped.addAll(kj::StringPtr("\\r")); break;
case '\t': escaped.addAll(kj::StringPtr("\\t")); break;
case '\v': escaped.addAll(kj::StringPtr("\\v")); break;
case '\'': escaped.addAll(kj::StringPtr("\\\'")); break;
case '\"': escaped.addAll(kj::StringPtr("\\\"")); break;
case '\\': escaped.addAll(kj::StringPtr("\\\\")); break;
default:
if (c < 0x20) {
escaped.add('\\');
escaped.add('x');
uint8_t c2 = c;
escaped.add(HEXDIGITS[c2 / 16]);
escaped.add(HEXDIGITS[c2 % 16]);
} else {
escaped.add(c);
}
break;
}
}
return kj::strTree('"', escaped, '"');
} }
case DynamicValue::LIST: { case DynamicValue::LIST: {
auto listValue = value.as<DynamicList>(); auto listValue = value.as<DynamicList>();
......
...@@ -19,6 +19,7 @@ set(kj_sources_heavy ...@@ -19,6 +19,7 @@ set(kj_sources_heavy
units.c++ units.c++
refcount.c++ refcount.c++
string-tree.c++ string-tree.c++
encoding.c++
parse/char.c++ parse/char.c++
) )
if(NOT CAPNP_LITE) if(NOT CAPNP_LITE)
...@@ -36,6 +37,7 @@ set(kj_headers ...@@ -36,6 +37,7 @@ set(kj_headers
vector.h vector.h
string.h string.h
string-tree.h string-tree.h
encoding.h
exception.h exception.h
debug.h debug.h
arena.h arena.h
...@@ -170,6 +172,7 @@ if(BUILD_TESTING) ...@@ -170,6 +172,7 @@ if(BUILD_TESTING)
async-io-test.c++ async-io-test.c++
refcount-test.c++ refcount-test.c++
string-tree-test.c++ string-tree-test.c++
encoding-test.c++
arena-test.c++ arena-test.c++
units-test.c++ units-test.c++
tuple-test.c++ tuple-test.c++
......
...@@ -455,6 +455,10 @@ T refIfLvalue(T&&); ...@@ -455,6 +455,10 @@ T refIfLvalue(T&&);
// KJ_DECLTYPE_REF(i) i3(i); // i3 has type int&. // KJ_DECLTYPE_REF(i) i3(i); // i3 has type int&.
// KJ_DECLTYPE_REF(kj::mv(i)) i4(kj::mv(i)); // i4 has type int. // KJ_DECLTYPE_REF(kj::mv(i)) i4(kj::mv(i)); // i4 has type int.
template <typename T, typename U> struct IsSameType_ { static constexpr bool value = false; };
template <typename T> struct IsSameType_<T, T> { static constexpr bool value = true; };
template <typename T, typename U> constexpr bool isSameType() { return IsSameType_<T, U>::value; }
template <typename T> template <typename T>
struct CanConvert_ { struct CanConvert_ {
static int sfinae(T); static int sfinae(T);
...@@ -911,7 +915,6 @@ public: ...@@ -911,7 +915,6 @@ public:
return value; return value;
} }
private: // internal interface used by friends only
inline NullableValue() noexcept: isSet(false) {} inline NullableValue() noexcept: isSet(false) {}
inline NullableValue(T&& t) noexcept(noexcept(T(instance<T&&>()))) inline NullableValue(T&& t) noexcept(noexcept(T(instance<T&&>())))
: isSet(true) { : isSet(true) {
...@@ -1244,8 +1247,31 @@ public: ...@@ -1244,8 +1247,31 @@ public:
: ptr(init.begin()), size_(init.size()) {} : ptr(init.begin()), size_(init.size()) {}
template <size_t size> template <size_t size>
inline constexpr ArrayPtr(T (&native)[size]): ptr(native), size_(size) {} inline constexpr ArrayPtr(T (&native)[size]): ptr(native), size_(size) {
// Construct an ArrayPtr from a native C-style array. // Construct an ArrayPtr from a native C-style array.
//
// We disable this constructor for const char arrays because otherwise you would be able to
// implicitly convert a character literal to ArrayPtr<const char>, which sounds really great,
// except that the NUL terminator would be included, which probably isn't what you intended.
//
// TODO(someday): Maybe we should support character literals but explicitly chop off the NUL
// terminator. This could do the wrong thing if someone tries to construct an
// ArrayPtr<const char> from a non-NUL-terminated char array, but evidence suggests that all
// real use cases are in fact intending to remove the NUL terminator. It's convenient to be
// able to specify ArrayPtr<const char> as a parameter type and be able to accept strings
// as input in addition to arrays. Currently, you'll need overloading to support string
// literals in this case, but if you overload StringPtr, then you'll find that several
// conversions (e.g. from String and from a literal char array) become ambiguous! You end up
// having to overload for literal char arrays specifically which is cumbersome.
static_assert(!isSameType<T, const char>(),
"Can't implicitly convert literal char array to ArrayPtr because we don't know if "
"you meant to include the NUL terminator. We may change this in the future to "
"automatically drop the NUL terminator. For now, try explicitly converting to StringPtr, "
"which can in turn implicitly convert to ArrayPtr<const char>.");
static_assert(!isSameType<T, const char16_t>(), "see above");
static_assert(!isSameType<T, const char32_t>(), "see above");
}
inline operator ArrayPtr<const T>() const { inline operator ArrayPtr<const T>() const {
return ArrayPtr<const T>(ptr, size_); return ArrayPtr<const T>(ptr, size_);
......
This diff is collapsed.
This diff is collapsed.
// Copyright (c) 2017 Cloudflare, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef KJ_ENCODING_H_
#define KJ_ENCODING_H_
// Functions for encoding/decoding bytes and text in common formats, including:
// - UTF-{8,16,32}
// - Hex
// - URI encoding
// - Base64
#if defined(__GNUC__) && !KJ_HEADER_WARNINGS
#pragma GCC system_header
#endif
#include "string.h"
namespace kj {
template <typename ResultType>
struct EncodingResult: public ResultType {
// Equivalent to ResultType (a String or wide-char array) for all intents and purposes, except
// that the bool `hadErrors` can be inspected to see if any errors were encountered in the input.
// Each encoding/decoding function that returns this type will "work around" errors in some way,
// so an application doesn't strictly have to check for errors. E.g. the Unicode functions
// replace errors with U+FFFD in the output.
//
// Through magic, KJ_IF_MAYBE() and KJ_{REQUIRE,ASSERT}_NONNULL() work on EncodingResult<T>
// exactly if it were a Maybe<T> that is null in case of errors.
inline EncodingResult(ResultType&& result, bool hadErrors)
: ResultType(kj::mv(result)), hadErrors(hadErrors) {}
const bool hadErrors;
};
EncodingResult<Array<char16_t>> encodeUtf16(ArrayPtr<const char> text, bool nulTerminate = false);
EncodingResult<Array<char32_t>> encodeUtf32(ArrayPtr<const char> text, bool nulTerminate = false);
// Convert UTF-8 text (which KJ strings use) to UTF-16 or UTF-32.
//
// If `nulTerminate` is true, an extra NUL character will be added to the end of the output.
//
// The `try` versions return null if the input is invalid; the non-`try` versions return data
// containing the Unicode replacement character (U+FFFD).
//
// The returned arrays are in platform-native endianness (otherwise they wouldn't really be
// char16_t / char32_t).
EncodingResult<String> decodeUtf16(ArrayPtr<const char16_t> utf16);
EncodingResult<String> decodeUtf32(ArrayPtr<const char32_t> utf32);
// Convert UTF-16 or UTF-32 to UTF-8 (which KJ strings use).
//
// The input should NOT include a NUL terminator; any NUL characters in the input array will be
// preserved in the output.
//
// The `try` versions return null if the input is invalid; the non-`try` versions return data
// containing the Unicode replacement character (U+FFFD).
//
// The input must be in platform-native endianness. BOMs are NOT recognized by these functions.
String encodeHex(ArrayPtr<const byte> bytes);
EncodingResult<Array<byte>> decodeHex(ArrayPtr<const char> text);
// Encode/decode bytes as hex strings.
String encodeUriComponent(ArrayPtr<const byte> bytes);
String encodeUriComponent(ArrayPtr<const char> bytes);
EncodingResult<Array<byte>> decodeBinaryUriComponent(
ArrayPtr<const char> text, bool nulTerminate = false);
EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text);
// Encode/decode URI components using % escapes. See Javascript's encodeURIComponent().
String encodeCEscape(ArrayPtr<const byte> bytes);
String encodeCEscape(ArrayPtr<const char> bytes);
EncodingResult<Array<byte>> decodeBinaryCEscape(
ArrayPtr<const char> text, bool nulTerminate = false);
EncodingResult<String> decodeCEscape(ArrayPtr<const char> text);
String encodeBase64(ArrayPtr<const byte> bytes, bool breakLines = false);
// Encode the given bytes as base64 text. If `breakLines` is true, line breaks will be inserted
// into the output every 72 characters (e.g. for encoding e-mail bodies).
Array<byte> decodeBase64(ArrayPtr<const char> text);
// Decode base64 text. Non-base64 characters are ignored and padding characters are not requried;
// as such, this function never fails.
// =======================================================================================
// inline implementation details
namespace _ { // private
template <typename T>
NullableValue<T> readMaybe(EncodingResult<T>&& value) {
if (value.hadErrors) {
return nullptr;
} else {
return kj::mv(value);
}
}
template <typename T>
T* readMaybe(EncodingResult<T>& value) {
if (value.hadErrors) {
return nullptr;
} else {
return &value;
}
}
template <typename T>
const T* readMaybe(const EncodingResult<T>& value) {
if (value.hadErrors) {
return nullptr;
} else {
return &value;
}
}
} // namespace _ (private)
inline String encodeUriComponent(ArrayPtr<const char> text) {
return encodeUriComponent(text.asBytes());
}
inline EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text) {
auto result = decodeBinaryUriComponent(text, true);
return { String(result.releaseAsChars()), result.hadErrors };
}
inline String encodeCEscape(ArrayPtr<const char> text) {
return encodeCEscape(text.asBytes());
}
inline EncodingResult<String> decodeCEscape(ArrayPtr<const char> text) {
auto result = decodeBinaryCEscape(text, true);
return { String(result.releaseAsChars()), result.hadErrors };
}
// If you pass a string literal to a function taking ArrayPtr<const char>, it'll include the NUL
// termintator, which is surprising. Let's add overloads that avoid that. In practice this probably
// only even matters for encoding-test.c++.
template <size_t s>
inline EncodingResult<Array<char16_t>> encodeUtf16(const char (&text)[s], bool nulTerminate=false) {
return encodeUtf16(arrayPtr(text, s - 1), nulTerminate);
}
template <size_t s>
inline EncodingResult<Array<char32_t>> encodeUtf32(const char (&text)[s], bool nulTerminate=false) {
return encodeUtf32(arrayPtr(text, s - 1), nulTerminate);
}
template <size_t s>
inline EncodingResult<String> decodeUtf16(const char16_t (&utf16)[s]) {
return decodeUtf16(arrayPtr(utf16, s - 1));
}
template <size_t s>
inline EncodingResult<String> decodeUtf32(const char32_t (&utf32)[s]) {
return decodeUtf32(arrayPtr(utf32, s - 1));
}
template <size_t s>
inline EncodingResult<Array<byte>> decodeHex(const char (&text)[s]) {
return decodeHex(arrayPtr(text, s - 1));
}
template <size_t s>
inline String encodeUriComponent(const char (&text)[s]) {
return encodeUriComponent(arrayPtr(text, s - 1));
}
template <size_t s>
inline Array<byte> decodeBinaryUriComponent(const char (&text)[s]) {
return decodeBinaryUriComponent(arrayPtr(text, s - 1));
}
template <size_t s>
inline EncodingResult<String> decodeUriComponent(const char (&text)[s]) {
return decodeUriComponent(arrayPtr(text, s-1));
}
template <size_t s>
inline String encodeCEscape(const char (&text)[s]) {
return encodeCEscape(arrayPtr(text, s - 1));
}
template <size_t s>
inline EncodingResult<Array<byte>> decodeBinaryCEscape(const char (&text)[s]) {
return decodeBinaryCEscape(arrayPtr(text, s - 1));
}
template <size_t s>
inline EncodingResult<String> decodeCEscape(const char (&text)[s]) {
return decodeCEscape(arrayPtr(text, s-1));
}
template <size_t s>
Array<byte> decodeBase64(const char (&text)[s]) {
return decodeBase64(arrayPtr(text, s - 1));
}
} // namespace kj
#endif // KJ_ENCODING_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment