Commit 03800dfa authored by Kenton Varda's avatar Kenton Varda

Add CEscape to encodings.

parent f74555b4
......@@ -24,6 +24,7 @@
#include <capnp/serialize.h>
#include <kj/debug.h>
#include <kj/arena.h>
#include <kj/encoding.h>
#include <set>
#include <map>
#include <stdlib.h>
......@@ -2408,36 +2409,7 @@ uint64_t NodeTranslator::compileParamList(
static const char HEXDIGITS[] = "0123456789abcdef";
static kj::StringTree stringLiteral(kj::StringPtr chars) {
// TODO(cleanup): This code keeps coming up. Put somewhere common?
kj::Vector<char> escaped(chars.size());
for (char c: chars) {
switch (c) {
case '\a': escaped.addAll(kj::StringPtr("\\a")); break;
case '\b': escaped.addAll(kj::StringPtr("\\b")); break;
case '\f': escaped.addAll(kj::StringPtr("\\f")); break;
case '\n': escaped.addAll(kj::StringPtr("\\n")); break;
case '\r': escaped.addAll(kj::StringPtr("\\r")); break;
case '\t': escaped.addAll(kj::StringPtr("\\t")); break;
case '\v': escaped.addAll(kj::StringPtr("\\v")); break;
case '\'': escaped.addAll(kj::StringPtr("\\\'")); break;
case '\"': escaped.addAll(kj::StringPtr("\\\"")); break;
case '\\': escaped.addAll(kj::StringPtr("\\\\")); break;
default:
if (c < 0x20) {
escaped.add('\\');
escaped.add('x');
uint8_t c2 = c;
escaped.add(HEXDIGITS[c2 / 16]);
escaped.add(HEXDIGITS[c2 % 16]);
} else {
escaped.add(c);
}
break;
}
}
return kj::strTree('"', escaped, '"');
return kj::strTree('"', kj::encodeCEscape(chars), '"');
}
static kj::StringTree binaryLiteral(Data::Reader data) {
......
......@@ -22,6 +22,7 @@
#include "dynamic.h"
#include <kj/debug.h>
#include <kj/vector.h>
#include <kj/encoding.h>
namespace capnp {
......@@ -150,34 +151,7 @@ static kj::StringTree print(const DynamicValue::Reader& value,
chars = value.as<Text>();
}
kj::Vector<char> escaped(chars.size());
for (char c: chars) {
switch (c) {
case '\a': escaped.addAll(kj::StringPtr("\\a")); break;
case '\b': escaped.addAll(kj::StringPtr("\\b")); break;
case '\f': escaped.addAll(kj::StringPtr("\\f")); break;
case '\n': escaped.addAll(kj::StringPtr("\\n")); break;
case '\r': escaped.addAll(kj::StringPtr("\\r")); break;
case '\t': escaped.addAll(kj::StringPtr("\\t")); break;
case '\v': escaped.addAll(kj::StringPtr("\\v")); break;
case '\'': escaped.addAll(kj::StringPtr("\\\'")); break;
case '\"': escaped.addAll(kj::StringPtr("\\\"")); break;
case '\\': escaped.addAll(kj::StringPtr("\\\\")); break;
default:
if (c < 0x20) {
escaped.add('\\');
escaped.add('x');
uint8_t c2 = c;
escaped.add(HEXDIGITS[c2 / 16]);
escaped.add(HEXDIGITS[c2 % 16]);
} else {
escaped.add(c);
}
break;
}
}
return kj::strTree('"', escaped, '"');
return kj::strTree('"', kj::encodeCEscape(chars), '"');
}
case DynamicValue::LIST: {
auto listValue = value.as<DynamicList>();
......
......@@ -215,6 +215,31 @@ KJ_TEST("URI encoding/decoding") {
KJ_EXPECT(decodeBinaryUriComponent(encodeUriComponent(bytes)).asPtr() == bytes);
}
KJ_TEST("C escape encoding/decoding") {
KJ_EXPECT(encodeCEscape("fooo\a\b\f\n\r\t\v\'\"\\bar") ==
"fooo\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\\bar");
KJ_EXPECT(encodeCEscape("foo\x01\x7fxxx") ==
"foo\\001\\177xxx");
expectUtf(decodeCEscape("fooo\\a\\b\\f\\n\\r\\t\\v\\\'\\\"\\\\bar"),
"fooo\a\b\f\n\r\t\v\'\"\\bar");
expectUtf(decodeCEscape("foo\\x01\\x7fxxx"), "foo\x01\x7fxxx");
expectUtf(decodeCEscape("foo\\001\\177234"), "foo\001\177234");
expectUtf(decodeCEscape("foo\\x1"), "foo\x1");
expectUtf(decodeCEscape("foo\\1"), "foo\1");
expectUtf(decodeCEscape("foo\\u1234bar"), u8"foo\u1234bar");
expectUtf(decodeCEscape("foo\\U00045678bar"), u8"foo\U00045678bar");
// Error cases.
expectUtf(decodeCEscape("foo\\"), "foo", true);
expectUtf(decodeCEscape("foo\\x123x"), u8"foo\x23x", true);
expectUtf(decodeCEscape("foo\\u12"), u8"foo\u0012", true);
expectUtf(decodeCEscape("foo\\u12xxx"), u8"foo\u0012xxx", true);
expectUtf(decodeCEscape("foo\\U12"), u8"foo\u0012", true);
expectUtf(decodeCEscape("foo\\U12xxxxxxxx"), u8"foo\u0012xxxxxxxx", true);
}
KJ_TEST("base64 encoding/decoding") {
{
auto encoded = encodeBase64(StringPtr("foo").asBytes(), false);
......
......@@ -338,6 +338,180 @@ Array<byte> decodeBinaryUriComponent(ArrayPtr<const char> text, bool nulTerminat
return result.releaseAsArray();
}
// =======================================================================================
String encodeCEscape(ArrayPtr<const byte> bytes) {
Vector<char> escaped(bytes.size());
for (byte b: bytes) {
switch (b) {
case '\a': escaped.addAll(StringPtr("\\a")); break;
case '\b': escaped.addAll(StringPtr("\\b")); break;
case '\f': escaped.addAll(StringPtr("\\f")); break;
case '\n': escaped.addAll(StringPtr("\\n")); break;
case '\r': escaped.addAll(StringPtr("\\r")); break;
case '\t': escaped.addAll(StringPtr("\\t")); break;
case '\v': escaped.addAll(StringPtr("\\v")); break;
case '\'': escaped.addAll(StringPtr("\\\'")); break;
case '\"': escaped.addAll(StringPtr("\\\"")); break;
case '\\': escaped.addAll(StringPtr("\\\\")); break;
default:
if (b < 0x20 || b == 0x7f) {
// Use octal escape, not hex, because hex escapes technically have no length limit and
// so can create ambiguity with subsequent characters.
escaped.add('\\');
escaped.add(HEX_DIGITS[b / 64]);
escaped.add(HEX_DIGITS[(b / 8) % 8]);
escaped.add(HEX_DIGITS[b % 8]);
} else {
escaped.add(b);
}
break;
}
}
escaped.add(0);
return String(escaped.releaseAsArray());
}
namespace {
static Maybe<uint> tryFromHexDigit(char c) {
if ('0' <= c && c <= '9') {
return c - '0';
} else if ('a' <= c && c <= 'f') {
return c - ('a' - 10);
} else if ('A' <= c && c <= 'F') {
return c - ('A' - 10);
} else {
return nullptr;
}
}
static Maybe<uint> tryFromOctDigit(char c) {
if ('0' <= c && c <= '7') {
return c - '0';
} else {
return nullptr;
}
}
} // namespace
UtfResult<Array<byte>> decodeBinaryCEscape(ArrayPtr<const char> text, bool nulTerminate) {
Vector<byte> result(text.size() + nulTerminate);
bool hadErrors = false;
size_t i = 0;
while (i < text.size()) {
char c = text[i++];
if (c == '\\') {
if (i == text.size()) {
hadErrors = true;
continue;
}
char c2 = text[i++];
switch (c2) {
case 'a' : result.add('\a'); break;
case 'b' : result.add('\b'); break;
case 'f' : result.add('\f'); break;
case 'n' : result.add('\n'); break;
case 'r' : result.add('\r'); break;
case 't' : result.add('\t'); break;
case 'v' : result.add('\v'); break;
case '\'': result.add('\''); break;
case '\"': result.add('\"'); break;
case '\\': result.add('\\'); break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': {
uint value = c2 - '0';
for (uint j = 0; j < 2 && i < text.size(); j++) {
KJ_IF_MAYBE(d, tryFromOctDigit(text[i])) {
++i;
value = (value << 3) | *d;
} else {
break;
}
}
if (value >= 0x100) hadErrors = true;
result.add(value);
break;
}
case 'x': {
uint value = 0;
while (i < text.size()) {
KJ_IF_MAYBE(d, tryFromHexDigit(text[i])) {
++i;
value = (value << 4) | *d;
} else {
break;
}
}
if (value >= 0x100) hadErrors = true;
result.add(value);
break;
}
case 'u': {
char16_t value = 0;
for (uint j = 0; j < 4; j++) {
if (i == text.size()) {
hadErrors = true;
break;
} else KJ_IF_MAYBE(d, tryFromHexDigit(text[i])) {
++i;
value = (value << 4) | *d;
} else {
hadErrors = true;
break;
}
}
auto utf = decodeUtf16(arrayPtr(&value, 1));
if (utf.hadErrors) hadErrors = true;
result.addAll(utf.asBytes());
break;
}
case 'U': {
char32_t value = 0;
for (uint j = 0; j < 8; j++) {
if (i == text.size()) {
hadErrors = true;
break;
} else KJ_IF_MAYBE(d, tryFromHexDigit(text[i])) {
++i;
value = (value << 4) | *d;
} else {
hadErrors = true;
break;
}
}
auto utf = decodeUtf32(arrayPtr(&value, 1));
if (utf.hadErrors) hadErrors = true;
result.addAll(utf.asBytes());
break;
}
default:
result.add(c2);
}
} else {
result.add(c);
}
}
if (nulTerminate) result.add(0);
return { result.releaseAsArray(), hadErrors };
}
// =======================================================================================
// This code is derived from libb64 which has been placed in the public domain.
// For details, see http://sourceforge.net/projects/libb64
......
......@@ -88,6 +88,11 @@ Array<byte> decodeBinaryUriComponent(ArrayPtr<const char> text, bool nulTerminat
String decodeUriComponent(ArrayPtr<const char> text);
// Encode/decode URI components using % escapes. See Javascript's encodeURIComponent().
String encodeCEscape(ArrayPtr<const byte> bytes);
String encodeCEscape(ArrayPtr<const char> bytes);
UtfResult<Array<byte>> decodeBinaryCEscape(ArrayPtr<const char> text, bool nulTerminate = false);
UtfResult<String> decodeCEscape(ArrayPtr<const char> text);
String encodeBase64(ArrayPtr<const byte> bytes, bool breakLines = false);
// Encode the given bytes as base64 text. If `breakLines` is true, line breaks will be inserted
// into the output every 72 characters (e.g. for encoding e-mail bodies).
......@@ -105,6 +110,14 @@ inline String decodeUriComponent(ArrayPtr<const char> text) {
return String(decodeBinaryUriComponent(text, true).releaseAsChars());
}
inline String encodeCEscape(ArrayPtr<const char> text) {
return encodeCEscape(text.asBytes());
}
inline UtfResult<String> decodeCEscape(ArrayPtr<const char> text) {
auto result = decodeBinaryCEscape(text, true);
return { String(result.releaseAsChars()), result.hadErrors };
}
// If you pass a string literal to a function taking ArrayPtr<const char>, it'll include the NUL
// termintator, which is surprising. Let's add overloads that avoid that. In practice this probably
// only even matters for encoding-test.c++.
......@@ -158,6 +171,19 @@ inline String decodeUriComponent(const char (&text)[s]) {
return String(decodeBinaryUriComponent(arrayPtr(text, s - 1), true).releaseAsChars());
}
template <size_t s>
inline String encodeCEscape(const char (&text)[s]) {
return encodeCEscape(arrayPtr(text, s - 1));
}
template <size_t s>
inline UtfResult<Array<byte>> decodeBinaryCEscape(const char (&text)[s]) {
return decodeBinaryCEscape(arrayPtr(text, s - 1));
}
template <size_t s>
inline UtfResult<String> decodeCEscape(const char (&text)[s]) {
auto result = decodeBinaryCEscape(arrayPtr(text, s - 1), true);
return { String(result.releaseAsChars()), result.hadErrors };
}
template <size_t s>
Array<byte> decodeBase64(const char (&text)[s]) {
return decodeBase64(arrayPtr(text, s - 1));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment