Commit 4982c9e8 authored by Harris Hancock's avatar Harris Hancock

Implement application/x-www-form-urlencoded encode/decode functions

These are almost the same as {encode,decode}UriComponent, differing only in the set of characters they consider reserved, and their treatment of spaces.

I wasn't sure what to name them -- encodeWwwForm() seemed least bad.

For the encode side, I added a completely separate function -- it seemed like more trouble than it was worth trying to integrate the changes into encodeUriComponent(). For the decode side, I integrated the change (plus-to-space) into decodeBinaryUriComponent(), since that function is a bit longer, and the change was trivial.
parent 3f928587
......@@ -279,6 +279,9 @@ KJ_TEST("URI encoding/decoding") {
KJ_EXPECT(encodeUriComponent("\xab\xba") == "%AB%BA");
KJ_EXPECT(encodeUriComponent(StringPtr("foo\0bar", 7)) == "foo%00bar");
// Encode characters reserved by application/x-www-form-urlencoded, but not by RFC 2396.
KJ_EXPECT(encodeUriComponent("'foo'! (~)") == "'foo'!%20(~)");
expectRes(decodeUriComponent("foo%20bar"), "foo bar");
expectRes(decodeUriComponent("%ab%BA"), "\xab\xba");
......@@ -287,8 +290,43 @@ KJ_TEST("URI encoding/decoding") {
expectRes(decodeUriComponent("foo%xxx"), "fooxxx", true);
expectRes(decodeUriComponent("foo%"), "foo", true);
{
byte bytes[] = {12, 34, 56};
KJ_EXPECT(decodeBinaryUriComponent(encodeUriComponent(bytes)).asPtr() == bytes);
// decodeBinaryUriComponent() takes a DecodeUriOptions struct as its second parameter, but it
// once took a single `bool nulTerminate`. Verify that the old behavior still compiles and
// works.
auto bytesWithNul = decodeBinaryUriComponent(encodeUriComponent(bytes), true);
KJ_ASSERT(bytesWithNul.size() == 4);
KJ_EXPECT(bytesWithNul[3] == '\0');
KJ_EXPECT(bytesWithNul.slice(0, 3) == bytes);
}
}
KJ_TEST("application/x-www-form-urlencoded encoding/decoding") {
KJ_EXPECT(encodeWwwForm("foo") == "foo");
KJ_EXPECT(encodeWwwForm("foo bar") == "foo+bar");
KJ_EXPECT(encodeWwwForm("\xab\xba") == "%AB%BA");
KJ_EXPECT(encodeWwwForm(StringPtr("foo\0bar", 7)) == "foo%00bar");
// Encode characters reserved by application/x-www-form-urlencoded, but not by RFC 2396.
KJ_EXPECT(encodeWwwForm("'foo'! (~)") == "%27foo%27%21+%28%7E%29");
expectRes(decodeWwwForm("foo%20bar"), "foo bar");
expectRes(decodeWwwForm("foo+bar"), "foo bar");
expectRes(decodeWwwForm("%ab%BA"), "\xab\xba");
expectRes(decodeWwwForm("foo%1xxx"), "foo\1xxx", true);
expectRes(decodeWwwForm("foo%1"), "foo\1", true);
expectRes(decodeWwwForm("foo%xxx"), "fooxxx", true);
expectRes(decodeWwwForm("foo%"), "foo", true);
{
byte bytes[] = {12, 34, 56};
DecodeUriOptions options { /*.nulTerminate=*/false, /*.plusToSpace=*/true };
KJ_EXPECT(decodeBinaryUriComponent(encodeWwwForm(bytes), options) == bytes);
}
}
KJ_TEST("C escape encoding/decoding") {
......
......@@ -404,9 +404,27 @@ String encodeUriComponent(ArrayPtr<const byte> bytes) {
return String(result.releaseAsArray());
}
String encodeWwwForm(ArrayPtr<const byte> bytes) {
Vector<char> result(bytes.size() + 1);
for (byte b: bytes) {
if (('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') || ('0' <= b && b <= '9') ||
b == '-' || b == '_' || b == '.' || b == '*') {
result.add(b);
} else if (b == ' ') {
result.add('+');
} else {
result.add('%');
result.add(HEX_DIGITS_URI[b/16]);
result.add(HEX_DIGITS_URI[b%16]);
}
}
result.add('\0');
return String(result.releaseAsArray());
}
EncodingResult<Array<byte>> decodeBinaryUriComponent(
ArrayPtr<const char> text, bool nulTerminate) {
Vector<byte> result(text.size() + nulTerminate);
ArrayPtr<const char> text, DecodeUriOptions options) {
Vector<byte> result(text.size() + options.nulTerminate);
bool hadErrors = false;
const char* ptr = text.begin();
......@@ -432,12 +450,15 @@ EncodingResult<Array<byte>> decodeBinaryUriComponent(
} else {
hadErrors = true;
}
} else if (options.plusToSpace && *ptr == '+') {
++ptr;
result.add(' ');
} else {
result.add(*ptr++);
}
}
if (nulTerminate) result.add(0);
if (options.nulTerminate) result.add(0);
return { result.releaseAsArray(), hadErrors };
}
......
......@@ -124,10 +124,40 @@ EncodingResult<Array<byte>> decodeHex(ArrayPtr<const char> text);
String encodeUriComponent(ArrayPtr<const byte> bytes);
String encodeUriComponent(ArrayPtr<const char> bytes);
EncodingResult<Array<byte>> decodeBinaryUriComponent(
ArrayPtr<const char> text, bool nulTerminate = false);
EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text);
// Encode/decode URI components using % escapes. See Javascript's encodeURIComponent().
// Encode/decode URI components using % escapes for characters listed as "reserved" in RFC 2396.
// This is the same behavior as JavaScript's `encodeURIComponent()`.
//
// See https://tools.ietf.org/html/rfc2396#section-2.3
String encodeWwwForm(ArrayPtr<const byte> bytes);
String encodeWwwForm(ArrayPtr<const char> bytes);
EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text);
// Encode/decode URI components using % escapes and '+' (for spaces) according to the
// application/x-www-form-urlencoded format defined by the WHATWG URL specification.
//
// See https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer
struct DecodeUriOptions {
// Parameter to `decodeBinaryUriComponent()`.
// This struct is intentionally convertible from bool, in order to maintain backwards
// compatibility with code written when `decodeBinaryUriComponent()` took a boolean second
// parameter.
DecodeUriOptions(bool nulTerminate = false, bool plusToSpace = false)
: nulTerminate(nulTerminate), plusToSpace(plusToSpace) {}
bool nulTerminate;
// Append a terminal NUL byte.
bool plusToSpace;
// Convert '+' to ' ' characters before percent decoding. Used to decode
// application/x-www-form-urlencoded text, such as query strings.
};
EncodingResult<Array<byte>> decodeBinaryUriComponent(
ArrayPtr<const char> text, DecodeUriOptions options = DecodeUriOptions());
// Decode URI components using % escapes. This is a lower-level interface used to implement both
// `decodeUriComponent()` and `decodeWwwForm()`
String encodeCEscape(ArrayPtr<const byte> bytes);
String encodeCEscape(ArrayPtr<const char> bytes);
......@@ -181,7 +211,16 @@ inline String encodeUriComponent(ArrayPtr<const char> text) {
return encodeUriComponent(text.asBytes());
}
inline EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text) {
auto result = decodeBinaryUriComponent(text, true);
auto result = decodeBinaryUriComponent(text, DecodeUriOptions { /*.nulTerminate=*/true });
return { String(result.releaseAsChars()), result.hadErrors };
}
inline String encodeWwwForm(ArrayPtr<const char> text) {
return encodeWwwForm(text.asBytes());
}
inline EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text) {
auto result = decodeBinaryUriComponent(text, DecodeUriOptions { /*.nulTerminate=*/true,
/*.plusToSpace=*/true });
return { String(result.releaseAsChars()), result.hadErrors };
}
......@@ -239,6 +278,14 @@ inline EncodingResult<String> decodeUriComponent(const char (&text)[s]) {
return decodeUriComponent(arrayPtr(text, s-1));
}
template <size_t s>
inline String encodeWwwForm(const char (&text)[s]) {
return encodeWwwForm(arrayPtr(text, s - 1));
}
template <size_t s>
inline EncodingResult<String> decodeWwwForm(const char (&text)[s]) {
return decodeWwwForm(arrayPtr(text, s-1));
}
template <size_t s>
inline String encodeCEscape(const char (&text)[s]) {
return encodeCEscape(arrayPtr(text, s - 1));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment