Unverified Commit c047c831 authored by Kenton Varda's avatar Kenton Varda Committed by GitHub

Merge pull request #627 from capnproto/url-conformance

Use application/x-www-form-urlencoded for URL query strings
parents 3f928587 7a28452e
......@@ -132,6 +132,14 @@ KJ_TEST("parse / stringify URL") {
KJ_EXPECT(KJ_ASSERT_NONNULL(url.fragment) == "garply");
}
{
auto url = parseAndCheck("https://capnproto.org/foo?bar%20baz=qux+quux",
"https://capnproto.org/foo?bar+baz=qux+quux");
KJ_ASSERT(url.query.size() == 1);
KJ_EXPECT(url.query[0].name == "bar baz");
KJ_EXPECT(url.query[0].value == "qux quux");
}
{
auto url = parseAndCheck("https://capnproto.org/foo/bar#garply");
KJ_EXPECT(url.scheme == "https");
......@@ -232,7 +240,7 @@ KJ_TEST("URL percent encoding") {
parseAndCheck(
"https://b b: bcd@capnproto.org/f o?b r=b z#q x",
"https://b%20b:%20bcd@capnproto.org/f%20o?b%20r=b%20z#q%20x");
"https://b%20b:%20bcd@capnproto.org/f%20o?b+r=b+z#q%20x");
}
KJ_TEST("URL relative paths") {
......@@ -340,6 +348,9 @@ KJ_TEST("parse relative URL") {
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"?grault",
"https://capnproto.org/foo/bar?grault");
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"?grault+garply=waldo",
"https://capnproto.org/foo/bar?grault+garply=waldo");
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"grault",
"https://capnproto.org/foo/grault");
......
......@@ -88,6 +88,12 @@ String percentDecode(ArrayPtr<const char> text, bool& hadErrors) {
return kj::mv(result);
}
String percentDecodeQuery(ArrayPtr<const char> text, bool& hadErrors) {
auto result = decodeWwwForm(text);
if (result.hadErrors) hadErrors = true;
return kj::mv(result);
}
} // namespace
Url::~Url() noexcept(false) {}
......@@ -195,9 +201,10 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) {
if (part.size() > 0) {
KJ_IF_MAYBE(key, trySplit(part, '=')) {
result.query.add(QueryParam { percentDecode(*key, err), percentDecode(part, err) });
result.query.add(QueryParam { percentDecodeQuery(*key, err),
percentDecodeQuery(part, err) });
} else {
result.query.add(QueryParam { percentDecode(part, err), nullptr });
result.query.add(QueryParam { percentDecodeQuery(part, err), nullptr });
}
}
} while (text.startsWith("&"));
......@@ -331,9 +338,10 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
if (part.size() > 0) {
KJ_IF_MAYBE(key, trySplit(part, '=')) {
result.query.add(QueryParam { percentDecode(*key, err), percentDecode(part, err) });
result.query.add(QueryParam { percentDecodeQuery(*key, err),
percentDecodeQuery(part, err) });
} else {
result.query.add(QueryParam { percentDecode(part, err), nullptr });
result.query.add(QueryParam { percentDecodeQuery(part, err), nullptr });
}
}
} while (text.startsWith("&"));
......@@ -407,10 +415,10 @@ String Url::toString(Context context) const {
for (auto& param: query) {
chars.add(first ? '?' : '&');
first = false;
chars.addAll(encodeUriComponent(param.name));
chars.addAll(encodeWwwForm(param.name));
if (param.value.size() > 0) {
chars.add('=');
chars.addAll(encodeUriComponent(param.value));
chars.addAll(encodeWwwForm(param.value));
}
}
......
......@@ -279,6 +279,9 @@ KJ_TEST("URI encoding/decoding") {
KJ_EXPECT(encodeUriComponent("\xab\xba") == "%AB%BA");
KJ_EXPECT(encodeUriComponent(StringPtr("foo\0bar", 7)) == "foo%00bar");
// Encode characters reserved by application/x-www-form-urlencoded, but not by RFC 2396.
KJ_EXPECT(encodeUriComponent("'foo'! (~)") == "'foo'!%20(~)");
expectRes(decodeUriComponent("foo%20bar"), "foo bar");
expectRes(decodeUriComponent("%ab%BA"), "\xab\xba");
......@@ -287,8 +290,43 @@ KJ_TEST("URI encoding/decoding") {
expectRes(decodeUriComponent("foo%xxx"), "fooxxx", true);
expectRes(decodeUriComponent("foo%"), "foo", true);
byte bytes[] = {12, 34, 56};
KJ_EXPECT(decodeBinaryUriComponent(encodeUriComponent(bytes)).asPtr() == bytes);
{
byte bytes[] = {12, 34, 56};
KJ_EXPECT(decodeBinaryUriComponent(encodeUriComponent(bytes)).asPtr() == bytes);
// decodeBinaryUriComponent() takes a DecodeUriOptions struct as its second parameter, but it
// once took a single `bool nulTerminate`. Verify that the old behavior still compiles and
// works.
auto bytesWithNul = decodeBinaryUriComponent(encodeUriComponent(bytes), true);
KJ_ASSERT(bytesWithNul.size() == 4);
KJ_EXPECT(bytesWithNul[3] == '\0');
KJ_EXPECT(bytesWithNul.slice(0, 3) == bytes);
}
}
KJ_TEST("application/x-www-form-urlencoded encoding/decoding") {
KJ_EXPECT(encodeWwwForm("foo") == "foo");
KJ_EXPECT(encodeWwwForm("foo bar") == "foo+bar");
KJ_EXPECT(encodeWwwForm("\xab\xba") == "%AB%BA");
KJ_EXPECT(encodeWwwForm(StringPtr("foo\0bar", 7)) == "foo%00bar");
// Encode characters reserved by application/x-www-form-urlencoded, but not by RFC 2396.
KJ_EXPECT(encodeWwwForm("'foo'! (~)") == "%27foo%27%21+%28%7E%29");
expectRes(decodeWwwForm("foo%20bar"), "foo bar");
expectRes(decodeWwwForm("foo+bar"), "foo bar");
expectRes(decodeWwwForm("%ab%BA"), "\xab\xba");
expectRes(decodeWwwForm("foo%1xxx"), "foo\1xxx", true);
expectRes(decodeWwwForm("foo%1"), "foo\1", true);
expectRes(decodeWwwForm("foo%xxx"), "fooxxx", true);
expectRes(decodeWwwForm("foo%"), "foo", true);
{
byte bytes[] = {12, 34, 56};
DecodeUriOptions options { /*.nulTerminate=*/false, /*.plusToSpace=*/true };
KJ_EXPECT(decodeBinaryUriComponent(encodeWwwForm(bytes), options) == bytes);
}
}
KJ_TEST("C escape encoding/decoding") {
......
......@@ -404,9 +404,27 @@ String encodeUriComponent(ArrayPtr<const byte> bytes) {
return String(result.releaseAsArray());
}
String encodeWwwForm(ArrayPtr<const byte> bytes) {
Vector<char> result(bytes.size() + 1);
for (byte b: bytes) {
if (('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') || ('0' <= b && b <= '9') ||
b == '-' || b == '_' || b == '.' || b == '*') {
result.add(b);
} else if (b == ' ') {
result.add('+');
} else {
result.add('%');
result.add(HEX_DIGITS_URI[b/16]);
result.add(HEX_DIGITS_URI[b%16]);
}
}
result.add('\0');
return String(result.releaseAsArray());
}
EncodingResult<Array<byte>> decodeBinaryUriComponent(
ArrayPtr<const char> text, bool nulTerminate) {
Vector<byte> result(text.size() + nulTerminate);
ArrayPtr<const char> text, DecodeUriOptions options) {
Vector<byte> result(text.size() + options.nulTerminate);
bool hadErrors = false;
const char* ptr = text.begin();
......@@ -432,12 +450,15 @@ EncodingResult<Array<byte>> decodeBinaryUriComponent(
} else {
hadErrors = true;
}
} else if (options.plusToSpace && *ptr == '+') {
++ptr;
result.add(' ');
} else {
result.add(*ptr++);
}
}
if (nulTerminate) result.add(0);
if (options.nulTerminate) result.add(0);
return { result.releaseAsArray(), hadErrors };
}
......
......@@ -124,10 +124,40 @@ EncodingResult<Array<byte>> decodeHex(ArrayPtr<const char> text);
String encodeUriComponent(ArrayPtr<const byte> bytes);
String encodeUriComponent(ArrayPtr<const char> bytes);
EncodingResult<Array<byte>> decodeBinaryUriComponent(
ArrayPtr<const char> text, bool nulTerminate = false);
EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text);
// Encode/decode URI components using % escapes. See Javascript's encodeURIComponent().
// Encode/decode URI components using % escapes for characters listed as "reserved" in RFC 2396.
// This is the same behavior as JavaScript's `encodeURIComponent()`.
//
// See https://tools.ietf.org/html/rfc2396#section-2.3
String encodeWwwForm(ArrayPtr<const byte> bytes);
String encodeWwwForm(ArrayPtr<const char> bytes);
EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text);
// Encode/decode URI components using % escapes and '+' (for spaces) according to the
// application/x-www-form-urlencoded format defined by the WHATWG URL specification.
//
// See https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer
struct DecodeUriOptions {
// Parameter to `decodeBinaryUriComponent()`.
// This struct is intentionally convertible from bool, in order to maintain backwards
// compatibility with code written when `decodeBinaryUriComponent()` took a boolean second
// parameter.
DecodeUriOptions(bool nulTerminate = false, bool plusToSpace = false)
: nulTerminate(nulTerminate), plusToSpace(plusToSpace) {}
bool nulTerminate;
// Append a terminal NUL byte.
bool plusToSpace;
// Convert '+' to ' ' characters before percent decoding. Used to decode
// application/x-www-form-urlencoded text, such as query strings.
};
EncodingResult<Array<byte>> decodeBinaryUriComponent(
ArrayPtr<const char> text, DecodeUriOptions options = DecodeUriOptions());
// Decode URI components using % escapes. This is a lower-level interface used to implement both
// `decodeUriComponent()` and `decodeWwwForm()`
String encodeCEscape(ArrayPtr<const byte> bytes);
String encodeCEscape(ArrayPtr<const char> bytes);
......@@ -181,7 +211,16 @@ inline String encodeUriComponent(ArrayPtr<const char> text) {
return encodeUriComponent(text.asBytes());
}
inline EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text) {
auto result = decodeBinaryUriComponent(text, true);
auto result = decodeBinaryUriComponent(text, DecodeUriOptions { /*.nulTerminate=*/true });
return { String(result.releaseAsChars()), result.hadErrors };
}
inline String encodeWwwForm(ArrayPtr<const char> text) {
return encodeWwwForm(text.asBytes());
}
inline EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text) {
auto result = decodeBinaryUriComponent(text, DecodeUriOptions { /*.nulTerminate=*/true,
/*.plusToSpace=*/true });
return { String(result.releaseAsChars()), result.hadErrors };
}
......@@ -239,6 +278,14 @@ inline EncodingResult<String> decodeUriComponent(const char (&text)[s]) {
return decodeUriComponent(arrayPtr(text, s-1));
}
template <size_t s>
inline String encodeWwwForm(const char (&text)[s]) {
return encodeWwwForm(arrayPtr(text, s - 1));
}
template <size_t s>
inline EncodingResult<String> decodeWwwForm(const char (&text)[s]) {
return decodeWwwForm(arrayPtr(text, s-1));
}
template <size_t s>
inline String encodeCEscape(const char (&text)[s]) {
return encodeCEscape(arrayPtr(text, s - 1));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment