Commit 25df9749 authored by Harris Hancock's avatar Harris Hancock

Ensure '%' signs get round-tripped in URL path, fragment, userinfo

Our query string encoding function (encodeWwwForm()) was already doing the right thing.

I changed the comment in encodeUriPath() to clarify that it's intended to implement a URL class which stores its path in percent-decoded form, not either/or. I was wrong before.
parent 513ef481
......@@ -269,6 +269,13 @@ KJ_TEST("parse / stringify URL") {
// URLs with underscores in their hostnames are allowed, but you probably shouldn't use them. They
// are not valid domain names.
parseAndCheck("https://bad_domain.capnproto.org/");
// Make sure URLs with %-encoded '%' signs in their userinfo, path, query, and fragment components
// get correctly re-encoded.
parseAndCheck("https://foo%25bar:baz%25qux@capnproto.org/");
parseAndCheck("https://capnproto.org/foo%25bar");
parseAndCheck("https://capnproto.org/?foo%25bar=baz%25qux");
parseAndCheck("https://capnproto.org/#foo%25bar");
}
KJ_TEST("URL percent encoding") {
......
......@@ -411,8 +411,8 @@ String encodeUriFragment(ArrayPtr<const byte> bytes) {
for (byte b: bytes) {
if (('?' <= b && b <= '_') || // covers A-Z
('a' <= b && b <= '~') || // covers a-z
('#' <= b && b <= ';') || // covers 0-9
b == '!' || b == '=') {
('&' <= b && b <= ';') || // covers 0-9
b == '!' || b == '=' || b == '#' || b == '$') {
result.add(b);
} else {
result.add('%');
......@@ -430,8 +430,9 @@ String encodeUriPath(ArrayPtr<const byte> bytes) {
if (('@' <= b && b <= '[') || // covers A-Z
('a' <= b && b <= 'z') ||
('0' <= b && b <= ';') || // covers 0-9
('$' <= b && b <= '.') ||
b == '_' || b == '!' || b == '=' || b == ']' || b == '^' || b == '|' || b == '~') {
('&' <= b && b <= '.') ||
b == '_' || b == '!' || b == '=' || b == ']' ||
b == '^' || b == '|' || b == '~' || b == '$') {
result.add(b);
} else {
result.add('%');
......@@ -449,8 +450,8 @@ String encodeUriUserInfo(ArrayPtr<const byte> bytes) {
if (('A' <= b && b <= 'Z') ||
('a' <= b && b <= 'z') ||
('0' <= b && b <= '9') ||
('$' <= b && b <= '.') ||
b == '_' || b == '!' || b == '~') {
('&' <= b && b <= '.') ||
b == '_' || b == '!' || b == '~' || b == '$') {
result.add(b);
} else {
result.add('%');
......
......@@ -135,6 +135,9 @@ String encodeUriFragment(ArrayPtr<const char> bytes);
// Encode URL fragment components using the fragment percent encode set defined by the WHATWG URL
// specification. Use decodeUriComponent() to decode.
//
// Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
// decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
//
// See https://url.spec.whatwg.org/#fragment-percent-encode-set
String encodeUriPath(ArrayPtr<const byte> bytes);
......@@ -142,11 +145,14 @@ String encodeUriPath(ArrayPtr<const char> bytes);
// Encode URL path components (not entire paths!) using the path percent encode set defined by the
// WHATWG URL specification. Use decodeUriComponent() to decode.
//
// Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
// decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
//
// Quirk: This percent-encodes '/' and '\' characters as well, which are not actually in the set
// defined by the WHATWG URL spec. Since a conforming URL implementation will only ever call this
// function on individual path components, and never entire paths, augmenting the character set to
// include these separators allows this function to be used to implement a URL class that stores
// its path components in either percent-encoded OR percent-decoded form.
// its path components in percent-decoded form.
//
// See https://url.spec.whatwg.org/#path-percent-encode-set
......@@ -155,6 +161,9 @@ String encodeUriUserInfo(ArrayPtr<const char> bytes);
// Encode URL userinfo components using the userinfo percent encode set defined by the WHATWG URL
// specification. Use decodeUriComponent() to decode.
//
// Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
// decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
//
// See https://url.spec.whatwg.org/#userinfo-percent-encode-set
String encodeWwwForm(ArrayPtr<const byte> bytes);
......@@ -163,6 +172,10 @@ EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text);
// Encode/decode URI components using % escapes and '+' (for spaces) according to the
// application/x-www-form-urlencoded format defined by the WHATWG URL specification.
//
// Note: Like the fragment, path, and userinfo percent-encoding functions above, this function is
// not idempotent: we percent-encode '%' signs. However, in this particular case the spec happens
// to agree with us!
//
// See https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer
struct DecodeUriOptions {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment