Commit 25df9749 authored by Harris Hancock's avatar Harris Hancock

Ensure '%' signs get round-tripped in URL path, fragment, userinfo

Our query string encoding function (encodeWwwForm()) was already doing the right thing.

I changed the comment in encodeUriPath() to clarify that it's intended to implement a URL class which stores its path in percent-decoded form, not either/or. I was wrong before.
parent 513ef481
...@@ -269,6 +269,13 @@ KJ_TEST("parse / stringify URL") { ...@@ -269,6 +269,13 @@ KJ_TEST("parse / stringify URL") {
// URLs with underscores in their hostnames are allowed, but you probably shouldn't use them. They // URLs with underscores in their hostnames are allowed, but you probably shouldn't use them. They
// are not valid domain names. // are not valid domain names.
parseAndCheck("https://bad_domain.capnproto.org/"); parseAndCheck("https://bad_domain.capnproto.org/");
// Make sure URLs with %-encoded '%' signs in their userinfo, path, query, and fragment components
// get correctly re-encoded.
parseAndCheck("https://foo%25bar:baz%25qux@capnproto.org/");
parseAndCheck("https://capnproto.org/foo%25bar");
parseAndCheck("https://capnproto.org/?foo%25bar=baz%25qux");
parseAndCheck("https://capnproto.org/#foo%25bar");
} }
KJ_TEST("URL percent encoding") { KJ_TEST("URL percent encoding") {
......
...@@ -411,8 +411,8 @@ String encodeUriFragment(ArrayPtr<const byte> bytes) { ...@@ -411,8 +411,8 @@ String encodeUriFragment(ArrayPtr<const byte> bytes) {
for (byte b: bytes) { for (byte b: bytes) {
if (('?' <= b && b <= '_') || // covers A-Z if (('?' <= b && b <= '_') || // covers A-Z
('a' <= b && b <= '~') || // covers a-z ('a' <= b && b <= '~') || // covers a-z
('#' <= b && b <= ';') || // covers 0-9 ('&' <= b && b <= ';') || // covers 0-9
b == '!' || b == '=') { b == '!' || b == '=' || b == '#' || b == '$') {
result.add(b); result.add(b);
} else { } else {
result.add('%'); result.add('%');
...@@ -430,8 +430,9 @@ String encodeUriPath(ArrayPtr<const byte> bytes) { ...@@ -430,8 +430,9 @@ String encodeUriPath(ArrayPtr<const byte> bytes) {
if (('@' <= b && b <= '[') || // covers A-Z if (('@' <= b && b <= '[') || // covers A-Z
('a' <= b && b <= 'z') || ('a' <= b && b <= 'z') ||
('0' <= b && b <= ';') || // covers 0-9 ('0' <= b && b <= ';') || // covers 0-9
('$' <= b && b <= '.') || ('&' <= b && b <= '.') ||
b == '_' || b == '!' || b == '=' || b == ']' || b == '^' || b == '|' || b == '~') { b == '_' || b == '!' || b == '=' || b == ']' ||
b == '^' || b == '|' || b == '~' || b == '$') {
result.add(b); result.add(b);
} else { } else {
result.add('%'); result.add('%');
...@@ -449,8 +450,8 @@ String encodeUriUserInfo(ArrayPtr<const byte> bytes) { ...@@ -449,8 +450,8 @@ String encodeUriUserInfo(ArrayPtr<const byte> bytes) {
if (('A' <= b && b <= 'Z') || if (('A' <= b && b <= 'Z') ||
('a' <= b && b <= 'z') || ('a' <= b && b <= 'z') ||
('0' <= b && b <= '9') || ('0' <= b && b <= '9') ||
('$' <= b && b <= '.') || ('&' <= b && b <= '.') ||
b == '_' || b == '!' || b == '~') { b == '_' || b == '!' || b == '~' || b == '$') {
result.add(b); result.add(b);
} else { } else {
result.add('%'); result.add('%');
......
...@@ -135,6 +135,9 @@ String encodeUriFragment(ArrayPtr<const char> bytes); ...@@ -135,6 +135,9 @@ String encodeUriFragment(ArrayPtr<const char> bytes);
// Encode URL fragment components using the fragment percent encode set defined by the WHATWG URL // Encode URL fragment components using the fragment percent encode set defined by the WHATWG URL
// specification. Use decodeUriComponent() to decode. // specification. Use decodeUriComponent() to decode.
// //
// Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
// decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
//
// See https://url.spec.whatwg.org/#fragment-percent-encode-set // See https://url.spec.whatwg.org/#fragment-percent-encode-set
String encodeUriPath(ArrayPtr<const byte> bytes); String encodeUriPath(ArrayPtr<const byte> bytes);
...@@ -142,11 +145,14 @@ String encodeUriPath(ArrayPtr<const char> bytes); ...@@ -142,11 +145,14 @@ String encodeUriPath(ArrayPtr<const char> bytes);
// Encode URL path components (not entire paths!) using the path percent encode set defined by the // Encode URL path components (not entire paths!) using the path percent encode set defined by the
// WHATWG URL specification. Use decodeUriComponent() to decode. // WHATWG URL specification. Use decodeUriComponent() to decode.
// //
// Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
// decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
//
// Quirk: This percent-encodes '/' and '\' characters as well, which are not actually in the set // Quirk: This percent-encodes '/' and '\' characters as well, which are not actually in the set
// defined by the WHATWG URL spec. Since a conforming URL implementation will only ever call this // defined by the WHATWG URL spec. Since a conforming URL implementation will only ever call this
// function on individual path components, and never entire paths, augmenting the character set to // function on individual path components, and never entire paths, augmenting the character set to
// include these separators allows this function to be used to implement a URL class that stores // include these separators allows this function to be used to implement a URL class that stores
// its path components in either percent-encoded OR percent-decoded form. // its path components in percent-decoded form.
// //
// See https://url.spec.whatwg.org/#path-percent-encode-set // See https://url.spec.whatwg.org/#path-percent-encode-set
...@@ -155,6 +161,9 @@ String encodeUriUserInfo(ArrayPtr<const char> bytes); ...@@ -155,6 +161,9 @@ String encodeUriUserInfo(ArrayPtr<const char> bytes);
// Encode URL userinfo components using the userinfo percent encode set defined by the WHATWG URL // Encode URL userinfo components using the userinfo percent encode set defined by the WHATWG URL
// specification. Use decodeUriComponent() to decode. // specification. Use decodeUriComponent() to decode.
// //
// Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
// decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
//
// See https://url.spec.whatwg.org/#userinfo-percent-encode-set // See https://url.spec.whatwg.org/#userinfo-percent-encode-set
String encodeWwwForm(ArrayPtr<const byte> bytes); String encodeWwwForm(ArrayPtr<const byte> bytes);
...@@ -163,6 +172,10 @@ EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text); ...@@ -163,6 +172,10 @@ EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text);
// Encode/decode URI components using % escapes and '+' (for spaces) according to the // Encode/decode URI components using % escapes and '+' (for spaces) according to the
// application/x-www-form-urlencoded format defined by the WHATWG URL specification. // application/x-www-form-urlencoded format defined by the WHATWG URL specification.
// //
// Note: Like the fragment, path, and userinfo percent-encoding functions above, this function is
// not idempotent: we percent-encode '%' signs. However, in this particular case the spec happens
// to agree with us!
//
// See https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer // See https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer
struct DecodeUriOptions { struct DecodeUriOptions {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment