Commit a358282e authored by Kenton Varda's avatar Kenton Varda

Extend kj::Url with option to not collapse empty path/query components.

This is to fix a complaint about Cloudflare Workers removing these components as requests pass through, which apparently breaks someone's funky URLs.

Arguably "." and ".." processing presents a similar problems. But, allowing ".." to pass through is much more likely to lead to security problems. Browsers will generally process "." and ".." before sending a request (whereas they won't collapse double-slashes), so we're following their lead here.
parent 12d346b2
...@@ -39,6 +39,12 @@ Url parseAndCheck(kj::StringPtr originalText, kj::StringPtr expectedRestringifie ...@@ -39,6 +39,12 @@ Url parseAndCheck(kj::StringPtr originalText, kj::StringPtr expectedRestringifie
static constexpr Url::Options NO_DECODE { static constexpr Url::Options NO_DECODE {
false, // percentDecode false, // percentDecode
false, // allowEmpty
};
static constexpr Url::Options ALLOW_EMPTY {
true, // percentDecode
true, // allowEmpty
}; };
KJ_TEST("parse / stringify URL") { KJ_TEST("parse / stringify URL") {
...@@ -281,6 +287,13 @@ KJ_TEST("parse / stringify URL") { ...@@ -281,6 +287,13 @@ KJ_TEST("parse / stringify URL") {
parseAndCheck("https://capnproto.org/foo%25bar"); parseAndCheck("https://capnproto.org/foo%25bar");
parseAndCheck("https://capnproto.org/?foo%25bar=baz%25qux"); parseAndCheck("https://capnproto.org/?foo%25bar=baz%25qux");
parseAndCheck("https://capnproto.org/#foo%25bar"); parseAndCheck("https://capnproto.org/#foo%25bar");
// Make sure redundant /'s and &'s aren't collapsed when options.removeEmpty is false.
parseAndCheck("https://capnproto.org/foo//bar///test//?foo=bar&&baz=qux&", nullptr, ALLOW_EMPTY);
// "." and ".." are still processed, though.
parseAndCheck("https://capnproto.org/foo//../bar/.",
"https://capnproto.org/foo/bar/", ALLOW_EMPTY);
} }
KJ_TEST("URL percent encoding") { KJ_TEST("URL percent encoding") {
......
...@@ -233,7 +233,7 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context, Options options) { ...@@ -233,7 +233,7 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context, Options options) {
result.path.removeLast(); result.path.removeLast();
} }
result.hasTrailingSlash = true; result.hasTrailingSlash = true;
} else if (part.size() == 0 || (part.size() == 1 && part[0] == '.')) { } else if ((part.size() == 0 && !options.allowEmpty) || (part.size() == 1 && part[0] == '.')) {
// Collapse consecutive slashes and "/./". // Collapse consecutive slashes and "/./".
result.hasTrailingSlash = true; result.hasTrailingSlash = true;
} else { } else {
...@@ -247,7 +247,7 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context, Options options) { ...@@ -247,7 +247,7 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context, Options options) {
text = text.slice(1); text = text.slice(1);
auto part = split(text, END_QUERY_PART); auto part = split(text, END_QUERY_PART);
if (part.size() > 0) { if (part.size() > 0 || options.allowEmpty) {
KJ_IF_MAYBE(key, trySplit(part, '=')) { KJ_IF_MAYBE(key, trySplit(part, '=')) {
result.query.add(QueryParam { percentDecodeQuery(*key, err, options), result.query.add(QueryParam { percentDecodeQuery(*key, err, options),
percentDecodeQuery(part, err, options) }); percentDecodeQuery(part, err, options) });
...@@ -458,8 +458,8 @@ String Url::toString(Context context) const { ...@@ -458,8 +458,8 @@ String Url::toString(Context context) const {
for (auto& pathPart: path) { for (auto& pathPart: path) {
// Protect against path injection. // Protect against path injection.
KJ_REQUIRE(pathPart != "" && pathPart != "." && pathPart != "..", KJ_REQUIRE((pathPart != "" || options.allowEmpty) && pathPart != "." && pathPart != "..",
"invalid name in URL path", *this) { "invalid name in URL path", path) {
continue; continue;
} }
chars.add('/'); chars.add('/');
......
...@@ -37,6 +37,12 @@ struct UrlOptions { ...@@ -37,6 +37,12 @@ struct UrlOptions {
bool percentDecode = true; bool percentDecode = true;
// True if URL components should be automatically percent-decoded during parsing, and // True if URL components should be automatically percent-decoded during parsing, and
// percent-encoded during serialization. // percent-encoded during serialization.
bool allowEmpty = false;
// Whether or not to allow empty path and query components when parsing; otherwise, they are
// silently removed. In other words, setting this false causes consecutive slashes in the path or
// consecutive ampersands in the query to be collapsed into one, whereas if true then they
// produce empty components.
}; };
struct Url { struct Url {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment