Commit 74916c4b authored by Harris Hancock's avatar Harris Hancock

kj::Url request contexts allow hash sign in path/query components

Occasionally servers processing HTTP requests will see URLs in the wild which have unescaped hash signs in path and query components. This change allows kj::Url to parse such URLs in the Url::HTTP_REQUEST and Url::HTTP_PROXY_REQUEST contexts.
parent 5c8e496e
......@@ -400,6 +400,58 @@ KJ_TEST("URL for HTTP request") {
KJ_EXPECT(url.query[1].name == "corge");
KJ_EXPECT(url.query[1].value == nullptr);
}
{
// '#' is allowed in path components in the HTTP_REQUEST context.
Url url = Url::parse("/foo#bar", Url::HTTP_REQUEST);
KJ_EXPECT(url.toString(Url::HTTP_REQUEST) == "/foo%23bar");
KJ_EXPECT(url.scheme == nullptr);
KJ_EXPECT(url.host == nullptr);
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>{"foo#bar"});
KJ_EXPECT(!url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(url.fragment == nullptr);
}
{
// '#' is allowed in path components in the HTTP_PROXY_REQUEST context.
Url url = Url::parse("https://capnproto.org/foo#bar", Url::HTTP_PROXY_REQUEST);
KJ_EXPECT(url.toString(Url::HTTP_PROXY_REQUEST) == "https://capnproto.org/foo%23bar");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>{"foo#bar"});
KJ_EXPECT(!url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(url.fragment == nullptr);
}
{
// '#' is allowed in query components in the HTTP_REQUEST context.
Url url = Url::parse("/?foo=bar#123", Url::HTTP_REQUEST);
KJ_EXPECT(url.toString(Url::HTTP_REQUEST) == "/?foo=bar%23123");
KJ_EXPECT(url.scheme == nullptr);
KJ_EXPECT(url.host == nullptr);
KJ_EXPECT(url.path == nullptr);
KJ_EXPECT(url.hasTrailingSlash);
KJ_ASSERT(url.query.size() == 1);
KJ_EXPECT(url.query[0].name == "foo");
KJ_EXPECT(url.query[0].value == "bar#123");
KJ_EXPECT(url.fragment == nullptr);
}
{
// '#' is allowed in query components in the HTTP_PROXY_REQUEST context.
Url url = Url::parse("https://capnproto.org/?foo=bar#123", Url::HTTP_PROXY_REQUEST);
KJ_EXPECT(url.toString(Url::HTTP_PROXY_REQUEST) == "https://capnproto.org/?foo=bar%23123");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path == nullptr);
KJ_EXPECT(url.hasTrailingSlash);
KJ_ASSERT(url.query.size() == 1);
KJ_EXPECT(url.query[0].name == "foo");
KJ_EXPECT(url.query[0].value == "bar#123");
KJ_EXPECT(url.fragment == nullptr);
}
}
KJ_TEST("parse URL failure") {
......@@ -413,9 +465,8 @@ KJ_TEST("parse URL failure") {
// components not valid in context
KJ_EXPECT(Url::tryParse("https://capnproto.org/foo", Url::HTTP_REQUEST) == nullptr);
KJ_EXPECT(Url::tryParse("/foo#bar", Url::HTTP_REQUEST) == nullptr);
KJ_EXPECT(Url::tryParse("https://bob:123@capnproto.org/foo", Url::HTTP_PROXY_REQUEST) == nullptr);
KJ_EXPECT(Url::tryParse("https://capnproto.org/foo#bar", Url::HTTP_PROXY_REQUEST) == nullptr);
KJ_EXPECT(Url::tryParse("https://capnproto.org#/foo", Url::HTTP_PROXY_REQUEST) == nullptr);
}
void parseAndCheckRelative(kj::StringPtr base, kj::StringPtr relative, kj::StringPtr expected,
......
......@@ -31,9 +31,40 @@ namespace {
constexpr auto ALPHAS = parse::charRange('a', 'z').orRange('A', 'Z');
constexpr auto DIGITS = parse::charRange('0', '9');
constexpr auto END_AUTHORITY = parse::anyOfChars("/?#");
constexpr auto END_PATH_PART = parse::anyOfChars("/?#");
constexpr auto END_QUERY_PART = parse::anyOfChars("&#");
// Authority, path, and query components can typically be terminated by the start of a fragment.
// However, fragments are disallowed in HTTP_REQUEST and HTTP_PROXY_REQUEST contexts. As a quirk, we
// allow the fragment start character ('#') to live unescaped in path and query components. We do
// not currently allow it in the authority component, because our parser would reject it as a host
// character anyway.
const parse::CharGroup_& getEndPathPart(Url::Context context) {
static constexpr auto END_PATH_PART_HREF = parse::anyOfChars("/?#");
static constexpr auto END_PATH_PART_REQUEST = parse::anyOfChars("/?");
switch (context) {
case Url::REMOTE_HREF: return END_PATH_PART_HREF;
case Url::HTTP_PROXY_REQUEST: return END_PATH_PART_REQUEST;
case Url::HTTP_REQUEST: return END_PATH_PART_REQUEST;
}
KJ_UNREACHABLE;
}
const parse::CharGroup_& getEndQueryPart(Url::Context context) {
static constexpr auto END_QUERY_PART_HREF = parse::anyOfChars("&#");
static constexpr auto END_QUERY_PART_REQUEST = parse::anyOfChars("&");
switch (context) {
case Url::REMOTE_HREF: return END_QUERY_PART_HREF;
case Url::HTTP_PROXY_REQUEST: return END_QUERY_PART_REQUEST;
case Url::HTTP_REQUEST: return END_QUERY_PART_REQUEST;
}
KJ_UNREACHABLE;
}
constexpr auto SCHEME_CHARS = ALPHAS.orGroup(DIGITS).orAny("+-.");
constexpr auto NOT_SCHEME_CHARS = SCHEME_CHARS.invert();
......@@ -139,6 +170,9 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context, Options options) {
result.options = options;
bool err = false; // tracks percent-decoding errors
auto& END_PATH_PART = getEndPathPart(context);
auto& END_QUERY_PART = getEndQueryPart(context);
if (context == HTTP_REQUEST) {
if (!text.startsWith("/")) {
return nullptr;
......@@ -251,6 +285,9 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
result.options = options;
bool err = false; // tracks percent-decoding errors
auto& END_PATH_PART = getEndPathPart(Url::REMOTE_HREF);
auto& END_QUERY_PART = getEndQueryPart(Url::REMOTE_HREF);
// scheme
{
bool gotScheme = false;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment