Commit 52562bf5 authored by Kenton Varda's avatar Kenton Varda

Add URL parsing library to libkj-http.

parent 0623cedb
// Copyright (c) 2017 Cloudflare, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "url.h"
#include <kj/debug.h>
#include <kj/test.h>
namespace kj {
namespace {
Url parseAndCheck(kj::StringPtr originalText, kj::StringPtr expectedRestringified = nullptr) {
if (expectedRestringified == nullptr) expectedRestringified = originalText;
auto url = Url::parse(originalText);
KJ_EXPECT(kj::str(url) == expectedRestringified, url, originalText, expectedRestringified);
return url;
}
KJ_TEST("parse / stringify URL") {
{
auto url = parseAndCheck("https://capnproto.org");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path == nullptr);
KJ_EXPECT(!url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(url.fragment == nullptr);
}
{
auto url = parseAndCheck("https://capnproto.org:80");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org:80");
KJ_EXPECT(url.path == nullptr);
KJ_EXPECT(!url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(url.fragment == nullptr);
}
{
auto url = parseAndCheck("https://capnproto.org/");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path == nullptr);
KJ_EXPECT(url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(url.fragment == nullptr);
}
{
auto url = parseAndCheck("https://capnproto.org/foo/bar");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo", "bar"}));
KJ_EXPECT(!url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(url.fragment == nullptr);
}
{
auto url = parseAndCheck("https://capnproto.org/foo/bar/");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo", "bar"}));
KJ_EXPECT(url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(url.fragment == nullptr);
}
{
auto url = parseAndCheck("https://capnproto.org/foo/bar?baz=qux&corge#garply");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo", "bar"}));
KJ_EXPECT(!url.hasTrailingSlash);
KJ_ASSERT(url.query.size() == 2);
KJ_EXPECT(url.query[0].name == "baz");
KJ_EXPECT(url.query[0].value == "qux");
KJ_EXPECT(url.query[1].name == "corge");
KJ_EXPECT(url.query[1].value == nullptr);
KJ_EXPECT(KJ_ASSERT_NONNULL(url.fragment) == "garply");
}
{
auto url = parseAndCheck("https://capnproto.org/foo/bar/?baz=qux&corge=grault#garply");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo", "bar"}));
KJ_EXPECT(url.hasTrailingSlash);
KJ_ASSERT(url.query.size() == 2);
KJ_EXPECT(url.query[0].name == "baz");
KJ_EXPECT(url.query[0].value == "qux");
KJ_EXPECT(url.query[1].name == "corge");
KJ_EXPECT(url.query[1].value == "grault");
KJ_EXPECT(KJ_ASSERT_NONNULL(url.fragment) == "garply");
}
{
auto url = parseAndCheck("https://capnproto.org/foo/bar?baz=qux#garply");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo", "bar"}));
KJ_EXPECT(!url.hasTrailingSlash);
KJ_ASSERT(url.query.size() == 1);
KJ_EXPECT(url.query[0].name == "baz");
KJ_EXPECT(url.query[0].value == "qux");
KJ_EXPECT(KJ_ASSERT_NONNULL(url.fragment) == "garply");
}
{
auto url = parseAndCheck("https://capnproto.org/foo/bar#garply");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo", "bar"}));
KJ_EXPECT(!url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(KJ_ASSERT_NONNULL(url.fragment) == "garply");
}
{
auto url = parseAndCheck("https://capnproto.org/foo/bar/#garply");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo", "bar"}));
KJ_EXPECT(url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(KJ_ASSERT_NONNULL(url.fragment) == "garply");
}
{
auto url = parseAndCheck("https://capnproto.org#garply");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path == nullptr);
KJ_EXPECT(!url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(KJ_ASSERT_NONNULL(url.fragment) == "garply");
}
{
auto url = parseAndCheck("https://capnproto.org/#garply");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path == nullptr);
KJ_EXPECT(url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(KJ_ASSERT_NONNULL(url.fragment) == "garply");
}
{
auto url = parseAndCheck("https://foo@capnproto.org");
KJ_EXPECT(url.scheme == "https");
auto& user = KJ_ASSERT_NONNULL(url.userInfo);
KJ_EXPECT(user.username == "foo");
KJ_EXPECT(user.password == nullptr);
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path == nullptr);
KJ_EXPECT(!url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(url.fragment == nullptr);
}
{
auto url = parseAndCheck("https://foo:1234@capnproto.org");
KJ_EXPECT(url.scheme == "https");
auto& user = KJ_ASSERT_NONNULL(url.userInfo);
KJ_EXPECT(user.username == "foo");
KJ_EXPECT(KJ_ASSERT_NONNULL(user.password) == "1234");
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path == nullptr);
KJ_EXPECT(!url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(url.fragment == nullptr);
}
{
auto url = parseAndCheck("https://[2001:db8::1234]:80/foo");
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.userInfo == nullptr);
KJ_EXPECT(url.host == "[2001:db8::1234]:80");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo"}));
KJ_EXPECT(!url.hasTrailingSlash);
KJ_EXPECT(url.query == nullptr);
KJ_EXPECT(url.fragment == nullptr);
}
parseAndCheck("https://capnproto.org/foo/bar?", "https://capnproto.org/foo/bar");
parseAndCheck("https://capnproto.org/foo/bar?#", "https://capnproto.org/foo/bar#");
parseAndCheck("https://capnproto.org/foo/bar#");
// Scheme and host are forced to lower-case.
parseAndCheck("hTtP://capNprotO.org/fOo/bAr", "http://capnproto.org/fOo/bAr");
}
KJ_TEST("URL percent encoding") {
parseAndCheck(
"https://b%6fb:%61bcd@capnpr%6fto.org/f%6fo?b%61r=b%61z#q%75x",
"https://bob:abcd@capnproto.org/foo?bar=baz#qux");
parseAndCheck(
"https://b\001b:\001bcd@capnproto.org/f\001o?b\001r=b\001z#q\001x",
"https://b%01b:%01bcd@capnproto.org/f%01o?b%01r=b%01z#q%01x");
parseAndCheck(
"https://b b: bcd@capnproto.org/f o?b r=b z#q x",
"https://b%20b:%20bcd@capnproto.org/f%20o?b%20r=b%20z#q%20x");
}
KJ_TEST("URL relative paths") {
parseAndCheck(
"https://capnproto.org/foo//bar",
"https://capnproto.org/foo/bar");
parseAndCheck(
"https://capnproto.org/foo/./bar",
"https://capnproto.org/foo/bar");
parseAndCheck(
"https://capnproto.org/foo/bar//",
"https://capnproto.org/foo/bar/");
parseAndCheck(
"https://capnproto.org/foo/bar/.",
"https://capnproto.org/foo/bar/");
parseAndCheck(
"https://capnproto.org/foo/baz/../bar",
"https://capnproto.org/foo/bar");
parseAndCheck(
"https://capnproto.org/foo/bar/baz/..",
"https://capnproto.org/foo/bar/");
parseAndCheck(
"https://capnproto.org/..",
"https://capnproto.org/");
parseAndCheck(
"https://capnproto.org/foo/../..",
"https://capnproto.org/");
}
KJ_TEST("URL for HTTP request") {
{
Url url = Url::parse("https://bob:1234@capnproto.org/foo/bar?baz=qux#corge");
KJ_EXPECT(url.toString(Url::GENERAL) == "https://bob:1234@capnproto.org/foo/bar?baz=qux#corge");
KJ_EXPECT(url.toString(Url::HTTP_PROXY_REQUEST) == "https://capnproto.org/foo/bar?baz=qux");
KJ_EXPECT(url.toString(Url::HTTP_REQUEST) == "/foo/bar?baz=qux");
}
{
Url url = Url::parse("https://capnproto.org");
KJ_EXPECT(url.toString(Url::GENERAL) == "https://capnproto.org");
KJ_EXPECT(url.toString(Url::HTTP_PROXY_REQUEST) == "https://capnproto.org");
KJ_EXPECT(url.toString(Url::HTTP_REQUEST) == "/");
}
{
Url url = Url::parse("/foo/bar?baz=qux&corge", Url::HTTP_REQUEST);
KJ_EXPECT(url.scheme == nullptr);
KJ_EXPECT(url.host == nullptr);
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo", "bar"}));
KJ_EXPECT(!url.hasTrailingSlash);
KJ_ASSERT(url.query.size() == 2);
KJ_EXPECT(url.query[0].name == "baz");
KJ_EXPECT(url.query[0].value == "qux");
KJ_EXPECT(url.query[1].name == "corge");
KJ_EXPECT(url.query[1].value == nullptr);
}
{
Url url = Url::parse("https://capnproto.org/foo/bar?baz=qux&corge", Url::HTTP_PROXY_REQUEST);
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo", "bar"}));
KJ_EXPECT(!url.hasTrailingSlash);
KJ_ASSERT(url.query.size() == 2);
KJ_EXPECT(url.query[0].name == "baz");
KJ_EXPECT(url.query[0].value == "qux");
KJ_EXPECT(url.query[1].name == "corge");
KJ_EXPECT(url.query[1].value == nullptr);
}
}
KJ_TEST("parse URL failure") {
KJ_EXPECT(Url::tryParse("ht/tps://capnproto.org") == nullptr);
KJ_EXPECT(Url::tryParse("capnproto.org") == nullptr);
KJ_EXPECT(Url::tryParse("https:foo") == nullptr);
// percent-decode errors
KJ_EXPECT(Url::tryParse("https://capnproto.org/f%nno") == nullptr);
KJ_EXPECT(Url::tryParse("https://capnproto.org/foo?b%nnr=baz") == nullptr);
// components not valid in context
KJ_EXPECT(Url::tryParse("https://capnproto.org/foo", Url::HTTP_REQUEST) == nullptr);
KJ_EXPECT(Url::tryParse("/foo#bar", Url::HTTP_REQUEST) == nullptr);
KJ_EXPECT(Url::tryParse("https://bob:123@capnproto.org/foo", Url::HTTP_PROXY_REQUEST) == nullptr);
KJ_EXPECT(Url::tryParse("https://capnproto.org/foo#bar", Url::HTTP_PROXY_REQUEST) == nullptr);
}
void parseAndCheckRelative(kj::StringPtr base, kj::StringPtr relative, kj::StringPtr expected) {
auto parsed = Url::parse(base).parseRelative(relative);
KJ_EXPECT(kj::str(parsed) == expected, parsed, expected);
}
KJ_TEST("parse relative URL") {
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"#grault",
"https://capnproto.org/foo/bar?baz=qux#grault");
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"?grault",
"https://capnproto.org/foo/bar?grault");
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"grault",
"https://capnproto.org/foo/grault");
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"/grault",
"https://capnproto.org/grault");
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"//grault",
"https://grault");
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"//grault/garply",
"https://grault/garply");
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"http:/grault",
"http://capnproto.org/grault");
parseAndCheckRelative("https://capnproto.org/foo/bar?baz=qux#corge",
"/http:/grault",
"https://capnproto.org/http%3A/grault");
}
} // namespace
} // namespace kj
// Copyright (c) 2017 Cloudflare, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "url.h"
#include <kj/encoding.h>
#include <kj/parse/char.h>
#include <kj/debug.h>
#include <stdlib.h>
namespace kj {
namespace {
constexpr auto ALPHAS = parse::charRange('a', 'z').orRange('A', 'Z');
constexpr auto DIGITS = parse::charRange('0', '9');
constexpr auto END_AUTHORITY = parse::anyOfChars("/?#");
constexpr auto END_PATH_PART = parse::anyOfChars("/?#");
constexpr auto END_QUERY_PART = parse::anyOfChars("&#");
constexpr auto SCHEME_CHARS = ALPHAS.orGroup(DIGITS).orAny("+-.");
constexpr auto NOT_SCHEME_CHARS = SCHEME_CHARS.invert();
constexpr auto HOST_CHARS = ALPHAS.orGroup(DIGITS).orAny(".-:[]"); // [] is for ipv6 literals
void toLower(String& text) {
for (char& c: text) {
if ('A' <= c && c <= 'Z') {
c += 'a' - 'A';
}
}
}
Maybe<ArrayPtr<const char>> trySplit(StringPtr& text, char c) {
KJ_IF_MAYBE(pos, text.findFirst(c)) {
ArrayPtr<const char> result = text.slice(0, *pos);
text = text.slice(*pos + 1);
return result;
} else {
return nullptr;
}
}
Maybe<ArrayPtr<const char>> trySplit(ArrayPtr<const char>& text, char c) {
for (auto i: kj::indices(text)) {
if (text[i] == c) {
ArrayPtr<const char> result = text.slice(0, i);
text = text.slice(i + 1, text.size());
return result;
}
}
return nullptr;
}
ArrayPtr<const char> split(StringPtr& text, const parse::CharGroup_& chars) {
for (auto i: kj::indices(text)) {
if (chars.contains(text[i])) {
ArrayPtr<const char> result = text.slice(0, i);
text = text.slice(i);
return result;
}
}
auto result = text.asArray();
text = "";
return result;
}
String percentDecode(ArrayPtr<const char> text, bool& hadErrors) {
auto result = decodeUriComponent(text);
if (result.hadErrors) hadErrors = true;
return kj::mv(result);
}
} // namespace
Url::~Url() noexcept(false) {}
Url Url::clone() const {
return {
kj::str(scheme),
userInfo.map([](const UserInfo& ui) -> UserInfo {
return {
kj::str(ui.username),
ui.password.map([](const String& s) { return kj::str(s); })
};
}),
kj::str(host),
KJ_MAP(part, path) { return kj::str(part); },
hasTrailingSlash,
KJ_MAP(param, query) -> QueryParam {
return { kj::str(param.name), kj::str(param.value) };
},
fragment.map([](const String& s) { return kj::str(s); })
};
}
Url Url::parse(StringPtr url, Context context) {
return KJ_REQUIRE_NONNULL(tryParse(url, context), "invalid URL", url);
}
Maybe<Url> Url::tryParse(StringPtr text, Context context) {
Url result;
bool err = false; // tracks percent-decoding errors
if (context == HTTP_REQUEST) {
if (!text.startsWith("/")) {
return nullptr;
}
} else {
KJ_IF_MAYBE(scheme, trySplit(text, ':')) {
result.scheme = kj::str(*scheme);
} else {
// missing scheme
return nullptr;
}
toLower(result.scheme);
if (result.scheme.size() == 0 ||
!ALPHAS.contains(result.scheme[0]) ||
!SCHEME_CHARS.containsAll(result.scheme.slice(1))) {
// bad scheme
return nullptr;
}
if (!text.startsWith("//")) {
// We require an authority (hostname) part.
return nullptr;
}
text = text.slice(2);
{
auto authority = split(text, END_AUTHORITY);
KJ_IF_MAYBE(userpass, trySplit(authority, '@')) {
if (context != GENERAL) {
// No user/pass allowed here.
return nullptr;
}
KJ_IF_MAYBE(username, trySplit(*userpass, ':')) {
result.userInfo = UserInfo {
percentDecode(*username, err),
percentDecode(*userpass, err)
};
} else {
result.userInfo = UserInfo {
percentDecode(*userpass, err),
nullptr
};
}
}
result.host = percentDecode(authority, err);
if (!HOST_CHARS.containsAll(result.host)) return nullptr;
toLower(result.host);
}
}
{
Vector<String> path;
while (text.startsWith("/")) {
text = text.slice(1);
auto part = split(text, END_PATH_PART);
if (part.size() == 2 && part[0] == '.' && part[1] == '.') {
if (path.size() != 0) {
path.removeLast();
}
result.hasTrailingSlash = true;
} else if (part.size() == 0 || (part.size() == 1 && part[0] == '.')) {
// Collapse consecutive slashes and "/./".
result.hasTrailingSlash = true;
} else {
path.add(percentDecode(part, err));
result.hasTrailingSlash = false;
}
}
result.path = path.releaseAsArray();
}
if (text.startsWith("?")) {
Vector<QueryParam> params;
do {
text = text.slice(1);
auto part = split(text, END_QUERY_PART);
if (part.size() > 0) {
KJ_IF_MAYBE(key, trySplit(part, '=')) {
params.add(QueryParam { percentDecode(*key, err), percentDecode(part, err) });
} else {
params.add(QueryParam { percentDecode(part, err), nullptr });
}
}
} while (text.startsWith("&"));
result.query = params.releaseAsArray();
}
if (text.startsWith("#")) {
if (context != GENERAL) {
// No fragment allowed here.
return nullptr;
}
result.fragment = percentDecode(text.slice(1), err);
} else {
// We should have consumed everything.
KJ_ASSERT(text.size() == 0);
}
if (err) return nullptr;
return kj::mv(result);
}
Url Url::parseRelative(StringPtr url) const {
return KJ_REQUIRE_NONNULL(tryParseRelative(url), "invalid relative URL", url);
}
Maybe<Url> Url::tryParseRelative(StringPtr text) const {
if (text.size() == 0) return clone();
Url result;
bool err = false; // tracks percent-decoding errors
// scheme
{
bool gotScheme = false;
for (auto i: kj::indices(text)) {
if (text[i] == ':') {
// found valid scheme
result.scheme = kj::str(text.slice(0, i));
text = text.slice(i + 1);
gotScheme = true;
break;
} else if (NOT_SCHEME_CHARS.contains(text[i])) {
// no scheme
break;
}
}
if (!gotScheme) {
// copy scheme
result.scheme = kj::str(this->scheme);
}
}
// authority
bool hadNewAuthority = text.startsWith("//");
if (hadNewAuthority) {
text = text.slice(2);
auto authority = split(text, END_AUTHORITY);
KJ_IF_MAYBE(userpass, trySplit(authority, '@')) {
KJ_IF_MAYBE(username, trySplit(*userpass, ':')) {
result.userInfo = UserInfo {
percentDecode(*username, err),
percentDecode(*userpass, err)
};
} else {
result.userInfo = UserInfo {
percentDecode(*userpass, err),
nullptr
};
}
}
result.host = percentDecode(authority, err);
} else {
// copy authority
result.host = kj::str(this->host);
result.userInfo = this->userInfo.map([](const UserInfo& userInfo) {
return UserInfo {
kj::str(userInfo.username),
userInfo.password.map([](const String& password) { return kj::str(password); }),
};
});
}
// path
bool hadNewPath = text.size() > 0 && text[0] != '?' && text[0] != '#';
if (hadNewPath) {
// There's a new path.
Vector<String> path(this->path.size());
if (text[0] == '/') {
// New path is absolute, so don't copy the old path.
text = text.slice(1);
result.hasTrailingSlash = true;
} else if (this->path.size() > 0) {
// New path is relative, so start from the old path, dropping everything after the last
// slash.
auto slice = this->path.slice(0, this->path.size() - (this->hasTrailingSlash ? 0 : 1));
for (auto& part: slice) {
path.add(kj::str(part));
}
result.hasTrailingSlash = true;
}
for (;;) {
auto part = split(text, END_PATH_PART);
if (part.size() == 2 && part[0] == '.' && part[1] == '.') {
if (path.size() != 0) {
path.removeLast();
}
result.hasTrailingSlash = true;
} else if (part.size() == 0 || (part.size() == 1 && part[0] == '.')) {
// Collapse consecutive slashes and "/./".
result.hasTrailingSlash = true;
} else {
path.add(percentDecode(part, err));
result.hasTrailingSlash = false;
}
if (!text.startsWith("/")) break;
text = text.slice(1);
}
result.path = path.releaseAsArray();
} else if (!hadNewAuthority) {
// copy path
result.path = KJ_MAP(part, this->path) { return kj::str(part); };
result.hasTrailingSlash = this->hasTrailingSlash;
}
if (text.startsWith("?")) {
Vector<QueryParam> params;
do {
text = text.slice(1);
auto part = split(text, END_QUERY_PART);
if (part.size() > 0) {
KJ_IF_MAYBE(key, trySplit(part, '=')) {
params.add(QueryParam { percentDecode(*key, err), percentDecode(part, err) });
} else {
params.add(QueryParam { percentDecode(part, err), nullptr });
}
}
} while (text.startsWith("&"));
result.query = params.releaseAsArray();
} else if (!hadNewAuthority && !hadNewPath) {
// copy query
result.query = KJ_MAP(param, this->query) {
return QueryParam { kj::str(param.name), kj::str(param.value) };
};
}
if (text.startsWith("#")) {
result.fragment = percentDecode(text.slice(1), err);
} else {
// We should have consumed everything.
KJ_ASSERT(text.size() == 0);
}
if (err) return nullptr;
return kj::mv(result);
}
String Url::toString(Context context) const {
Vector<char> chars(128);
if (context != HTTP_REQUEST) {
chars.addAll(scheme);
chars.addAll(StringPtr("://"));
if (context == GENERAL) {
KJ_IF_MAYBE(user, userInfo) {
chars.addAll(encodeUriComponent(user->username));
KJ_IF_MAYBE(pass, user->password) {
chars.add(':');
chars.addAll(encodeUriComponent(*pass));
}
chars.add('@');
}
}
// RFC3986 specifies that hosts can contain percent-encoding escapes while suggesting that
// they should only be used for UTF-8 sequences. However, the DNS standard specifies a
// different way to encode Unicode into domain names and doesn't permit any characters which
// would need to be escaped. Meanwhile, encodeUriComponent() here would incorrectly try to
// escape colons and brackets (e.g. around ipv6 literal addresses). So, instead, we throw if
// the host is invalid.
if (HOST_CHARS.containsAll(host)) {
chars.addAll(host);
} else {
KJ_FAIL_REQUIRE("invalid hostname when stringifying URL", host) {
chars.addAll(StringPtr("invalid-host"));
break;
}
}
}
for (auto& pathPart: path) {
chars.add('/');
chars.addAll(encodeUriComponent(pathPart));
}
if (hasTrailingSlash || (path.size() == 0 && context == HTTP_REQUEST)) {
chars.add('/');
}
bool first = true;
for (auto& param: query) {
chars.add(first ? '?' : '&');
first = false;
chars.addAll(encodeUriComponent(param.name));
if (param.value.size() > 0) {
chars.add('=');
chars.addAll(encodeUriComponent(param.value));
}
}
if (context == GENERAL) {
KJ_IF_MAYBE(f, fragment) {
chars.add('#');
chars.addAll(encodeUriComponent(*f));
}
}
chars.add('\0');
return String(chars.releaseAsArray());
}
} // namespace kj
// Copyright (c) 2017 Cloudflare, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef KJ_COMPAT_URL_H_
#define KJ_COMPAT_URL_H_
#include <kj/string.h>
#include <inttypes.h>
namespace kj {
struct Url {
String scheme;
// E.g. "http", "https".
struct UserInfo {
String username;
Maybe<String> password;
};
Maybe<UserInfo> userInfo;
// Username / password.
String host;
// Hostname, including port if specified. We choose not to parse out the port because KJ's
// network address parsing functions already accept addresses containing port numbers, and
// because most web standards don't actually want to separate host and port.
Array<String> path;
bool hasTrailingSlash = false;
// Path, split on '/' characters. Note that the individual components of `path` could contain
// '/' characters if they were percent-encoded in the original URL.
struct QueryParam {
String name;
String value;
};
Array<QueryParam> query;
// Query, e.g. from "?key=value&key2=value2". If a component of the query contains no '=' sign,
// it will be parsed as a key with an empty value.
Maybe<String> fragment;
// The stuff after the '#' character (not including the '#' character itself), if present.
// ---------------------------------------------------------------------------
Url() = default;
Url(Url&&) = default;
~Url() noexcept(false);
Url clone() const;
enum Context {
GENERAL,
// The full URL.
HTTP_PROXY_REQUEST,
// The URL to place in the first line of an HTTP proxy request. This includes scheme, host,
// path, and query, but omits userInfo (which should be used to construct the Authorization
// header) and fragment (which should not be transmitted).
HTTP_REQUEST
// The path to place in the first line of a regular HTTP request. This includes only the path
// and query. Scheme, user, host, and fragment are omitted.
};
kj::String toString(Context context = GENERAL) const;
// Convert the URL to a string.
static Url parse(StringPtr text, Context context = GENERAL);
static Maybe<Url> tryParse(StringPtr text, Context context = GENERAL);
// Parse an absolute URL.
Url parseRelative(StringPtr relative) const;
Maybe<Url> tryParseRelative(StringPtr relative) const;
// Parse a relative URL string with this URL as the base.
};
} // namespace kj
#endif // KJ_COMPAT_URL_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment