Commit 88a11ff8 authored by Harris Hancock's avatar Harris Hancock

Add options bag to kj::Url::parse() to prevent automatic decoding of URL components

parent 9c8ba23d
...@@ -26,9 +26,10 @@ ...@@ -26,9 +26,10 @@
namespace kj { namespace kj {
namespace { namespace {
Url parseAndCheck(kj::StringPtr originalText, kj::StringPtr expectedRestringified = nullptr) { Url parseAndCheck(kj::StringPtr originalText, kj::StringPtr expectedRestringified = nullptr,
Url::Options options = {}) {
if (expectedRestringified == nullptr) expectedRestringified = originalText; if (expectedRestringified == nullptr) expectedRestringified = originalText;
auto url = Url::parse(originalText); auto url = Url::parse(originalText, Url::REMOTE_HREF, options);
KJ_EXPECT(kj::str(url) == expectedRestringified, url, originalText, expectedRestringified); KJ_EXPECT(kj::str(url) == expectedRestringified, url, originalText, expectedRestringified);
// Make sure clones also restringify to the expected string. // Make sure clones also restringify to the expected string.
auto clone = url.clone(); auto clone = url.clone();
...@@ -36,6 +37,10 @@ Url parseAndCheck(kj::StringPtr originalText, kj::StringPtr expectedRestringifie ...@@ -36,6 +37,10 @@ Url parseAndCheck(kj::StringPtr originalText, kj::StringPtr expectedRestringifie
return url; return url;
} }
static constexpr Url::Options NO_DECODE {
false, // percentDecode
};
KJ_TEST("parse / stringify URL") { KJ_TEST("parse / stringify URL") {
{ {
auto url = parseAndCheck("https://capnproto.org"); auto url = parseAndCheck("https://capnproto.org");
...@@ -301,6 +306,25 @@ KJ_TEST("URL percent encoding") { ...@@ -301,6 +306,25 @@ KJ_TEST("URL percent encoding") {
"https://capnproto.org/!$&'()*+,-.:;=@[]^_|~"); "https://capnproto.org/!$&'()*+,-.:;=@[]^_|~");
} }
KJ_TEST("parse / stringify URL w/o decoding") {
{
auto url = parseAndCheck("https://capnproto.org/foo%2Fbar/baz", nullptr, NO_DECODE);
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo%2Fbar", "baz"}));
}
{
// This case would throw an exception without NO_DECODE.
Url url = parseAndCheck("https://capnproto.org/R%20%26%20S?%foo=%QQ", nullptr, NO_DECODE);
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"R%20%26%20S"}));
KJ_EXPECT(!url.hasTrailingSlash);
KJ_ASSERT(url.query.size() == 1);
KJ_EXPECT(url.query[0].name == "%foo");
KJ_EXPECT(url.query[0].value == "%QQ");
}
}
KJ_TEST("URL relative paths") { KJ_TEST("URL relative paths") {
parseAndCheck( parseAndCheck(
"https://capnproto.org/foo//bar", "https://capnproto.org/foo//bar",
...@@ -394,8 +418,9 @@ KJ_TEST("parse URL failure") { ...@@ -394,8 +418,9 @@ KJ_TEST("parse URL failure") {
KJ_EXPECT(Url::tryParse("https://capnproto.org/foo#bar", Url::HTTP_PROXY_REQUEST) == nullptr); KJ_EXPECT(Url::tryParse("https://capnproto.org/foo#bar", Url::HTTP_PROXY_REQUEST) == nullptr);
} }
void parseAndCheckRelative(kj::StringPtr base, kj::StringPtr relative, kj::StringPtr expected) { void parseAndCheckRelative(kj::StringPtr base, kj::StringPtr relative, kj::StringPtr expected,
auto parsed = Url::parse(base).parseRelative(relative); Url::Options options = {}) {
auto parsed = Url::parse(base, Url::REMOTE_HREF, options).parseRelative(relative);
KJ_EXPECT(kj::str(parsed) == expected, parsed, expected); KJ_EXPECT(kj::str(parsed) == expected, parsed, expected);
auto clone = parsed.clone(); auto clone = parsed.clone();
KJ_EXPECT(kj::str(clone) == expected, clone, expected); KJ_EXPECT(kj::str(clone) == expected, clone, expected);
...@@ -443,6 +468,13 @@ KJ_TEST("parse relative URL") { ...@@ -443,6 +468,13 @@ KJ_TEST("parse relative URL") {
"https://capnproto.org/bar"); "https://capnproto.org/bar");
} }
KJ_TEST("parse relative URL w/o decoding") {
// This case would throw an exception without NO_DECODE.
parseAndCheckRelative("https://capnproto.org/R%20%26%20S?%foo=%QQ",
"%ANOTH%ERBAD%URL",
"https://capnproto.org/%ANOTH%ERBAD%URL", NO_DECODE);
}
KJ_TEST("parse relative URL failure") { KJ_TEST("parse relative URL failure") {
auto base = Url::parse("https://example.com/"); auto base = Url::parse("https://example.com/");
KJ_EXPECT(base.tryParseRelative("https://[not a host]") == nullptr); KJ_EXPECT(base.tryParseRelative("https://[not a host]") == nullptr);
......
...@@ -86,16 +86,22 @@ ArrayPtr<const char> split(StringPtr& text, const parse::CharGroup_& chars) { ...@@ -86,16 +86,22 @@ ArrayPtr<const char> split(StringPtr& text, const parse::CharGroup_& chars) {
return result; return result;
} }
String percentDecode(ArrayPtr<const char> text, bool& hadErrors) { String percentDecode(ArrayPtr<const char> text, bool& hadErrors, const Url::Options& options) {
if (options.percentDecode) {
auto result = decodeUriComponent(text); auto result = decodeUriComponent(text);
if (result.hadErrors) hadErrors = true; if (result.hadErrors) hadErrors = true;
return kj::mv(result); return kj::mv(result);
}
return kj::str(text);
} }
String percentDecodeQuery(ArrayPtr<const char> text, bool& hadErrors) { String percentDecodeQuery(ArrayPtr<const char> text, bool& hadErrors, const Url::Options& options) {
if (options.percentDecode) {
auto result = decodeWwwForm(text); auto result = decodeWwwForm(text);
if (result.hadErrors) hadErrors = true; if (result.hadErrors) hadErrors = true;
return kj::mv(result); return kj::mv(result);
}
return kj::str(text);
} }
} // namespace } // namespace
...@@ -119,16 +125,18 @@ Url Url::clone() const { ...@@ -119,16 +125,18 @@ Url Url::clone() const {
return { kj::str(param.name), param.value.begin() == nullptr ? kj::String() return { kj::str(param.name), param.value.begin() == nullptr ? kj::String()
: kj::str(param.value) }; : kj::str(param.value) };
}, },
fragment.map([](const String& s) { return kj::str(s); }) fragment.map([](const String& s) { return kj::str(s); }),
options
}; };
} }
Url Url::parse(StringPtr url, Context context) { Url Url::parse(StringPtr url, Context context, Options options) {
return KJ_REQUIRE_NONNULL(tryParse(url, context), "invalid URL", url); return KJ_REQUIRE_NONNULL(tryParse(url, context, options), "invalid URL", url);
} }
Maybe<Url> Url::tryParse(StringPtr text, Context context) { Maybe<Url> Url::tryParse(StringPtr text, Context context, Options options) {
Url result; Url result;
result.options = options;
bool err = false; // tracks percent-decoding errors bool err = false; // tracks percent-decoding errors
if (context == HTTP_REQUEST) { if (context == HTTP_REQUEST) {
...@@ -166,18 +174,18 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) { ...@@ -166,18 +174,18 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) {
} }
KJ_IF_MAYBE(username, trySplit(*userpass, ':')) { KJ_IF_MAYBE(username, trySplit(*userpass, ':')) {
result.userInfo = UserInfo { result.userInfo = UserInfo {
percentDecode(*username, err), percentDecode(*username, err, options),
percentDecode(*userpass, err) percentDecode(*userpass, err, options)
}; };
} else { } else {
result.userInfo = UserInfo { result.userInfo = UserInfo {
percentDecode(*userpass, err), percentDecode(*userpass, err, options),
nullptr nullptr
}; };
} }
} }
result.host = percentDecode(authority, err); result.host = percentDecode(authority, err, options);
if (!HOST_CHARS.containsAll(result.host)) return nullptr; if (!HOST_CHARS.containsAll(result.host)) return nullptr;
toLower(result.host); toLower(result.host);
} }
...@@ -195,7 +203,7 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) { ...@@ -195,7 +203,7 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) {
// Collapse consecutive slashes and "/./". // Collapse consecutive slashes and "/./".
result.hasTrailingSlash = true; result.hasTrailingSlash = true;
} else { } else {
result.path.add(percentDecode(part, err)); result.path.add(percentDecode(part, err, options));
result.hasTrailingSlash = false; result.hasTrailingSlash = false;
} }
} }
...@@ -207,10 +215,10 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) { ...@@ -207,10 +215,10 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) {
if (part.size() > 0) { if (part.size() > 0) {
KJ_IF_MAYBE(key, trySplit(part, '=')) { KJ_IF_MAYBE(key, trySplit(part, '=')) {
result.query.add(QueryParam { percentDecodeQuery(*key, err), result.query.add(QueryParam { percentDecodeQuery(*key, err, options),
percentDecodeQuery(part, err) }); percentDecodeQuery(part, err, options) });
} else { } else {
result.query.add(QueryParam { percentDecodeQuery(part, err), nullptr }); result.query.add(QueryParam { percentDecodeQuery(part, err, options), nullptr });
} }
} }
} while (text.startsWith("&")); } while (text.startsWith("&"));
...@@ -221,7 +229,7 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) { ...@@ -221,7 +229,7 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) {
// No fragment allowed here. // No fragment allowed here.
return nullptr; return nullptr;
} }
result.fragment = percentDecode(text.slice(1), err); result.fragment = percentDecode(text.slice(1), err, options);
} else { } else {
// We should have consumed everything. // We should have consumed everything.
KJ_ASSERT(text.size() == 0); KJ_ASSERT(text.size() == 0);
...@@ -240,6 +248,7 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const { ...@@ -240,6 +248,7 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
if (text.size() == 0) return clone(); if (text.size() == 0) return clone();
Url result; Url result;
result.options = options;
bool err = false; // tracks percent-decoding errors bool err = false; // tracks percent-decoding errors
// scheme // scheme
...@@ -273,18 +282,18 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const { ...@@ -273,18 +282,18 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
KJ_IF_MAYBE(userpass, trySplit(authority, '@')) { KJ_IF_MAYBE(userpass, trySplit(authority, '@')) {
KJ_IF_MAYBE(username, trySplit(*userpass, ':')) { KJ_IF_MAYBE(username, trySplit(*userpass, ':')) {
result.userInfo = UserInfo { result.userInfo = UserInfo {
percentDecode(*username, err), percentDecode(*username, err, options),
percentDecode(*userpass, err) percentDecode(*userpass, err, options)
}; };
} else { } else {
result.userInfo = UserInfo { result.userInfo = UserInfo {
percentDecode(*userpass, err), percentDecode(*userpass, err, options),
nullptr nullptr
}; };
} }
} }
result.host = percentDecode(authority, err); result.host = percentDecode(authority, err, options);
if (!HOST_CHARS.containsAll(result.host)) return nullptr; if (!HOST_CHARS.containsAll(result.host)) return nullptr;
toLower(result.host); toLower(result.host);
} else { } else {
...@@ -326,7 +335,7 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const { ...@@ -326,7 +335,7 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
// Collapse consecutive slashes and "/./". // Collapse consecutive slashes and "/./".
result.hasTrailingSlash = true; result.hasTrailingSlash = true;
} else { } else {
result.path.add(percentDecode(part, err)); result.path.add(percentDecode(part, err, options));
result.hasTrailingSlash = false; result.hasTrailingSlash = false;
} }
...@@ -346,10 +355,11 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const { ...@@ -346,10 +355,11 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
if (part.size() > 0) { if (part.size() > 0) {
KJ_IF_MAYBE(key, trySplit(part, '=')) { KJ_IF_MAYBE(key, trySplit(part, '=')) {
result.query.add(QueryParam { percentDecodeQuery(*key, err), result.query.add(QueryParam { percentDecodeQuery(*key, err, options),
percentDecodeQuery(part, err) }); percentDecodeQuery(part, err, options) });
} else { } else {
result.query.add(QueryParam { percentDecodeQuery(part, err), nullptr }); result.query.add(QueryParam { percentDecodeQuery(part, err, options),
nullptr });
} }
} }
} while (text.startsWith("&")); } while (text.startsWith("&"));
...@@ -363,7 +373,7 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const { ...@@ -363,7 +373,7 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
} }
if (text.startsWith("#")) { if (text.startsWith("#")) {
result.fragment = percentDecode(text.slice(1), err); result.fragment = percentDecode(text.slice(1), err, options);
} else { } else {
// We should have consumed everything. // We should have consumed everything.
KJ_ASSERT(text.size() == 0); KJ_ASSERT(text.size() == 0);
...@@ -383,10 +393,11 @@ String Url::toString(Context context) const { ...@@ -383,10 +393,11 @@ String Url::toString(Context context) const {
if (context == REMOTE_HREF) { if (context == REMOTE_HREF) {
KJ_IF_MAYBE(user, userInfo) { KJ_IF_MAYBE(user, userInfo) {
chars.addAll(encodeUriUserInfo(user->username)); chars.addAll(options.percentDecode ? encodeUriUserInfo(user->username)
: kj::str(user->username));
KJ_IF_MAYBE(pass, user->password) { KJ_IF_MAYBE(pass, user->password) {
chars.add(':'); chars.add(':');
chars.addAll(encodeUriUserInfo(*pass)); chars.addAll(options.percentDecode ? encodeUriUserInfo(*pass) : kj::str(*pass));
} }
chars.add('@'); chars.add('@');
} }
...@@ -415,7 +426,7 @@ String Url::toString(Context context) const { ...@@ -415,7 +426,7 @@ String Url::toString(Context context) const {
continue; continue;
} }
chars.add('/'); chars.add('/');
chars.addAll(encodeUriPath(pathPart)); chars.addAll(options.percentDecode ? encodeUriPath(pathPart) : kj::str(pathPart));
} }
if (hasTrailingSlash || (path.size() == 0 && context == HTTP_REQUEST)) { if (hasTrailingSlash || (path.size() == 0 && context == HTTP_REQUEST)) {
chars.add('/'); chars.add('/');
...@@ -425,17 +436,17 @@ String Url::toString(Context context) const { ...@@ -425,17 +436,17 @@ String Url::toString(Context context) const {
for (auto& param: query) { for (auto& param: query) {
chars.add(first ? '?' : '&'); chars.add(first ? '?' : '&');
first = false; first = false;
chars.addAll(encodeWwwForm(param.name)); chars.addAll(options.percentDecode ? encodeWwwForm(param.name) : kj::str(param.name));
if (param.value.begin() != nullptr) { if (param.value.begin() != nullptr) {
chars.add('='); chars.add('=');
chars.addAll(encodeWwwForm(param.value)); chars.addAll(options.percentDecode ? encodeWwwForm(param.value) : kj::str(param.value));
} }
} }
if (context == REMOTE_HREF) { if (context == REMOTE_HREF) {
KJ_IF_MAYBE(f, fragment) { KJ_IF_MAYBE(f, fragment) {
chars.add('#'); chars.add('#');
chars.addAll(encodeUriFragment(*f)); chars.addAll(options.percentDecode ? encodeUriFragment(*f) : kj::str(*f));
} }
} }
......
...@@ -27,6 +27,24 @@ ...@@ -27,6 +27,24 @@
namespace kj { namespace kj {
struct UrlOptions {
// A bag of options that you can pass to Url::parse()/tryParse() to customize the parser's
// behavior.
//
// A copy of this options struct will be stored in the parsed Url object, at which point it
// controls the behavior of the serializer in Url::toString().
bool percentDecode = true;
// True if URL components should be automatically percent-decoded during parsing, and
// percent-encoded during serialization.
#if __cplusplus < 201402L
inline constexpr UrlOptions(bool percentDecode = true): percentDecode(percentDecode) {}
// TODO(cleanup): This constructor is only here to support brace initialization in C++11. It
// should be removed once we upgrade to C++14.
#endif
};
struct Url { struct Url {
// Represents a URL (or, more accurately, a URI, but whatever). // Represents a URL (or, more accurately, a URI, but whatever).
// //
...@@ -75,6 +93,9 @@ struct Url { ...@@ -75,6 +93,9 @@ struct Url {
Maybe<String> fragment; Maybe<String> fragment;
// The stuff after the '#' character (not including the '#' character itself), if present. // The stuff after the '#' character (not including the '#' character itself), if present.
using Options = UrlOptions;
Options options;
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
Url() = default; Url() = default;
...@@ -84,9 +105,11 @@ struct Url { ...@@ -84,9 +105,11 @@ struct Url {
#if __cplusplus < 201402L #if __cplusplus < 201402L
inline Url(String&& scheme, Maybe<UserInfo>&& userInfo, String&& host, Vector<String>&& path, inline Url(String&& scheme, Maybe<UserInfo>&& userInfo, String&& host, Vector<String>&& path,
bool hasTrailingSlash, Vector<QueryParam>&& query, Maybe<String>&& fragment) bool hasTrailingSlash, Vector<QueryParam>&& query, Maybe<String>&& fragment,
UrlOptions options)
: scheme(kj::mv(scheme)), userInfo(kj::mv(userInfo)), host(kj::mv(host)), path(kj::mv(path)), : scheme(kj::mv(scheme)), userInfo(kj::mv(userInfo)), host(kj::mv(host)), path(kj::mv(path)),
hasTrailingSlash(hasTrailingSlash), query(kj::mv(query)), fragment(kj::mv(fragment)) {} hasTrailingSlash(hasTrailingSlash), query(kj::mv(query)), fragment(kj::mv(fragment)),
options(options) {}
// TODO(cleanup): This constructor is only here to support brace initialization in C++11. It // TODO(cleanup): This constructor is only here to support brace initialization in C++11. It
// should be removed once we upgrade to C++14. // should be removed once we upgrade to C++14.
#endif #endif
...@@ -114,8 +137,8 @@ struct Url { ...@@ -114,8 +137,8 @@ struct Url {
kj::String toString(Context context = REMOTE_HREF) const; kj::String toString(Context context = REMOTE_HREF) const;
// Convert the URL to a string. // Convert the URL to a string.
static Url parse(StringPtr text, Context context = REMOTE_HREF); static Url parse(StringPtr text, Context context = REMOTE_HREF, Options options = {});
static Maybe<Url> tryParse(StringPtr text, Context context = REMOTE_HREF); static Maybe<Url> tryParse(StringPtr text, Context context = REMOTE_HREF, Options options = {});
// Parse an absolute URL. // Parse an absolute URL.
Url parseRelative(StringPtr relative) const; Url parseRelative(StringPtr relative) const;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment