Commit 88a11ff8 authored by Harris Hancock's avatar Harris Hancock

Add options bag to kj::Url::parse() to prevent automatic decoding of URL components

parent 9c8ba23d
......@@ -26,9 +26,10 @@
namespace kj {
namespace {
Url parseAndCheck(kj::StringPtr originalText, kj::StringPtr expectedRestringified = nullptr) {
Url parseAndCheck(kj::StringPtr originalText, kj::StringPtr expectedRestringified = nullptr,
Url::Options options = {}) {
if (expectedRestringified == nullptr) expectedRestringified = originalText;
auto url = Url::parse(originalText);
auto url = Url::parse(originalText, Url::REMOTE_HREF, options);
KJ_EXPECT(kj::str(url) == expectedRestringified, url, originalText, expectedRestringified);
// Make sure clones also restringify to the expected string.
auto clone = url.clone();
......@@ -36,6 +37,10 @@ Url parseAndCheck(kj::StringPtr originalText, kj::StringPtr expectedRestringifie
return url;
}
static constexpr Url::Options NO_DECODE {
false, // percentDecode
};
KJ_TEST("parse / stringify URL") {
{
auto url = parseAndCheck("https://capnproto.org");
......@@ -301,6 +306,25 @@ KJ_TEST("URL percent encoding") {
"https://capnproto.org/!$&'()*+,-.:;=@[]^_|~");
}
KJ_TEST("parse / stringify URL w/o decoding") {
{
auto url = parseAndCheck("https://capnproto.org/foo%2Fbar/baz", nullptr, NO_DECODE);
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"foo%2Fbar", "baz"}));
}
{
// This case would throw an exception without NO_DECODE.
Url url = parseAndCheck("https://capnproto.org/R%20%26%20S?%foo=%QQ", nullptr, NO_DECODE);
KJ_EXPECT(url.scheme == "https");
KJ_EXPECT(url.host == "capnproto.org");
KJ_EXPECT(url.path.asPtr() == kj::ArrayPtr<const StringPtr>({"R%20%26%20S"}));
KJ_EXPECT(!url.hasTrailingSlash);
KJ_ASSERT(url.query.size() == 1);
KJ_EXPECT(url.query[0].name == "%foo");
KJ_EXPECT(url.query[0].value == "%QQ");
}
}
KJ_TEST("URL relative paths") {
parseAndCheck(
"https://capnproto.org/foo//bar",
......@@ -394,8 +418,9 @@ KJ_TEST("parse URL failure") {
KJ_EXPECT(Url::tryParse("https://capnproto.org/foo#bar", Url::HTTP_PROXY_REQUEST) == nullptr);
}
void parseAndCheckRelative(kj::StringPtr base, kj::StringPtr relative, kj::StringPtr expected) {
auto parsed = Url::parse(base).parseRelative(relative);
void parseAndCheckRelative(kj::StringPtr base, kj::StringPtr relative, kj::StringPtr expected,
Url::Options options = {}) {
auto parsed = Url::parse(base, Url::REMOTE_HREF, options).parseRelative(relative);
KJ_EXPECT(kj::str(parsed) == expected, parsed, expected);
auto clone = parsed.clone();
KJ_EXPECT(kj::str(clone) == expected, clone, expected);
......@@ -443,6 +468,13 @@ KJ_TEST("parse relative URL") {
"https://capnproto.org/bar");
}
KJ_TEST("parse relative URL w/o decoding") {
// This case would throw an exception without NO_DECODE.
parseAndCheckRelative("https://capnproto.org/R%20%26%20S?%foo=%QQ",
"%ANOTH%ERBAD%URL",
"https://capnproto.org/%ANOTH%ERBAD%URL", NO_DECODE);
}
KJ_TEST("parse relative URL failure") {
auto base = Url::parse("https://example.com/");
KJ_EXPECT(base.tryParseRelative("https://[not a host]") == nullptr);
......
......@@ -86,16 +86,22 @@ ArrayPtr<const char> split(StringPtr& text, const parse::CharGroup_& chars) {
return result;
}
String percentDecode(ArrayPtr<const char> text, bool& hadErrors) {
String percentDecode(ArrayPtr<const char> text, bool& hadErrors, const Url::Options& options) {
if (options.percentDecode) {
auto result = decodeUriComponent(text);
if (result.hadErrors) hadErrors = true;
return kj::mv(result);
}
return kj::str(text);
}
String percentDecodeQuery(ArrayPtr<const char> text, bool& hadErrors) {
String percentDecodeQuery(ArrayPtr<const char> text, bool& hadErrors, const Url::Options& options) {
if (options.percentDecode) {
auto result = decodeWwwForm(text);
if (result.hadErrors) hadErrors = true;
return kj::mv(result);
}
return kj::str(text);
}
} // namespace
......@@ -119,16 +125,18 @@ Url Url::clone() const {
return { kj::str(param.name), param.value.begin() == nullptr ? kj::String()
: kj::str(param.value) };
},
fragment.map([](const String& s) { return kj::str(s); })
fragment.map([](const String& s) { return kj::str(s); }),
options
};
}
Url Url::parse(StringPtr url, Context context) {
return KJ_REQUIRE_NONNULL(tryParse(url, context), "invalid URL", url);
Url Url::parse(StringPtr url, Context context, Options options) {
return KJ_REQUIRE_NONNULL(tryParse(url, context, options), "invalid URL", url);
}
Maybe<Url> Url::tryParse(StringPtr text, Context context) {
Maybe<Url> Url::tryParse(StringPtr text, Context context, Options options) {
Url result;
result.options = options;
bool err = false; // tracks percent-decoding errors
if (context == HTTP_REQUEST) {
......@@ -166,18 +174,18 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) {
}
KJ_IF_MAYBE(username, trySplit(*userpass, ':')) {
result.userInfo = UserInfo {
percentDecode(*username, err),
percentDecode(*userpass, err)
percentDecode(*username, err, options),
percentDecode(*userpass, err, options)
};
} else {
result.userInfo = UserInfo {
percentDecode(*userpass, err),
percentDecode(*userpass, err, options),
nullptr
};
}
}
result.host = percentDecode(authority, err);
result.host = percentDecode(authority, err, options);
if (!HOST_CHARS.containsAll(result.host)) return nullptr;
toLower(result.host);
}
......@@ -195,7 +203,7 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) {
// Collapse consecutive slashes and "/./".
result.hasTrailingSlash = true;
} else {
result.path.add(percentDecode(part, err));
result.path.add(percentDecode(part, err, options));
result.hasTrailingSlash = false;
}
}
......@@ -207,10 +215,10 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) {
if (part.size() > 0) {
KJ_IF_MAYBE(key, trySplit(part, '=')) {
result.query.add(QueryParam { percentDecodeQuery(*key, err),
percentDecodeQuery(part, err) });
result.query.add(QueryParam { percentDecodeQuery(*key, err, options),
percentDecodeQuery(part, err, options) });
} else {
result.query.add(QueryParam { percentDecodeQuery(part, err), nullptr });
result.query.add(QueryParam { percentDecodeQuery(part, err, options), nullptr });
}
}
} while (text.startsWith("&"));
......@@ -221,7 +229,7 @@ Maybe<Url> Url::tryParse(StringPtr text, Context context) {
// No fragment allowed here.
return nullptr;
}
result.fragment = percentDecode(text.slice(1), err);
result.fragment = percentDecode(text.slice(1), err, options);
} else {
// We should have consumed everything.
KJ_ASSERT(text.size() == 0);
......@@ -240,6 +248,7 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
if (text.size() == 0) return clone();
Url result;
result.options = options;
bool err = false; // tracks percent-decoding errors
// scheme
......@@ -273,18 +282,18 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
KJ_IF_MAYBE(userpass, trySplit(authority, '@')) {
KJ_IF_MAYBE(username, trySplit(*userpass, ':')) {
result.userInfo = UserInfo {
percentDecode(*username, err),
percentDecode(*userpass, err)
percentDecode(*username, err, options),
percentDecode(*userpass, err, options)
};
} else {
result.userInfo = UserInfo {
percentDecode(*userpass, err),
percentDecode(*userpass, err, options),
nullptr
};
}
}
result.host = percentDecode(authority, err);
result.host = percentDecode(authority, err, options);
if (!HOST_CHARS.containsAll(result.host)) return nullptr;
toLower(result.host);
} else {
......@@ -326,7 +335,7 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
// Collapse consecutive slashes and "/./".
result.hasTrailingSlash = true;
} else {
result.path.add(percentDecode(part, err));
result.path.add(percentDecode(part, err, options));
result.hasTrailingSlash = false;
}
......@@ -346,10 +355,11 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
if (part.size() > 0) {
KJ_IF_MAYBE(key, trySplit(part, '=')) {
result.query.add(QueryParam { percentDecodeQuery(*key, err),
percentDecodeQuery(part, err) });
result.query.add(QueryParam { percentDecodeQuery(*key, err, options),
percentDecodeQuery(part, err, options) });
} else {
result.query.add(QueryParam { percentDecodeQuery(part, err), nullptr });
result.query.add(QueryParam { percentDecodeQuery(part, err, options),
nullptr });
}
}
} while (text.startsWith("&"));
......@@ -363,7 +373,7 @@ Maybe<Url> Url::tryParseRelative(StringPtr text) const {
}
if (text.startsWith("#")) {
result.fragment = percentDecode(text.slice(1), err);
result.fragment = percentDecode(text.slice(1), err, options);
} else {
// We should have consumed everything.
KJ_ASSERT(text.size() == 0);
......@@ -383,10 +393,11 @@ String Url::toString(Context context) const {
if (context == REMOTE_HREF) {
KJ_IF_MAYBE(user, userInfo) {
chars.addAll(encodeUriUserInfo(user->username));
chars.addAll(options.percentDecode ? encodeUriUserInfo(user->username)
: kj::str(user->username));
KJ_IF_MAYBE(pass, user->password) {
chars.add(':');
chars.addAll(encodeUriUserInfo(*pass));
chars.addAll(options.percentDecode ? encodeUriUserInfo(*pass) : kj::str(*pass));
}
chars.add('@');
}
......@@ -415,7 +426,7 @@ String Url::toString(Context context) const {
continue;
}
chars.add('/');
chars.addAll(encodeUriPath(pathPart));
chars.addAll(options.percentDecode ? encodeUriPath(pathPart) : kj::str(pathPart));
}
if (hasTrailingSlash || (path.size() == 0 && context == HTTP_REQUEST)) {
chars.add('/');
......@@ -425,17 +436,17 @@ String Url::toString(Context context) const {
for (auto& param: query) {
chars.add(first ? '?' : '&');
first = false;
chars.addAll(encodeWwwForm(param.name));
chars.addAll(options.percentDecode ? encodeWwwForm(param.name) : kj::str(param.name));
if (param.value.begin() != nullptr) {
chars.add('=');
chars.addAll(encodeWwwForm(param.value));
chars.addAll(options.percentDecode ? encodeWwwForm(param.value) : kj::str(param.value));
}
}
if (context == REMOTE_HREF) {
KJ_IF_MAYBE(f, fragment) {
chars.add('#');
chars.addAll(encodeUriFragment(*f));
chars.addAll(options.percentDecode ? encodeUriFragment(*f) : kj::str(*f));
}
}
......
......@@ -27,6 +27,24 @@
namespace kj {
struct UrlOptions {
// A bag of options that you can pass to Url::parse()/tryParse() to customize the parser's
// behavior.
//
// A copy of this options struct will be stored in the parsed Url object, at which point it
// controls the behavior of the serializer in Url::toString().
bool percentDecode = true;
// True if URL components should be automatically percent-decoded during parsing, and
// percent-encoded during serialization.
#if __cplusplus < 201402L
inline constexpr UrlOptions(bool percentDecode = true): percentDecode(percentDecode) {}
// TODO(cleanup): This constructor is only here to support brace initialization in C++11. It
// should be removed once we upgrade to C++14.
#endif
};
struct Url {
// Represents a URL (or, more accurately, a URI, but whatever).
//
......@@ -75,6 +93,9 @@ struct Url {
Maybe<String> fragment;
// The stuff after the '#' character (not including the '#' character itself), if present.
using Options = UrlOptions;
Options options;
// ---------------------------------------------------------------------------
Url() = default;
......@@ -84,9 +105,11 @@ struct Url {
#if __cplusplus < 201402L
inline Url(String&& scheme, Maybe<UserInfo>&& userInfo, String&& host, Vector<String>&& path,
bool hasTrailingSlash, Vector<QueryParam>&& query, Maybe<String>&& fragment)
bool hasTrailingSlash, Vector<QueryParam>&& query, Maybe<String>&& fragment,
UrlOptions options)
: scheme(kj::mv(scheme)), userInfo(kj::mv(userInfo)), host(kj::mv(host)), path(kj::mv(path)),
hasTrailingSlash(hasTrailingSlash), query(kj::mv(query)), fragment(kj::mv(fragment)) {}
hasTrailingSlash(hasTrailingSlash), query(kj::mv(query)), fragment(kj::mv(fragment)),
options(options) {}
// TODO(cleanup): This constructor is only here to support brace initialization in C++11. It
// should be removed once we upgrade to C++14.
#endif
......@@ -114,8 +137,8 @@ struct Url {
kj::String toString(Context context = REMOTE_HREF) const;
// Convert the URL to a string.
static Url parse(StringPtr text, Context context = REMOTE_HREF);
static Maybe<Url> tryParse(StringPtr text, Context context = REMOTE_HREF);
static Url parse(StringPtr text, Context context = REMOTE_HREF, Options options = {});
static Maybe<Url> tryParse(StringPtr text, Context context = REMOTE_HREF, Options options = {});
// Parse an absolute URL.
Url parseRelative(StringPtr relative) const;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment