Commit 23e13c1d authored by Harris Hancock's avatar Harris Hancock

Allow underscores in URL hostnames

We'll need more hostname-related code changes in the future, but for now this will solve a Cloudflare customer's existing bug report.
parent 2ae725f3
...@@ -265,6 +265,10 @@ KJ_TEST("parse / stringify URL") { ...@@ -265,6 +265,10 @@ KJ_TEST("parse / stringify URL") {
// Scheme and host are forced to lower-case. // Scheme and host are forced to lower-case.
parseAndCheck("hTtP://capNprotO.org/fOo/bAr", "http://capnproto.org/fOo/bAr"); parseAndCheck("hTtP://capNprotO.org/fOo/bAr", "http://capnproto.org/fOo/bAr");
// URLs with underscores in their hostnames are allowed, but you probably shouldn't use them. They
// are not valid domain names.
parseAndCheck("https://bad_domain.capnproto.org/");
} }
KJ_TEST("URL percent encoding") { KJ_TEST("URL percent encoding") {
......
...@@ -38,7 +38,11 @@ constexpr auto END_QUERY_PART = parse::anyOfChars("&#"); ...@@ -38,7 +38,11 @@ constexpr auto END_QUERY_PART = parse::anyOfChars("&#");
constexpr auto SCHEME_CHARS = ALPHAS.orGroup(DIGITS).orAny("+-."); constexpr auto SCHEME_CHARS = ALPHAS.orGroup(DIGITS).orAny("+-.");
constexpr auto NOT_SCHEME_CHARS = SCHEME_CHARS.invert(); constexpr auto NOT_SCHEME_CHARS = SCHEME_CHARS.invert();
constexpr auto HOST_CHARS = ALPHAS.orGroup(DIGITS).orAny(".-:[]"); // [] is for ipv6 literals constexpr auto HOST_CHARS = ALPHAS.orGroup(DIGITS).orAny(".-:[]_");
// [] is for ipv6 literals.
// _ is not allowed in domain names, but the WHATWG URL spec allows it in hostnames, so we do, too.
// TODO(soon): The URL spec actually allows a lot more than just '_', and requires nameprepping to
// Punycode. We'll have to decide how we want to deal with all that.
void toLower(String& text) { void toLower(String& text) {
for (char& c: text) { for (char& c: text) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment