Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
capnproto
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
capnproto
Commits
52562bf5
Commit
52562bf5
authored
Jun 01, 2017
by
Kenton Varda
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add URL parsing library to libkj-http.
parent
0623cedb
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
898 additions
and
0 deletions
+898
-0
url-test.c++
c++/src/kj/compat/url-test.c++
+363
-0
url.c++
c++/src/kj/compat/url.c++
+436
-0
url.h
c++/src/kj/compat/url.h
+99
-0
No files found.
c++/src/kj/compat/url-test.c++
0 → 100644
View file @
52562bf5
// Copyright (c) 2017 Cloudflare, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "url.h"
#include <kj/debug.h>
#include <kj/test.h>
namespace
kj
{
namespace
{
Url
parseAndCheck
(
kj
::
StringPtr
originalText
,
kj
::
StringPtr
expectedRestringified
=
nullptr
)
{
if
(
expectedRestringified
==
nullptr
)
expectedRestringified
=
originalText
;
auto
url
=
Url
::
parse
(
originalText
);
KJ_EXPECT
(
kj
::
str
(
url
)
==
expectedRestringified
,
url
,
originalText
,
expectedRestringified
);
return
url
;
}
KJ_TEST
(
"parse / stringify URL"
)
{
{
auto
url
=
parseAndCheck
(
"https://capnproto.org"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
==
nullptr
);
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
url
.
fragment
==
nullptr
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org:80"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org:80"
);
KJ_EXPECT
(
url
.
path
==
nullptr
);
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
url
.
fragment
==
nullptr
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org/"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
==
nullptr
);
KJ_EXPECT
(
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
url
.
fragment
==
nullptr
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org/foo/bar"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
.
asPtr
()
==
kj
::
ArrayPtr
<
const
StringPtr
>
({
"foo"
,
"bar"
}));
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
url
.
fragment
==
nullptr
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org/foo/bar/"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
.
asPtr
()
==
kj
::
ArrayPtr
<
const
StringPtr
>
({
"foo"
,
"bar"
}));
KJ_EXPECT
(
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
url
.
fragment
==
nullptr
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org/foo/bar?baz=qux&corge#garply"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
.
asPtr
()
==
kj
::
ArrayPtr
<
const
StringPtr
>
({
"foo"
,
"bar"
}));
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_ASSERT
(
url
.
query
.
size
()
==
2
);
KJ_EXPECT
(
url
.
query
[
0
].
name
==
"baz"
);
KJ_EXPECT
(
url
.
query
[
0
].
value
==
"qux"
);
KJ_EXPECT
(
url
.
query
[
1
].
name
==
"corge"
);
KJ_EXPECT
(
url
.
query
[
1
].
value
==
nullptr
);
KJ_EXPECT
(
KJ_ASSERT_NONNULL
(
url
.
fragment
)
==
"garply"
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org/foo/bar/?baz=qux&corge=grault#garply"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
.
asPtr
()
==
kj
::
ArrayPtr
<
const
StringPtr
>
({
"foo"
,
"bar"
}));
KJ_EXPECT
(
url
.
hasTrailingSlash
);
KJ_ASSERT
(
url
.
query
.
size
()
==
2
);
KJ_EXPECT
(
url
.
query
[
0
].
name
==
"baz"
);
KJ_EXPECT
(
url
.
query
[
0
].
value
==
"qux"
);
KJ_EXPECT
(
url
.
query
[
1
].
name
==
"corge"
);
KJ_EXPECT
(
url
.
query
[
1
].
value
==
"grault"
);
KJ_EXPECT
(
KJ_ASSERT_NONNULL
(
url
.
fragment
)
==
"garply"
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org/foo/bar?baz=qux#garply"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
.
asPtr
()
==
kj
::
ArrayPtr
<
const
StringPtr
>
({
"foo"
,
"bar"
}));
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_ASSERT
(
url
.
query
.
size
()
==
1
);
KJ_EXPECT
(
url
.
query
[
0
].
name
==
"baz"
);
KJ_EXPECT
(
url
.
query
[
0
].
value
==
"qux"
);
KJ_EXPECT
(
KJ_ASSERT_NONNULL
(
url
.
fragment
)
==
"garply"
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org/foo/bar#garply"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
.
asPtr
()
==
kj
::
ArrayPtr
<
const
StringPtr
>
({
"foo"
,
"bar"
}));
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
KJ_ASSERT_NONNULL
(
url
.
fragment
)
==
"garply"
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org/foo/bar/#garply"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
.
asPtr
()
==
kj
::
ArrayPtr
<
const
StringPtr
>
({
"foo"
,
"bar"
}));
KJ_EXPECT
(
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
KJ_ASSERT_NONNULL
(
url
.
fragment
)
==
"garply"
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org#garply"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
==
nullptr
);
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
KJ_ASSERT_NONNULL
(
url
.
fragment
)
==
"garply"
);
}
{
auto
url
=
parseAndCheck
(
"https://capnproto.org/#garply"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
==
nullptr
);
KJ_EXPECT
(
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
KJ_ASSERT_NONNULL
(
url
.
fragment
)
==
"garply"
);
}
{
auto
url
=
parseAndCheck
(
"https://foo@capnproto.org"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
auto
&
user
=
KJ_ASSERT_NONNULL
(
url
.
userInfo
);
KJ_EXPECT
(
user
.
username
==
"foo"
);
KJ_EXPECT
(
user
.
password
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
==
nullptr
);
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
url
.
fragment
==
nullptr
);
}
{
auto
url
=
parseAndCheck
(
"https://foo:1234@capnproto.org"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
auto
&
user
=
KJ_ASSERT_NONNULL
(
url
.
userInfo
);
KJ_EXPECT
(
user
.
username
==
"foo"
);
KJ_EXPECT
(
KJ_ASSERT_NONNULL
(
user
.
password
)
==
"1234"
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
==
nullptr
);
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
url
.
fragment
==
nullptr
);
}
{
auto
url
=
parseAndCheck
(
"https://[2001:db8::1234]:80/foo"
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
userInfo
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
"[2001:db8::1234]:80"
);
KJ_EXPECT
(
url
.
path
.
asPtr
()
==
kj
::
ArrayPtr
<
const
StringPtr
>
({
"foo"
}));
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_EXPECT
(
url
.
query
==
nullptr
);
KJ_EXPECT
(
url
.
fragment
==
nullptr
);
}
parseAndCheck
(
"https://capnproto.org/foo/bar?"
,
"https://capnproto.org/foo/bar"
);
parseAndCheck
(
"https://capnproto.org/foo/bar?#"
,
"https://capnproto.org/foo/bar#"
);
parseAndCheck
(
"https://capnproto.org/foo/bar#"
);
// Scheme and host are forced to lower-case.
parseAndCheck
(
"hTtP://capNprotO.org/fOo/bAr"
,
"http://capnproto.org/fOo/bAr"
);
}
KJ_TEST
(
"URL percent encoding"
)
{
parseAndCheck
(
"https://b%6fb:%61bcd@capnpr%6fto.org/f%6fo?b%61r=b%61z#q%75x"
,
"https://bob:abcd@capnproto.org/foo?bar=baz#qux"
);
parseAndCheck
(
"https://b
\001
b:
\001
bcd@capnproto.org/f
\001
o?b
\001
r=b
\001
z#q
\001
x"
,
"https://b%01b:%01bcd@capnproto.org/f%01o?b%01r=b%01z#q%01x"
);
parseAndCheck
(
"https://b b: bcd@capnproto.org/f o?b r=b z#q x"
,
"https://b%20b:%20bcd@capnproto.org/f%20o?b%20r=b%20z#q%20x"
);
}
KJ_TEST
(
"URL relative paths"
)
{
parseAndCheck
(
"https://capnproto.org/foo//bar"
,
"https://capnproto.org/foo/bar"
);
parseAndCheck
(
"https://capnproto.org/foo/./bar"
,
"https://capnproto.org/foo/bar"
);
parseAndCheck
(
"https://capnproto.org/foo/bar//"
,
"https://capnproto.org/foo/bar/"
);
parseAndCheck
(
"https://capnproto.org/foo/bar/."
,
"https://capnproto.org/foo/bar/"
);
parseAndCheck
(
"https://capnproto.org/foo/baz/../bar"
,
"https://capnproto.org/foo/bar"
);
parseAndCheck
(
"https://capnproto.org/foo/bar/baz/.."
,
"https://capnproto.org/foo/bar/"
);
parseAndCheck
(
"https://capnproto.org/.."
,
"https://capnproto.org/"
);
parseAndCheck
(
"https://capnproto.org/foo/../.."
,
"https://capnproto.org/"
);
}
KJ_TEST
(
"URL for HTTP request"
)
{
{
Url
url
=
Url
::
parse
(
"https://bob:1234@capnproto.org/foo/bar?baz=qux#corge"
);
KJ_EXPECT
(
url
.
toString
(
Url
::
GENERAL
)
==
"https://bob:1234@capnproto.org/foo/bar?baz=qux#corge"
);
KJ_EXPECT
(
url
.
toString
(
Url
::
HTTP_PROXY_REQUEST
)
==
"https://capnproto.org/foo/bar?baz=qux"
);
KJ_EXPECT
(
url
.
toString
(
Url
::
HTTP_REQUEST
)
==
"/foo/bar?baz=qux"
);
}
{
Url
url
=
Url
::
parse
(
"https://capnproto.org"
);
KJ_EXPECT
(
url
.
toString
(
Url
::
GENERAL
)
==
"https://capnproto.org"
);
KJ_EXPECT
(
url
.
toString
(
Url
::
HTTP_PROXY_REQUEST
)
==
"https://capnproto.org"
);
KJ_EXPECT
(
url
.
toString
(
Url
::
HTTP_REQUEST
)
==
"/"
);
}
{
Url
url
=
Url
::
parse
(
"/foo/bar?baz=qux&corge"
,
Url
::
HTTP_REQUEST
);
KJ_EXPECT
(
url
.
scheme
==
nullptr
);
KJ_EXPECT
(
url
.
host
==
nullptr
);
KJ_EXPECT
(
url
.
path
.
asPtr
()
==
kj
::
ArrayPtr
<
const
StringPtr
>
({
"foo"
,
"bar"
}));
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_ASSERT
(
url
.
query
.
size
()
==
2
);
KJ_EXPECT
(
url
.
query
[
0
].
name
==
"baz"
);
KJ_EXPECT
(
url
.
query
[
0
].
value
==
"qux"
);
KJ_EXPECT
(
url
.
query
[
1
].
name
==
"corge"
);
KJ_EXPECT
(
url
.
query
[
1
].
value
==
nullptr
);
}
{
Url
url
=
Url
::
parse
(
"https://capnproto.org/foo/bar?baz=qux&corge"
,
Url
::
HTTP_PROXY_REQUEST
);
KJ_EXPECT
(
url
.
scheme
==
"https"
);
KJ_EXPECT
(
url
.
host
==
"capnproto.org"
);
KJ_EXPECT
(
url
.
path
.
asPtr
()
==
kj
::
ArrayPtr
<
const
StringPtr
>
({
"foo"
,
"bar"
}));
KJ_EXPECT
(
!
url
.
hasTrailingSlash
);
KJ_ASSERT
(
url
.
query
.
size
()
==
2
);
KJ_EXPECT
(
url
.
query
[
0
].
name
==
"baz"
);
KJ_EXPECT
(
url
.
query
[
0
].
value
==
"qux"
);
KJ_EXPECT
(
url
.
query
[
1
].
name
==
"corge"
);
KJ_EXPECT
(
url
.
query
[
1
].
value
==
nullptr
);
}
}
KJ_TEST
(
"parse URL failure"
)
{
KJ_EXPECT
(
Url
::
tryParse
(
"ht/tps://capnproto.org"
)
==
nullptr
);
KJ_EXPECT
(
Url
::
tryParse
(
"capnproto.org"
)
==
nullptr
);
KJ_EXPECT
(
Url
::
tryParse
(
"https:foo"
)
==
nullptr
);
// percent-decode errors
KJ_EXPECT
(
Url
::
tryParse
(
"https://capnproto.org/f%nno"
)
==
nullptr
);
KJ_EXPECT
(
Url
::
tryParse
(
"https://capnproto.org/foo?b%nnr=baz"
)
==
nullptr
);
// components not valid in context
KJ_EXPECT
(
Url
::
tryParse
(
"https://capnproto.org/foo"
,
Url
::
HTTP_REQUEST
)
==
nullptr
);
KJ_EXPECT
(
Url
::
tryParse
(
"/foo#bar"
,
Url
::
HTTP_REQUEST
)
==
nullptr
);
KJ_EXPECT
(
Url
::
tryParse
(
"https://bob:123@capnproto.org/foo"
,
Url
::
HTTP_PROXY_REQUEST
)
==
nullptr
);
KJ_EXPECT
(
Url
::
tryParse
(
"https://capnproto.org/foo#bar"
,
Url
::
HTTP_PROXY_REQUEST
)
==
nullptr
);
}
void
parseAndCheckRelative
(
kj
::
StringPtr
base
,
kj
::
StringPtr
relative
,
kj
::
StringPtr
expected
)
{
auto
parsed
=
Url
::
parse
(
base
).
parseRelative
(
relative
);
KJ_EXPECT
(
kj
::
str
(
parsed
)
==
expected
,
parsed
,
expected
);
}
KJ_TEST
(
"parse relative URL"
)
{
parseAndCheckRelative
(
"https://capnproto.org/foo/bar?baz=qux#corge"
,
"#grault"
,
"https://capnproto.org/foo/bar?baz=qux#grault"
);
parseAndCheckRelative
(
"https://capnproto.org/foo/bar?baz=qux#corge"
,
"?grault"
,
"https://capnproto.org/foo/bar?grault"
);
parseAndCheckRelative
(
"https://capnproto.org/foo/bar?baz=qux#corge"
,
"grault"
,
"https://capnproto.org/foo/grault"
);
parseAndCheckRelative
(
"https://capnproto.org/foo/bar?baz=qux#corge"
,
"/grault"
,
"https://capnproto.org/grault"
);
parseAndCheckRelative
(
"https://capnproto.org/foo/bar?baz=qux#corge"
,
"//grault"
,
"https://grault"
);
parseAndCheckRelative
(
"https://capnproto.org/foo/bar?baz=qux#corge"
,
"//grault/garply"
,
"https://grault/garply"
);
parseAndCheckRelative
(
"https://capnproto.org/foo/bar?baz=qux#corge"
,
"http:/grault"
,
"http://capnproto.org/grault"
);
parseAndCheckRelative
(
"https://capnproto.org/foo/bar?baz=qux#corge"
,
"/http:/grault"
,
"https://capnproto.org/http%3A/grault"
);
}
}
// namespace
}
// namespace kj
c++/src/kj/compat/url.c++
0 → 100644
View file @
52562bf5
// Copyright (c) 2017 Cloudflare, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "url.h"
#include <kj/encoding.h>
#include <kj/parse/char.h>
#include <kj/debug.h>
#include <stdlib.h>
namespace
kj
{
namespace
{
constexpr
auto
ALPHAS
=
parse
::
charRange
(
'a'
,
'z'
).
orRange
(
'A'
,
'Z'
);
constexpr
auto
DIGITS
=
parse
::
charRange
(
'0'
,
'9'
);
constexpr
auto
END_AUTHORITY
=
parse
::
anyOfChars
(
"/?#"
);
constexpr
auto
END_PATH_PART
=
parse
::
anyOfChars
(
"/?#"
);
constexpr
auto
END_QUERY_PART
=
parse
::
anyOfChars
(
"&#"
);
constexpr
auto
SCHEME_CHARS
=
ALPHAS
.
orGroup
(
DIGITS
).
orAny
(
"+-."
);
constexpr
auto
NOT_SCHEME_CHARS
=
SCHEME_CHARS
.
invert
();
constexpr
auto
HOST_CHARS
=
ALPHAS
.
orGroup
(
DIGITS
).
orAny
(
".-:[]"
);
// [] is for ipv6 literals
void
toLower
(
String
&
text
)
{
for
(
char
&
c
:
text
)
{
if
(
'A'
<=
c
&&
c
<=
'Z'
)
{
c
+=
'a'
-
'A'
;
}
}
}
Maybe
<
ArrayPtr
<
const
char
>>
trySplit
(
StringPtr
&
text
,
char
c
)
{
KJ_IF_MAYBE
(
pos
,
text
.
findFirst
(
c
))
{
ArrayPtr
<
const
char
>
result
=
text
.
slice
(
0
,
*
pos
);
text
=
text
.
slice
(
*
pos
+
1
);
return
result
;
}
else
{
return
nullptr
;
}
}
Maybe
<
ArrayPtr
<
const
char
>>
trySplit
(
ArrayPtr
<
const
char
>&
text
,
char
c
)
{
for
(
auto
i
:
kj
::
indices
(
text
))
{
if
(
text
[
i
]
==
c
)
{
ArrayPtr
<
const
char
>
result
=
text
.
slice
(
0
,
i
);
text
=
text
.
slice
(
i
+
1
,
text
.
size
());
return
result
;
}
}
return
nullptr
;
}
ArrayPtr
<
const
char
>
split
(
StringPtr
&
text
,
const
parse
::
CharGroup_
&
chars
)
{
for
(
auto
i
:
kj
::
indices
(
text
))
{
if
(
chars
.
contains
(
text
[
i
]))
{
ArrayPtr
<
const
char
>
result
=
text
.
slice
(
0
,
i
);
text
=
text
.
slice
(
i
);
return
result
;
}
}
auto
result
=
text
.
asArray
();
text
=
""
;
return
result
;
}
String
percentDecode
(
ArrayPtr
<
const
char
>
text
,
bool
&
hadErrors
)
{
auto
result
=
decodeUriComponent
(
text
);
if
(
result
.
hadErrors
)
hadErrors
=
true
;
return
kj
::
mv
(
result
);
}
}
// namespace
Url
::~
Url
()
noexcept
(
false
)
{}
Url
Url
::
clone
()
const
{
return
{
kj
::
str
(
scheme
),
userInfo
.
map
([](
const
UserInfo
&
ui
)
->
UserInfo
{
return
{
kj
::
str
(
ui
.
username
),
ui
.
password
.
map
([](
const
String
&
s
)
{
return
kj
::
str
(
s
);
})
};
}),
kj
::
str
(
host
),
KJ_MAP
(
part
,
path
)
{
return
kj
::
str
(
part
);
},
hasTrailingSlash
,
KJ_MAP
(
param
,
query
)
->
QueryParam
{
return
{
kj
::
str
(
param
.
name
),
kj
::
str
(
param
.
value
)
};
},
fragment
.
map
([](
const
String
&
s
)
{
return
kj
::
str
(
s
);
})
};
}
Url
Url
::
parse
(
StringPtr
url
,
Context
context
)
{
return
KJ_REQUIRE_NONNULL
(
tryParse
(
url
,
context
),
"invalid URL"
,
url
);
}
Maybe
<
Url
>
Url
::
tryParse
(
StringPtr
text
,
Context
context
)
{
Url
result
;
bool
err
=
false
;
// tracks percent-decoding errors
if
(
context
==
HTTP_REQUEST
)
{
if
(
!
text
.
startsWith
(
"/"
))
{
return
nullptr
;
}
}
else
{
KJ_IF_MAYBE
(
scheme
,
trySplit
(
text
,
':'
))
{
result
.
scheme
=
kj
::
str
(
*
scheme
);
}
else
{
// missing scheme
return
nullptr
;
}
toLower
(
result
.
scheme
);
if
(
result
.
scheme
.
size
()
==
0
||
!
ALPHAS
.
contains
(
result
.
scheme
[
0
])
||
!
SCHEME_CHARS
.
containsAll
(
result
.
scheme
.
slice
(
1
)))
{
// bad scheme
return
nullptr
;
}
if
(
!
text
.
startsWith
(
"//"
))
{
// We require an authority (hostname) part.
return
nullptr
;
}
text
=
text
.
slice
(
2
);
{
auto
authority
=
split
(
text
,
END_AUTHORITY
);
KJ_IF_MAYBE
(
userpass
,
trySplit
(
authority
,
'@'
))
{
if
(
context
!=
GENERAL
)
{
// No user/pass allowed here.
return
nullptr
;
}
KJ_IF_MAYBE
(
username
,
trySplit
(
*
userpass
,
':'
))
{
result
.
userInfo
=
UserInfo
{
percentDecode
(
*
username
,
err
),
percentDecode
(
*
userpass
,
err
)
};
}
else
{
result
.
userInfo
=
UserInfo
{
percentDecode
(
*
userpass
,
err
),
nullptr
};
}
}
result
.
host
=
percentDecode
(
authority
,
err
);
if
(
!
HOST_CHARS
.
containsAll
(
result
.
host
))
return
nullptr
;
toLower
(
result
.
host
);
}
}
{
Vector
<
String
>
path
;
while
(
text
.
startsWith
(
"/"
))
{
text
=
text
.
slice
(
1
);
auto
part
=
split
(
text
,
END_PATH_PART
);
if
(
part
.
size
()
==
2
&&
part
[
0
]
==
'.'
&&
part
[
1
]
==
'.'
)
{
if
(
path
.
size
()
!=
0
)
{
path
.
removeLast
();
}
result
.
hasTrailingSlash
=
true
;
}
else
if
(
part
.
size
()
==
0
||
(
part
.
size
()
==
1
&&
part
[
0
]
==
'.'
))
{
// Collapse consecutive slashes and "/./".
result
.
hasTrailingSlash
=
true
;
}
else
{
path
.
add
(
percentDecode
(
part
,
err
));
result
.
hasTrailingSlash
=
false
;
}
}
result
.
path
=
path
.
releaseAsArray
();
}
if
(
text
.
startsWith
(
"?"
))
{
Vector
<
QueryParam
>
params
;
do
{
text
=
text
.
slice
(
1
);
auto
part
=
split
(
text
,
END_QUERY_PART
);
if
(
part
.
size
()
>
0
)
{
KJ_IF_MAYBE
(
key
,
trySplit
(
part
,
'='
))
{
params
.
add
(
QueryParam
{
percentDecode
(
*
key
,
err
),
percentDecode
(
part
,
err
)
});
}
else
{
params
.
add
(
QueryParam
{
percentDecode
(
part
,
err
),
nullptr
});
}
}
}
while
(
text
.
startsWith
(
"&"
));
result
.
query
=
params
.
releaseAsArray
();
}
if
(
text
.
startsWith
(
"#"
))
{
if
(
context
!=
GENERAL
)
{
// No fragment allowed here.
return
nullptr
;
}
result
.
fragment
=
percentDecode
(
text
.
slice
(
1
),
err
);
}
else
{
// We should have consumed everything.
KJ_ASSERT
(
text
.
size
()
==
0
);
}
if
(
err
)
return
nullptr
;
return
kj
::
mv
(
result
);
}
Url
Url
::
parseRelative
(
StringPtr
url
)
const
{
return
KJ_REQUIRE_NONNULL
(
tryParseRelative
(
url
),
"invalid relative URL"
,
url
);
}
Maybe
<
Url
>
Url
::
tryParseRelative
(
StringPtr
text
)
const
{
if
(
text
.
size
()
==
0
)
return
clone
();
Url
result
;
bool
err
=
false
;
// tracks percent-decoding errors
// scheme
{
bool
gotScheme
=
false
;
for
(
auto
i
:
kj
::
indices
(
text
))
{
if
(
text
[
i
]
==
':'
)
{
// found valid scheme
result
.
scheme
=
kj
::
str
(
text
.
slice
(
0
,
i
));
text
=
text
.
slice
(
i
+
1
);
gotScheme
=
true
;
break
;
}
else
if
(
NOT_SCHEME_CHARS
.
contains
(
text
[
i
]))
{
// no scheme
break
;
}
}
if
(
!
gotScheme
)
{
// copy scheme
result
.
scheme
=
kj
::
str
(
this
->
scheme
);
}
}
// authority
bool
hadNewAuthority
=
text
.
startsWith
(
"//"
);
if
(
hadNewAuthority
)
{
text
=
text
.
slice
(
2
);
auto
authority
=
split
(
text
,
END_AUTHORITY
);
KJ_IF_MAYBE
(
userpass
,
trySplit
(
authority
,
'@'
))
{
KJ_IF_MAYBE
(
username
,
trySplit
(
*
userpass
,
':'
))
{
result
.
userInfo
=
UserInfo
{
percentDecode
(
*
username
,
err
),
percentDecode
(
*
userpass
,
err
)
};
}
else
{
result
.
userInfo
=
UserInfo
{
percentDecode
(
*
userpass
,
err
),
nullptr
};
}
}
result
.
host
=
percentDecode
(
authority
,
err
);
}
else
{
// copy authority
result
.
host
=
kj
::
str
(
this
->
host
);
result
.
userInfo
=
this
->
userInfo
.
map
([](
const
UserInfo
&
userInfo
)
{
return
UserInfo
{
kj
::
str
(
userInfo
.
username
),
userInfo
.
password
.
map
([](
const
String
&
password
)
{
return
kj
::
str
(
password
);
}),
};
});
}
// path
bool
hadNewPath
=
text
.
size
()
>
0
&&
text
[
0
]
!=
'?'
&&
text
[
0
]
!=
'#'
;
if
(
hadNewPath
)
{
// There's a new path.
Vector
<
String
>
path
(
this
->
path
.
size
());
if
(
text
[
0
]
==
'/'
)
{
// New path is absolute, so don't copy the old path.
text
=
text
.
slice
(
1
);
result
.
hasTrailingSlash
=
true
;
}
else
if
(
this
->
path
.
size
()
>
0
)
{
// New path is relative, so start from the old path, dropping everything after the last
// slash.
auto
slice
=
this
->
path
.
slice
(
0
,
this
->
path
.
size
()
-
(
this
->
hasTrailingSlash
?
0
:
1
));
for
(
auto
&
part
:
slice
)
{
path
.
add
(
kj
::
str
(
part
));
}
result
.
hasTrailingSlash
=
true
;
}
for
(;;)
{
auto
part
=
split
(
text
,
END_PATH_PART
);
if
(
part
.
size
()
==
2
&&
part
[
0
]
==
'.'
&&
part
[
1
]
==
'.'
)
{
if
(
path
.
size
()
!=
0
)
{
path
.
removeLast
();
}
result
.
hasTrailingSlash
=
true
;
}
else
if
(
part
.
size
()
==
0
||
(
part
.
size
()
==
1
&&
part
[
0
]
==
'.'
))
{
// Collapse consecutive slashes and "/./".
result
.
hasTrailingSlash
=
true
;
}
else
{
path
.
add
(
percentDecode
(
part
,
err
));
result
.
hasTrailingSlash
=
false
;
}
if
(
!
text
.
startsWith
(
"/"
))
break
;
text
=
text
.
slice
(
1
);
}
result
.
path
=
path
.
releaseAsArray
();
}
else
if
(
!
hadNewAuthority
)
{
// copy path
result
.
path
=
KJ_MAP
(
part
,
this
->
path
)
{
return
kj
::
str
(
part
);
};
result
.
hasTrailingSlash
=
this
->
hasTrailingSlash
;
}
if
(
text
.
startsWith
(
"?"
))
{
Vector
<
QueryParam
>
params
;
do
{
text
=
text
.
slice
(
1
);
auto
part
=
split
(
text
,
END_QUERY_PART
);
if
(
part
.
size
()
>
0
)
{
KJ_IF_MAYBE
(
key
,
trySplit
(
part
,
'='
))
{
params
.
add
(
QueryParam
{
percentDecode
(
*
key
,
err
),
percentDecode
(
part
,
err
)
});
}
else
{
params
.
add
(
QueryParam
{
percentDecode
(
part
,
err
),
nullptr
});
}
}
}
while
(
text
.
startsWith
(
"&"
));
result
.
query
=
params
.
releaseAsArray
();
}
else
if
(
!
hadNewAuthority
&&
!
hadNewPath
)
{
// copy query
result
.
query
=
KJ_MAP
(
param
,
this
->
query
)
{
return
QueryParam
{
kj
::
str
(
param
.
name
),
kj
::
str
(
param
.
value
)
};
};
}
if
(
text
.
startsWith
(
"#"
))
{
result
.
fragment
=
percentDecode
(
text
.
slice
(
1
),
err
);
}
else
{
// We should have consumed everything.
KJ_ASSERT
(
text
.
size
()
==
0
);
}
if
(
err
)
return
nullptr
;
return
kj
::
mv
(
result
);
}
String
Url
::
toString
(
Context
context
)
const
{
Vector
<
char
>
chars
(
128
);
if
(
context
!=
HTTP_REQUEST
)
{
chars
.
addAll
(
scheme
);
chars
.
addAll
(
StringPtr
(
"://"
));
if
(
context
==
GENERAL
)
{
KJ_IF_MAYBE
(
user
,
userInfo
)
{
chars
.
addAll
(
encodeUriComponent
(
user
->
username
));
KJ_IF_MAYBE
(
pass
,
user
->
password
)
{
chars
.
add
(
':'
);
chars
.
addAll
(
encodeUriComponent
(
*
pass
));
}
chars
.
add
(
'@'
);
}
}
// RFC3986 specifies that hosts can contain percent-encoding escapes while suggesting that
// they should only be used for UTF-8 sequences. However, the DNS standard specifies a
// different way to encode Unicode into domain names and doesn't permit any characters which
// would need to be escaped. Meanwhile, encodeUriComponent() here would incorrectly try to
// escape colons and brackets (e.g. around ipv6 literal addresses). So, instead, we throw if
// the host is invalid.
if
(
HOST_CHARS
.
containsAll
(
host
))
{
chars
.
addAll
(
host
);
}
else
{
KJ_FAIL_REQUIRE
(
"invalid hostname when stringifying URL"
,
host
)
{
chars
.
addAll
(
StringPtr
(
"invalid-host"
));
break
;
}
}
}
for
(
auto
&
pathPart
:
path
)
{
chars
.
add
(
'/'
);
chars
.
addAll
(
encodeUriComponent
(
pathPart
));
}
if
(
hasTrailingSlash
||
(
path
.
size
()
==
0
&&
context
==
HTTP_REQUEST
))
{
chars
.
add
(
'/'
);
}
bool
first
=
true
;
for
(
auto
&
param
:
query
)
{
chars
.
add
(
first
?
'?'
:
'&'
);
first
=
false
;
chars
.
addAll
(
encodeUriComponent
(
param
.
name
));
if
(
param
.
value
.
size
()
>
0
)
{
chars
.
add
(
'='
);
chars
.
addAll
(
encodeUriComponent
(
param
.
value
));
}
}
if
(
context
==
GENERAL
)
{
KJ_IF_MAYBE
(
f
,
fragment
)
{
chars
.
add
(
'#'
);
chars
.
addAll
(
encodeUriComponent
(
*
f
));
}
}
chars
.
add
(
'\0'
);
return
String
(
chars
.
releaseAsArray
());
}
}
// namespace kj
c++/src/kj/compat/url.h
0 → 100644
View file @
52562bf5
// Copyright (c) 2017 Cloudflare, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef KJ_COMPAT_URL_H_
#define KJ_COMPAT_URL_H_
#include <kj/string.h>
#include <inttypes.h>
namespace
kj
{
struct
Url
{
String
scheme
;
// E.g. "http", "https".
struct
UserInfo
{
String
username
;
Maybe
<
String
>
password
;
};
Maybe
<
UserInfo
>
userInfo
;
// Username / password.
String
host
;
// Hostname, including port if specified. We choose not to parse out the port because KJ's
// network address parsing functions already accept addresses containing port numbers, and
// because most web standards don't actually want to separate host and port.
Array
<
String
>
path
;
bool
hasTrailingSlash
=
false
;
// Path, split on '/' characters. Note that the individual components of `path` could contain
// '/' characters if they were percent-encoded in the original URL.
struct
QueryParam
{
String
name
;
String
value
;
};
Array
<
QueryParam
>
query
;
// Query, e.g. from "?key=value&key2=value2". If a component of the query contains no '=' sign,
// it will be parsed as a key with an empty value.
Maybe
<
String
>
fragment
;
// The stuff after the '#' character (not including the '#' character itself), if present.
// ---------------------------------------------------------------------------
Url
()
=
default
;
Url
(
Url
&&
)
=
default
;
~
Url
()
noexcept
(
false
);
Url
clone
()
const
;
enum
Context
{
GENERAL
,
// The full URL.
HTTP_PROXY_REQUEST
,
// The URL to place in the first line of an HTTP proxy request. This includes scheme, host,
// path, and query, but omits userInfo (which should be used to construct the Authorization
// header) and fragment (which should not be transmitted).
HTTP_REQUEST
// The path to place in the first line of a regular HTTP request. This includes only the path
// and query. Scheme, user, host, and fragment are omitted.
};
kj
::
String
toString
(
Context
context
=
GENERAL
)
const
;
// Convert the URL to a string.
static
Url
parse
(
StringPtr
text
,
Context
context
=
GENERAL
);
static
Maybe
<
Url
>
tryParse
(
StringPtr
text
,
Context
context
=
GENERAL
);
// Parse an absolute URL.
Url
parseRelative
(
StringPtr
relative
)
const
;
Maybe
<
Url
>
tryParseRelative
(
StringPtr
relative
)
const
;
// Parse a relative URL string with this URL as the base.
};
}
// namespace kj
#endif // KJ_COMPAT_URL_H_
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment