Commit 64bc025e authored by gejun's avatar gejun

patch svn r34965

Change-Id: I348aba53d44b0941151ddb189ffbd66529da062d
parent 03018d4c
......@@ -89,8 +89,43 @@ static bool is_all_spaces(const char* p) {
for (; *p == ' '; ++p) {}
return !*p;
}
// This implementation is much faster than http_parser_parse_url().
const char URI_PARSE_CONTINUE = 0;
const char URI_PARSE_CHECK = 1;
const char URI_PARSE_BREAK = 2;
static const char g_url_parsing_fast_action_map_raw[] = {
0/*-128*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-118*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-108*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-98*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-88*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-78*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-68*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-58*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-48*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-38*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-28*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-18*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*-8*/, 0, 0, 0, 0, 0, 0, 0, URI_PARSE_BREAK/*\0*/, 0,
0/*2*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*12*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*22*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
URI_PARSE_CHECK/* */, 0, 0, URI_PARSE_BREAK/*#*/, 0, 0, 0, 0, 0, 0,
0/*42*/, 0, 0, 0, 0, URI_PARSE_BREAK/*/*/, 0, 0, 0, 0,
0/*52*/, 0, 0, 0, 0, 0, URI_PARSE_CHECK/*:*/, 0, 0, 0,
0/*62*/, URI_PARSE_BREAK/*?*/, URI_PARSE_CHECK/*@*/, 0, 0, 0, 0, 0, 0, 0,
0/*72*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*82*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*92*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*102*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*112*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0/*122*/, 0, 0, 0, 0, 0
};
static const char* const g_url_parsing_fast_action_map =
g_url_parsing_fast_action_map_raw + 128;
// This implementation is faster than http_parser_parse_url() and allows
// ignoring of schema("http://")
int URI::SetHttpURL(const char* url) {
Clear();
......@@ -103,7 +138,14 @@ int URI::SetHttpURL(const char* url) {
// Find end of host, locate schema and user_info during the searching
bool need_schema = true;
bool need_user_info = true;
for (; *p && *p != '/'; ++p) {
for (; true; ++p) {
const char action = g_url_parsing_fast_action_map[(int)*p];
if (action == URI_PARSE_CONTINUE) {
continue;
}
if (action == URI_PARSE_BREAK) {
break;
}
if (*p == ':') {
if (p[1] == '/' && p[2] == '/' && need_schema) {
need_schema = false;
......@@ -127,25 +169,20 @@ int URI::SetHttpURL(const char* url) {
}
const char* host_end = SplitHostAndPort(start, p, &_port);
_host.assign(start, host_end - start);
if (*p != '/') {
if (host_end == start) {
_st.set_error(EINVAL, "Empty host and path");
return -1;
}
return 0;
}
start = p; //slash pointed by p is counted into _path
++p;
for (; *p && *p != '?' && *p != '#'; ++p) {
if (*p == ' ') {
if (!is_all_spaces(p + 1)) {
_st.set_error(EINVAL, "Invalid space in path");
return -1;
if (*p == '/') {
start = p; //slash pointed by p is counted into _path
++p;
for (; *p && *p != '?' && *p != '#'; ++p) {
if (*p == ' ') {
if (!is_all_spaces(p + 1)) {
_st.set_error(EINVAL, "Invalid space in path");
return -1;
}
break;
}
break;
}
_path.assign(start, p - start);
}
_path.assign(start, p - start);
if (*p == '?') {
start = ++p;
for (; *p && *p != '#'; ++p) {
......@@ -187,7 +224,14 @@ int ParseHostAndPortFromURL(const char* url, std::string* host_out,
bool need_schema = true;
bool need_user_info = true;
base::StringPiece schema;
for (; *p && *p != '/'; ++p) {
for (; true; ++p) {
const char action = g_url_parsing_fast_action_map[(int)*p];
if (action == URI_PARSE_CONTINUE) {
continue;
}
if (action == URI_PARSE_BREAK) {
break;
}
if (*p == ':') {
if (p[1] == '/' && p[2] == '/' && need_schema) {
need_schema = false;
......
......@@ -30,25 +30,31 @@ TEST(URITest, everything) {
TEST(URITest, only_host) {
brpc::URI uri;
ASSERT_EQ(0, uri.SetHttpURL(" foo1://www.baidu1.com "));
ASSERT_EQ(0, uri.SetHttpURL(" foo1://www.baidu1.com?wd=uri2&nonkey=22 "));
ASSERT_EQ("foo1", uri.schema());
ASSERT_EQ(-1, uri.port());
ASSERT_EQ("www.baidu1.com", uri.host());
ASSERT_EQ("", uri.path());
ASSERT_EQ("", uri.user_info());
ASSERT_EQ("", uri.fragment());
ASSERT_EQ(0, uri.QueryCount());
ASSERT_FALSE(uri.GetQuery("wd"));
ASSERT_FALSE(uri.GetQuery("nonkey"));
ASSERT_EQ(2, uri.QueryCount());
ASSERT_TRUE(uri.GetQuery("wd"));
ASSERT_EQ(*uri.GetQuery("wd"), "uri2");
ASSERT_TRUE(uri.GetQuery("nonkey"));
ASSERT_EQ(*uri.GetQuery("nonkey"), "22");
ASSERT_EQ(0, uri.SetHttpURL("foo2://www.baidu2.com:1234"));
ASSERT_EQ(0, uri.SetHttpURL("foo2://www.baidu2.com:1234?wd=uri2&nonkey=22 "));
ASSERT_EQ("foo2", uri.schema());
ASSERT_EQ(1234, uri.port());
ASSERT_EQ("www.baidu2.com", uri.host());
ASSERT_EQ("", uri.path());
ASSERT_EQ("", uri.user_info());
ASSERT_EQ("", uri.fragment());
ASSERT_EQ(0, uri.QueryCount());
ASSERT_EQ(2, uri.QueryCount());
ASSERT_TRUE(uri.GetQuery("wd"));
ASSERT_EQ(*uri.GetQuery("wd"), "uri2");
ASSERT_TRUE(uri.GetQuery("nonkey"));
ASSERT_EQ(*uri.GetQuery("nonkey"), "22");
ASSERT_EQ(0, uri.SetHttpURL(" www.baidu3.com:4321 "));
ASSERT_EQ("", uri.schema());
......@@ -246,7 +252,9 @@ TEST(URITest, only_one_key) {
TEST(URITest, empty_host) {
brpc::URI uri;
ASSERT_EQ(-1, uri.SetHttpURL("http://"));
ASSERT_EQ(0, uri.SetHttpURL("http://"));
ASSERT_EQ("", uri.host());
ASSERT_EQ("", uri.path());
}
TEST(URITest, invalid_spaces) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment