Unverified Commit b08edd65 authored by Ge Jun's avatar Ge Jun Committed by GitHub

Merge pull request #457 from brpc/redis_escaping_issues

Make escaping and quoting in redis more reasonable
parents 1ed2db97 34b2eb4c
......@@ -23,6 +23,8 @@ void* fast_memcpy(void *__restrict dest, const void *__restrict src, size_t n);
namespace brpc {
const size_t CTX_WIDTH = 5;
// Much faster than snprintf(..., "%lu", d);
inline size_t AppendDecimal(char* outbuf, unsigned long d) {
char buf[24]; // enough for decimal 64-bit integers
......@@ -57,6 +59,14 @@ inline void AppendHeader(butil::IOBuf& buf, char fc, unsigned long value) {
buf.append(header, len + 3);
}
static void FlushComponent(std::string* out, std::string* compbuf, int* ncomp) {
AppendHeader(*out, '$', compbuf->size());
out->append(*compbuf);
out->append("\r\n", 2);
compbuf->clear();
++*ncomp;
}
// Support hiredis-style format, namely everything is same with printf except
// that %b corresponds to binary-data + length. Notice that we can't use
// %.*s (printf built-in) which ends scaning at \0 and is not binary-safe.
......@@ -78,27 +88,33 @@ RedisCommandFormatV(butil::IOBuf* outbuf, const char* fmt, va_list ap) {
char quote_char = 0;
const char* quote_pos = fmt;
int nargs = 0;
bool is_empty_component = false;
for (; *c; ++c) {
if (*c != '%' || c[1] == '\0') {
if (*c == ' ') {
if (quote_char) {
compbuf.push_back(*c);
} else if (!compbuf.empty() || is_empty_component) {
is_empty_component = false;
AppendHeader(nocount_buf, '$', compbuf.size());
compbuf.append("\r\n", 2);
nocount_buf.append(compbuf);
compbuf.clear();
++ncomponent;
} else if (!compbuf.empty()) {
FlushComponent(&nocount_buf, &compbuf, &ncomponent);
}
} else if (*c == '"' || *c == '\'') { // Check quotation.
if (!quote_char) { // begin quote
quote_char = *c;
quote_pos = c;
} else if (quote_char == *c) { // end quote
is_empty_component = (c - quote_pos == 1) ? true : false; // for empty string
quote_char = 0;
if (!compbuf.empty()) {
FlushComponent(&nocount_buf, &compbuf, &ncomponent);
}
} else if (quote_char == *c) {
const char last_char = (compbuf.empty() ? 0 : compbuf.back());
if (last_char == '\\') {
// Even if the preceding chars are two consecutive backslashes
// (\\), still do the escaping, which is the behavior of
// official redis-cli.
compbuf.pop_back();
compbuf.push_back(*c);
} else { // end quote
quote_char = 0;
FlushComponent(&nocount_buf, &compbuf, &ncomponent);
}
} else {
compbuf.push_back(*c);
}
......@@ -233,17 +249,16 @@ RedisCommandFormatV(butil::IOBuf* outbuf, const char* fmt, va_list ap) {
}
}
if (quote_char) {
return butil::Status(EINVAL, "Unmatched quote: ... %.*s ... (offset=%lu)",
(int)(fmt + fmt_len - quote_pos),
quote_pos, quote_pos - fmt);
const char* ctx_begin =
quote_pos - std::min((size_t)(quote_pos - fmt), CTX_WIDTH);
size_t ctx_size =
std::min((size_t)(fmt + fmt_len - ctx_begin), CTX_WIDTH * 2 + 1);
return butil::Status(EINVAL, "Unmatched quote: ...%.*s... (offset=%lu)",
(int)ctx_size, ctx_begin, quote_pos - fmt);
}
if (!compbuf.empty() || is_empty_component) {
AppendHeader(nocount_buf, '$', compbuf.size());
compbuf.append("\r\n", 2);
nocount_buf.append(compbuf);
compbuf.clear();
++ncomponent;
if (!compbuf.empty()) {
FlushComponent(&nocount_buf, &compbuf, &ncomponent);
}
LOG_IF(ERROR, nargs == 0) << "You must call RedisCommandNoFormat() "
......@@ -276,26 +291,32 @@ RedisCommandNoFormat(butil::IOBuf* outbuf, const butil::StringPiece& cmd) {
int ncomponent = 0;
char quote_char = 0;
const char* quote_pos = cmd.data();
bool is_empty_component = false;
for (const char* c = cmd.data(); c != cmd.data() + cmd.size(); ++c) {
if (*c == ' ') {
if (quote_char) {
compbuf.push_back(*c);
} else if (!compbuf.empty() || is_empty_component) {
is_empty_component = false;
AppendHeader(nocount_buf, '$', compbuf.size());
compbuf.append("\r\n", 2);
nocount_buf.append(compbuf);
compbuf.clear();
++ncomponent;
} else if (!compbuf.empty()) {
FlushComponent(&nocount_buf, &compbuf, &ncomponent);
}
} else if (*c == '"' || *c == '\'') { // Check quotation.
if (!quote_char) { // begin quote
quote_char = *c;
quote_pos = c;
} else if (quote_char == *c) { // end quote
is_empty_component = (c - quote_pos == 1) ? true : false; // for empty string
quote_char = 0;
if (!compbuf.empty()) {
FlushComponent(&nocount_buf, &compbuf, &ncomponent);
}
} else if (quote_char == *c) {
const char last_char = (compbuf.empty() ? 0 : compbuf.back());
if (last_char == '\\') {
// Even if the preceding chars are two consecutive backslashes
// (\\), still do the escaping, which is the behavior of
// official redis-cli.
compbuf.pop_back();
compbuf.push_back(*c);
} else { // end quote
quote_char = 0;
FlushComponent(&nocount_buf, &compbuf, &ncomponent);
}
} else {
compbuf.push_back(*c);
}
......@@ -304,17 +325,16 @@ RedisCommandNoFormat(butil::IOBuf* outbuf, const butil::StringPiece& cmd) {
}
}
if (quote_char) {
return butil::Status(EINVAL, "Unmatched quote: ... %.*s ... (offset=%lu)",
(int)(cmd.data() + cmd.size() - quote_pos),
quote_pos, quote_pos - cmd.data());
const char* ctx_begin =
quote_pos - std::min((size_t)(quote_pos - cmd.data()), CTX_WIDTH);
size_t ctx_size =
std::min((size_t)(cmd.data() + cmd.size() - ctx_begin), CTX_WIDTH * 2 + 1);
return butil::Status(EINVAL, "Unmatched quote: ...%.*s... (offset=%lu)",
(int)ctx_size, ctx_begin, quote_pos - cmd.data());
}
if (!compbuf.empty() || is_empty_component) {
AppendHeader(nocount_buf, '$', compbuf.size());
compbuf.append("\r\n", 2);
nocount_buf.append(compbuf);
compbuf.clear();
++ncomponent;
if (!compbuf.empty()) {
FlushComponent(&nocount_buf, &compbuf, &ncomponent);
}
AppendHeader(*outbuf, '*', ncomponent);
......
......@@ -205,33 +205,47 @@ bool RedisReply::ConsumePartialIOBuf(butil::IOBuf& buf, butil::Arena* arena) {
return false;
}
static void PrintBinaryData(std::ostream& os, const butil::StringPiece& s) {
// Check for non-ascii characters first so that we can print ascii data
// (most cases) fast, rather than printing char-by-char as we do in the
// binary_data=true branch.
bool binary_data = false;
for (size_t i = 0; i < s.size(); ++i) {
if (s[i] <= 0) {
binary_data = true;
break;
}
}
if (!binary_data) {
os << s;
} else {
for (size_t i = 0; i < s.size(); ++i) {
if (s[i] <= 0) {
char buf[8] = "\\u0000";
uint8_t d1 = ((uint8_t)s[i]) & 0xF;
uint8_t d2 = ((uint8_t)s[i]) >> 4;
buf[4] = (d1 < 10 ? d1 + '0' : (d1 - 10) + 'A');
buf[5] = (d2 < 10 ? d2 + '0' : (d2 - 10) + 'A');
os << butil::StringPiece(buf, 6);
} else {
os << s[i];
class RedisStringPrinter {
public:
RedisStringPrinter(const char* str, size_t length)
: _str(str, length) {}
void Print(std::ostream& os) const;
private:
butil::StringPiece _str;
};
static std::ostream&
operator<<(std::ostream& os, const RedisStringPrinter& printer) {
printer.Print(os);
return os;
}
void RedisStringPrinter::Print(std::ostream& os) const {
size_t flush_start = 0;
for (size_t i = 0; i < _str.size(); ++i) {
const char c = _str[i];
if (c <= 0) { // unprintable chars
if (i != flush_start) {
os << butil::StringPiece(_str.data() + flush_start, i - flush_start);
}
char buf[8] = "\\u0000";
uint8_t d1 = ((uint8_t)c) & 0xF;
uint8_t d2 = ((uint8_t)c) >> 4;
buf[4] = (d1 < 10 ? d1 + '0' : (d1 - 10) + 'A');
buf[5] = (d2 < 10 ? d2 + '0' : (d2 - 10) + 'A');
os << butil::StringPiece(buf, 6);
flush_start = i + 1;
} else if (c == '"' || c == '\\') { // need to escape
if (i != flush_start) {
os << butil::StringPiece(_str.data() + flush_start, i - flush_start);
}
os << '\\' << c;
flush_start = i + 1;
}
}
if (flush_start != _str.size()) {
os << butil::StringPiece(_str.data() + flush_start, _str.size() - flush_start);
}
}
// Mimic how official redis-cli prints.
......@@ -240,9 +254,9 @@ void RedisReply::Print(std::ostream& os) const {
case REDIS_REPLY_STRING:
os << '"';
if (_length < sizeof(_data.short_str)) {
os << _data.short_str;
os << RedisStringPrinter(_data.short_str, _length);
} else {
PrintBinaryData(os, butil::StringPiece(_data.long_str, _length));
os << RedisStringPrinter(_data.long_str, _length);
}
os << '"';
break;
......@@ -267,9 +281,9 @@ void RedisReply::Print(std::ostream& os) const {
// fall through
case REDIS_REPLY_STATUS:
if (_length < sizeof(_data.short_str)) {
os << _data.short_str;
os << RedisStringPrinter(_data.short_str, _length);
} else {
PrintBinaryData(os, butil::StringPiece(_data.long_str, _length));
os << RedisStringPrinter(_data.long_str, _length);
}
break;
default:
......
......@@ -474,20 +474,63 @@ TEST_F(RedisTest, cmd_format) {
request._buf.to_string().c_str());
request.Clear();
request.AddCommand("get ''key value"); // == get key value
ASSERT_STREQ("*3\r\n$3\r\nget\r\n$3\r\nkey\r\n$5\r\nvalue\r\n", request._buf.to_string().c_str());
request.AddCommand("get ''key value"); // == get <empty> key value
ASSERT_STREQ("*4\r\n$3\r\nget\r\n$0\r\n\r\n$3\r\nkey\r\n$5\r\nvalue\r\n", request._buf.to_string().c_str());
request.Clear();
request.AddCommand("get key'' value"); // == get key value
ASSERT_STREQ("*3\r\n$3\r\nget\r\n$3\r\nkey\r\n$5\r\nvalue\r\n", request._buf.to_string().c_str());
request.AddCommand("get key'' value"); // == get key <empty> value
ASSERT_STREQ("*4\r\n$3\r\nget\r\n$3\r\nkey\r\n$0\r\n\r\n$5\r\nvalue\r\n", request._buf.to_string().c_str());
request.Clear();
request.AddCommand("get 'ext'key value "); // == get extkey value
ASSERT_STREQ("*3\r\n$3\r\nget\r\n$6\r\nextkey\r\n$5\r\nvalue\r\n", request._buf.to_string().c_str());
request.AddCommand("get 'ext'key value "); // == get ext key value
ASSERT_STREQ("*4\r\n$3\r\nget\r\n$3\r\next\r\n$3\r\nkey\r\n$5\r\nvalue\r\n", request._buf.to_string().c_str());
request.Clear();
request.AddCommand(" get key'ext' value "); // == get keyext value
ASSERT_STREQ("*3\r\n$3\r\nget\r\n$6\r\nkeyext\r\n$5\r\nvalue\r\n", request._buf.to_string().c_str());
request.AddCommand(" get key'ext' value "); // == get key ext value
ASSERT_STREQ("*4\r\n$3\r\nget\r\n$3\r\nkey\r\n$3\r\next\r\n$5\r\nvalue\r\n", request._buf.to_string().c_str());
request.Clear();
}
TEST_F(RedisTest, quote_and_escape) {
if (g_redis_pid < 0) {
puts("Skipped due to absence of redis-server");
return;
}
brpc::RedisRequest request;
request.AddCommand("set a 'foo bar'");
ASSERT_STREQ("*3\r\n$3\r\nset\r\n$1\r\na\r\n$7\r\nfoo bar\r\n",
request._buf.to_string().c_str());
request.Clear();
request.AddCommand("set a 'foo \\'bar'");
ASSERT_STREQ("*3\r\n$3\r\nset\r\n$1\r\na\r\n$8\r\nfoo 'bar\r\n",
request._buf.to_string().c_str());
request.Clear();
request.AddCommand("set a 'foo \"bar'");
ASSERT_STREQ("*3\r\n$3\r\nset\r\n$1\r\na\r\n$8\r\nfoo \"bar\r\n",
request._buf.to_string().c_str());
request.Clear();
request.AddCommand("set a 'foo \\\"bar'");
ASSERT_STREQ("*3\r\n$3\r\nset\r\n$1\r\na\r\n$9\r\nfoo \\\"bar\r\n",
request._buf.to_string().c_str());
request.Clear();
request.AddCommand("set a \"foo 'bar\"");
ASSERT_STREQ("*3\r\n$3\r\nset\r\n$1\r\na\r\n$8\r\nfoo 'bar\r\n",
request._buf.to_string().c_str());
request.Clear();
request.AddCommand("set a \"foo \\'bar\"");
ASSERT_STREQ("*3\r\n$3\r\nset\r\n$1\r\na\r\n$9\r\nfoo \\'bar\r\n",
request._buf.to_string().c_str());
request.Clear();
request.AddCommand("set a \"foo \\\"bar\"");
ASSERT_STREQ("*3\r\n$3\r\nset\r\n$1\r\na\r\n$8\r\nfoo \"bar\r\n",
request._buf.to_string().c_str());
request.Clear();
}
} //namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment