Commit 607dbbf5 authored by Kenton Varda's avatar Kenton Varda

Merge pull request #258 from kamalmarhubi/json-decodeRaw

Implement most of JsonCodec::decodeRaw
parents e93c9aca f60fe3c2
......@@ -11,6 +11,7 @@ Bryan Borham <bjboreham@gmail.com>: Initial MSVC support
Philip Quinn <p@partylemon.com>: cmake build and other assorted bits
Brian Taylor <el.wubo@gmail.com>: emacs syntax highlighting
Ben Laurie <ben@links.org>: discovered and responsibly disclosed security bugs
Kamal Marhubi <kamal@marhubi.com>: JSON parser
This file does not list people who maintain their own Cap'n Proto
implementations as separate projects. Those people are awesome too! :)
......@@ -21,7 +21,9 @@
#include "json.h"
#include <capnp/test-util.h>
#include <capnp/compat/json.capnp.h>
#include <kj/debug.h>
#include <kj/string.h>
#include <kj/test.h>
namespace capnp {
......@@ -181,6 +183,293 @@ KJ_TEST("encode union") {
KJ_EXPECT(json.encode(root) == "{\"before\":\"a\",\"middle\":44,\"bar\":321,\"after\":\"c\"}");
}
KJ_TEST("basic json decoding") {
// TODO(cleanup): this test is a mess!
JsonCodec json;
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("null", root);
KJ_EXPECT(root.which() == JsonValue::NULL_);
KJ_EXPECT(root.getNull() == VOID);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("false", root);
KJ_EXPECT(root.which() == JsonValue::BOOLEAN);
KJ_EXPECT(root.getBoolean() == false);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("true", root);
KJ_EXPECT(root.which() == JsonValue::BOOLEAN);
KJ_EXPECT(root.getBoolean() == true);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("\"foo\"", root);
KJ_EXPECT(root.which() == JsonValue::STRING);
KJ_EXPECT(kj::str("foo") == root.getString());
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(R"("\"")", root);
KJ_EXPECT(root.which() == JsonValue::STRING);
KJ_EXPECT(kj::str("\"") == root.getString());
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(R"("\\abc\"d\\e")", root);
KJ_EXPECT(root.which() == JsonValue::STRING);
KJ_EXPECT(kj::str("\\abc\"d\\e") == root.getString());
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(R"("\"\\\/\b\f\n\r\t\u0003abc\u0064\u0065f")", root);
KJ_EXPECT(root.which() == JsonValue::STRING);
KJ_EXPECT(kj::str("\"\\/\b\f\n\r\t\x03""abcdef") == root.getString(), root.getString());
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("[]", root);
KJ_EXPECT(root.which() == JsonValue::ARRAY, root.which());
KJ_EXPECT(root.getArray().size() == 0);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("[true]", root);
KJ_EXPECT(root.which() == JsonValue::ARRAY);
auto array = root.getArray();
KJ_EXPECT(array.size() == 1, array.size());
KJ_EXPECT(root.getArray()[0].which() == JsonValue::BOOLEAN);
KJ_EXPECT(root.getArray()[0].getBoolean() == true);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(" [ true , false\t\n , null]", root);
KJ_EXPECT(root.which() == JsonValue::ARRAY);
auto array = root.getArray();
KJ_EXPECT(array.size() == 3);
KJ_EXPECT(array[0].which() == JsonValue::BOOLEAN);
KJ_EXPECT(array[0].getBoolean() == true);
KJ_EXPECT(array[1].which() == JsonValue::BOOLEAN);
KJ_EXPECT(array[1].getBoolean() == false);
KJ_EXPECT(array[2].which() == JsonValue::NULL_);
KJ_EXPECT(array[2].getNull() == VOID);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("{}", root);
KJ_EXPECT(root.which() == JsonValue::OBJECT, root.which());
KJ_EXPECT(root.getObject().size() == 0);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(R"({"some": null})", root);
KJ_EXPECT(root.which() == JsonValue::OBJECT, root.which());
auto object = root.getObject();
KJ_EXPECT(object.size() == 1);
KJ_EXPECT(kj::str("some") == object[0].getName());
KJ_EXPECT(object[0].getValue().which() == JsonValue::NULL_);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(R"({"foo\n\tbaz": "a val", "bar": ["a", -5.5e11, { "z": {}}]})", root);
KJ_EXPECT(root.which() == JsonValue::OBJECT, root.which());
auto object = root.getObject();
KJ_EXPECT(object.size() == 2);
KJ_EXPECT(kj::str("foo\n\tbaz") == object[0].getName());
KJ_EXPECT(object[0].getValue().which() == JsonValue::STRING);
KJ_EXPECT(kj::str("a val") == object[0].getValue().getString());
KJ_EXPECT(kj::str("bar") == object[1].getName());
KJ_EXPECT(object[1].getValue().which() == JsonValue::ARRAY);
auto array = object[1].getValue().getArray();
KJ_EXPECT(array.size() == 3, array.size());
KJ_EXPECT(array[0].which() == JsonValue::STRING);
KJ_EXPECT(kj::str("a") == array[0].getString());
KJ_EXPECT(array[1].which() == JsonValue::NUMBER);
KJ_EXPECT(array[1].getNumber() == -5.5e11);
KJ_EXPECT(array[2].which() == JsonValue::OBJECT);
KJ_EXPECT(array[2].getObject().size() == 1);
KJ_EXPECT(array[2].getObject()[0].getValue().which() == JsonValue::OBJECT);
KJ_EXPECT(array[2].getObject()[0].getValue().getObject().size() == 0);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("123", root);
KJ_EXPECT(root.which() == JsonValue::NUMBER);
KJ_EXPECT(root.getNumber() == 123);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
KJ_EXPECT_THROW_MESSAGE("input", json.decodeRaw("z", root));
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
// Leading + not allowed in numbers.
KJ_EXPECT_THROW_MESSAGE("Unexpected", json.decodeRaw("+123", root));
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
KJ_EXPECT_THROW_MESSAGE("Overflow", json.decodeRaw("1e1024", root));
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
KJ_EXPECT_THROW_MESSAGE("Underflow", json.decodeRaw("1e-1023", root));
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
KJ_EXPECT_THROW_MESSAGE("Unexpected", json.decodeRaw("[00]", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected", json.decodeRaw("[01]", root));
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
KJ_EXPECT_THROW_MESSAGE("ends prematurely", json.decodeRaw("-", root));
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("-5", root);
KJ_EXPECT(root.which() == JsonValue::NUMBER);
KJ_EXPECT(root.getNumber() == -5);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("-5.5", root);
KJ_EXPECT(root.which() == JsonValue::NUMBER);
KJ_EXPECT(root.getNumber() == -5.5);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("a", root));
KJ_EXPECT_THROW_MESSAGE("ends prematurely", json.decodeRaw("[", root));
KJ_EXPECT_THROW_MESSAGE("ends prematurely", json.decodeRaw("{", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("[}", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("{]", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("[}]", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("[1, , ]", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("[,]", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("[true,]", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("[, 1]", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("[1\"\"]", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("[1,, \"\"]", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("{\"a\"1: 0}", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw(R"({"some": null,})", root));
KJ_EXPECT_THROW_MESSAGE("Input remains", json.decodeRaw("11a", root));
KJ_EXPECT_THROW_MESSAGE("Invalid escape", json.decodeRaw(R"("\z")", root));
KJ_EXPECT_THROW_MESSAGE("Invalid escape", json.decodeRaw(R"("\z")", root));
KJ_EXPECT_THROW_MESSAGE("ends prematurely", json.decodeRaw(R"(["\n\", 3])", root));
KJ_EXPECT_THROW_MESSAGE("Invalid hex", json.decodeRaw(R"("\u12zz")", root));
KJ_EXPECT_THROW_MESSAGE("ends prematurely", json.decodeRaw("-", root));
KJ_EXPECT_THROW_MESSAGE("Unexpected input", json.decodeRaw("--", root));
}
}
KJ_TEST("maximum nesting depth") {
JsonCodec json;
auto input = kj::str(R"({"foo": "a", "bar": ["b", { "baz": [-5.5e11] }, [ [ 1 ], { "z": 2 }]]})");
// `input` has a maximum nesting depth of 4, reached 3 times.
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(input, root);
}
{
json.setMaxNestingDepth(0);
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
KJ_EXPECT_THROW_MESSAGE("nest",
json.decodeRaw(input, root));
}
{
json.setMaxNestingDepth(3);
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
KJ_EXPECT_THROW_MESSAGE("nest",
json.decodeRaw(input, root));
}
{
json.setMaxNestingDepth(4);
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(input, root);
}
}
class TestHandler: public JsonCodec::Handler<Text> {
public:
void encode(const JsonCodec& codec, Text::Reader input,
......
......@@ -20,8 +20,14 @@
// THE SOFTWARE.
#include "json.h"
#include <math.h> // for HUGEVAL to check for overflow in std::strtod
#include <stdlib.h> // std::strtod
#include <errno.h> // for std::strtod errors
#include <unordered_map>
#include <capnp/orphan.h>
#include <kj/debug.h>
#include <kj/function.h>
#include <kj/vector.h>
namespace capnp {
......@@ -43,6 +49,7 @@ struct FieldHash {
struct JsonCodec::Impl {
bool prettyPrint = false;
size_t maxNestingDepth = 64;
std::unordered_map<Type, HandlerBase*, TypeHash> typeHandlers;
std::unordered_map<StructSchema::Field, HandlerBase*, FieldHash> fieldHandlers;
......@@ -184,6 +191,10 @@ JsonCodec::~JsonCodec() noexcept(false) {}
void JsonCodec::setPrettyPrint(bool enabled) { impl->prettyPrint = enabled; }
void JsonCodec::setMaxNestingDepth(size_t maxNestingDepth) {
impl->maxNestingDepth = maxNestingDepth;
}
kj::String JsonCodec::encode(DynamicValue::Reader value, Type type) const {
MallocMessageBuilder message;
auto json = message.getRoot<JsonValue>();
......@@ -211,10 +222,6 @@ kj::String JsonCodec::encodeRaw(JsonValue::Reader value) const {
return impl->encodeRaw(value, 0, multiline, false).flatten();
}
void JsonCodec::decodeRaw(kj::ArrayPtr<const char> input, JsonValue::Builder output) const {
KJ_FAIL_ASSERT("JSON decode not implement yet. :(");
}
void JsonCodec::encode(DynamicValue::Reader input, Type type, JsonValue::Builder output) const {
// TODO(soon): For interfaces, check for handlers on superclasses, per documentation...
// TODO(soon): For branded types, should we check for handlers on the generic?
......@@ -383,6 +390,320 @@ Orphan<DynamicValue> JsonCodec::decode(
// -----------------------------------------------------------------------------
namespace {
class Parser {
public:
Parser(size_t maxNestingDepth, kj::ArrayPtr<const char> input) :
maxNestingDepth(maxNestingDepth), input(input), remaining(input), nestingDepth(0) {}
void parseValue(JsonValue::Builder& output) {
consumeWhitespace();
KJ_DEFER(consumeWhitespace());
KJ_REQUIRE(!inputExhausted(), "JSON message ends prematurely.");
switch (nextChar()) {
case 'n': consume(kj::StringPtr("null")); output.setNull(); break;
case 'f': consume(kj::StringPtr("false")); output.setBoolean(false); break;
case 't': consume(kj::StringPtr("true")); output.setBoolean(true); break;
case '"': parseString(output); break;
case '[': parseArray(output); break;
case '{': parseObject(output); break;
case '-': case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7': case '8':
case '9': parseNumber(output); break;
default: KJ_FAIL_REQUIRE("Unexpected input in JSON message.");
}
}
void parseNumber(JsonValue::Builder& output) {
auto numberStr = consumeNumber();
char *endPtr;
errno = 0;
double value = std::strtod(numberStr.begin(), &endPtr);
KJ_ASSERT(endPtr != numberStr.begin(), "strtod should not fail! Is consumeNumber wrong?");
KJ_REQUIRE((value != HUGE_VAL && value != -HUGE_VAL) || errno != ERANGE,
"Overflow in JSON number.");
KJ_REQUIRE(value != 0.0 || errno != ERANGE,
"Underflow in JSON number.");
output.setNumber(value);
}
void parseString(JsonValue::Builder& output) {
output.setString(consumeQuotedString());
}
void parseArray(JsonValue::Builder& output) {
// TODO(perf): Using orphans leaves holes in the message. It's expected
// that a JsonValue is used for interop, and won't be sent or written as a
// Cap'n Proto message. This also applies to parseObject below.
kj::Vector<Orphan<JsonValue>> values;
auto orphanage = Orphanage::getForMessageContaining(output);
bool expectComma = false;
consume('[');
KJ_REQUIRE(++nestingDepth <= maxNestingDepth, "JSON message nested too deeply.");
KJ_DEFER(--nestingDepth);
while (consumeWhitespace(), nextChar() != ']') {
auto orphan = orphanage.newOrphan<JsonValue>();
auto builder = orphan.get();
if (expectComma) {
consumeWhitespace();
consume(',');
consumeWhitespace();
}
parseValue(builder);
values.add(kj::mv(orphan));
expectComma = true;
}
output.initArray(values.size());
auto array = output.getArray();
for (auto i : kj::indices(values)) {
array.adoptWithCaveats(i, kj::mv(values[i]));
}
consume(']');
}
void parseObject(JsonValue::Builder& output) {
kj::Vector<Orphan<JsonValue::Field>> fields;
auto orphanage = Orphanage::getForMessageContaining(output);
bool expectComma = false;
consume('{');
KJ_REQUIRE(++nestingDepth <= maxNestingDepth, "JSON message nested too deeply.");
KJ_DEFER(--nestingDepth);
while (consumeWhitespace(), nextChar() != '}') {
auto orphan = orphanage.newOrphan<JsonValue::Field>();
auto builder = orphan.get();
if (expectComma) {
consumeWhitespace();
consume(',');
consumeWhitespace();
}
builder.setName(consumeQuotedString());
consumeWhitespace();
consume(':');
consumeWhitespace();
auto valueBuilder = builder.getValue();
parseValue(valueBuilder);
fields.add(kj::mv(orphan));
expectComma = true;
}
output.initObject(fields.size());
auto object = output.getObject();
for (auto i : kj::indices(fields)) {
object.adoptWithCaveats(i, kj::mv(fields[i]));
}
consume('}');
}
bool inputExhausted() {
return remaining.size() == 0 || remaining.front() == '\0';
}
char nextChar() {
KJ_REQUIRE(!inputExhausted(), "JSON message ends prematurely.");
return remaining.front();
}
void advance(size_t numBytes = 1) {
KJ_REQUIRE(numBytes <= remaining.size(), "JSON message ends prematurely.");
remaining = kj::arrayPtr(remaining.begin() + numBytes, remaining.end());
}
void advanceTo(const char *newPos) {
KJ_REQUIRE(remaining.begin() <= newPos && newPos < remaining.end(),
"JSON message ends prematurely.");
remaining = kj::arrayPtr(newPos, remaining.end());
}
void consume(char expected) {
char current = nextChar();
KJ_REQUIRE(current == expected, "Unexpected input in JSON message.");
advance();
}
void consume(kj::ArrayPtr<const char> expected) {
KJ_REQUIRE(remaining.size() >= expected.size());
auto prefix = remaining.slice(0, expected.size());
KJ_REQUIRE(prefix == expected, "Unexpected input in JSON message.");
advance(expected.size());
}
bool tryConsume(char expected) {
bool found = !inputExhausted() && nextChar() == expected;
if (found) { advance(); }
return found;
}
template <typename Predicate>
void consumeOne(Predicate&& predicate) {
char current = nextChar();
KJ_REQUIRE(predicate(current), "Unexpected input in JSON message.");
advance();
}
template <typename Predicate>
kj::ArrayPtr<const char> consumeWhile(Predicate&& predicate) {
auto originalPos = remaining.begin();
while (!inputExhausted() && predicate(nextChar())) { advance(); }
return kj::arrayPtr(originalPos, remaining.begin());
}
void consumeWhitespace() {
consumeWhile([](char chr) {
return (
chr == ' ' ||
chr == '\f' ||
chr == '\n' ||
chr == '\r' ||
chr == '\t' ||
chr == '\v'
);
});
}
kj::String consumeQuotedString() {
consume('"');
// TODO(perf): Avoid copy / alloc if no escapes encoutered.
// TODO(perf): Get statistics on string size and preallocate?
kj::Vector<char> decoded;
do {
auto stringValue = consumeWhile([](const char chr) {
return chr != '"' && chr != '\\';
});
decoded.addAll(stringValue);
if (nextChar() == '\\') { // handle escapes.
advance();
switch(nextChar()) {
case '"' : decoded.add('"' ); advance(); break;
case '\\': decoded.add('\\'); advance(); break;
case '/' : decoded.add('/' ); advance(); break;
case 'b' : decoded.add('\b'); advance(); break;
case 'f' : decoded.add('\f'); advance(); break;
case 'n' : decoded.add('\n'); advance(); break;
case 'r' : decoded.add('\r'); advance(); break;
case 't' : decoded.add('\t'); advance(); break;
case 'u' :
advance(); // consume 'u'
unescapeAndAppend(kj::arrayPtr(remaining.begin(), 4), decoded);
advance(4);
break;
default: KJ_FAIL_REQUIRE("Invalid escape in JSON string."); break;
}
}
} while(nextChar() != '"');
consume('"');
decoded.add('\0');
// TODO(perf): This copy can be eliminated, but I can't find the kj::wayToDoIt();
return kj::String(decoded.releaseAsArray());
}
kj::String consumeNumber() {
auto originalPos = remaining.begin();
tryConsume('-');
if (!tryConsume('0')) {
consumeOne([](char c) { return '1' <= c && c <= '9'; });
consumeWhile([](char c) { return '0' <= c && c <= '9'; });
}
if (tryConsume('.')) {
consumeWhile([](char c) { return '0' <= c && c <= '9'; });
}
if (tryConsume('e') || tryConsume('E')) {
tryConsume('+') || tryConsume('-');
consumeWhile([](char c) { return '0' <= c && c <= '9'; });
}
KJ_REQUIRE(remaining.begin() != originalPos, "Expected number in JSON input.");
kj::Vector<char> number;
number.addAll(originalPos, remaining.begin());
number.add('\0');
return kj::String(number.releaseAsArray());
}
// TODO(someday): This "interface" is ugly, and won't work if/when surrogates are handled.
void unescapeAndAppend(kj::ArrayPtr<const char> hex, kj::Vector<char>& target) {
KJ_REQUIRE(hex.size() == 4);
int codePoint = 0;
for (int i = 0; i < 4; ++i) {
char c = hex[i];
codePoint <<= 4;
if ('0' <= c && c <= '9') {
codePoint |= c - '0';
} else if ('a' <= c && c <= 'f') {
codePoint |= c - 'a';
} else if ('A' <= c && c <= 'F') {
codePoint |= c - 'A';
} else {
KJ_FAIL_REQUIRE("Invalid hex digit in unicode escape.", c);
}
}
// TODO(soon): Support at least basic multi-lingual plane, ie ignore surrogates.
KJ_REQUIRE(codePoint < 128, "non-ASCII unicode escapes are not supported (yet!)");
target.add(0x7f & static_cast<char>(codePoint));
}
private:
const size_t maxNestingDepth;
const kj::ArrayPtr<const char> input;
kj::ArrayPtr<const char> remaining;
size_t nestingDepth;
}; // class Parser
} // namespace
void JsonCodec::decodeRaw(kj::ArrayPtr<const char> input, JsonValue::Builder output) const {
Parser parser(impl->maxNestingDepth, input);
parser.parseValue(output);
KJ_REQUIRE(parser.inputExhausted(), "Input remains after parsing JSON.");
}
// -----------------------------------------------------------------------------
Orphan<DynamicValue> JsonCodec::HandlerBase::decodeBase(
const JsonCodec& codec, JsonValue::Reader input, Orphanage orphanage) const {
KJ_FAIL_ASSERT("JSON decoder handler type / value type mismatch");
......
......@@ -71,6 +71,10 @@ public:
// Enable to insert newlines, indentation, and other extra spacing into the output. The default
// is to use minimal whitespace.
void setMaxNestingDepth(size_t maxNestingDepth);
// Set maximum nesting depth when decoding JSON to prevent highly nested input from overflowing
// the call stack. The default is 64.
template <typename T>
kj::String encode(T&& value);
// Encode any Cap'n Proto value to JSON, including primitives and
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment