Commit b17faeec authored by Kamal Marhubi's avatar Kamal Marhubi

Implement most of JsonCodec::decodeRaw

This is the first step towards JSON decoding, implementing the basic
functionality of JsonCodec::decodeRaw. The main outstanding issues are:

- it allows trailing commas in arrays and objects
- it is too liberal in number syntax, eg allowing a leading +
- it does rejects non-ASCII characters in \u escapes

Refs https://github.com/sandstorm-io/capnproto/issues/255
parent cb17739b
...@@ -21,7 +21,9 @@ ...@@ -21,7 +21,9 @@
#include "json.h" #include "json.h"
#include <capnp/test-util.h> #include <capnp/test-util.h>
#include <capnp/compat/json.capnp.h>
#include <kj/debug.h> #include <kj/debug.h>
#include <kj/string.h>
#include <kj/test.h> #include <kj/test.h>
namespace capnp { namespace capnp {
...@@ -181,6 +183,194 @@ KJ_TEST("encode union") { ...@@ -181,6 +183,194 @@ KJ_TEST("encode union") {
KJ_EXPECT(json.encode(root) == "{\"before\":\"a\",\"middle\":44,\"bar\":321,\"after\":\"c\"}"); KJ_EXPECT(json.encode(root) == "{\"before\":\"a\",\"middle\":44,\"bar\":321,\"after\":\"c\"}");
} }
KJ_TEST("basic json decoding") {
// TODO(cleanup): this test is a mess!
// TODO(soon): add expected failing cases.
JsonCodec json;
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("null", root);
KJ_EXPECT(root.which() == JsonValue::NULL_);
KJ_EXPECT(root.getNull() == VOID);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("false", root);
KJ_EXPECT(root.which() == JsonValue::BOOLEAN);
KJ_EXPECT(root.getBoolean() == false);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("true", root);
KJ_EXPECT(root.which() == JsonValue::BOOLEAN);
KJ_EXPECT(root.getBoolean() == true);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("\"foo\"", root);
KJ_EXPECT(root.which() == JsonValue::STRING);
KJ_EXPECT(kj::str("foo") == root.getString());
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(R"("\"")", root);
KJ_EXPECT(root.which() == JsonValue::STRING);
KJ_EXPECT(kj::str("\"") == root.getString());
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(R"("\\abc\"d\\e")", root);
KJ_EXPECT(root.which() == JsonValue::STRING);
KJ_EXPECT(kj::str("\\abc\"d\\e") == root.getString());
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(R"("\"\\\/\b\f\n\r\t\u0003abc\u0064\u0065f")", root);
KJ_EXPECT(root.which() == JsonValue::STRING);
KJ_EXPECT(kj::str("\"\\/\b\f\n\r\t\x03""abcdef") == root.getString(), root.getString());
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("[]", root);
KJ_EXPECT(root.which() == JsonValue::ARRAY, root.which());
KJ_EXPECT(root.getArray().size() == 0);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("[true]", root);
KJ_EXPECT(root.which() == JsonValue::ARRAY);
auto array = root.getArray();
KJ_EXPECT(array.size() == 1, array.size());
KJ_EXPECT(root.getArray()[0].which() == JsonValue::BOOLEAN);
KJ_EXPECT(root.getArray()[0].getBoolean() == true);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(" [ true , false\t\n , null]", root);
KJ_EXPECT(root.which() == JsonValue::ARRAY);
auto array = root.getArray();
KJ_EXPECT(array.size() == 3);
KJ_EXPECT(array[0].which() == JsonValue::BOOLEAN);
KJ_EXPECT(array[0].getBoolean() == true);
KJ_EXPECT(array[1].which() == JsonValue::BOOLEAN);
KJ_EXPECT(array[1].getBoolean() == false);
KJ_EXPECT(array[2].which() == JsonValue::NULL_);
KJ_EXPECT(array[2].getNull() == VOID);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("{}", root);
KJ_EXPECT(root.which() == JsonValue::OBJECT, root.which());
KJ_EXPECT(root.getObject().size() == 0);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(R"({"some": null})", root);
KJ_EXPECT(root.which() == JsonValue::OBJECT, root.which());
auto object = root.getObject();
KJ_EXPECT(object.size() == 1);
KJ_EXPECT(kj::str("some") == object[0].getName());
KJ_EXPECT(object[0].getValue().which() == JsonValue::NULL_);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw(R"({"foo\n\tbaz": "a val", "bar": ["a", -5.5e11, { "z": {}}]})", root);
KJ_EXPECT(root.which() == JsonValue::OBJECT, root.which());
auto object = root.getObject();
KJ_EXPECT(object.size() == 2);
KJ_EXPECT(kj::str("foo\n\tbaz") == object[0].getName());
KJ_EXPECT(object[0].getValue().which() == JsonValue::STRING);
KJ_EXPECT(kj::str("a val") == object[0].getValue().getString());
KJ_EXPECT(kj::str("bar") == object[1].getName());
KJ_EXPECT(object[1].getValue().which() == JsonValue::ARRAY);
auto array = object[1].getValue().getArray();
KJ_EXPECT(array.size() == 3, array.size());
KJ_EXPECT(array[0].which() == JsonValue::STRING);
KJ_EXPECT(kj::str("a") == array[0].getString());
KJ_EXPECT(array[1].which() == JsonValue::NUMBER);
KJ_EXPECT(array[1].getNumber() == -5.5e11);
KJ_EXPECT(array[2].which() == JsonValue::OBJECT);
KJ_EXPECT(array[2].getObject().size() == 1);
KJ_EXPECT(array[2].getObject()[0].getValue().which() == JsonValue::OBJECT);
KJ_EXPECT(array[2].getObject()[0].getValue().getObject().size() == 0);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("123", root);
KJ_EXPECT(root.which() == JsonValue::NUMBER);
KJ_EXPECT(root.getNumber() == 123);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
// TODO(soon): this should fail, JSON doesn't allow leading +
json.decodeRaw("+123", root);
KJ_EXPECT(root.which() == JsonValue::NUMBER);
KJ_EXPECT(root.getNumber() == 123);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("-5", root);
KJ_EXPECT(root.which() == JsonValue::NUMBER);
KJ_EXPECT(root.getNumber() == -5);
}
{
MallocMessageBuilder message;
auto root = message.initRoot<JsonValue>();
json.decodeRaw("-5.5", root);
KJ_EXPECT(root.which() == JsonValue::NUMBER);
KJ_EXPECT(root.getNumber() == -5.5);
}
}
class TestHandler: public JsonCodec::Handler<Text> { class TestHandler: public JsonCodec::Handler<Text> {
public: public:
void encode(const JsonCodec& codec, Text::Reader input, void encode(const JsonCodec& codec, Text::Reader input,
......
...@@ -20,8 +20,12 @@ ...@@ -20,8 +20,12 @@
// THE SOFTWARE. // THE SOFTWARE.
#include "json.h" #include "json.h"
#include <cstdlib> // std::strtod
#include <unordered_map> #include <unordered_map>
#include <capnp/orphan.h>
#include <kj/debug.h> #include <kj/debug.h>
#include <kj/function.h>
#include <kj/vector.h>
namespace capnp { namespace capnp {
...@@ -41,6 +45,12 @@ struct FieldHash { ...@@ -41,6 +45,12 @@ struct FieldHash {
} // namespace } // namespace
namespace _ { // private
void parseJsonValue(kj::ArrayPtr<const char> input, JsonValue::Builder output);
} // namespace _ (private)
struct JsonCodec::Impl { struct JsonCodec::Impl {
bool prettyPrint = false; bool prettyPrint = false;
...@@ -212,7 +222,7 @@ kj::String JsonCodec::encodeRaw(JsonValue::Reader value) const { ...@@ -212,7 +222,7 @@ kj::String JsonCodec::encodeRaw(JsonValue::Reader value) const {
} }
void JsonCodec::decodeRaw(kj::ArrayPtr<const char> input, JsonValue::Builder output) const { void JsonCodec::decodeRaw(kj::ArrayPtr<const char> input, JsonValue::Builder output) const {
KJ_FAIL_ASSERT("JSON decode not implement yet. :("); _::parseJsonValue(input, output);
} }
void JsonCodec::encode(DynamicValue::Reader input, Type type, JsonValue::Builder output) const { void JsonCodec::encode(DynamicValue::Reader input, Type type, JsonValue::Builder output) const {
...@@ -372,6 +382,251 @@ Orphan<DynamicValue> JsonCodec::decode( ...@@ -372,6 +382,251 @@ Orphan<DynamicValue> JsonCodec::decode(
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
namespace _ { // private
class Parser {
public:
Parser(kj::ArrayPtr<const char> input) : input_(input), remaining_(input_) {}
void parseValue(JsonValue::Builder &output) {
consumeWhitespace();
KJ_REQUIRE(remaining_.size() > 0, "JSON message ends prematurely.");
switch (nextChar()) {
case 'n': consume(NULL_); output.setNull(); break;
case 'f': consume(FALSE); output.setBoolean(false); break;
case 't': consume(TRUE); output.setBoolean(true); break;
case '"': parseString(output); break;
case '[': parseArray(output); break;
case '{': parseObject(output); break;
// TODO(security): We could check for numbers more carefully instead of
// relying on strtod.
default: parseNumber(output); break;
}
}
void parseNumber(JsonValue::Builder &output) {
// TODO(someday): strtod allows leading +, while JSON grammar does not.
// strtod consumes leading whitespace, so we don't have to.
char *numEnd;
output.setNumber(std::strtod(remaining_.begin(), &numEnd));
advanceTo(numEnd);
}
void parseString(JsonValue::Builder &output) {
output.setString(consumeQuotedString());
}
void parseArray(JsonValue::Builder &output) {
// TODO(perf): Using orphans leaves holes in the message. It's expected
// that a JsonValue is used for interop, and won't be sent or written as a
// Cap'n Proto message.
kj::Vector<Orphan<JsonValue>> values;
auto orphanage = Orphanage::getForMessageContaining(output);
consume('[');
while (consumeWhitespace(), nextChar() != ']') {
auto orphan = orphanage.newOrphan<JsonValue>();
auto builder = orphan.get();
parseValue(builder);
values.add(kj::mv(orphan));
if (consumeWhitespace(), nextChar() != ']') {
// TODO(soon): This incorrectly allows a trailing comma.
consume(',');
}
}
output.initArray(values.size());
auto array = output.getArray();
for (size_t i = 0; i < values.size(); ++i) {
array.adoptWithCaveats(i, kj::mv(values[i]));
}
consume(']');
}
void parseObject(JsonValue::Builder &output) {
kj::Vector<Orphan<JsonValue::Field>> fields;
auto orphanage = Orphanage::getForMessageContaining(output);
consume('{');
while (consumeWhitespace(), nextChar() != '}') {
auto orphan = orphanage.newOrphan<JsonValue::Field>();
auto builder = orphan.get();
builder.setName(consumeQuotedString());
consumeWhitespace();
consume(':');
consumeWhitespace();
auto valueBuilder = builder.getValue();
parseValue(valueBuilder);
fields.add(kj::mv(orphan));
if (consumeWhitespace(), nextChar() != '}') {
// TODO(soon): This incorrectly allows a trailing comma.
consume(',');
}
}
output.initObject(fields.size());
auto object = output.getObject();
for (size_t i = 0; i < fields.size(); ++i) {
object.adoptWithCaveats(i, kj::mv(fields[i]));
}
consume('}');
}
char nextChar() {
return remaining_.front();
}
void advance(size_t numBytes = 1) {
KJ_REQUIRE(numBytes < remaining_.size(), "JSON message ends prematurely.");
remaining_ = kj::arrayPtr(remaining_.begin() + numBytes, remaining_.end());
}
void advanceTo(const char *newPos) {
KJ_REQUIRE(remaining_.begin() <= newPos && newPos < remaining_.end(),
"JSON message ends prematurely.");
remaining_ = kj::arrayPtr(newPos, remaining_.end());
}
void consume(char expected) {
char current = nextChar();
KJ_REQUIRE(current == expected, "Unexpected character in JSON message.");
advance();
}
void consume(kj::ArrayPtr<const char> expected) {
KJ_REQUIRE(remaining_.size() >= expected.size());
auto prefix = remaining_.slice(0, expected.size());
KJ_REQUIRE(prefix == expected, "Unexpected input in JSON message.");
advance(expected.size());
}
kj::ArrayPtr<const char> consumeWhile(kj::Function<bool(char)> predicate) {
auto originalPos = remaining_.begin();
while (predicate(nextChar())) { advance(); }
return kj::arrayPtr(originalPos, remaining_.begin());
}
void consumeWhitespace() {
consumeWhile([](char chr) {
return (
chr == ' ' ||
chr == '\f' ||
chr == '\n' ||
chr == '\r' ||
chr == '\t' ||
chr == '\v'
);
});
}
kj::String consumeQuotedString() {
consume('"');
// TODO(perf): Avoid copy / alloc if no escapes encoutered.
// TODO(perf): Get statistics on string size and preallocate?
kj::Vector<char> decoded;
do {
auto stringValue = consumeWhile([](const char chr) {
return chr != '"' && chr != '\\';
});
decoded.addAll(stringValue);
if (nextChar() == '\\') { // handle escapes.
advance();
switch(nextChar()) {
case '"' : decoded.add('"' ); advance(); break;
case '\\': decoded.add('\\'); advance(); break;
case '/' : decoded.add('/' ); advance(); break;
case 'b' : decoded.add('\b'); advance(); break;
case 'f' : decoded.add('\f'); advance(); break;
case 'n' : decoded.add('\n'); advance(); break;
case 'r' : decoded.add('\r'); advance(); break;
case 't' : decoded.add('\t'); advance(); break;
case 'u' :
advance(); // consume 'u'
unescapeAndAppend(kj::arrayPtr(remaining_.begin(), 4), decoded);
advance(4);
break;
default: KJ_FAIL_REQUIRE("invalid escape", nextChar()); break;
}
}
} while(nextChar() != '"');
consume('"');
decoded.add('\0');
// TODO(perf): This copy can be eliminated, but I can't find the kj::wayToDoIt();
return kj::String(decoded.releaseAsArray());
}
// TODO(someday): This "interface" is ugly, and won't work if/when surrogates are handled.
void unescapeAndAppend(kj::ArrayPtr<const char> hex, kj::Vector<char>& target) {
KJ_REQUIRE(hex.size() == 4);
int codePoint = 0;
for (int i = 0; i < 4; ++i) {
char c = hex[i];
codePoint <<= 4;
if ('0' <= c && c <= '9') {
codePoint |= c - '0';
} else if ('a' <= c && c <= 'f') {
codePoint |= c - 'a';
} else if ('A' <= c && c <= 'F') {
codePoint |= c - 'A';
} else {
KJ_FAIL_REQUIRE("invalid hex digit in unicode escape", c);
}
}
// TODO(soon): Support at least basic multi-lingual plane, ie ignore surrogates.
KJ_REQUIRE(codePoint < 128, "non-ASCII unicode escapes are not supported (yet!)");
target.add(0x7f & static_cast<char>(codePoint));
}
private:
static const kj::ArrayPtr<const char> NULL_;
static const kj::ArrayPtr<const char> FALSE;
static const kj::ArrayPtr<const char> TRUE;
const kj::ArrayPtr<const char> input_;
kj::ArrayPtr<const char> remaining_;
}; // class Parser
// Array literal used instead of string literal to avoid null terminator.
const kj::ArrayPtr<const char> Parser::NULL_ = kj::ArrayPtr<const char>({'n','u','l','l'});
const kj::ArrayPtr<const char> Parser::FALSE = kj::ArrayPtr<const char>({'f','a','l','s','e'});
const kj::ArrayPtr<const char> Parser::TRUE = kj::ArrayPtr<const char>({'t','r','u','e'});
void parseJsonValue(kj::ArrayPtr<const char> input, JsonValue::Builder output) {
// TODO(security): should we check there are no non-whitespace characters left in input?
Parser parser(input);
parser.parseValue(output);
}
} // namespace _ (private)
// -----------------------------------------------------------------------------
Orphan<DynamicValue> JsonCodec::HandlerBase::decodeBase( Orphan<DynamicValue> JsonCodec::HandlerBase::decodeBase(
const JsonCodec& codec, JsonValue::Reader input, Orphanage orphanage) const { const JsonCodec& codec, JsonValue::Reader input, Orphanage orphanage) const {
KJ_FAIL_ASSERT("JSON decoder handler type / value type mismatch"); KJ_FAIL_ASSERT("JSON decoder handler type / value type mismatch");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment