json.c++ 29.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
// Copyright (c) 2015 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#include "json.h"
23 24 25
#include <math.h>    // for HUGEVAL to check for overflow in strtod
#include <stdlib.h>  // strtod
#include <errno.h>   // for strtod errors
26
#include <unordered_map>
27
#include <capnp/orphan.h>
28
#include <kj/debug.h>
29 30
#include <kj/function.h>
#include <kj/vector.h>
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51

namespace capnp {

namespace {

struct TypeHash {
  size_t operator()(const Type& type) const {
    return type.hashCode();
  }
};

struct FieldHash {
  size_t operator()(const StructSchema::Field& field) const {
    return field.getIndex() ^ field.getContainingStruct().getProto().getId();
  }
};

}  // namespace

struct JsonCodec::Impl {
  bool prettyPrint = false;
52
  size_t maxNestingDepth = 64;
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129

  std::unordered_map<Type, HandlerBase*, TypeHash> typeHandlers;
  std::unordered_map<StructSchema::Field, HandlerBase*, FieldHash> fieldHandlers;

  kj::StringTree encodeRaw(JsonValue::Reader value, uint indent, bool& multiline,
                           bool hasPrefix) const {
    switch (value.which()) {
      case JsonValue::NULL_:
        return kj::strTree("null");
      case JsonValue::BOOLEAN:
        return kj::strTree(value.getBoolean());
      case JsonValue::NUMBER:
        return kj::strTree(value.getNumber());

      case JsonValue::STRING:
        return kj::strTree(encodeString(value.getString()));

      case JsonValue::ARRAY: {
        auto array = value.getArray();
        uint subIndent = indent + (array.size() > 1);
        bool childMultiline = false;
        auto encodedElements = KJ_MAP(element, array) {
          return encodeRaw(element, subIndent, childMultiline, false);
        };

        return kj::strTree('[', encodeList(
            kj::mv(encodedElements), childMultiline, indent, multiline, hasPrefix), ']');
      }

      case JsonValue::OBJECT: {
        auto object = value.getObject();
        uint subIndent = indent + (object.size() > 1);
        bool childMultiline = false;
        kj::StringPtr colon = prettyPrint ? ": " : ":";
        auto encodedElements = KJ_MAP(field, object) {
          return kj::strTree(
              encodeString(field.getName()), colon,
              encodeRaw(field.getValue(), subIndent, childMultiline, true));
        };

        return kj::strTree('{', encodeList(
            kj::mv(encodedElements), childMultiline, indent, multiline, hasPrefix), '}');
      }

      case JsonValue::CALL: {
        auto call = value.getCall();
        auto params = call.getParams();
        uint subIndent = indent + (params.size() > 1);
        bool childMultiline = false;
        auto encodedElements = KJ_MAP(element, params) {
          return encodeRaw(element, subIndent, childMultiline, false);
        };

        return kj::strTree(call.getFunction(), '(', encodeList(
            kj::mv(encodedElements), childMultiline, indent, multiline, true), ')');
      }
    }

    KJ_FAIL_ASSERT("unknown JsonValue type", static_cast<uint>(value.which()));
  }

  kj::String encodeString(kj::StringPtr chars) const {
    static const char HEXDIGITS[] = "0123456789abcdef";
    kj::Vector<char> escaped(chars.size() + 3);

    escaped.add('"');
    for (char c: chars) {
      switch (c) {
        case '\"': escaped.addAll(kj::StringPtr("\\\"")); break;
        case '\\': escaped.addAll(kj::StringPtr("\\\\")); break;
        case '/' : escaped.addAll(kj::StringPtr("\\/" )); break;
        case '\b': escaped.addAll(kj::StringPtr("\\b")); break;
        case '\f': escaped.addAll(kj::StringPtr("\\f")); break;
        case '\n': escaped.addAll(kj::StringPtr("\\n")); break;
        case '\r': escaped.addAll(kj::StringPtr("\\r")); break;
        case '\t': escaped.addAll(kj::StringPtr("\\t")); break;
        default:
130
          if (c >= 0 && c < 0x20) {
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
            escaped.addAll(kj::StringPtr("\\u00"));
            uint8_t c2 = c;
            escaped.add(HEXDIGITS[c2 / 16]);
            escaped.add(HEXDIGITS[c2 % 16]);
          } else {
            escaped.add(c);
          }
          break;
      }
    }
    escaped.add('"');
    escaped.add('\0');

    return kj::String(escaped.releaseAsArray());
  }

  kj::StringTree encodeList(kj::Array<kj::StringTree> elements,
                            bool hasMultilineElement, uint indent, bool& multiline,
                            bool hasPrefix) const {
    size_t maxChildSize = 0;
    for (auto& e: elements) maxChildSize = kj::max(maxChildSize, e.size());

    kj::StringPtr prefix;
    kj::StringPtr delim;
    kj::StringPtr suffix;
    kj::String ownPrefix;
    kj::String ownDelim;
    if (!prettyPrint) {
      // No whitespace.
      delim = ",";
      prefix = "";
      suffix = "";
    } else if ((elements.size() > 1) && (hasMultilineElement || maxChildSize > 50)) {
      // If the array contained any multi-line elements, OR it contained sufficiently long
      // elements, then put each element on its own line.
      auto indentSpace = kj::repeat(' ', (indent + 1) * 2);
      delim = ownDelim = kj::str(",\n", indentSpace);
      multiline = true;
      if (hasPrefix) {
        // We're producing a multi-line list, and the first line has some garbage in front of it.
        // Therefore, move the first element to the next line.
        prefix = ownPrefix = kj::str("\n", indentSpace);
      } else {
        prefix = " ";
      }
      suffix = " ";
    } else {
      // Put everything on one line, but add spacing between elements for legibility.
      delim = ", ";
      prefix = "";
      suffix = "";
    }

    return kj::strTree(prefix, kj::StringTree(kj::mv(elements), delim), suffix);
  }
};

JsonCodec::JsonCodec()
    : impl(kj::heap<Impl>()) {}
JsonCodec::~JsonCodec() noexcept(false) {}

void JsonCodec::setPrettyPrint(bool enabled) { impl->prettyPrint = enabled; }

194 195 196 197
void JsonCodec::setMaxNestingDepth(size_t maxNestingDepth) {
  impl->maxNestingDepth = maxNestingDepth;
}

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
kj::String JsonCodec::encode(DynamicValue::Reader value, Type type) const {
  MallocMessageBuilder message;
  auto json = message.getRoot<JsonValue>();
  encode(value, type, json);
  return encodeRaw(json);
}

void JsonCodec::decode(kj::ArrayPtr<const char> input, DynamicStruct::Builder output) const {
  MallocMessageBuilder message;
  auto json = message.getRoot<JsonValue>();
  decodeRaw(input, json);
  decode(json, output);
}

Orphan<DynamicValue> JsonCodec::decode(
    kj::ArrayPtr<const char> input, Type type, Orphanage orphanage) const {
  MallocMessageBuilder message;
  auto json = message.getRoot<JsonValue>();
  decodeRaw(input, json);
  return decode(json, type, orphanage);
}

kj::String JsonCodec::encodeRaw(JsonValue::Reader value) const {
  bool multiline = false;
  return impl->encodeRaw(value, 0, multiline, false).flatten();
}

void JsonCodec::encode(DynamicValue::Reader input, Type type, JsonValue::Builder output) const {
226 227
  // TODO(soon): For interfaces, check for handlers on superclasses, per documentation...
  // TODO(soon): For branded types, should we check for handlers on the generic?
228
  // TODO(someday): Allow registering handlers for "all structs", "all lists", etc?
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
  auto iter = impl->typeHandlers.find(type);
  if (iter != impl->typeHandlers.end()) {
    iter->second->encodeBase(*this, input, output);
    return;
  }

  switch (type.which()) {
    case schema::Type::VOID:
      output.setNull();
      break;
    case schema::Type::BOOL:
      output.setBoolean(input.as<bool>());
      break;
    case schema::Type::INT8:
    case schema::Type::INT16:
    case schema::Type::INT32:
    case schema::Type::UINT8:
    case schema::Type::UINT16:
    case schema::Type::UINT32:
248 249
      output.setNumber(input.as<double>());
      break;
250 251
    case schema::Type::FLOAT32:
    case schema::Type::FLOAT64:
252 253
      {
        double value = input.as<double>();
254 255 256 257 258 259 260
        // Inf, -inf and NaN are not allowed in the JSON spec. Storing into string.
        if (kj::inf() == value) {
          output.setString("Infinity");
        } else if (-kj::inf() == value) {
          output.setString("-Infinity");
        } else if (kj::isNaN(value)) {
          output.setString("NaN");
261 262 263 264
        } else {
          output.setNumber(value);
        }
      }
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
      break;
    case schema::Type::INT64:
      output.setString(kj::str(input.as<int64_t>()));
      break;
    case schema::Type::UINT64:
      output.setString(kj::str(input.as<uint64_t>()));
      break;
    case schema::Type::TEXT:
      output.setString(kj::str(input.as<Text>()));
      break;
    case schema::Type::DATA: {
      // Turn into array of byte values. Yep, this is pretty ugly. People really need to override
      // this with a handler.
      auto bytes = input.as<Data>();
      auto array = output.initArray(bytes.size());
      for (auto i: kj::indices(bytes)) {
        array[i].setNumber(bytes[i]);
      }
      break;
    }
    case schema::Type::LIST: {
      auto list = input.as<DynamicList>();
      auto elementType = type.asList().getElementType();
      auto array = output.initArray(list.size());
      for (auto i: kj::indices(list)) {
        encode(list[i], elementType, array[i]);
      }
      break;
    }
    case schema::Type::ENUM: {
      auto e = input.as<DynamicEnum>();
      KJ_IF_MAYBE(symbol, e.getEnumerant()) {
        output.setString(symbol->getProto().getName());
      } else {
        output.setNumber(e.getRaw());
      }
      break;
    }
    case schema::Type::STRUCT: {
      auto structValue = input.as<capnp::DynamicStruct>();
      auto nonUnionFields = structValue.getSchema().getNonUnionFields();

      KJ_STACK_ARRAY(bool, hasField, nonUnionFields.size(), 32, 128);

      uint fieldCount = 0;
      for (auto i: kj::indices(nonUnionFields)) {
        fieldCount += (hasField[i] = structValue.has(nonUnionFields[i]));
      }

      // We try to write the union field, if any, in proper order with the rest.
      auto which = structValue.which();
      bool unionFieldIsNull = false;

      KJ_IF_MAYBE(field, which) {
        // Even if the union field is null, if it is not the default field of the union then we
        // have to print it anyway.
        unionFieldIsNull = !structValue.has(*field);
        if (field->getProto().getDiscriminantValue() != 0 || !unionFieldIsNull) {
          ++fieldCount;
        } else {
          which = nullptr;
        }
      }

      auto object = output.initObject(fieldCount);

      size_t pos = 0;
      for (auto i: kj::indices(nonUnionFields)) {
        auto field = nonUnionFields[i];
        KJ_IF_MAYBE(unionField, which) {
          if (unionField->getIndex() < field.getIndex()) {
            auto outField = object[pos++];
            outField.setName(unionField->getProto().getName());
            if (unionFieldIsNull) {
              outField.initValue().setNull();
            } else {
              encodeField(*unionField, structValue.get(*unionField), outField.initValue());
            }
            which = nullptr;
          }
        }
        if (hasField[i]) {
          auto outField = object[pos++];
          outField.setName(field.getProto().getName());
          encodeField(field, structValue.get(field), outField.initValue());
        }
      }
      if (which != nullptr) {
        // Union field not printed yet; must be last.
        auto unionField = KJ_ASSERT_NONNULL(which);
        auto outField = object[pos++];
        outField.setName(unionField.getProto().getName());
        if (unionFieldIsNull) {
          outField.initValue().setNull();
        } else {
          encodeField(unionField, structValue.get(unionField), outField.initValue());
        }
      }
      KJ_ASSERT(pos == fieldCount);
      break;
    }
    case schema::Type::INTERFACE:
      KJ_FAIL_REQUIRE("don't know how to JSON-encode capabilities; "
                      "please register a JsonCodec::Handler for this");
    case schema::Type::ANY_POINTER:
      KJ_FAIL_REQUIRE("don't know how to JSON-encode AnyPointer; "
                      "please register a JsonCodec::Handler for this");
  }
}

void JsonCodec::encodeField(StructSchema::Field field, DynamicValue::Reader input,
                            JsonValue::Builder output) const {
  auto iter = impl->fieldHandlers.find(field);
  if (iter != impl->fieldHandlers.end()) {
    iter->second->encodeBase(*this, input, output);
    return;
  }

  encode(input, field.getType(), output);
}

386
namespace {
387 388 389 390

template <typename SetFn, typename DecodeArrayFn, typename DecodeObjectFn>
void decodeField(Type type, JsonValue::Reader value, SetFn setFn, DecodeArrayFn decodeArrayFn,
    DecodeObjectFn decodeObjectFn) {
391
  // This code relies on conversions in DynamicValue::Reader::as<T>.
392 393
  switch(type.which()) {
    case schema::Type::VOID:
394
      break;
395
    case schema::Type::BOOL:
396 397 398 399 400 401
      switch (value.which()) {
        case JsonValue::BOOLEAN:
          setFn(value.getBoolean());
          break;
        default:
          KJ_FAIL_REQUIRE("Expected boolean value");
402
      }
403
      break;
404 405 406 407
    case schema::Type::INT8:
    case schema::Type::INT16:
    case schema::Type::INT32:
    case schema::Type::INT64:
408 409 410 411 412 413
      // Relies on range check in DynamicValue::Reader::as<IntType>
      switch (value.which()) {
        case JsonValue::NUMBER:
          setFn(value.getNumber());
          break;
        case JsonValue::STRING:
414
          setFn(value.getString().parseAs<int64_t>());
415 416 417
          break;
        default:
          KJ_FAIL_REQUIRE("Expected integer value");
418
      }
419
      break;
420 421 422 423
    case schema::Type::UINT8:
    case schema::Type::UINT16:
    case schema::Type::UINT32:
    case schema::Type::UINT64:
424 425 426 427 428 429
      // Relies on range check in DynamicValue::Reader::as<IntType>
      switch (value.which()) {
        case JsonValue::NUMBER:
          setFn(value.getNumber());
          break;
        case JsonValue::STRING:
430
          setFn(value.getString().parseAs<uint64_t>());
431 432 433
          break;
        default:
          KJ_FAIL_REQUIRE("Expected integer value");
434 435
      }
      break;
436 437
    case schema::Type::FLOAT32:
    case schema::Type::FLOAT64:
438 439 440 441 442 443 444 445
      switch (value.which()) {
        case JsonValue::NULL_:
          setFn(kj::nan());
          break;
        case JsonValue::NUMBER:
          setFn(value.getNumber());
          break;
        case JsonValue::STRING:
446
          setFn(value.getString().parseAs<double>());
447 448 449
          break;
        default:
          KJ_FAIL_REQUIRE("Expected float value");
450 451 452
      }
      break;
    case schema::Type::TEXT:
453 454 455 456 457 458
      switch (value.which()) {
        case JsonValue::STRING:
          setFn(value.getString());
          break;
        default:
          KJ_FAIL_REQUIRE("Expected text value");
459 460 461
      }
      break;
    case schema::Type::DATA:
462 463 464 465 466 467
      switch (value.which()) {
        case JsonValue::ARRAY: {
          auto array = value.getArray();
          kj::Vector<byte> data(array.size());
          for (auto arrayObject : array) {
            auto x = arrayObject.getNumber();
468
            KJ_REQUIRE(byte(x) == x, "Number in byte array is not an integer in [0, 255]");
469 470 471 472
            data.add(byte(x));
          }
          setFn(Data::Reader(data.asPtr()));
          break;
473
        }
474 475
        default:
          KJ_FAIL_REQUIRE("Expected data value");
476 477
      }
      break;
478
    case schema::Type::LIST:
479 480 481 482 483 484 485 486 487
      switch (value.which()) {
        case JsonValue::NULL_:
          // nothing to do
          break;
        case JsonValue::ARRAY:
          decodeArrayFn(value.getArray());
          break;
        default:
          KJ_FAIL_REQUIRE("Expected list value");
488
      }
489
      break;
490
    case schema::Type::ENUM:
491 492 493 494 495 496
      switch (value.which()) {
        case JsonValue::STRING:
          setFn(value.getString());
          break;
        default:
          KJ_FAIL_REQUIRE("Expected enum value");
497
      }
498
      break;
499 500 501 502 503 504 505 506 507 508
    case schema::Type::STRUCT:
      switch (value.which()) {
        case JsonValue::NULL_:
          // nothing to do
          break;
        case JsonValue::OBJECT:
          decodeObjectFn(value.getObject());
          break;
        default:
          KJ_FAIL_REQUIRE("Expected object value");
509 510
      }
      break;
511
    case schema::Type::INTERFACE:
512
      KJ_FAIL_REQUIRE("don't know how to JSON-decode capabilities; "
513 514
                      "JsonCodec::Handler not implemented yet :(");
    case schema::Type::ANY_POINTER:
515
      KJ_FAIL_REQUIRE("don't know how to JSON-decode AnyPointer; "
516
                      "JsonCodec::Handler not implemented yet :(");
517 518
  }
}
519
} // namespace
520 521

void JsonCodec::decodeArray(List<JsonValue>::Reader input, DynamicList::Builder output) const {
522
  KJ_ASSERT(input.size() == output.size(), "Builder was not initialized to input size");
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
  auto type = output.getSchema().getElementType();
  for (auto i = 0; i < input.size(); i++) {
    decodeField(type, input[i],
        [&](DynamicValue::Reader value) { output.set(i, value); },
        [&](List<JsonValue>::Reader array) {
          decodeArray(array, output.init(i, array.size()).as<DynamicList>());
        },
        [&](List<JsonValue::Field>::Reader object) {
          decodeObject(object, output[i].as<DynamicStruct>());
        });
  }
}

void JsonCodec::decodeObject(List<JsonValue::Field>::Reader input, DynamicStruct::Builder output)
    const {
  for (auto field : input) {
    KJ_IF_MAYBE(fieldSchema, output.getSchema().findFieldByName(field.getName())) {
      decodeField((*fieldSchema).getType(), field.getValue(),
          [&](DynamicValue::Reader value) { output.set(*fieldSchema, value); },
          [&](List<JsonValue>::Reader array) {
            decodeArray(array, output.init(*fieldSchema, array.size()).as<DynamicList>());
          },
          [&](List<JsonValue::Field>::Reader object) {
            decodeObject(object, output.init(*fieldSchema).as<DynamicStruct>());
          });
    } else {
549
      // Unknown json fields are ignored to allow schema evolution
550 551 552 553
    }
  }
}

554
void JsonCodec::decode(JsonValue::Reader input, DynamicStruct::Builder output) const {
555
  // TODO(soon): type and field handlers
556 557 558 559 560 561 562
  switch (input.which()) {
    case JsonValue::OBJECT:
      decodeObject(input.getObject(), output);
      break;
    default:
      KJ_FAIL_REQUIRE("Top level json value must be object");
  };
563 564 565 566
}

Orphan<DynamicValue> JsonCodec::decode(
    JsonValue::Reader input, Type type, Orphanage orphanage) const {
567
  // TODO(soon)
568
  KJ_FAIL_ASSERT("JSON decode into orphanage not implement yet. :(");
569 570 571 572
}

// -----------------------------------------------------------------------------

573
namespace {
574

575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
class Input {
public:
  Input(kj::ArrayPtr<const char> input) : wrapped(input) {}

  bool exhausted() {
    return wrapped.size() == 0 || wrapped.front() == '\0';
  }

  char nextChar() {
    KJ_REQUIRE(!exhausted(), "JSON message ends prematurely.");
    return wrapped.front();
  }

  void advance(size_t numBytes = 1) {
    KJ_REQUIRE(numBytes <= wrapped.size(), "JSON message ends prematurely.");
    wrapped = kj::arrayPtr(wrapped.begin() + numBytes, wrapped.end());
  }

  void advanceTo(const char *newPos) {
    KJ_REQUIRE(wrapped.begin() <= newPos && newPos < wrapped.end(),
        "JSON message ends prematurely.");
    wrapped = kj::arrayPtr(newPos, wrapped.end());
  }

  kj::ArrayPtr<const char> consume(size_t numBytes = 1) {
    auto originalPos = wrapped.begin();
    advance(numBytes);

    return kj::arrayPtr(originalPos, wrapped.begin());
  }

  void consume(char expected) {
    char current = nextChar();
    KJ_REQUIRE(current == expected, "Unexpected input in JSON message.");

    advance();
  }

  void consume(kj::ArrayPtr<const char> expected) {
    KJ_REQUIRE(wrapped.size() >= expected.size());

    auto prefix = wrapped.slice(0, expected.size());
    KJ_REQUIRE(prefix == expected, "Unexpected input in JSON message.");

    advance(expected.size());
  }

  bool tryConsume(char expected) {
    bool found = !exhausted() && nextChar() == expected;
    if (found) { advance(); }

    return found;
  }

  template <typename Predicate>
  void consumeOne(Predicate&& predicate) {
    char current = nextChar();
    KJ_REQUIRE(predicate(current), "Unexpected input in JSON message.");

    advance();
  }

  template <typename Predicate>
  kj::ArrayPtr<const char> consumeWhile(Predicate&& predicate) {
    auto originalPos = wrapped.begin();
    while (!exhausted() && predicate(nextChar())) { advance(); }

    return kj::arrayPtr(originalPos, wrapped.begin());
  }

  template <typename F>  // Function<void(Input&)>
  kj::ArrayPtr<const char> consumeCustom(F&& f) {
    // Allows consuming in a custom manner without exposing the wrapped ArrayPtr.
    auto originalPos = wrapped.begin();
    f(*this);

    return kj::arrayPtr(originalPos, wrapped.begin());
  }
653

654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
  void consumeWhitespace() {
    consumeWhile([](char chr) {
      return (
        chr == ' '  ||
        chr == '\n' ||
        chr == '\r' ||
        chr == '\t'
      );
    });
  }


private:
  kj::ArrayPtr<const char> wrapped;

};  // class Input

671 672
class Parser {
public:
673
  Parser(size_t maxNestingDepth, kj::ArrayPtr<const char> input) :
674
    maxNestingDepth(maxNestingDepth), input(input), nestingDepth(0) {}
675

676
  void parseValue(JsonValue::Builder& output) {
677 678
    input.consumeWhitespace();
    KJ_DEFER(input.consumeWhitespace());
679

680
    KJ_REQUIRE(!input.exhausted(), "JSON message ends prematurely.");
681

682 683 684 685
    switch (input.nextChar()) {
      case 'n': input.consume(kj::StringPtr("null"));  output.setNull();         break;
      case 'f': input.consume(kj::StringPtr("false")); output.setBoolean(false); break;
      case 't': input.consume(kj::StringPtr("true"));  output.setBoolean(true);  break;
686 687 688
      case '"': parseString(output); break;
      case '[': parseArray(output);  break;
      case '{': parseObject(output); break;
689 690 691 692
      case '-': case '0': case '1': case '2': case '3':
      case '4': case '5': case '6': case '7': case '8':
      case '9': parseNumber(output); break;
      default: KJ_FAIL_REQUIRE("Unexpected input in JSON message.");
693 694 695
    }
  }

696
  void parseNumber(JsonValue::Builder& output) {
697 698
    auto numberStr = consumeNumber();
    char *endPtr;
699

700
    errno = 0;
701
    double value = strtod(numberStr.begin(), &endPtr);
702 703 704 705 706 707 708 709

    KJ_ASSERT(endPtr != numberStr.begin(), "strtod should not fail! Is consumeNumber wrong?");
    KJ_REQUIRE((value != HUGE_VAL && value != -HUGE_VAL) || errno != ERANGE,
        "Overflow in JSON number.");
    KJ_REQUIRE(value != 0.0 || errno != ERANGE,
        "Underflow in JSON number.");

    output.setNumber(value);
710 711
  }

712
  void parseString(JsonValue::Builder& output) {
713 714 715
    output.setString(consumeQuotedString());
  }

716
  void parseArray(JsonValue::Builder& output) {
717 718
    // TODO(perf): Using orphans leaves holes in the message. It's expected
    // that a JsonValue is used for interop, and won't be sent or written as a
719
    // Cap'n Proto message.  This also applies to parseObject below.
720 721
    kj::Vector<Orphan<JsonValue>> values;
    auto orphanage = Orphanage::getForMessageContaining(output);
722
    bool expectComma = false;
723

724
    input.consume('[');
725 726
    KJ_REQUIRE(++nestingDepth <= maxNestingDepth, "JSON message nested too deeply.");
    KJ_DEFER(--nestingDepth);
727

728
    while (input.consumeWhitespace(), input.nextChar() != ']') {
729 730 731
      auto orphan = orphanage.newOrphan<JsonValue>();
      auto builder = orphan.get();

732
      if (expectComma) {
733 734 735
        input.consumeWhitespace();
        input.consume(',');
        input.consumeWhitespace();
736
      }
737 738 739 740 741

      parseValue(builder);
      values.add(kj::mv(orphan));

      expectComma = true;
742 743 744 745 746
    }

    output.initArray(values.size());
    auto array = output.getArray();

747
    for (auto i : kj::indices(values)) {
748 749 750
      array.adoptWithCaveats(i, kj::mv(values[i]));
    }

751
    input.consume(']');
752 753
  }

754
  void parseObject(JsonValue::Builder& output) {
755 756
    kj::Vector<Orphan<JsonValue::Field>> fields;
    auto orphanage = Orphanage::getForMessageContaining(output);
757
    bool expectComma = false;
758

759
    input.consume('{');
760 761
    KJ_REQUIRE(++nestingDepth <= maxNestingDepth, "JSON message nested too deeply.");
    KJ_DEFER(--nestingDepth);
762

763
    while (input.consumeWhitespace(), input.nextChar() != '}') {
764 765 766
      auto orphan = orphanage.newOrphan<JsonValue::Field>();
      auto builder = orphan.get();

767
      if (expectComma) {
768 769 770
        input.consumeWhitespace();
        input.consume(',');
        input.consumeWhitespace();
771 772
      }

773 774
      builder.setName(consumeQuotedString());

775 776 777
      input.consumeWhitespace();
      input.consume(':');
      input.consumeWhitespace();
778 779 780 781 782 783

      auto valueBuilder = builder.getValue();
      parseValue(valueBuilder);

      fields.add(kj::mv(orphan));

784
      expectComma = true;
785 786 787 788 789
    }

    output.initObject(fields.size());
    auto object = output.getObject();

790
    for (auto i : kj::indices(fields)) {
791 792 793
      object.adoptWithCaveats(i, kj::mv(fields[i]));
    }

794
    input.consume('}');
795 796
  }

797
  bool inputExhausted() { return input.exhausted(); }
798

799
private:
800
  kj::String consumeQuotedString() {
801
    input.consume('"');
802 803 804 805 806
    // TODO(perf): Avoid copy / alloc if no escapes encoutered.
    // TODO(perf): Get statistics on string size and preallocate?
    kj::Vector<char> decoded;

    do {
807
      auto stringValue = input.consumeWhile([](const char chr) {
808 809 810 811 812
          return chr != '"' && chr != '\\';
      });

      decoded.addAll(stringValue);

813 814 815 816 817 818 819 820 821 822 823
      if (input.nextChar() == '\\') {  // handle escapes.
        input.advance();
        switch(input.nextChar()) {
          case '"' : decoded.add('"' ); input.advance(); break;
          case '\\': decoded.add('\\'); input.advance(); break;
          case '/' : decoded.add('/' ); input.advance(); break;
          case 'b' : decoded.add('\b'); input.advance(); break;
          case 'f' : decoded.add('\f'); input.advance(); break;
          case 'n' : decoded.add('\n'); input.advance(); break;
          case 'r' : decoded.add('\r'); input.advance(); break;
          case 't' : decoded.add('\t'); input.advance(); break;
824
          case 'u' :
825 826
            input.consume('u');
            unescapeAndAppend(input.consume(size_t(4)), decoded);
827
            break;
828
          default: KJ_FAIL_REQUIRE("Invalid escape in JSON string."); break;
829 830 831
        }
      }

832
    } while(input.nextChar() != '"');
833

834
    input.consume('"');
835 836 837 838 839 840
    decoded.add('\0');

    // TODO(perf): This copy can be eliminated, but I can't find the kj::wayToDoIt();
    return kj::String(decoded.releaseAsArray());
  }

841
  kj::String consumeNumber() {
842 843 844 845 846 847
    auto numArrayPtr = input.consumeCustom([](Input& input) {
      input.tryConsume('-');
      if (!input.tryConsume('0')) {
        input.consumeOne([](char c) { return '1' <= c && c <= '9'; });
        input.consumeWhile([](char c) { return '0' <= c && c <= '9'; });
      }
848

849 850 851
      if (input.tryConsume('.')) {
        input.consumeWhile([](char c) { return '0' <= c && c <= '9'; });
      }
852

853 854 855 856 857
      if (input.tryConsume('e') || input.tryConsume('E')) {
        input.tryConsume('+') || input.tryConsume('-');
        input.consumeWhile([](char c) { return '0' <= c && c <= '9'; });
      }
    });
858

859
    KJ_REQUIRE(numArrayPtr.size() > 0, "Expected number in JSON input.");
860 861

    kj::Vector<char> number;
862
    number.addAll(numArrayPtr);
863 864 865 866 867
    number.add('\0');

    return kj::String(number.releaseAsArray());
  }

868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
  // TODO(someday): This "interface" is ugly, and won't work if/when surrogates are handled.
  void unescapeAndAppend(kj::ArrayPtr<const char> hex, kj::Vector<char>& target) {
    KJ_REQUIRE(hex.size() == 4);
    int codePoint = 0;

    for (int i = 0; i < 4; ++i) {
      char c = hex[i];
      codePoint <<= 4;

      if ('0' <= c && c <= '9') {
        codePoint |= c - '0';
      } else if ('a' <= c && c <= 'f') {
        codePoint |= c - 'a';
      } else if ('A' <= c && c <= 'F') {
        codePoint |= c - 'A';
      } else {
884
        KJ_FAIL_REQUIRE("Invalid hex digit in unicode escape.", c);
885 886 887
      }
    }

888
    // TODO(soon): Support at least basic multi-lingual plane, ie ignore surrogates.
889 890 891 892
    KJ_REQUIRE(codePoint < 128, "non-ASCII unicode escapes are not supported (yet!)");
    target.add(0x7f & static_cast<char>(codePoint));
  }

893
  const size_t maxNestingDepth;
894
  Input input;
895
  size_t nestingDepth;
896

897

898 899
};  // class Parser

900 901
}  // namespace

902

903
void JsonCodec::decodeRaw(kj::ArrayPtr<const char> input, JsonValue::Builder output) const {
904
  Parser parser(impl->maxNestingDepth, input);
905
  parser.parseValue(output);
906 907

  KJ_REQUIRE(parser.inputExhausted(), "Input remains after parsing JSON.");
908 909 910 911
}

// -----------------------------------------------------------------------------

912
Orphan<DynamicValue> JsonCodec::HandlerBase::decodeBase(
913
    const JsonCodec& codec, JsonValue::Reader input, Type type, Orphanage orphanage) const {
914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
  KJ_FAIL_ASSERT("JSON decoder handler type / value type mismatch");
}
void JsonCodec::HandlerBase::decodeStructBase(
    const JsonCodec& codec, JsonValue::Reader input, DynamicStruct::Builder output) const {
  KJ_FAIL_ASSERT("JSON decoder handler type / value type mismatch");
}

void JsonCodec::addTypeHandlerImpl(Type type, HandlerBase& handler) {
  impl->typeHandlers[type] = &handler;
}

void JsonCodec::addFieldHandlerImpl(StructSchema::Field field, Type type, HandlerBase& handler) {
  KJ_REQUIRE(type == field.getType(),
      "handler type did not match field type for addFieldHandler()");
  impl->fieldHandlers[field] = &handler;
}

} // namespace capnp