encode_decode.c 52 KB
Newer Older
Chris Fallin's avatar
Chris Fallin committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
// Protocol Buffers - Google's data interchange format
// Copyright 2014 Google Inc.  All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "protobuf.h"

33 34 35 36 37
// This function is equivalent to rb_str_cat(), but unlike the real
// rb_str_cat(), it doesn't leak memory in some versions of Ruby.
// For more information, see:
//   https://bugs.ruby-lang.org/issues/11328
VALUE noleak_rb_str_cat(VALUE rb_str, const char *str, long len) {
38
  char *p;
39 40
  size_t oldlen = RSTRING_LEN(rb_str);
  rb_str_modify_expand(rb_str, len);
41
  p = RSTRING_PTR(rb_str);
42 43
  memcpy(p + oldlen, str, len);
  rb_str_set_len(rb_str, oldlen + len);
44
  return rb_str;
45 46
}

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
// The code below also comes from upb's prototype Ruby binding, developed by
// haberman@.

/* stringsink *****************************************************************/

static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
  stringsink *sink = _sink;
  sink->len = 0;
  return sink;
}

static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
                                size_t len, const upb_bufhandle *handle) {
  stringsink *sink = _sink;
  size_t new_size = sink->size;

  UPB_UNUSED(hd);
  UPB_UNUSED(handle);

  while (sink->len + len > new_size) {
    new_size *= 2;
  }

  if (new_size != sink->size) {
    sink->ptr = realloc(sink->ptr, new_size);
    sink->size = new_size;
  }

  memcpy(sink->ptr + sink->len, ptr, len);
  sink->len += len;

  return len;
}

void stringsink_init(stringsink *sink) {
  upb_byteshandler_init(&sink->handler);
  upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
  upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);

  upb_bytessink_reset(&sink->sink, &sink->handler, sink);

  sink->size = 32;
  sink->ptr = malloc(sink->size);
  sink->len = 0;
}

void stringsink_uninit(stringsink *sink) {
  free(sink->ptr);
}

Chris Fallin's avatar
Chris Fallin committed
97 98 99 100 101 102
// -----------------------------------------------------------------------------
// Parsing.
// -----------------------------------------------------------------------------

#define DEREF(msg, ofs, type) *(type*)(((uint8_t *)msg) + ofs)

103 104 105 106 107 108 109 110 111 112 113 114
typedef struct {
  size_t ofs;
  int32_t hasbit;
} field_handlerdata_t;

// Creates a handlerdata that contains the offset and the hasbit for the field
static const void* newhandlerdata(upb_handlers* h, uint32_t ofs, int32_t hasbit) {
  field_handlerdata_t *hd = ALLOC(field_handlerdata_t);
  hd->ofs = ofs;
  hd->hasbit = hasbit;
  upb_handlers_addcleanup(h, hd, xfree);
  return hd;
Chris Fallin's avatar
Chris Fallin committed
115 116 117 118
}

typedef struct {
  size_t ofs;
119
  int32_t hasbit;
120
  VALUE subklass;
Chris Fallin's avatar
Chris Fallin committed
121 122 123
} submsg_handlerdata_t;

// Creates a handlerdata that contains offset and submessage type information.
124 125 126
static const void *newsubmsghandlerdata(upb_handlers* h,
                                        uint32_t ofs,
                                        int32_t hasbit,
127
                                        VALUE subklass) {
Chris Fallin's avatar
Chris Fallin committed
128 129
  submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t);
  hd->ofs = ofs;
130
  hd->hasbit = hasbit;
131
  hd->subklass = subklass;
132
  upb_handlers_addcleanup(h, hd, xfree);
Chris Fallin's avatar
Chris Fallin committed
133 134 135
  return hd;
}

136
typedef struct {
137 138 139
  size_t ofs;              // union data slot
  size_t case_ofs;         // oneof_case field
  uint32_t oneof_case_num; // oneof-case number to place in oneof_case field
140
  VALUE subklass;
141 142 143 144 145
} oneof_handlerdata_t;

static const void *newoneofhandlerdata(upb_handlers *h,
                                       uint32_t ofs,
                                       uint32_t case_ofs,
146 147
                                       const upb_fielddef *f,
                                       const Descriptor* desc) {
148 149 150
  oneof_handlerdata_t *hd = ALLOC(oneof_handlerdata_t);
  hd->ofs = ofs;
  hd->case_ofs = case_ofs;
151 152 153 154
  // We reuse the field tag number as a oneof union discriminant tag. Note that
  // we don't expose these numbers to the user, so the only requirement is that
  // we have some unique ID for each union case/possibility. The field tag
  // numbers are already present and are easy to use so there's no reason to
155 156
  // create a separate ID space. In addition, using the field tag number here
  // lets us easily look up the field in the oneof accessor.
157
  hd->oneof_case_num = upb_fielddef_number(f);
158 159 160
  if (is_value_field(f)) {
    hd->oneof_case_num |= ONEOF_CASE_MASK;
  }
161
  hd->subklass = field_type_class(desc->layout, f);
162
  upb_handlers_addcleanup(h, hd, xfree);
163 164 165
  return hd;
}

Chris Fallin's avatar
Chris Fallin committed
166 167 168 169 170
// A handler that starts a repeated field.  Gets the Repeated*Field instance for
// this field (such an instance always exists even in an empty message).
static void *startseq_handler(void* closure, const void* hd) {
  MessageHeader* msg = closure;
  const size_t *ofs = hd;
171
  return (void*)DEREF(msg, *ofs, VALUE);
Chris Fallin's avatar
Chris Fallin committed
172 173
}

174
// Handlers that append primitive values to a repeated field.
Chris Fallin's avatar
Chris Fallin committed
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
#define DEFINE_APPEND_HANDLER(type, ctype)                 \
  static bool append##type##_handler(void *closure, const void *hd, \
                                     ctype val) {                   \
    VALUE ary = (VALUE)closure;                                     \
    RepeatedField_push_native(ary, &val);                           \
    return true;                                                    \
  }

DEFINE_APPEND_HANDLER(bool,   bool)
DEFINE_APPEND_HANDLER(int32,  int32_t)
DEFINE_APPEND_HANDLER(uint32, uint32_t)
DEFINE_APPEND_HANDLER(float,  float)
DEFINE_APPEND_HANDLER(int64,  int64_t)
DEFINE_APPEND_HANDLER(uint64, uint64_t)
DEFINE_APPEND_HANDLER(double, double)

191
// Appends a string to a repeated field.
Chris Fallin's avatar
Chris Fallin committed
192 193 194 195 196 197
static void* appendstr_handler(void *closure,
                               const void *hd,
                               size_t size_hint) {
  VALUE ary = (VALUE)closure;
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyStringUtf8Encoding);
198
  RepeatedField_push_native(ary, &str);
Chris Fallin's avatar
Chris Fallin committed
199 200 201
  return (void*)str;
}

202 203 204 205 206 207 208
static void set_hasbit(void *closure, int32_t hasbit) {
  if (hasbit > 0) {
    uint8_t* storage = closure;
    storage[hasbit/8] |= 1 << (hasbit % 8);
  }
}

209
// Appends a 'bytes' string to a repeated field.
Chris Fallin's avatar
Chris Fallin committed
210 211 212 213 214 215
static void* appendbytes_handler(void *closure,
                                 const void *hd,
                                 size_t size_hint) {
  VALUE ary = (VALUE)closure;
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyString8bitEncoding);
216
  RepeatedField_push_native(ary, &str);
Chris Fallin's avatar
Chris Fallin committed
217 218 219 220 221 222 223 224
  return (void*)str;
}

// Sets a non-repeated string field in a message.
static void* str_handler(void *closure,
                         const void *hd,
                         size_t size_hint) {
  MessageHeader* msg = closure;
225 226
  const field_handlerdata_t *fieldhandler = hd;

Chris Fallin's avatar
Chris Fallin committed
227 228
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyStringUtf8Encoding);
229 230
  DEREF(msg, fieldhandler->ofs, VALUE) = str;
  set_hasbit(closure, fieldhandler->hasbit);
Chris Fallin's avatar
Chris Fallin committed
231 232 233 234 235 236 237 238
  return (void*)str;
}

// Sets a non-repeated 'bytes' field in a message.
static void* bytes_handler(void *closure,
                           const void *hd,
                           size_t size_hint) {
  MessageHeader* msg = closure;
239 240
  const field_handlerdata_t *fieldhandler = hd;

Chris Fallin's avatar
Chris Fallin committed
241 242
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyString8bitEncoding);
243 244
  DEREF(msg, fieldhandler->ofs, VALUE) = str;
  set_hasbit(closure, fieldhandler->hasbit);
Chris Fallin's avatar
Chris Fallin committed
245 246 247 248 249 250 251
  return (void*)str;
}

static size_t stringdata_handler(void* closure, const void* hd,
                                 const char* str, size_t len,
                                 const upb_bufhandle* handle) {
  VALUE rb_str = (VALUE)closure;
252
  noleak_rb_str_cat(rb_str, str, len);
Chris Fallin's avatar
Chris Fallin committed
253 254 255
  return len;
}

256
static bool stringdata_end_handler(void* closure, const void* hd) {
257
  VALUE rb_str = (VALUE)closure;
258 259 260 261 262
  rb_obj_freeze(rb_str);
  return true;
}

static bool appendstring_end_handler(void* closure, const void* hd) {
263
  VALUE rb_str = (VALUE)closure;
264 265 266 267
  rb_obj_freeze(rb_str);
  return true;
}

Chris Fallin's avatar
Chris Fallin committed
268 269 270 271
// Appends a submessage to a repeated field (a regular Ruby array for now).
static void *appendsubmsg_handler(void *closure, const void *hd) {
  VALUE ary = (VALUE)closure;
  const submsg_handlerdata_t *submsgdata = hd;
272
  MessageHeader* submsg;
Chris Fallin's avatar
Chris Fallin committed
273

274
  VALUE submsg_rb = rb_class_new_instance(0, NULL, submsgdata->subklass);
Chris Fallin's avatar
Chris Fallin committed
275 276 277 278 279 280 281 282 283 284
  RepeatedField_push(ary, submsg_rb);

  TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
  return submsg;
}

// Sets a non-repeated submessage field in a message.
static void *submsg_handler(void *closure, const void *hd) {
  MessageHeader* msg = closure;
  const submsg_handlerdata_t* submsgdata = hd;
285 286
  VALUE submsg_rb;
  MessageHeader* submsg;
Chris Fallin's avatar
Chris Fallin committed
287

288 289
  if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) {
    DEREF(msg, submsgdata->ofs, VALUE) =
290
        rb_class_new_instance(0, NULL, submsgdata->subklass);
Chris Fallin's avatar
Chris Fallin committed
291 292
  }

293 294
  set_hasbit(closure, submsgdata->hasbit);

295
  submsg_rb = DEREF(msg, submsgdata->ofs, VALUE);
Chris Fallin's avatar
Chris Fallin committed
296
  TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
297

Chris Fallin's avatar
Chris Fallin committed
298 299 300
  return submsg;
}

301 302 303
// Handler data for startmap/endmap handlers.
typedef struct {
  size_t ofs;
304 305
  upb_fieldtype_t key_field_type;
  upb_fieldtype_t value_field_type;
306
  VALUE subklass;
307 308 309 310 311 312 313 314 315 316
} map_handlerdata_t;

// Temporary frame for map parsing: at the beginning of a map entry message, a
// submsg handler allocates a frame to hold (i) a reference to the Map object
// into which this message will be inserted and (ii) storage slots to
// temporarily hold the key and value for this map entry until the end of the
// submessage. When the submessage ends, another handler is called to insert the
// value into the map.
typedef struct {
  VALUE map;
317
  const map_handlerdata_t* handlerdata;
318 319 320 321
  char key_storage[NATIVE_SLOT_MAX_SIZE];
  char value_storage[NATIVE_SLOT_MAX_SIZE];
} map_parse_frame_t;

322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
static void MapParseFrame_mark(void* _self) {
  map_parse_frame_t* frame = _self;

  // This shouldn't strictly be necessary since this should be rooted by the
  // message itself, but it can't hurt.
  rb_gc_mark(frame->map);

  native_slot_mark(frame->handlerdata->key_field_type, &frame->key_storage);
  native_slot_mark(frame->handlerdata->value_field_type, &frame->value_storage);
}

void MapParseFrame_free(void* self) {
  xfree(self);
}

rb_data_type_t MapParseFrame_type = {
  "MapParseFrame",
  { MapParseFrame_mark, MapParseFrame_free, NULL },
};

342 343 344 345 346
// Handler to begin a map entry: allocates a temporary frame. This is the
// 'startsubmsg' handler on the msgdef that contains the map field.
static void *startmap_handler(void *closure, const void *hd) {
  MessageHeader* msg = closure;
  const map_handlerdata_t* mapdata = hd;
347
  map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
348
  VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE);
349

350 351 352 353 354 355 356
  frame->handlerdata = mapdata;
  frame->map = map_rb;
  native_slot_init(mapdata->key_field_type, &frame->key_storage);
  native_slot_init(mapdata->value_field_type, &frame->value_storage);

  Map_set_frame(map_rb,
                TypedData_Wrap_Struct(rb_cObject, &MapParseFrame_type, frame));
357 358 359 360

  return frame;
}

361
static bool endmap_handler(void *closure, const void *hd) {
362 363
  MessageHeader* msg = closure;
  const map_handlerdata_t* mapdata = hd;
364
  VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE);
365 366
  Map_set_frame(map_rb, Qnil);
  return true;
367 368 369 370
}

// Handler to end a map entry: inserts the value defined during the message into
// the map. This is the 'endmsg' handler on the map entry msgdef.
371
static bool endmapentry_handler(void* closure, const void* hd, upb_status* s) {
372 373 374 375
  map_parse_frame_t* frame = closure;
  const map_handlerdata_t* mapdata = hd;

  VALUE key = native_slot_get(
376
      mapdata->key_field_type, Qnil,
377
      &frame->key_storage);
378

379 380
  VALUE value = native_slot_get(
      mapdata->value_field_type, mapdata->subklass,
381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
      &frame->value_storage);

  Map_index_set(frame->map, key, value);

  return true;
}

// Allocates a new map_handlerdata_t given the map entry message definition. If
// the offset of the field within the parent message is also given, that is
// added to the handler data as well. Note that this is called *twice* per map
// field: once in the parent message handler setup when setting the startsubmsg
// handler and once in the map entry message handler setup when setting the
// key/value and endmsg handlers. The reason is that there is no easy way to
// pass the handlerdata down to the sub-message handler setup.
static map_handlerdata_t* new_map_handlerdata(
    size_t ofs,
397
    const upb_msgdef* mapentry_def,
398
    const Descriptor* desc) {
399 400
  const upb_fielddef* key_field;
  const upb_fielddef* value_field;
401 402
  map_handlerdata_t* hd = ALLOC(map_handlerdata_t);
  hd->ofs = ofs;
403
  key_field = upb_msgdef_itof(mapentry_def, MAP_KEY_FIELD);
404 405
  assert(key_field != NULL);
  hd->key_field_type = upb_fielddef_type(key_field);
406
  value_field = upb_msgdef_itof(mapentry_def, MAP_VALUE_FIELD);
407 408
  assert(value_field != NULL);
  hd->value_field_type = upb_fielddef_type(value_field);
409
  hd->subklass = field_type_class(desc->layout, value_field);
410

411 412 413
  return hd;
}

414 415 416 417 418
// Handlers that set primitive values in oneofs.
#define DEFINE_ONEOF_HANDLER(type, ctype)                           \
  static bool oneof##type##_handler(void *closure, const void *hd,  \
                                     ctype val) {                   \
    const oneof_handlerdata_t *oneofdata = hd;                      \
419 420
    DEREF(closure, oneofdata->case_ofs, uint32_t) =                 \
        oneofdata->oneof_case_num;                                  \
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
    DEREF(closure, oneofdata->ofs, ctype) = val;                    \
    return true;                                                    \
  }

DEFINE_ONEOF_HANDLER(bool,   bool)
DEFINE_ONEOF_HANDLER(int32,  int32_t)
DEFINE_ONEOF_HANDLER(uint32, uint32_t)
DEFINE_ONEOF_HANDLER(float,  float)
DEFINE_ONEOF_HANDLER(int64,  int64_t)
DEFINE_ONEOF_HANDLER(uint64, uint64_t)
DEFINE_ONEOF_HANDLER(double, double)

#undef DEFINE_ONEOF_HANDLER

// Handlers for strings in a oneof.
static void *oneofstr_handler(void *closure,
                              const void *hd,
                              size_t size_hint) {
  MessageHeader* msg = closure;
  const oneof_handlerdata_t *oneofdata = hd;
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyStringUtf8Encoding);
443 444
  DEREF(msg, oneofdata->case_ofs, uint32_t) =
      oneofdata->oneof_case_num;
445 446 447 448 449 450 451 452 453 454 455
  DEREF(msg, oneofdata->ofs, VALUE) = str;
  return (void*)str;
}

static void *oneofbytes_handler(void *closure,
                                const void *hd,
                                size_t size_hint) {
  MessageHeader* msg = closure;
  const oneof_handlerdata_t *oneofdata = hd;
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyString8bitEncoding);
456 457
  DEREF(msg, oneofdata->case_ofs, uint32_t) =
      oneofdata->oneof_case_num;
458 459 460 461
  DEREF(msg, oneofdata->ofs, VALUE) = str;
  return (void*)str;
}

462
static bool oneofstring_end_handler(void* closure, const void* hd) {
463 464
  VALUE rb_str = rb_str_new2("");
  rb_obj_freeze(rb_str);
465 466 467
  return true;
}

468 469 470 471 472 473 474
// Handler for a submessage field in a oneof.
static void *oneofsubmsg_handler(void *closure,
                                 const void *hd) {
  MessageHeader* msg = closure;
  const oneof_handlerdata_t *oneofdata = hd;
  uint32_t oldcase = DEREF(msg, oneofdata->case_ofs, uint32_t);

475 476
  VALUE submsg_rb;
  MessageHeader* submsg;
477

478
  if (oldcase != oneofdata->oneof_case_num ||
479 480
      DEREF(msg, oneofdata->ofs, VALUE) == Qnil) {
    DEREF(msg, oneofdata->ofs, VALUE) =
481
        rb_class_new_instance(0, NULL, oneofdata->subklass);
482
  }
483 484 485 486 487 488
  // Set the oneof case *after* allocating the new class instance -- otherwise,
  // if the Ruby GC is invoked as part of a call into the VM, it might invoke
  // our mark routines, and our mark routines might see the case value
  // indicating a VALUE is present and expect a valid VALUE. See comment in
  // layout_set() for more detail: basically, the change to the value and the
  // case must be atomic w.r.t. the Ruby VM.
489 490
  DEREF(msg, oneofdata->case_ofs, uint32_t) =
      oneofdata->oneof_case_num;
491

492
  submsg_rb = DEREF(msg, oneofdata->ofs, VALUE);
493 494 495 496
  TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
  return submsg;
}

497 498
// Set up handlers for a repeated field.
static void add_handlers_for_repeated_field(upb_handlers *h,
499
                                            const Descriptor* desc,
500 501
                                            const upb_fielddef *f,
                                            size_t offset) {
502 503
  upb_handlerattr attr = UPB_HANDLERATTR_INIT;
  attr.handler_data = newhandlerdata(h, offset, -1);
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
  upb_handlers_setstartseq(h, f, startseq_handler, &attr);

  switch (upb_fielddef_type(f)) {

#define SET_HANDLER(utype, ltype)                                 \
  case utype:                                                     \
    upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
    break;

    SET_HANDLER(UPB_TYPE_BOOL,   bool);
    SET_HANDLER(UPB_TYPE_INT32,  int32);
    SET_HANDLER(UPB_TYPE_UINT32, uint32);
    SET_HANDLER(UPB_TYPE_ENUM,   int32);
    SET_HANDLER(UPB_TYPE_FLOAT,  float);
    SET_HANDLER(UPB_TYPE_INT64,  int64);
    SET_HANDLER(UPB_TYPE_UINT64, uint64);
    SET_HANDLER(UPB_TYPE_DOUBLE, double);

#undef SET_HANDLER

    case UPB_TYPE_STRING:
    case UPB_TYPE_BYTES: {
      bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
      upb_handlers_setstartstr(h, f, is_bytes ?
                               appendbytes_handler : appendstr_handler,
                               NULL);
      upb_handlers_setstring(h, f, stringdata_handler, NULL);
531
      upb_handlers_setendstr(h, f, appendstring_end_handler, NULL);
532
      break;
533 534
    }
    case UPB_TYPE_MESSAGE: {
535 536 537
      VALUE subklass = field_type_class(desc->layout, f);
      upb_handlerattr attr = UPB_HANDLERATTR_INIT;
      attr.handler_data = newsubmsghandlerdata(h, 0, -1, subklass);
538 539 540 541 542 543 544
      upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
      break;
    }
  }
}

// Set up handlers for a singular field.
545 546 547 548
static void add_handlers_for_singular_field(const Descriptor* desc,
                                            upb_handlers* h,
                                            const upb_fielddef* f,
                                            size_t offset, size_t hasbit_off) {
549 550 551 552 553 554 555
  // The offset we pass to UPB points to the start of the Message,
  // rather than the start of where our data is stored.
  int32_t hasbit = -1;
  if (hasbit_off != MESSAGE_FIELD_NO_HASBIT) {
    hasbit = hasbit_off + sizeof(MessageHeader) * 8;
  }

556 557 558 559 560 561 562 563 564
  switch (upb_fielddef_type(f)) {
    case UPB_TYPE_BOOL:
    case UPB_TYPE_INT32:
    case UPB_TYPE_UINT32:
    case UPB_TYPE_ENUM:
    case UPB_TYPE_FLOAT:
    case UPB_TYPE_INT64:
    case UPB_TYPE_UINT64:
    case UPB_TYPE_DOUBLE:
565
      upb_msg_setscalarhandler(h, f, offset, hasbit);
566 567 568 569
      break;
    case UPB_TYPE_STRING:
    case UPB_TYPE_BYTES: {
      bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
570 571
      upb_handlerattr attr = UPB_HANDLERATTR_INIT;
      attr.handler_data = newhandlerdata(h, offset, hasbit);
572 573 574 575
      upb_handlers_setstartstr(h, f,
                               is_bytes ? bytes_handler : str_handler,
                               &attr);
      upb_handlers_setstring(h, f, stringdata_handler, &attr);
576
      upb_handlers_setendstr(h, f, stringdata_end_handler, &attr);
577 578 579
      break;
    }
    case UPB_TYPE_MESSAGE: {
580 581 582
      upb_handlerattr attr = UPB_HANDLERATTR_INIT;
      attr.handler_data = newsubmsghandlerdata(
          h, offset, hasbit, field_type_class(desc->layout, f));
583 584 585 586 587 588 589 590 591
      upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
      break;
    }
  }
}

// Adds handlers to a map field.
static void add_handlers_for_mapfield(upb_handlers* h,
                                      const upb_fielddef* fielddef,
592
                                      size_t offset,
593
                                      const Descriptor* desc) {
594
  const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef);
595
  map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc);
596
  upb_handlerattr attr = UPB_HANDLERATTR_INIT;
597

598
  upb_handlers_addcleanup(h, hd, xfree);
599
  attr.handler_data = hd;
600 601
  upb_handlers_setstartsubmsg(h, fielddef, startmap_handler, &attr);
  upb_handlers_setendsubmsg(h, fielddef, endmap_handler, &attr);
602 603 604
}

// Adds handlers to a map-entry msgdef.
605 606
static void add_handlers_for_mapentry(const upb_msgdef* msgdef, upb_handlers* h,
                                      const Descriptor* desc) {
607 608
  const upb_fielddef* key_field = map_entry_key(msgdef);
  const upb_fielddef* value_field = map_entry_value(msgdef);
609
  map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc);
610
  upb_handlerattr attr = UPB_HANDLERATTR_INIT;
611

612
  upb_handlers_addcleanup(h, hd, xfree);
613
  attr.handler_data = hd;
614
  upb_handlers_setendmsg(h, endmapentry_handler, &attr);
615 616

  add_handlers_for_singular_field(
617
      desc, h, key_field,
618 619
      offsetof(map_parse_frame_t, key_storage),
      MESSAGE_FIELD_NO_HASBIT);
620
  add_handlers_for_singular_field(
621
      desc, h, value_field,
622 623
      offsetof(map_parse_frame_t, value_storage),
      MESSAGE_FIELD_NO_HASBIT);
624 625
}

626 627 628 629
// Set up handlers for a oneof field.
static void add_handlers_for_oneof_field(upb_handlers *h,
                                         const upb_fielddef *f,
                                         size_t offset,
630 631 632 633 634
                                         size_t oneof_case_offset,
                                         const Descriptor* desc) {
  upb_handlerattr attr = UPB_HANDLERATTR_INIT;
  attr.handler_data =
      newoneofhandlerdata(h, offset, oneof_case_offset, f, desc);
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660

  switch (upb_fielddef_type(f)) {

#define SET_HANDLER(utype, ltype)                                 \
  case utype:                                                     \
    upb_handlers_set##ltype(h, f, oneof##ltype##_handler, &attr); \
    break;

    SET_HANDLER(UPB_TYPE_BOOL,   bool);
    SET_HANDLER(UPB_TYPE_INT32,  int32);
    SET_HANDLER(UPB_TYPE_UINT32, uint32);
    SET_HANDLER(UPB_TYPE_ENUM,   int32);
    SET_HANDLER(UPB_TYPE_FLOAT,  float);
    SET_HANDLER(UPB_TYPE_INT64,  int64);
    SET_HANDLER(UPB_TYPE_UINT64, uint64);
    SET_HANDLER(UPB_TYPE_DOUBLE, double);

#undef SET_HANDLER

    case UPB_TYPE_STRING:
    case UPB_TYPE_BYTES: {
      bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
      upb_handlers_setstartstr(h, f, is_bytes ?
                               oneofbytes_handler : oneofstr_handler,
                               &attr);
      upb_handlers_setstring(h, f, stringdata_handler, NULL);
661
      upb_handlers_setendstr(h, f, oneofstring_end_handler, &attr);
662 663 664 665 666 667 668 669 670
      break;
    }
    case UPB_TYPE_MESSAGE: {
      upb_handlers_setstartsubmsg(h, f, oneofsubmsg_handler, &attr);
      break;
    }
  }
}

671 672
static bool unknown_field_handler(void* closure, const void* hd,
                                  const char* buf, size_t size) {
673
  MessageHeader* msg = (MessageHeader*)closure;
674 675 676 677 678 679 680 681 682 683 684
  UPB_UNUSED(hd);

  if (msg->unknown_fields == NULL) {
    msg->unknown_fields = malloc(sizeof(stringsink));
    stringsink_init(msg->unknown_fields);
  }

  stringsink_string(msg->unknown_fields, NULL, buf, size, NULL);

  return true;
}
685

686 687
void add_handlers_for_message(const void *closure, upb_handlers *h) {
  const VALUE descriptor_pool = (VALUE)closure;
688
  const upb_msgdef* msgdef = upb_handlers_msgdef(h);
689 690
  Descriptor* desc =
      ruby_to_Descriptor(get_msgdef_obj(descriptor_pool, msgdef));
691
  upb_msg_field_iter i;
692
  upb_handlerattr attr = UPB_HANDLERATTR_INIT;
693

Chris Fallin's avatar
Chris Fallin committed
694 695 696 697 698
  // Ensure layout exists. We may be invoked to create handlers for a given
  // message if we are included as a submsg of another message type before our
  // class is actually built, so to work around this, we just create the layout
  // (and handlers, in the class-building function) on-demand.
  if (desc->layout == NULL) {
Joshua Haberman's avatar
Joshua Haberman committed
699
    create_layout(desc);
700 701 702 703 704 705 706
  }

  // If this is a mapentry message type, set up a special set of handlers and
  // bail out of the normal (user-defined) message type handling.
  if (upb_msgdef_mapentry(msgdef)) {
    add_handlers_for_mapentry(msgdef, h, desc);
    return;
Chris Fallin's avatar
Chris Fallin committed
707 708
  }

709 710
  upb_handlers_setunknown(h, unknown_field_handler, &attr);

711 712 713
  for (upb_msg_field_begin(&i, desc->msgdef);
       !upb_msg_field_done(&i);
       upb_msg_field_next(&i)) {
Chris Fallin's avatar
Chris Fallin committed
714
    const upb_fielddef *f = upb_msg_iter_field(&i);
715
    const upb_oneofdef *oneof = upb_fielddef_containingoneof(f);
716 717
    size_t offset = desc->layout->fields[upb_fielddef_index(f)].offset +
        sizeof(MessageHeader);
Chris Fallin's avatar
Chris Fallin committed
718

719
    if (oneof) {
720
      size_t oneof_case_offset =
721
          desc->layout->oneofs[upb_oneofdef_index(oneof)].case_offset +
722
          sizeof(MessageHeader);
723
      add_handlers_for_oneof_field(h, f, offset, oneof_case_offset, desc);
724
    } else if (is_map_field(f)) {
725
      add_handlers_for_mapfield(h, f, offset, desc);
726
    } else if (upb_fielddef_isseq(f)) {
727
      add_handlers_for_repeated_field(h, desc, f, offset);
728
    } else {
729
      add_handlers_for_singular_field(
730 731
          desc, h, f, offset,
          desc->layout->fields[upb_fielddef_index(f)].hasbit);
Chris Fallin's avatar
Chris Fallin committed
732 733 734 735 736 737 738
    }
  }
}

// Constructs the handlers for filling a message's data into an in-memory
// object.
const upb_handlers* get_fill_handlers(Descriptor* desc) {
739 740
  DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool);
  return upb_handlercache_get(pool->fill_handler_cache, desc->msgdef);
Chris Fallin's avatar
Chris Fallin committed
741 742
}

743 744 745 746
static const upb_pbdecodermethod *msgdef_decodermethod(Descriptor* desc) {
  DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool);
  return upb_pbcodecache_get(pool->fill_method_cache, desc->msgdef);
}
Chris Fallin's avatar
Chris Fallin committed
747

748 749 750
static const upb_json_parsermethod *msgdef_jsonparsermethod(Descriptor* desc) {
  DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool);
  return upb_json_codecache_get(pool->json_fill_method_cache, desc->msgdef);
Chris Fallin's avatar
Chris Fallin committed
751 752
}

753 754 755
static const upb_handlers* msgdef_pb_serialize_handlers(Descriptor* desc) {
  DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool);
  return upb_handlercache_get(pool->pb_serialize_handler_cache, desc->msgdef);
Chris Fallin's avatar
Chris Fallin committed
756 757
}

758 759 760 761 762 763 764 765 766
static const upb_handlers* msgdef_json_serialize_handlers(
    Descriptor* desc, bool preserve_proto_fieldnames) {
  DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool);
  if (preserve_proto_fieldnames) {
    return upb_handlercache_get(pool->json_serialize_handler_preserve_cache,
                                desc->msgdef);
  } else {
    return upb_handlercache_get(pool->json_serialize_handler_cache,
                                desc->msgdef);
767 768 769
  }
}

770 771 772 773 774 775 776

// Stack-allocated context during an encode/decode operation. Contains the upb
// environment and its stack-based allocator, an initial buffer for allocations
// to avoid malloc() when possible, and a template for Ruby exception messages
// if any error occurs.
#define STACK_ENV_STACKBYTES 4096
typedef struct {
777 778
  upb_arena *arena;
  upb_status status;
779 780 781 782 783 784 785 786 787
  const char* ruby_error_template;
  char allocbuf[STACK_ENV_STACKBYTES];
} stackenv;

static void stackenv_init(stackenv* se, const char* errmsg);
static void stackenv_uninit(stackenv* se);

static void stackenv_init(stackenv* se, const char* errmsg) {
  se->ruby_error_template = errmsg;
788 789 790
  se->arena =
      upb_arena_init(se->allocbuf, sizeof(se->allocbuf), &upb_alloc_global);
  upb_status_clear(&se->status);
791 792 793
}

static void stackenv_uninit(stackenv* se) {
794 795 796 797 798 799 800 801
  upb_arena_free(se->arena);

  if (!upb_ok(&se->status)) {
    // TODO(haberman): have a way to verify that this is actually a parse error,
    // instead of just throwing "parse error" unconditionally.
    VALUE errmsg = rb_str_new2(upb_status_errmsg(&se->status));
    rb_raise(cParseError, se->ruby_error_template, errmsg);
  }
802 803
}

Chris Fallin's avatar
Chris Fallin committed
804 805 806 807 808 809 810 811 812
/*
 * call-seq:
 *     MessageClass.decode(data) => message
 *
 * Decodes the given data (as a string containing bytes in protocol buffers wire
 * format) under the interpretration given by this message class's definition
 * and returns a message object with the corresponding field values.
 */
VALUE Message_decode(VALUE klass, VALUE data) {
813
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
Chris Fallin's avatar
Chris Fallin committed
814 815
  Descriptor* desc = ruby_to_Descriptor(descriptor);
  VALUE msgklass = Descriptor_msgclass(descriptor);
816 817
  VALUE msg_rb;
  MessageHeader* msg;
Chris Fallin's avatar
Chris Fallin committed
818 819 820 821 822

  if (TYPE(data) != T_STRING) {
    rb_raise(rb_eArgError, "Expected string for binary protobuf data.");
  }

823
  msg_rb = rb_class_new_instance(0, NULL, msgklass);
Chris Fallin's avatar
Chris Fallin committed
824 825
  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

826 827 828 829 830 831
  {
    const upb_pbdecodermethod* method = msgdef_decodermethod(desc);
    const upb_handlers* h = upb_pbdecodermethod_desthandlers(method);
    stackenv se;
    upb_sink sink;
    upb_pbdecoder* decoder;
832
    stackenv_init(&se, "Error occurred during parsing: %" PRIsVALUE);
Chris Fallin's avatar
Chris Fallin committed
833

834
    upb_sink_reset(&sink, h, msg);
835
    decoder = upb_pbdecoder_create(se.arena, method, sink, &se.status);
836 837
    upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data),
                      upb_pbdecoder_input(decoder));
Chris Fallin's avatar
Chris Fallin committed
838

839 840
    stackenv_uninit(&se);
  }
Chris Fallin's avatar
Chris Fallin committed
841 842 843 844 845 846

  return msg_rb;
}

/*
 * call-seq:
847
 *     MessageClass.decode_json(data, options = {}) => message
Chris Fallin's avatar
Chris Fallin committed
848 849 850 851
 *
 * Decodes the given data (as a string containing bytes in protocol buffers wire
 * format) under the interpretration given by this message class's definition
 * and returns a message object with the corresponding field values.
852
 *
853 854 855
 *  @param options [Hash] options for the decoder
 *   ignore_unknown_fields: set true to ignore unknown fields (default is to
 *   raise an error)
Chris Fallin's avatar
Chris Fallin committed
856
 */
857
VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass) {
858
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
Chris Fallin's avatar
Chris Fallin committed
859 860
  Descriptor* desc = ruby_to_Descriptor(descriptor);
  VALUE msgklass = Descriptor_msgclass(descriptor);
861
  VALUE msg_rb;
862 863
  VALUE data = argv[0];
  VALUE ignore_unknown_fields = Qfalse;
864
  MessageHeader* msg;
Chris Fallin's avatar
Chris Fallin committed
865

866 867 868 869 870 871 872 873 874 875 876 877 878 879
  if (argc < 1 || argc > 2) {
    rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
  }

  if (argc == 2) {
    VALUE hash_args = argv[1];
    if (TYPE(hash_args) != T_HASH) {
      rb_raise(rb_eArgError, "Expected hash arguments.");
    }

    ignore_unknown_fields = rb_hash_lookup2(
        hash_args, ID2SYM(rb_intern("ignore_unknown_fields")), Qfalse);
  }

Chris Fallin's avatar
Chris Fallin committed
880 881 882
  if (TYPE(data) != T_STRING) {
    rb_raise(rb_eArgError, "Expected string for JSON data.");
  }
883

Chris Fallin's avatar
Chris Fallin committed
884 885 886 887
  // TODO(cfallin): Check and respect string encoding. If not UTF-8, we need to
  // convert, because string handlers pass data directly to message string
  // fields.

888
  msg_rb = rb_class_new_instance(0, NULL, msgklass);
Chris Fallin's avatar
Chris Fallin committed
889 890
  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

891
  {
892
    const upb_json_parsermethod* method = msgdef_jsonparsermethod(desc);
893 894 895
    stackenv se;
    upb_sink sink;
    upb_json_parser* parser;
896
    DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool);
897
    stackenv_init(&se, "Error occurred during parsing: %" PRIsVALUE);
Chris Fallin's avatar
Chris Fallin committed
898

899
    upb_sink_reset(&sink, get_fill_handlers(desc), msg);
900 901
    parser = upb_json_parser_create(se.arena, method, pool->symtab, sink,
                                    &se.status, RTEST(ignore_unknown_fields));
902 903
    upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data),
                      upb_json_parser_input(parser));
Chris Fallin's avatar
Chris Fallin committed
904

905 906
    stackenv_uninit(&se);
  }
Chris Fallin's avatar
Chris Fallin committed
907 908 909 910 911 912 913 914 915 916

  return msg_rb;
}

// -----------------------------------------------------------------------------
// Serializing.
// -----------------------------------------------------------------------------

/* msgvisitor *****************************************************************/

917 918
static void putmsg(VALUE msg, const Descriptor* desc, upb_sink sink, int depth,
                   bool emit_defaults, bool is_json, bool open_msg);
Chris Fallin's avatar
Chris Fallin committed
919 920 921 922

static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
  upb_selector_t ret;
  bool ok = upb_handlers_getselector(f, type, &ret);
923
  UPB_ASSERT(ok);
Chris Fallin's avatar
Chris Fallin committed
924 925 926
  return ret;
}

927
static void putstr(VALUE str, const upb_fielddef *f, upb_sink sink) {
928 929
  upb_sink subsink;

Chris Fallin's avatar
Chris Fallin committed
930 931 932 933
  if (str == Qnil) return;

  assert(BUILTIN_TYPE(str) == RUBY_T_STRING);

934 935 936
  // We should be guaranteed that the string has the correct encoding because
  // we ensured this at assignment time and then froze the string.
  if (upb_fielddef_type(f) == UPB_TYPE_STRING) {
937
    assert(rb_enc_from_index(ENCODING_GET(str)) == kRubyStringUtf8Encoding);
938
  } else {
939
    assert(rb_enc_from_index(ENCODING_GET(str)) == kRubyString8bitEncoding);
940
  }
Chris Fallin's avatar
Chris Fallin committed
941 942 943

  upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), RSTRING_LEN(str),
                    &subsink);
944
  upb_sink_putstring(subsink, getsel(f, UPB_HANDLER_STRING), RSTRING_PTR(str),
Chris Fallin's avatar
Chris Fallin committed
945 946 947 948
                     RSTRING_LEN(str), NULL);
  upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR));
}

949
static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink sink,
950
                      int depth, bool emit_defaults, bool is_json) {
951 952 953 954
  upb_sink subsink;
  VALUE descriptor;
  Descriptor* subdesc;

Chris Fallin's avatar
Chris Fallin committed
955 956
  if (submsg == Qnil) return;

957 958
  descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
  subdesc = ruby_to_Descriptor(descriptor);
Chris Fallin's avatar
Chris Fallin committed
959 960

  upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink);
961
  putmsg(submsg, subdesc, subsink, depth + 1, emit_defaults, is_json, true);
Chris Fallin's avatar
Chris Fallin committed
962 963 964
  upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG));
}

965 966
static void putary(VALUE ary, const upb_fielddef* f, upb_sink sink, int depth,
                   bool emit_defaults, bool is_json) {
Chris Fallin's avatar
Chris Fallin committed
967
  upb_sink subsink;
968 969 970
  upb_fieldtype_t type = upb_fielddef_type(f);
  upb_selector_t sel = 0;
  int size;
971
  int i;
972 973

  if (ary == Qnil) return;
974
  if (!emit_defaults && NUM2INT(RepeatedField_length(ary)) == 0) return;
Chris Fallin's avatar
Chris Fallin committed
975

976 977 978
  size = NUM2INT(RepeatedField_length(ary));
  if (size == 0 && !emit_defaults) return;

Chris Fallin's avatar
Chris Fallin committed
979 980 981 982 983 984
  upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);

  if (upb_fielddef_isprimitive(f)) {
    sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
  }

985
  for (i = 0; i < size; i++) {
Chris Fallin's avatar
Chris Fallin committed
986 987
    void* memory = RepeatedField_index_native(ary, i);
    switch (type) {
988 989 990
#define T(upbtypeconst, upbtype, ctype)                     \
  case upbtypeconst:                                        \
    upb_sink_put##upbtype(subsink, sel, *((ctype*)memory)); \
Chris Fallin's avatar
Chris Fallin committed
991 992 993 994 995 996 997 998 999 1000 1001 1002 1003
    break;

      T(UPB_TYPE_FLOAT,  float,  float)
      T(UPB_TYPE_DOUBLE, double, double)
      T(UPB_TYPE_BOOL,   bool,   int8_t)
      case UPB_TYPE_ENUM:
      T(UPB_TYPE_INT32,  int32,  int32_t)
      T(UPB_TYPE_UINT32, uint32, uint32_t)
      T(UPB_TYPE_INT64,  int64,  int64_t)
      T(UPB_TYPE_UINT64, uint64, uint64_t)

      case UPB_TYPE_STRING:
      case UPB_TYPE_BYTES:
1004
        putstr(*((VALUE *)memory), f, subsink);
Chris Fallin's avatar
Chris Fallin committed
1005 1006
        break;
      case UPB_TYPE_MESSAGE:
1007
        putsubmsg(*((VALUE*)memory), f, subsink, depth, emit_defaults, is_json);
Chris Fallin's avatar
Chris Fallin committed
1008 1009 1010 1011 1012 1013 1014 1015 1016
        break;

#undef T

    }
  }
  upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
}

1017 1018
static void put_ruby_value(VALUE value, const upb_fielddef* f, VALUE type_class,
                           int depth, upb_sink sink, bool emit_defaults,
1019
                           bool is_json) {
1020 1021
  upb_selector_t sel = 0;

1022 1023 1024 1025 1026
  if (depth > ENCODE_MAX_NESTING) {
    rb_raise(rb_eRuntimeError,
             "Maximum recursion depth exceeded during encoding.");
  }

1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
  if (upb_fielddef_isprimitive(f)) {
    sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
  }

  switch (upb_fielddef_type(f)) {
    case UPB_TYPE_INT32:
      upb_sink_putint32(sink, sel, NUM2INT(value));
      break;
    case UPB_TYPE_INT64:
      upb_sink_putint64(sink, sel, NUM2LL(value));
      break;
    case UPB_TYPE_UINT32:
      upb_sink_putuint32(sink, sel, NUM2UINT(value));
      break;
    case UPB_TYPE_UINT64:
      upb_sink_putuint64(sink, sel, NUM2ULL(value));
      break;
    case UPB_TYPE_FLOAT:
      upb_sink_putfloat(sink, sel, NUM2DBL(value));
      break;
    case UPB_TYPE_DOUBLE:
      upb_sink_putdouble(sink, sel, NUM2DBL(value));
      break;
    case UPB_TYPE_ENUM: {
      if (TYPE(value) == T_SYMBOL) {
        value = rb_funcall(type_class, rb_intern("resolve"), 1, value);
      }
      upb_sink_putint32(sink, sel, NUM2INT(value));
      break;
    }
    case UPB_TYPE_BOOL:
      upb_sink_putbool(sink, sel, value == Qtrue);
      break;
    case UPB_TYPE_STRING:
    case UPB_TYPE_BYTES:
      putstr(value, f, sink);
      break;
    case UPB_TYPE_MESSAGE:
1065
      putsubmsg(value, f, sink, depth, emit_defaults, is_json);
1066 1067 1068
  }
}

1069 1070
static void putmap(VALUE map, const upb_fielddef* f, upb_sink sink, int depth,
                   bool emit_defaults, bool is_json) {
1071
  Map* self;
1072
  upb_sink subsink;
1073 1074 1075 1076 1077
  const upb_fielddef* key_field;
  const upb_fielddef* value_field;
  Map_iter it;

  if (map == Qnil) return;
1078 1079
  if (!emit_defaults && Map_length(map) == 0) return;

1080
  self = ruby_to_Map(map);
1081 1082 1083 1084

  upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);

  assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
1085 1086
  key_field = map_field_key(f);
  value_field = map_field_value(f);
1087 1088 1089 1090

  for (Map_begin(map, &it); !Map_done(&it); Map_next(&it)) {
    VALUE key = Map_iter_key(&it);
    VALUE value = Map_iter_value(&it);
1091
    upb_status status;
1092 1093

    upb_sink entry_sink;
1094
    upb_sink_startsubmsg(subsink, getsel(f, UPB_HANDLER_STARTSUBMSG),
1095
                         &entry_sink);
1096
    upb_sink_startmsg(entry_sink);
1097

1098 1099
    put_ruby_value(key, key_field, Qnil, depth + 1, entry_sink, emit_defaults,
                   is_json);
1100
    put_ruby_value(value, value_field, self->value_type_class, depth + 1,
1101
                   entry_sink, emit_defaults, is_json);
1102

1103 1104
    upb_sink_endmsg(entry_sink, &status);
    upb_sink_endsubmsg(subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
1105 1106 1107 1108 1109
  }

  upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
}

1110 1111 1112
static const upb_handlers* msgdef_json_serialize_handlers(
    Descriptor* desc, bool preserve_proto_fieldnames);

1113 1114
static void putjsonany(VALUE msg_rb, const Descriptor* desc, upb_sink sink,
                       int depth, bool emit_defaults) {
1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
  upb_status status;
  MessageHeader* msg = NULL;
  const upb_fielddef* type_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_TYPE);
  const upb_fielddef* value_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_VALUE);

  size_t type_url_offset;
  VALUE type_url_str_rb;
  const upb_msgdef *payload_type = NULL;

  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

  upb_sink_startmsg(sink);

  /* Handle type url */
  type_url_offset = desc->layout->fields[upb_fielddef_index(type_field)].offset;
  type_url_str_rb = DEREF(Message_data(msg), type_url_offset, VALUE);
  if (RSTRING_LEN(type_url_str_rb) > 0) {
    putstr(type_url_str_rb, type_field, sink);
  }

  {
    const char* type_url_str = RSTRING_PTR(type_url_str_rb);
    size_t type_url_len = RSTRING_LEN(type_url_str_rb);
    DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool);

    if (type_url_len <= 20 ||
        strncmp(type_url_str, "type.googleapis.com/", 20) != 0) {
      rb_raise(rb_eRuntimeError, "Invalid type url: %s", type_url_str);
      return;
    }

    /* Resolve type url */
    type_url_str += 20;
    type_url_len -= 20;

    payload_type = upb_symtab_lookupmsg2(
        pool->symtab, type_url_str, type_url_len);
    if (payload_type == NULL) {
      rb_raise(rb_eRuntimeError, "Unknown type: %s", type_url_str);
      return;
    }
  }

  {
    uint32_t value_offset;
    VALUE value_str_rb;
    size_t value_len;

    value_offset = desc->layout->fields[upb_fielddef_index(value_field)].offset;
    value_str_rb = DEREF(Message_data(msg), value_offset, VALUE);
    value_len = RSTRING_LEN(value_str_rb);

    if (value_len > 0) {
1168
      VALUE payload_desc_rb = get_msgdef_obj(generated_pool, payload_type);
1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185
      Descriptor* payload_desc = ruby_to_Descriptor(payload_desc_rb);
      VALUE payload_class = Descriptor_msgclass(payload_desc_rb);
      upb_sink subsink;
      bool is_wellknown;

      VALUE payload_msg_rb = Message_decode(payload_class, value_str_rb);

      is_wellknown =
          upb_msgdef_wellknowntype(payload_desc->msgdef) !=
              UPB_WELLKNOWN_UNSPECIFIED;
      if (is_wellknown) {
        upb_sink_startstr(sink, getsel(value_field, UPB_HANDLER_STARTSTR), 0,
                          &subsink);
      }

      subsink.handlers =
          msgdef_json_serialize_handlers(payload_desc, true);
1186 1187
      subsink.closure = sink.closure;
      putmsg(payload_msg_rb, payload_desc, subsink, depth, emit_defaults, true,
1188 1189 1190 1191 1192 1193 1194
             is_wellknown);
    }
  }

  upb_sink_endmsg(sink, &status);
}

1195 1196
static void putjsonlistvalue(
    VALUE msg_rb, const Descriptor* desc,
1197
    upb_sink sink, int depth, bool emit_defaults) {
1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222
  upb_status status;
  upb_sink subsink;
  MessageHeader* msg = NULL;
  const upb_fielddef* f = upb_msgdef_itof(desc->msgdef, 1);
  uint32_t offset =
      desc->layout->fields[upb_fielddef_index(f)].offset +
      sizeof(MessageHeader);
  VALUE ary;

  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

  upb_sink_startmsg(sink);

  ary = DEREF(msg, offset, VALUE);

  if (ary == Qnil || RepeatedField_size(ary) == 0) {
    upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
    upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
  } else {
    putary(ary, f, sink, depth, emit_defaults, true);
  }

  upb_sink_endmsg(sink, &status);
}

Chris Fallin's avatar
Chris Fallin committed
1223
static void putmsg(VALUE msg_rb, const Descriptor* desc,
1224
                   upb_sink sink, int depth, bool emit_defaults,
1225
                   bool is_json, bool open_msg) {
1226 1227 1228 1229
  MessageHeader* msg;
  upb_msg_field_iter i;
  upb_status status;

1230 1231
  if (is_json &&
      upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_ANY) {
1232 1233 1234 1235
    putjsonany(msg_rb, desc, sink, depth, emit_defaults);
    return;
  }

1236 1237 1238 1239 1240 1241
  if (is_json &&
      upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_LISTVALUE) {
    putjsonlistvalue(msg_rb, desc, sink, depth, emit_defaults);
    return;
  }

1242 1243 1244
  if (open_msg) {
    upb_sink_startmsg(sink);
  }
Chris Fallin's avatar
Chris Fallin committed
1245 1246 1247

  // Protect against cycles (possible because users may freely reassign message
  // and repeated fields) by imposing a maximum recursion depth.
1248
  if (depth > ENCODE_MAX_NESTING) {
Chris Fallin's avatar
Chris Fallin committed
1249 1250 1251 1252 1253 1254
    rb_raise(rb_eRuntimeError,
             "Maximum recursion depth exceeded during encoding.");
  }

  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

1255
  if (desc != msg->descriptor) {
1256
    rb_raise(rb_eArgError,
1257 1258 1259 1260 1261
             "The type of given msg is '%s', expect '%s'.",
             upb_msgdef_fullname(msg->descriptor->msgdef),
             upb_msgdef_fullname(desc->msgdef));
  }

1262 1263 1264
  for (upb_msg_field_begin(&i, desc->msgdef);
       !upb_msg_field_done(&i);
       upb_msg_field_next(&i)) {
Chris Fallin's avatar
Chris Fallin committed
1265
    upb_fielddef *f = upb_msg_iter_field(&i);
1266
    const upb_oneofdef *oneof = upb_fielddef_containingoneof(f);
1267
    bool is_matching_oneof = false;
1268
    uint32_t offset =
1269 1270 1271
        desc->layout->fields[upb_fielddef_index(f)].offset +
        sizeof(MessageHeader);

1272 1273 1274
    if (oneof) {
      uint32_t oneof_case =
          slot_read_oneof_case(desc->layout, Message_data(msg), oneof);
1275 1276
      // For a oneof, check that this field is actually present -- skip all the
      // below if not.
1277
      if (oneof_case != upb_fielddef_number(f)) {
1278 1279 1280 1281
        continue;
      }
      // Otherwise, fall through to the appropriate singular-field handler
      // below.
1282
      is_matching_oneof = true;
1283
    }
Chris Fallin's avatar
Chris Fallin committed
1284

1285
    if (is_map_field(f)) {
1286
      VALUE map = DEREF(msg, offset, VALUE);
1287
      if (map != Qnil || emit_defaults) {
1288
        putmap(map, f, sink, depth, emit_defaults, is_json);
1289 1290
      }
    } else if (upb_fielddef_isseq(f)) {
1291
      VALUE ary = DEREF(msg, offset, VALUE);
Chris Fallin's avatar
Chris Fallin committed
1292
      if (ary != Qnil) {
1293
        putary(ary, f, sink, depth, emit_defaults, is_json);
Chris Fallin's avatar
Chris Fallin committed
1294 1295
      }
    } else if (upb_fielddef_isstring(f)) {
1296
      VALUE str = DEREF(msg, offset, VALUE);
1297 1298 1299 1300 1301 1302 1303 1304 1305
      bool is_default = false;

      if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO2) {
        is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse;
      } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) {
        is_default = RSTRING_LEN(str) == 0;
      }

      if (is_matching_oneof || emit_defaults || !is_default) {
Chris Fallin's avatar
Chris Fallin committed
1306 1307 1308
        putstr(str, f, sink);
      }
    } else if (upb_fielddef_issubmsg(f)) {
1309 1310
      putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth,
                emit_defaults, is_json);
Chris Fallin's avatar
Chris Fallin committed
1311 1312 1313
    } else {
      upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));

1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326
#define T(upbtypeconst, upbtype, ctype, default_value)                       \
  case upbtypeconst: {                                                       \
    ctype value = DEREF(msg, offset, ctype);                                 \
    bool is_default = false;                                                 \
    if (upb_fielddef_haspresence(f)) {                                       \
      is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse; \
    } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) {       \
      is_default = default_value == value;                                   \
    }                                                                        \
    if (is_matching_oneof || emit_defaults || !is_default) {                 \
      upb_sink_put##upbtype(sink, sel, value);                               \
    }                                                                        \
  } break;
Chris Fallin's avatar
Chris Fallin committed
1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347

      switch (upb_fielddef_type(f)) {
        T(UPB_TYPE_FLOAT,  float,  float, 0.0)
        T(UPB_TYPE_DOUBLE, double, double, 0.0)
        T(UPB_TYPE_BOOL,   bool,   uint8_t, 0)
        case UPB_TYPE_ENUM:
        T(UPB_TYPE_INT32,  int32,  int32_t, 0)
        T(UPB_TYPE_UINT32, uint32, uint32_t, 0)
        T(UPB_TYPE_INT64,  int64,  int64_t, 0)
        T(UPB_TYPE_UINT64, uint64, uint64_t, 0)

        case UPB_TYPE_STRING:
        case UPB_TYPE_BYTES:
        case UPB_TYPE_MESSAGE: rb_raise(rb_eRuntimeError, "Internal error.");
      }

#undef T

    }
  }

1348 1349 1350 1351 1352
  {
    stringsink* unknown = msg->unknown_fields;
    if (unknown != NULL) {
      upb_sink_putunknown(sink, unknown->ptr, unknown->len);
    }
1353 1354
  }

1355 1356 1357
  if (open_msg) {
    upb_sink_endmsg(sink, &status);
  }
Chris Fallin's avatar
Chris Fallin committed
1358 1359 1360 1361 1362 1363 1364 1365 1366 1367
}

/*
 * call-seq:
 *     MessageClass.encode(msg) => bytes
 *
 * Encodes the given message object to its serialized form in protocol buffers
 * wire format.
 */
VALUE Message_encode(VALUE klass, VALUE msg_rb) {
1368
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
Chris Fallin's avatar
Chris Fallin committed
1369 1370 1371 1372 1373
  Descriptor* desc = ruby_to_Descriptor(descriptor);

  stringsink sink;
  stringsink_init(&sink);

1374 1375 1376
  {
    const upb_handlers* serialize_handlers =
        msgdef_pb_serialize_handlers(desc);
Chris Fallin's avatar
Chris Fallin committed
1377

1378 1379 1380
    stackenv se;
    upb_pb_encoder* encoder;
    VALUE ret;
Chris Fallin's avatar
Chris Fallin committed
1381

1382 1383
    stackenv_init(&se, "Error occurred during encoding: %" PRIsVALUE);
    encoder = upb_pb_encoder_create(se.arena, serialize_handlers, sink.sink);
Chris Fallin's avatar
Chris Fallin committed
1384

1385
    putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0, false, false, true);
Chris Fallin's avatar
Chris Fallin committed
1386

1387
    ret = rb_str_new(sink.ptr, sink.len);
Chris Fallin's avatar
Chris Fallin committed
1388

1389 1390 1391 1392 1393
    stackenv_uninit(&se);
    stringsink_uninit(&sink);

    return ret;
  }
Chris Fallin's avatar
Chris Fallin committed
1394 1395 1396 1397
}

/*
 * call-seq:
1398
 *     MessageClass.encode_json(msg, options = {}) => json_string
Chris Fallin's avatar
Chris Fallin committed
1399 1400
 *
 * Encodes the given message object into its serialized JSON representation.
1401 1402 1403
 * @param options [Hash] options for the decoder
 *  preserve_proto_fieldnames: set true to use original fieldnames (default is to camelCase)
 *  emit_defaults: set true to emit 0/false values (default is to omit them)
Chris Fallin's avatar
Chris Fallin committed
1404
 */
1405
VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1406
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
Chris Fallin's avatar
Chris Fallin committed
1407
  Descriptor* desc = ruby_to_Descriptor(descriptor);
1408 1409
  VALUE msg_rb;
  VALUE preserve_proto_fieldnames = Qfalse;
1410
  VALUE emit_defaults = Qfalse;
Chris Fallin's avatar
Chris Fallin committed
1411
  stringsink sink;
1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425

  if (argc < 1 || argc > 2) {
    rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
  }

  msg_rb = argv[0];

  if (argc == 2) {
    VALUE hash_args = argv[1];
    if (TYPE(hash_args) != T_HASH) {
      rb_raise(rb_eArgError, "Expected hash arguments.");
    }
    preserve_proto_fieldnames = rb_hash_lookup2(
        hash_args, ID2SYM(rb_intern("preserve_proto_fieldnames")), Qfalse);
1426 1427 1428

    emit_defaults = rb_hash_lookup2(
        hash_args, ID2SYM(rb_intern("emit_defaults")), Qfalse);
1429 1430
  }

Chris Fallin's avatar
Chris Fallin committed
1431 1432
  stringsink_init(&sink);

1433 1434
  {
    const upb_handlers* serialize_handlers =
1435
        msgdef_json_serialize_handlers(desc, RTEST(preserve_proto_fieldnames));
1436 1437 1438
    upb_json_printer* printer;
    stackenv se;
    VALUE ret;
Chris Fallin's avatar
Chris Fallin committed
1439

1440 1441
    stackenv_init(&se, "Error occurred during encoding: %" PRIsVALUE);
    printer = upb_json_printer_create(se.arena, serialize_handlers, sink.sink);
Chris Fallin's avatar
Chris Fallin committed
1442

1443 1444
    putmsg(msg_rb, desc, upb_json_printer_input(printer), 0,
           RTEST(emit_defaults), true, true);
Chris Fallin's avatar
Chris Fallin committed
1445

1446
    ret = rb_enc_str_new(sink.ptr, sink.len, rb_utf8_encoding());
Chris Fallin's avatar
Chris Fallin committed
1447

1448 1449
    stackenv_uninit(&se);
    stringsink_uninit(&sink);
Chris Fallin's avatar
Chris Fallin committed
1450

1451 1452
    return ret;
  }
Chris Fallin's avatar
Chris Fallin committed
1453 1454
}

1455 1456 1457 1458 1459 1460
static void discard_unknown(VALUE msg_rb, const Descriptor* desc) {
  MessageHeader* msg;
  upb_msg_field_iter it;

  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

1461 1462 1463 1464 1465 1466
  {
    stringsink* unknown = msg->unknown_fields;
    if (unknown != NULL) {
      stringsink_uninit(unknown);
      msg->unknown_fields = NULL;
    }
1467 1468 1469 1470 1471 1472
  }

  for (upb_msg_field_begin(&it, desc->msgdef);
       !upb_msg_field_done(&it);
       upb_msg_field_next(&it)) {
    upb_fielddef *f = upb_msg_iter_field(&it);
1473
    const upb_oneofdef *oneof = upb_fielddef_containingoneof(f);
1474 1475 1476 1477
    uint32_t offset =
        desc->layout->fields[upb_fielddef_index(f)].offset +
        sizeof(MessageHeader);

1478 1479 1480
    if (oneof) {
      uint32_t oneof_case =
          slot_read_oneof_case(desc->layout, Message_data(msg), oneof);
1481 1482
      // For a oneof, check that this field is actually present -- skip all the
      // below if not.
1483
      if (oneof_case != upb_fielddef_number(f)) {
1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
        continue;
      }
      // Otherwise, fall through to the appropriate singular-field handler
      // below.
    }

    if (!upb_fielddef_issubmsg(f)) {
      continue;
    }

    if (is_map_field(f)) {
1495 1496 1497
      VALUE map;
      Map_iter map_it;

1498
      if (!upb_fielddef_issubmsg(map_field_value(f))) continue;
1499
      map = DEREF(msg, offset, VALUE);
1500 1501 1502 1503 1504 1505 1506 1507 1508
      if (map == Qnil) continue;
      for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) {
        VALUE submsg = Map_iter_value(&map_it);
        VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
        const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
        discard_unknown(submsg, subdesc);
      }
    } else if (upb_fielddef_isseq(f)) {
      VALUE ary = DEREF(msg, offset, VALUE);
1509 1510 1511
      int size;
      int i;

1512
      if (ary == Qnil) continue;
1513 1514
      size = NUM2INT(RepeatedField_length(ary));
      for (i = 0; i < size; i++) {
1515 1516 1517 1518 1519 1520 1521 1522
        void* memory = RepeatedField_index_native(ary, i);
        VALUE submsg = *((VALUE *)memory);
        VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
        const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
        discard_unknown(submsg, subdesc);
      }
    } else {
      VALUE submsg = DEREF(msg, offset, VALUE);
1523 1524 1525
      VALUE descriptor;
      const Descriptor* subdesc;

1526
      if (submsg == Qnil) continue;
1527 1528
      descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
      subdesc = ruby_to_Descriptor(descriptor);
1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551
      discard_unknown(submsg, subdesc);
    }
  }
}

/*
 * call-seq:
 *     Google::Protobuf.discard_unknown(msg)
 *
 * Discard unknown fields in the given message object and recursively discard
 * unknown fields in submessages.
 */
VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) {
  VALUE klass = CLASS_OF(msg_rb);
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
  Descriptor* desc = ruby_to_Descriptor(descriptor);
  if (klass == cRepeatedField || klass == cMap) {
    rb_raise(rb_eArgError, "Expected proto msg for discard unknown.");
  } else {
    discard_unknown(msg_rb, desc);
  }
  return Qnil;
}