encode_decode.c 54.6 KB
Newer Older
Chris Fallin's avatar
Chris Fallin committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
// Protocol Buffers - Google's data interchange format
// Copyright 2014 Google Inc.  All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "protobuf.h"

33 34 35 36 37
// This function is equivalent to rb_str_cat(), but unlike the real
// rb_str_cat(), it doesn't leak memory in some versions of Ruby.
// For more information, see:
//   https://bugs.ruby-lang.org/issues/11328
VALUE noleak_rb_str_cat(VALUE rb_str, const char *str, long len) {
38
  char *p;
39 40
  size_t oldlen = RSTRING_LEN(rb_str);
  rb_str_modify_expand(rb_str, len);
41
  p = RSTRING_PTR(rb_str);
42 43
  memcpy(p + oldlen, str, len);
  rb_str_set_len(rb_str, oldlen + len);
44
  return rb_str;
45 46
}

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
// The code below also comes from upb's prototype Ruby binding, developed by
// haberman@.

/* stringsink *****************************************************************/

static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
  stringsink *sink = _sink;
  sink->len = 0;
  return sink;
}

static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
                                size_t len, const upb_bufhandle *handle) {
  stringsink *sink = _sink;
  size_t new_size = sink->size;

  UPB_UNUSED(hd);
  UPB_UNUSED(handle);

  while (sink->len + len > new_size) {
    new_size *= 2;
  }

  if (new_size != sink->size) {
    sink->ptr = realloc(sink->ptr, new_size);
    sink->size = new_size;
  }

  memcpy(sink->ptr + sink->len, ptr, len);
  sink->len += len;

  return len;
}

void stringsink_init(stringsink *sink) {
  upb_byteshandler_init(&sink->handler);
  upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
  upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);

  upb_bytessink_reset(&sink->sink, &sink->handler, sink);

  sink->size = 32;
  sink->ptr = malloc(sink->size);
  sink->len = 0;
}

void stringsink_uninit(stringsink *sink) {
  free(sink->ptr);
}

Chris Fallin's avatar
Chris Fallin committed
97 98 99 100 101 102
// -----------------------------------------------------------------------------
// Parsing.
// -----------------------------------------------------------------------------

#define DEREF(msg, ofs, type) *(type*)(((uint8_t *)msg) + ofs)

103 104 105 106 107 108 109 110 111 112 113 114
typedef struct {
  size_t ofs;
  int32_t hasbit;
} field_handlerdata_t;

// Creates a handlerdata that contains the offset and the hasbit for the field
static const void* newhandlerdata(upb_handlers* h, uint32_t ofs, int32_t hasbit) {
  field_handlerdata_t *hd = ALLOC(field_handlerdata_t);
  hd->ofs = ofs;
  hd->hasbit = hasbit;
  upb_handlers_addcleanup(h, hd, xfree);
  return hd;
Chris Fallin's avatar
Chris Fallin committed
115 116 117 118
}

typedef struct {
  size_t ofs;
119
  int32_t hasbit;
Chris Fallin's avatar
Chris Fallin committed
120 121 122 123
  const upb_msgdef *md;
} submsg_handlerdata_t;

// Creates a handlerdata that contains offset and submessage type information.
124 125 126
static const void *newsubmsghandlerdata(upb_handlers* h,
                                        uint32_t ofs,
                                        int32_t hasbit,
Chris Fallin's avatar
Chris Fallin committed
127 128 129
                                        const upb_fielddef* f) {
  submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t);
  hd->ofs = ofs;
130
  hd->hasbit = hasbit;
Chris Fallin's avatar
Chris Fallin committed
131
  hd->md = upb_fielddef_msgsubdef(f);
132
  upb_handlers_addcleanup(h, hd, xfree);
Chris Fallin's avatar
Chris Fallin committed
133 134 135
  return hd;
}

136
typedef struct {
137 138 139 140
  size_t ofs;              // union data slot
  size_t case_ofs;         // oneof_case field
  uint32_t oneof_case_num; // oneof-case number to place in oneof_case field
  const upb_msgdef *md;    // msgdef, for oneof submessage handler
141 142 143 144 145 146 147 148 149
} oneof_handlerdata_t;

static const void *newoneofhandlerdata(upb_handlers *h,
                                       uint32_t ofs,
                                       uint32_t case_ofs,
                                       const upb_fielddef *f) {
  oneof_handlerdata_t *hd = ALLOC(oneof_handlerdata_t);
  hd->ofs = ofs;
  hd->case_ofs = case_ofs;
150 151 152 153
  // We reuse the field tag number as a oneof union discriminant tag. Note that
  // we don't expose these numbers to the user, so the only requirement is that
  // we have some unique ID for each union case/possibility. The field tag
  // numbers are already present and are easy to use so there's no reason to
154 155
  // create a separate ID space. In addition, using the field tag number here
  // lets us easily look up the field in the oneof accessor.
156
  hd->oneof_case_num = upb_fielddef_number(f);
157 158 159 160 161
  if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE) {
    hd->md = upb_fielddef_msgsubdef(f);
  } else {
    hd->md = NULL;
  }
162
  upb_handlers_addcleanup(h, hd, xfree);
163 164 165
  return hd;
}

Chris Fallin's avatar
Chris Fallin committed
166 167 168 169 170
// A handler that starts a repeated field.  Gets the Repeated*Field instance for
// this field (such an instance always exists even in an empty message).
static void *startseq_handler(void* closure, const void* hd) {
  MessageHeader* msg = closure;
  const size_t *ofs = hd;
171
  return (void*)DEREF(msg, *ofs, VALUE);
Chris Fallin's avatar
Chris Fallin committed
172 173
}

174
// Handlers that append primitive values to a repeated field.
Chris Fallin's avatar
Chris Fallin committed
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
#define DEFINE_APPEND_HANDLER(type, ctype)                 \
  static bool append##type##_handler(void *closure, const void *hd, \
                                     ctype val) {                   \
    VALUE ary = (VALUE)closure;                                     \
    RepeatedField_push_native(ary, &val);                           \
    return true;                                                    \
  }

DEFINE_APPEND_HANDLER(bool,   bool)
DEFINE_APPEND_HANDLER(int32,  int32_t)
DEFINE_APPEND_HANDLER(uint32, uint32_t)
DEFINE_APPEND_HANDLER(float,  float)
DEFINE_APPEND_HANDLER(int64,  int64_t)
DEFINE_APPEND_HANDLER(uint64, uint64_t)
DEFINE_APPEND_HANDLER(double, double)

191
// Appends a string to a repeated field.
Chris Fallin's avatar
Chris Fallin committed
192 193 194 195 196 197
static void* appendstr_handler(void *closure,
                               const void *hd,
                               size_t size_hint) {
  VALUE ary = (VALUE)closure;
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyStringUtf8Encoding);
198
  RepeatedField_push_native(ary, &str);
Chris Fallin's avatar
Chris Fallin committed
199 200 201
  return (void*)str;
}

202 203 204 205 206 207 208
static void set_hasbit(void *closure, int32_t hasbit) {
  if (hasbit > 0) {
    uint8_t* storage = closure;
    storage[hasbit/8] |= 1 << (hasbit % 8);
  }
}

209
// Appends a 'bytes' string to a repeated field.
Chris Fallin's avatar
Chris Fallin committed
210 211 212 213 214 215
static void* appendbytes_handler(void *closure,
                                 const void *hd,
                                 size_t size_hint) {
  VALUE ary = (VALUE)closure;
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyString8bitEncoding);
216
  RepeatedField_push_native(ary, &str);
Chris Fallin's avatar
Chris Fallin committed
217 218 219 220 221 222 223 224
  return (void*)str;
}

// Sets a non-repeated string field in a message.
static void* str_handler(void *closure,
                         const void *hd,
                         size_t size_hint) {
  MessageHeader* msg = closure;
225 226
  const field_handlerdata_t *fieldhandler = hd;

Chris Fallin's avatar
Chris Fallin committed
227 228
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyStringUtf8Encoding);
229 230
  DEREF(msg, fieldhandler->ofs, VALUE) = str;
  set_hasbit(closure, fieldhandler->hasbit);
Chris Fallin's avatar
Chris Fallin committed
231 232 233 234 235 236 237 238
  return (void*)str;
}

// Sets a non-repeated 'bytes' field in a message.
static void* bytes_handler(void *closure,
                           const void *hd,
                           size_t size_hint) {
  MessageHeader* msg = closure;
239 240
  const field_handlerdata_t *fieldhandler = hd;

Chris Fallin's avatar
Chris Fallin committed
241 242
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyString8bitEncoding);
243 244
  DEREF(msg, fieldhandler->ofs, VALUE) = str;
  set_hasbit(closure, fieldhandler->hasbit);
Chris Fallin's avatar
Chris Fallin committed
245 246 247 248 249 250 251
  return (void*)str;
}

static size_t stringdata_handler(void* closure, const void* hd,
                                 const char* str, size_t len,
                                 const upb_bufhandle* handle) {
  VALUE rb_str = (VALUE)closure;
252
  noleak_rb_str_cat(rb_str, str, len);
Chris Fallin's avatar
Chris Fallin committed
253 254 255
  return len;
}

256
static bool stringdata_end_handler(void* closure, const void* hd) {
257
  VALUE rb_str = closure;
258 259 260 261 262
  rb_obj_freeze(rb_str);
  return true;
}

static bool appendstring_end_handler(void* closure, const void* hd) {
263
  VALUE rb_str = closure;
264 265 266 267
  rb_obj_freeze(rb_str);
  return true;
}

Chris Fallin's avatar
Chris Fallin committed
268 269 270 271 272 273 274
// Appends a submessage to a repeated field (a regular Ruby array for now).
static void *appendsubmsg_handler(void *closure, const void *hd) {
  VALUE ary = (VALUE)closure;
  const submsg_handlerdata_t *submsgdata = hd;
  VALUE subdesc =
      get_def_obj((void*)submsgdata->md);
  VALUE subklass = Descriptor_msgclass(subdesc);
275
  MessageHeader* submsg;
Chris Fallin's avatar
Chris Fallin committed
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290

  VALUE submsg_rb = rb_class_new_instance(0, NULL, subklass);
  RepeatedField_push(ary, submsg_rb);

  TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
  return submsg;
}

// Sets a non-repeated submessage field in a message.
static void *submsg_handler(void *closure, const void *hd) {
  MessageHeader* msg = closure;
  const submsg_handlerdata_t* submsgdata = hd;
  VALUE subdesc =
      get_def_obj((void*)submsgdata->md);
  VALUE subklass = Descriptor_msgclass(subdesc);
291 292
  VALUE submsg_rb;
  MessageHeader* submsg;
Chris Fallin's avatar
Chris Fallin committed
293

294 295
  if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) {
    DEREF(msg, submsgdata->ofs, VALUE) =
Chris Fallin's avatar
Chris Fallin committed
296 297 298
        rb_class_new_instance(0, NULL, subklass);
  }

299 300
  set_hasbit(closure, submsgdata->hasbit);

301
  submsg_rb = DEREF(msg, submsgdata->ofs, VALUE);
Chris Fallin's avatar
Chris Fallin committed
302
  TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
303

Chris Fallin's avatar
Chris Fallin committed
304 305 306
  return submsg;
}

307 308 309
// Handler data for startmap/endmap handlers.
typedef struct {
  size_t ofs;
310 311
  upb_fieldtype_t key_field_type;
  upb_fieldtype_t value_field_type;
312 313 314 315

  // We know that we can hold this reference because the handlerdata has the
  // same lifetime as the upb_handlers struct, and the upb_handlers struct holds
  // a reference to the upb_msgdef, which in turn has references to its subdefs.
316
  const upb_def* value_field_subdef;
317 318 319 320 321 322 323 324 325 326
} map_handlerdata_t;

// Temporary frame for map parsing: at the beginning of a map entry message, a
// submsg handler allocates a frame to hold (i) a reference to the Map object
// into which this message will be inserted and (ii) storage slots to
// temporarily hold the key and value for this map entry until the end of the
// submessage. When the submessage ends, another handler is called to insert the
// value into the map.
typedef struct {
  VALUE map;
327
  const map_handlerdata_t* handlerdata;
328 329 330 331
  char key_storage[NATIVE_SLOT_MAX_SIZE];
  char value_storage[NATIVE_SLOT_MAX_SIZE];
} map_parse_frame_t;

332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
static void MapParseFrame_mark(void* _self) {
  map_parse_frame_t* frame = _self;

  // This shouldn't strictly be necessary since this should be rooted by the
  // message itself, but it can't hurt.
  rb_gc_mark(frame->map);

  native_slot_mark(frame->handlerdata->key_field_type, &frame->key_storage);
  native_slot_mark(frame->handlerdata->value_field_type, &frame->value_storage);
}

void MapParseFrame_free(void* self) {
  xfree(self);
}

rb_data_type_t MapParseFrame_type = {
  "MapParseFrame",
  { MapParseFrame_mark, MapParseFrame_free, NULL },
};

static map_parse_frame_t* map_push_frame(VALUE map,
                                         const map_handlerdata_t* handlerdata) {
  map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
  frame->handlerdata = handlerdata;
  frame->map = map;
  native_slot_init(handlerdata->key_field_type, &frame->key_storage);
  native_slot_init(handlerdata->value_field_type, &frame->value_storage);

360
  Map_set_frame(map,
361 362 363 364 365
              TypedData_Wrap_Struct(rb_cObject, &MapParseFrame_type, frame));

  return frame;
}

366 367 368 369 370
// Handler to begin a map entry: allocates a temporary frame. This is the
// 'startsubmsg' handler on the msgdef that contains the map field.
static void *startmapentry_handler(void *closure, const void *hd) {
  MessageHeader* msg = closure;
  const map_handlerdata_t* mapdata = hd;
371
  VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE);
372

373
  return map_push_frame(map_rb, mapdata);
374 375 376 377 378 379 380 381 382
}

// Handler to end a map entry: inserts the value defined during the message into
// the map. This is the 'endmsg' handler on the map entry msgdef.
static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
  map_parse_frame_t* frame = closure;
  const map_handlerdata_t* mapdata = hd;

  VALUE key = native_slot_get(
383
      mapdata->key_field_type, Qnil,
384
      &frame->key_storage);
385 386

  VALUE value_field_typeclass = Qnil;
387 388
  VALUE value;

389 390 391 392 393
  if (mapdata->value_field_type == UPB_TYPE_MESSAGE ||
      mapdata->value_field_type == UPB_TYPE_ENUM) {
    value_field_typeclass = get_def_obj(mapdata->value_field_subdef);
  }

394
  value = native_slot_get(
395
      mapdata->value_field_type, value_field_typeclass,
396 397 398
      &frame->value_storage);

  Map_index_set(frame->map, key, value);
399
  Map_set_frame(frame->map, Qnil);
400 401 402 403 404 405 406 407 408 409 410 411 412

  return true;
}

// Allocates a new map_handlerdata_t given the map entry message definition. If
// the offset of the field within the parent message is also given, that is
// added to the handler data as well. Note that this is called *twice* per map
// field: once in the parent message handler setup when setting the startsubmsg
// handler and once in the map entry message handler setup when setting the
// key/value and endmsg handlers. The reason is that there is no easy way to
// pass the handlerdata down to the sub-message handler setup.
static map_handlerdata_t* new_map_handlerdata(
    size_t ofs,
413 414
    const upb_msgdef* mapentry_def,
    Descriptor* desc) {
415 416
  const upb_fielddef* key_field;
  const upb_fielddef* value_field;
417 418
  map_handlerdata_t* hd = ALLOC(map_handlerdata_t);
  hd->ofs = ofs;
419
  key_field = upb_msgdef_itof(mapentry_def, MAP_KEY_FIELD);
420 421
  assert(key_field != NULL);
  hd->key_field_type = upb_fielddef_type(key_field);
422
  value_field = upb_msgdef_itof(mapentry_def, MAP_VALUE_FIELD);
423 424
  assert(value_field != NULL);
  hd->value_field_type = upb_fielddef_type(value_field);
425
  hd->value_field_subdef = upb_fielddef_subdef(value_field);
426

427 428 429
  return hd;
}

430 431 432 433 434
// Handlers that set primitive values in oneofs.
#define DEFINE_ONEOF_HANDLER(type, ctype)                           \
  static bool oneof##type##_handler(void *closure, const void *hd,  \
                                     ctype val) {                   \
    const oneof_handlerdata_t *oneofdata = hd;                      \
435 436
    DEREF(closure, oneofdata->case_ofs, uint32_t) =                 \
        oneofdata->oneof_case_num;                                  \
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
    DEREF(closure, oneofdata->ofs, ctype) = val;                    \
    return true;                                                    \
  }

DEFINE_ONEOF_HANDLER(bool,   bool)
DEFINE_ONEOF_HANDLER(int32,  int32_t)
DEFINE_ONEOF_HANDLER(uint32, uint32_t)
DEFINE_ONEOF_HANDLER(float,  float)
DEFINE_ONEOF_HANDLER(int64,  int64_t)
DEFINE_ONEOF_HANDLER(uint64, uint64_t)
DEFINE_ONEOF_HANDLER(double, double)

#undef DEFINE_ONEOF_HANDLER

// Handlers for strings in a oneof.
static void *oneofstr_handler(void *closure,
                              const void *hd,
                              size_t size_hint) {
  MessageHeader* msg = closure;
  const oneof_handlerdata_t *oneofdata = hd;
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyStringUtf8Encoding);
459 460
  DEREF(msg, oneofdata->case_ofs, uint32_t) =
      oneofdata->oneof_case_num;
461 462 463 464 465 466 467 468 469 470 471
  DEREF(msg, oneofdata->ofs, VALUE) = str;
  return (void*)str;
}

static void *oneofbytes_handler(void *closure,
                                const void *hd,
                                size_t size_hint) {
  MessageHeader* msg = closure;
  const oneof_handlerdata_t *oneofdata = hd;
  VALUE str = rb_str_new2("");
  rb_enc_associate(str, kRubyString8bitEncoding);
472 473
  DEREF(msg, oneofdata->case_ofs, uint32_t) =
      oneofdata->oneof_case_num;
474 475 476 477
  DEREF(msg, oneofdata->ofs, VALUE) = str;
  return (void*)str;
}

478
static bool oneofstring_end_handler(void* closure, const void* hd) {
479 480
  VALUE rb_str = rb_str_new2("");
  rb_obj_freeze(rb_str);
481 482 483
  return true;
}

484 485 486 487 488 489 490 491 492 493
// Handler for a submessage field in a oneof.
static void *oneofsubmsg_handler(void *closure,
                                 const void *hd) {
  MessageHeader* msg = closure;
  const oneof_handlerdata_t *oneofdata = hd;
  uint32_t oldcase = DEREF(msg, oneofdata->case_ofs, uint32_t);

  VALUE subdesc =
      get_def_obj((void*)oneofdata->md);
  VALUE subklass = Descriptor_msgclass(subdesc);
494 495
  VALUE submsg_rb;
  MessageHeader* submsg;
496

497
  if (oldcase != oneofdata->oneof_case_num ||
498 499 500 501
      DEREF(msg, oneofdata->ofs, VALUE) == Qnil) {
    DEREF(msg, oneofdata->ofs, VALUE) =
        rb_class_new_instance(0, NULL, subklass);
  }
502 503 504 505 506 507
  // Set the oneof case *after* allocating the new class instance -- otherwise,
  // if the Ruby GC is invoked as part of a call into the VM, it might invoke
  // our mark routines, and our mark routines might see the case value
  // indicating a VALUE is present and expect a valid VALUE. See comment in
  // layout_set() for more detail: basically, the change to the value and the
  // case must be atomic w.r.t. the Ruby VM.
508 509
  DEREF(msg, oneofdata->case_ofs, uint32_t) =
      oneofdata->oneof_case_num;
510

511
  submsg_rb = DEREF(msg, oneofdata->ofs, VALUE);
512 513 514 515
  TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
  return submsg;
}

516 517 518 519 520
// Set up handlers for a repeated field.
static void add_handlers_for_repeated_field(upb_handlers *h,
                                            const upb_fielddef *f,
                                            size_t offset) {
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
521
  upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset, -1));
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
  upb_handlers_setstartseq(h, f, startseq_handler, &attr);
  upb_handlerattr_uninit(&attr);

  switch (upb_fielddef_type(f)) {

#define SET_HANDLER(utype, ltype)                                 \
  case utype:                                                     \
    upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
    break;

    SET_HANDLER(UPB_TYPE_BOOL,   bool);
    SET_HANDLER(UPB_TYPE_INT32,  int32);
    SET_HANDLER(UPB_TYPE_UINT32, uint32);
    SET_HANDLER(UPB_TYPE_ENUM,   int32);
    SET_HANDLER(UPB_TYPE_FLOAT,  float);
    SET_HANDLER(UPB_TYPE_INT64,  int64);
    SET_HANDLER(UPB_TYPE_UINT64, uint64);
    SET_HANDLER(UPB_TYPE_DOUBLE, double);

#undef SET_HANDLER

    case UPB_TYPE_STRING:
    case UPB_TYPE_BYTES: {
      bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
      upb_handlers_setstartstr(h, f, is_bytes ?
                               appendbytes_handler : appendstr_handler,
                               NULL);
      upb_handlers_setstring(h, f, stringdata_handler, NULL);
550
      upb_handlers_setendstr(h, f, appendstring_end_handler, NULL);
551
      break;
552 553 554
    }
    case UPB_TYPE_MESSAGE: {
      upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
555
      upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, -1, f));
556 557 558 559 560 561 562 563 564 565
      upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
      upb_handlerattr_uninit(&attr);
      break;
    }
  }
}

// Set up handlers for a singular field.
static void add_handlers_for_singular_field(upb_handlers *h,
                                            const upb_fielddef *f,
566 567 568 569 570 571 572 573 574
                                            size_t offset,
                                            size_t hasbit_off) {
  // The offset we pass to UPB points to the start of the Message,
  // rather than the start of where our data is stored.
  int32_t hasbit = -1;
  if (hasbit_off != MESSAGE_FIELD_NO_HASBIT) {
    hasbit = hasbit_off + sizeof(MessageHeader) * 8;
  }

575 576 577 578 579 580 581 582 583
  switch (upb_fielddef_type(f)) {
    case UPB_TYPE_BOOL:
    case UPB_TYPE_INT32:
    case UPB_TYPE_UINT32:
    case UPB_TYPE_ENUM:
    case UPB_TYPE_FLOAT:
    case UPB_TYPE_INT64:
    case UPB_TYPE_UINT64:
    case UPB_TYPE_DOUBLE:
584
      upb_msg_setscalarhandler(h, f, offset, hasbit);
585 586 587 588 589
      break;
    case UPB_TYPE_STRING:
    case UPB_TYPE_BYTES: {
      bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
      upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
590
      upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset, hasbit));
591 592 593 594
      upb_handlers_setstartstr(h, f,
                               is_bytes ? bytes_handler : str_handler,
                               &attr);
      upb_handlers_setstring(h, f, stringdata_handler, &attr);
595
      upb_handlers_setendstr(h, f, stringdata_end_handler, &attr);
596 597 598 599 600
      upb_handlerattr_uninit(&attr);
      break;
    }
    case UPB_TYPE_MESSAGE: {
      upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
601 602 603
      upb_handlerattr_sethandlerdata(&attr,
				     newsubmsghandlerdata(h, offset,
							  hasbit, f));
604 605 606 607 608 609 610 611 612 613
      upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
      upb_handlerattr_uninit(&attr);
      break;
    }
  }
}

// Adds handlers to a map field.
static void add_handlers_for_mapfield(upb_handlers* h,
                                      const upb_fielddef* fielddef,
614 615
                                      size_t offset,
                                      Descriptor* desc) {
616
  const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef);
617
  map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc);
618
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
619

620
  upb_handlers_addcleanup(h, hd, xfree);
621 622 623 624 625 626 627
  upb_handlerattr_sethandlerdata(&attr, hd);
  upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr);
  upb_handlerattr_uninit(&attr);
}

// Adds handlers to a map-entry msgdef.
static void add_handlers_for_mapentry(const upb_msgdef* msgdef,
628 629
                                      upb_handlers* h,
                                      Descriptor* desc) {
630 631
  const upb_fielddef* key_field = map_entry_key(msgdef);
  const upb_fielddef* value_field = map_entry_value(msgdef);
632
  map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc);
633
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
634

635
  upb_handlers_addcleanup(h, hd, xfree);
636 637 638 639
  upb_handlerattr_sethandlerdata(&attr, hd);
  upb_handlers_setendmsg(h, endmap_handler, &attr);

  add_handlers_for_singular_field(
640
      h, key_field,
641 642
      offsetof(map_parse_frame_t, key_storage),
      MESSAGE_FIELD_NO_HASBIT);
643
  add_handlers_for_singular_field(
644
      h, value_field,
645 646
      offsetof(map_parse_frame_t, value_storage),
      MESSAGE_FIELD_NO_HASBIT);
647 648
}

649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
// Set up handlers for a oneof field.
static void add_handlers_for_oneof_field(upb_handlers *h,
                                         const upb_fielddef *f,
                                         size_t offset,
                                         size_t oneof_case_offset) {

  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
  upb_handlerattr_sethandlerdata(
      &attr, newoneofhandlerdata(h, offset, oneof_case_offset, f));

  switch (upb_fielddef_type(f)) {

#define SET_HANDLER(utype, ltype)                                 \
  case utype:                                                     \
    upb_handlers_set##ltype(h, f, oneof##ltype##_handler, &attr); \
    break;

    SET_HANDLER(UPB_TYPE_BOOL,   bool);
    SET_HANDLER(UPB_TYPE_INT32,  int32);
    SET_HANDLER(UPB_TYPE_UINT32, uint32);
    SET_HANDLER(UPB_TYPE_ENUM,   int32);
    SET_HANDLER(UPB_TYPE_FLOAT,  float);
    SET_HANDLER(UPB_TYPE_INT64,  int64);
    SET_HANDLER(UPB_TYPE_UINT64, uint64);
    SET_HANDLER(UPB_TYPE_DOUBLE, double);

#undef SET_HANDLER

    case UPB_TYPE_STRING:
    case UPB_TYPE_BYTES: {
      bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
      upb_handlers_setstartstr(h, f, is_bytes ?
                               oneofbytes_handler : oneofstr_handler,
                               &attr);
      upb_handlers_setstring(h, f, stringdata_handler, NULL);
684
      upb_handlers_setendstr(h, f, oneofstring_end_handler, &attr);
685 686 687 688 689 690 691 692 693 694 695
      break;
    }
    case UPB_TYPE_MESSAGE: {
      upb_handlers_setstartsubmsg(h, f, oneofsubmsg_handler, &attr);
      break;
    }
  }

  upb_handlerattr_uninit(&attr);
}

696 697 698 699 700 701 702 703 704 705 706 707 708 709
static bool unknown_field_handler(void* closure, const void* hd,
                                  const char* buf, size_t size) {
  UPB_UNUSED(hd);

  MessageHeader* msg = (MessageHeader*)closure;
  if (msg->unknown_fields == NULL) {
    msg->unknown_fields = malloc(sizeof(stringsink));
    stringsink_init(msg->unknown_fields);
  }

  stringsink_string(msg->unknown_fields, NULL, buf, size, NULL);

  return true;
}
710

Chris Fallin's avatar
Chris Fallin committed
711
static void add_handlers_for_message(const void *closure, upb_handlers *h) {
712 713
  const upb_msgdef* msgdef = upb_handlers_msgdef(h);
  Descriptor* desc = ruby_to_Descriptor(get_def_obj((void*)msgdef));
714
  upb_msg_field_iter i;
715 716 717 718

  // If this is a mapentry message type, set up a special set of handlers and
  // bail out of the normal (user-defined) message type handling.
  if (upb_msgdef_mapentry(msgdef)) {
719
    add_handlers_for_mapentry(msgdef, h, desc);
720 721 722
    return;
  }

Chris Fallin's avatar
Chris Fallin committed
723 724 725 726 727 728 729 730
  // Ensure layout exists. We may be invoked to create handlers for a given
  // message if we are included as a submsg of another message type before our
  // class is actually built, so to work around this, we just create the layout
  // (and handlers, in the class-building function) on-demand.
  if (desc->layout == NULL) {
    desc->layout = create_layout(desc->msgdef);
  }

731 732 733
  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
  upb_handlers_setunknown(h, unknown_field_handler, &attr);

734 735 736
  for (upb_msg_field_begin(&i, desc->msgdef);
       !upb_msg_field_done(&i);
       upb_msg_field_next(&i)) {
Chris Fallin's avatar
Chris Fallin committed
737
    const upb_fielddef *f = upb_msg_iter_field(&i);
738 739
    size_t offset = desc->layout->fields[upb_fielddef_index(f)].offset +
        sizeof(MessageHeader);
Chris Fallin's avatar
Chris Fallin committed
740

741
    if (upb_fielddef_containingoneof(f)) {
742 743 744
      size_t oneof_case_offset =
          desc->layout->fields[upb_fielddef_index(f)].case_offset +
          sizeof(MessageHeader);
745 746
      add_handlers_for_oneof_field(h, f, offset, oneof_case_offset);
    } else if (is_map_field(f)) {
747
      add_handlers_for_mapfield(h, f, offset, desc);
748 749 750
    } else if (upb_fielddef_isseq(f)) {
      add_handlers_for_repeated_field(h, f, offset);
    } else {
751 752
      add_handlers_for_singular_field(
          h, f, offset, desc->layout->fields[upb_fielddef_index(f)].hasbit);
Chris Fallin's avatar
Chris Fallin committed
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785
    }
  }
}

// Creates upb handlers for populating a message.
static const upb_handlers *new_fill_handlers(Descriptor* desc,
                                             const void* owner) {
  // TODO(cfallin, haberman): once upb gets a caching/memoization layer for
  // handlers, reuse subdef handlers so that e.g. if we already parse
  // B-with-field-of-type-C, we don't have to rebuild the whole hierarchy to
  // parse A-with-field-of-type-B-with-field-of-type-C.
  return upb_handlers_newfrozen(desc->msgdef, owner,
                                add_handlers_for_message, NULL);
}

// Constructs the handlers for filling a message's data into an in-memory
// object.
const upb_handlers* get_fill_handlers(Descriptor* desc) {
  if (!desc->fill_handlers) {
    desc->fill_handlers =
        new_fill_handlers(desc, &desc->fill_handlers);
  }
  return desc->fill_handlers;
}

// Constructs the upb decoder method for parsing messages of this type.
// This is called from the message class creation code.
const upb_pbdecodermethod *new_fillmsg_decodermethod(Descriptor* desc,
                                                     const void* owner) {
  const upb_handlers* handlers = get_fill_handlers(desc);
  upb_pbdecodermethodopts opts;
  upb_pbdecodermethodopts_init(&opts, handlers);

786
  return upb_pbdecodermethod_new(&opts, owner);
Chris Fallin's avatar
Chris Fallin committed
787 788 789 790 791 792 793 794 795 796
}

static const upb_pbdecodermethod *msgdef_decodermethod(Descriptor* desc) {
  if (desc->fill_method == NULL) {
    desc->fill_method = new_fillmsg_decodermethod(
        desc, &desc->fill_method);
  }
  return desc->fill_method;
}

797 798 799 800 801 802 803 804
static const upb_json_parsermethod *msgdef_jsonparsermethod(Descriptor* desc) {
  if (desc->json_fill_method == NULL) {
    desc->json_fill_method =
        upb_json_parsermethod_new(desc->msgdef, &desc->json_fill_method);
  }
  return desc->json_fill_method;
}

805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825

// Stack-allocated context during an encode/decode operation. Contains the upb
// environment and its stack-based allocator, an initial buffer for allocations
// to avoid malloc() when possible, and a template for Ruby exception messages
// if any error occurs.
#define STACK_ENV_STACKBYTES 4096
typedef struct {
  upb_env env;
  const char* ruby_error_template;
  char allocbuf[STACK_ENV_STACKBYTES];
} stackenv;

static void stackenv_init(stackenv* se, const char* errmsg);
static void stackenv_uninit(stackenv* se);

// Callback invoked by upb if any error occurs during parsing or serialization.
static bool env_error_func(void* ud, const upb_status* status) {
  stackenv* se = ud;
  // Free the env -- rb_raise will longjmp up the stack past the encode/decode
  // function so it would not otherwise have been freed.
  stackenv_uninit(se);
826 827 828 829

  // TODO(haberman): have a way to verify that this is actually a parse error,
  // instead of just throwing "parse error" unconditionally.
  rb_raise(cParseError, se->ruby_error_template, upb_status_errmsg(status));
830 831 832 833 834 835 836
  // Never reached: rb_raise() always longjmp()s up the stack, past all of our
  // code, back to Ruby.
  return false;
}

static void stackenv_init(stackenv* se, const char* errmsg) {
  se->ruby_error_template = errmsg;
837
  upb_env_init2(&se->env, se->allocbuf, sizeof(se->allocbuf), NULL);
838 839 840 841 842 843 844
  upb_env_seterrorfunc(&se->env, env_error_func, se);
}

static void stackenv_uninit(stackenv* se) {
  upb_env_uninit(&se->env);
}

Chris Fallin's avatar
Chris Fallin committed
845 846 847 848 849 850 851 852 853
/*
 * call-seq:
 *     MessageClass.decode(data) => message
 *
 * Decodes the given data (as a string containing bytes in protocol buffers wire
 * format) under the interpretration given by this message class's definition
 * and returns a message object with the corresponding field values.
 */
VALUE Message_decode(VALUE klass, VALUE data) {
854
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
Chris Fallin's avatar
Chris Fallin committed
855 856
  Descriptor* desc = ruby_to_Descriptor(descriptor);
  VALUE msgklass = Descriptor_msgclass(descriptor);
857 858
  VALUE msg_rb;
  MessageHeader* msg;
Chris Fallin's avatar
Chris Fallin committed
859 860 861 862 863

  if (TYPE(data) != T_STRING) {
    rb_raise(rb_eArgError, "Expected string for binary protobuf data.");
  }

864
  msg_rb = rb_class_new_instance(0, NULL, msgklass);
Chris Fallin's avatar
Chris Fallin committed
865 866
  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

867 868 869 870 871 872 873
  {
    const upb_pbdecodermethod* method = msgdef_decodermethod(desc);
    const upb_handlers* h = upb_pbdecodermethod_desthandlers(method);
    stackenv se;
    upb_sink sink;
    upb_pbdecoder* decoder;
    stackenv_init(&se, "Error occurred during parsing: %s");
Chris Fallin's avatar
Chris Fallin committed
874

875 876 877 878
    upb_sink_reset(&sink, h, msg);
    decoder = upb_pbdecoder_create(&se.env, method, &sink);
    upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data),
                      upb_pbdecoder_input(decoder));
Chris Fallin's avatar
Chris Fallin committed
879

880 881
    stackenv_uninit(&se);
  }
Chris Fallin's avatar
Chris Fallin committed
882 883 884 885 886 887

  return msg_rb;
}

/*
 * call-seq:
888
 *     MessageClass.decode_json(data, options = {}) => message
Chris Fallin's avatar
Chris Fallin committed
889 890 891 892
 *
 * Decodes the given data (as a string containing bytes in protocol buffers wire
 * format) under the interpretration given by this message class's definition
 * and returns a message object with the corresponding field values.
893 894 895
 *
 * @param options [Hash] options for the decoder
 *   ignore_unknown_fields: set true to ignore unknown fields (default is to raise an error)
Chris Fallin's avatar
Chris Fallin committed
896
 */
897
VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass) {
898
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
Chris Fallin's avatar
Chris Fallin committed
899 900
  Descriptor* desc = ruby_to_Descriptor(descriptor);
  VALUE msgklass = Descriptor_msgclass(descriptor);
901
  VALUE msg_rb;
902 903
  VALUE data = argv[0];
  VALUE ignore_unknown_fields = Qfalse;
904
  MessageHeader* msg;
Chris Fallin's avatar
Chris Fallin committed
905

906 907 908 909 910 911 912 913 914 915 916 917 918 919
  if (argc < 1 || argc > 2) {
    rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
  }

  if (argc == 2) {
    VALUE hash_args = argv[1];
    if (TYPE(hash_args) != T_HASH) {
      rb_raise(rb_eArgError, "Expected hash arguments.");
    }

    ignore_unknown_fields = rb_hash_lookup2(
        hash_args, ID2SYM(rb_intern("ignore_unknown_fields")), Qfalse);
  }

Chris Fallin's avatar
Chris Fallin committed
920 921 922 923 924 925 926
  if (TYPE(data) != T_STRING) {
    rb_raise(rb_eArgError, "Expected string for JSON data.");
  }
  // TODO(cfallin): Check and respect string encoding. If not UTF-8, we need to
  // convert, because string handlers pass data directly to message string
  // fields.

927
  msg_rb = rb_class_new_instance(0, NULL, msgklass);
Chris Fallin's avatar
Chris Fallin committed
928 929
  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

930
  {
931
    const upb_json_parsermethod* method = msgdef_jsonparsermethod(desc);
932 933 934
    stackenv se;
    upb_sink sink;
    upb_json_parser* parser;
935
    DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool);
936
    stackenv_init(&se, "Error occurred during parsing: %s");
Chris Fallin's avatar
Chris Fallin committed
937

938
    upb_sink_reset(&sink, get_fill_handlers(desc), msg);
939 940
    parser = upb_json_parser_create(&se.env, method, pool->symtab,
                                    &sink, ignore_unknown_fields);
941 942
    upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data),
                      upb_json_parser_input(parser));
Chris Fallin's avatar
Chris Fallin committed
943

944 945
    stackenv_uninit(&se);
  }
Chris Fallin's avatar
Chris Fallin committed
946 947 948 949 950 951 952 953 954 955 956

  return msg_rb;
}

// -----------------------------------------------------------------------------
// Serializing.
// -----------------------------------------------------------------------------

/* msgvisitor *****************************************************************/

static void putmsg(VALUE msg, const Descriptor* desc,
957 958
                   upb_sink *sink, int depth, bool emit_defaults,
                   bool is_json, bool open_msg);
Chris Fallin's avatar
Chris Fallin committed
959 960 961 962

static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
  upb_selector_t ret;
  bool ok = upb_handlers_getselector(f, type, &ret);
963
  UPB_ASSERT(ok);
Chris Fallin's avatar
Chris Fallin committed
964 965 966 967
  return ret;
}

static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) {
968 969
  upb_sink subsink;

Chris Fallin's avatar
Chris Fallin committed
970 971 972 973
  if (str == Qnil) return;

  assert(BUILTIN_TYPE(str) == RUBY_T_STRING);

974 975 976
  // We should be guaranteed that the string has the correct encoding because
  // we ensured this at assignment time and then froze the string.
  if (upb_fielddef_type(f) == UPB_TYPE_STRING) {
977
    assert(rb_enc_from_index(ENCODING_GET(str)) == kRubyStringUtf8Encoding);
978
  } else {
979
    assert(rb_enc_from_index(ENCODING_GET(str)) == kRubyString8bitEncoding);
980
  }
Chris Fallin's avatar
Chris Fallin committed
981 982 983 984 985 986 987 988 989

  upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), RSTRING_LEN(str),
                    &subsink);
  upb_sink_putstring(&subsink, getsel(f, UPB_HANDLER_STRING), RSTRING_PTR(str),
                     RSTRING_LEN(str), NULL);
  upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR));
}

static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink,
990
                      int depth, bool emit_defaults, bool is_json) {
991 992 993 994
  upb_sink subsink;
  VALUE descriptor;
  Descriptor* subdesc;

Chris Fallin's avatar
Chris Fallin committed
995 996
  if (submsg == Qnil) return;

997 998
  descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
  subdesc = ruby_to_Descriptor(descriptor);
Chris Fallin's avatar
Chris Fallin committed
999 1000

  upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink);
1001
  putmsg(submsg, subdesc, &subsink, depth + 1, emit_defaults, is_json, true);
Chris Fallin's avatar
Chris Fallin committed
1002 1003 1004 1005
  upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG));
}

static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
1006
                   int depth, bool emit_defaults, bool is_json) {
Chris Fallin's avatar
Chris Fallin committed
1007
  upb_sink subsink;
1008 1009 1010 1011 1012
  upb_fieldtype_t type = upb_fielddef_type(f);
  upb_selector_t sel = 0;
  int size;

  if (ary == Qnil) return;
1013
  if (!emit_defaults && NUM2INT(RepeatedField_length(ary)) == 0) return;
Chris Fallin's avatar
Chris Fallin committed
1014

1015 1016 1017
  size = NUM2INT(RepeatedField_length(ary));
  if (size == 0 && !emit_defaults) return;

Chris Fallin's avatar
Chris Fallin committed
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045
  upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);

  if (upb_fielddef_isprimitive(f)) {
    sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
  }

  for (int i = 0; i < size; i++) {
    void* memory = RepeatedField_index_native(ary, i);
    switch (type) {
#define T(upbtypeconst, upbtype, ctype)                         \
  case upbtypeconst:                                            \
    upb_sink_put##upbtype(&subsink, sel, *((ctype *)memory));   \
    break;

      T(UPB_TYPE_FLOAT,  float,  float)
      T(UPB_TYPE_DOUBLE, double, double)
      T(UPB_TYPE_BOOL,   bool,   int8_t)
      case UPB_TYPE_ENUM:
      T(UPB_TYPE_INT32,  int32,  int32_t)
      T(UPB_TYPE_UINT32, uint32, uint32_t)
      T(UPB_TYPE_INT64,  int64,  int64_t)
      T(UPB_TYPE_UINT64, uint64, uint64_t)

      case UPB_TYPE_STRING:
      case UPB_TYPE_BYTES:
        putstr(*((VALUE *)memory), f, &subsink);
        break;
      case UPB_TYPE_MESSAGE:
1046 1047
        putsubmsg(*((VALUE *)memory), f, &subsink, depth,
                  emit_defaults, is_json);
Chris Fallin's avatar
Chris Fallin committed
1048 1049 1050 1051 1052 1053 1054 1055 1056
        break;

#undef T

    }
  }
  upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
}

1057 1058 1059 1060
static void put_ruby_value(VALUE value,
                           const upb_fielddef *f,
                           VALUE type_class,
                           int depth,
1061
                           upb_sink *sink,
1062 1063
                           bool emit_defaults,
                           bool is_json) {
1064 1065 1066 1067 1068
  if (depth > ENCODE_MAX_NESTING) {
    rb_raise(rb_eRuntimeError,
             "Maximum recursion depth exceeded during encoding.");
  }

1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107
  upb_selector_t sel = 0;
  if (upb_fielddef_isprimitive(f)) {
    sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
  }

  switch (upb_fielddef_type(f)) {
    case UPB_TYPE_INT32:
      upb_sink_putint32(sink, sel, NUM2INT(value));
      break;
    case UPB_TYPE_INT64:
      upb_sink_putint64(sink, sel, NUM2LL(value));
      break;
    case UPB_TYPE_UINT32:
      upb_sink_putuint32(sink, sel, NUM2UINT(value));
      break;
    case UPB_TYPE_UINT64:
      upb_sink_putuint64(sink, sel, NUM2ULL(value));
      break;
    case UPB_TYPE_FLOAT:
      upb_sink_putfloat(sink, sel, NUM2DBL(value));
      break;
    case UPB_TYPE_DOUBLE:
      upb_sink_putdouble(sink, sel, NUM2DBL(value));
      break;
    case UPB_TYPE_ENUM: {
      if (TYPE(value) == T_SYMBOL) {
        value = rb_funcall(type_class, rb_intern("resolve"), 1, value);
      }
      upb_sink_putint32(sink, sel, NUM2INT(value));
      break;
    }
    case UPB_TYPE_BOOL:
      upb_sink_putbool(sink, sel, value == Qtrue);
      break;
    case UPB_TYPE_STRING:
    case UPB_TYPE_BYTES:
      putstr(value, f, sink);
      break;
    case UPB_TYPE_MESSAGE:
1108
      putsubmsg(value, f, sink, depth, emit_defaults, is_json);
1109 1110 1111 1112
  }
}

static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
1113
                   int depth, bool emit_defaults, bool is_json) {
1114
  Map* self;
1115
  upb_sink subsink;
1116 1117 1118 1119 1120
  const upb_fielddef* key_field;
  const upb_fielddef* value_field;
  Map_iter it;

  if (map == Qnil) return;
1121 1122
  if (!emit_defaults && Map_length(map) == 0) return;

1123
  self = ruby_to_Map(map);
1124 1125 1126 1127

  upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);

  assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
1128 1129
  key_field = map_field_key(f);
  value_field = map_field_value(f);
1130 1131 1132 1133

  for (Map_begin(map, &it); !Map_done(&it); Map_next(&it)) {
    VALUE key = Map_iter_key(&it);
    VALUE value = Map_iter_value(&it);
1134
    upb_status status;
1135 1136

    upb_sink entry_sink;
1137 1138
    upb_sink_startsubmsg(&subsink, getsel(f, UPB_HANDLER_STARTSUBMSG),
                         &entry_sink);
1139 1140
    upb_sink_startmsg(&entry_sink);

1141 1142
    put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink,
                   emit_defaults, is_json);
1143
    put_ruby_value(value, value_field, self->value_type_class, depth + 1,
1144
                   &entry_sink, emit_defaults, is_json);
1145 1146 1147 1148 1149 1150 1151 1152

    upb_sink_endmsg(&entry_sink, &status);
    upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
  }

  upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
}

1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239
static const upb_handlers* msgdef_json_serialize_handlers(
    Descriptor* desc, bool preserve_proto_fieldnames);

static void putjsonany(VALUE msg_rb, const Descriptor* desc,
                       upb_sink* sink, int depth, bool emit_defaults) {
  upb_status status;
  MessageHeader* msg = NULL;
  const upb_fielddef* type_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_TYPE);
  const upb_fielddef* value_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_VALUE);

  size_t type_url_offset;
  VALUE type_url_str_rb;
  const upb_msgdef *payload_type = NULL;

  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

  upb_sink_startmsg(sink);

  /* Handle type url */
  type_url_offset = desc->layout->fields[upb_fielddef_index(type_field)].offset;
  type_url_str_rb = DEREF(Message_data(msg), type_url_offset, VALUE);
  if (RSTRING_LEN(type_url_str_rb) > 0) {
    putstr(type_url_str_rb, type_field, sink);
  }

  {
    const char* type_url_str = RSTRING_PTR(type_url_str_rb);
    size_t type_url_len = RSTRING_LEN(type_url_str_rb);
    DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool);

    if (type_url_len <= 20 ||
        strncmp(type_url_str, "type.googleapis.com/", 20) != 0) {
      rb_raise(rb_eRuntimeError, "Invalid type url: %s", type_url_str);
      return;
    }

    /* Resolve type url */
    type_url_str += 20;
    type_url_len -= 20;

    payload_type = upb_symtab_lookupmsg2(
        pool->symtab, type_url_str, type_url_len);
    if (payload_type == NULL) {
      rb_raise(rb_eRuntimeError, "Unknown type: %s", type_url_str);
      return;
    }
  }

  {
    uint32_t value_offset;
    VALUE value_str_rb;
    const char* value_str;
    size_t value_len;

    value_offset = desc->layout->fields[upb_fielddef_index(value_field)].offset;
    value_str_rb = DEREF(Message_data(msg), value_offset, VALUE);
    value_str = RSTRING_PTR(value_str_rb);
    value_len = RSTRING_LEN(value_str_rb);

    if (value_len > 0) {
      VALUE payload_desc_rb = get_def_obj(payload_type);
      Descriptor* payload_desc = ruby_to_Descriptor(payload_desc_rb);
      VALUE payload_class = Descriptor_msgclass(payload_desc_rb);
      upb_sink subsink;
      bool is_wellknown;

      VALUE payload_msg_rb = Message_decode(payload_class, value_str_rb);

      is_wellknown =
          upb_msgdef_wellknowntype(payload_desc->msgdef) !=
              UPB_WELLKNOWN_UNSPECIFIED;
      if (is_wellknown) {
        upb_sink_startstr(sink, getsel(value_field, UPB_HANDLER_STARTSTR), 0,
                          &subsink);
      }

      subsink.handlers =
          msgdef_json_serialize_handlers(payload_desc, true);
      subsink.closure = sink->closure;
      putmsg(payload_msg_rb, payload_desc, &subsink, depth, emit_defaults, true,
             is_wellknown);
    }
  }

  upb_sink_endmsg(sink, &status);
}

1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267
static void putjsonlistvalue(
    VALUE msg_rb, const Descriptor* desc,
    upb_sink* sink, int depth, bool emit_defaults) {
  upb_status status;
  upb_sink subsink;
  MessageHeader* msg = NULL;
  const upb_fielddef* f = upb_msgdef_itof(desc->msgdef, 1);
  uint32_t offset =
      desc->layout->fields[upb_fielddef_index(f)].offset +
      sizeof(MessageHeader);
  VALUE ary;

  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

  upb_sink_startmsg(sink);

  ary = DEREF(msg, offset, VALUE);

  if (ary == Qnil || RepeatedField_size(ary) == 0) {
    upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
    upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
  } else {
    putary(ary, f, sink, depth, emit_defaults, true);
  }

  upb_sink_endmsg(sink, &status);
}

Chris Fallin's avatar
Chris Fallin committed
1268
static void putmsg(VALUE msg_rb, const Descriptor* desc,
1269 1270
                   upb_sink *sink, int depth, bool emit_defaults,
                   bool is_json, bool open_msg) {
1271 1272 1273 1274
  MessageHeader* msg;
  upb_msg_field_iter i;
  upb_status status;

1275 1276
  if (is_json &&
      upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_ANY) {
1277 1278 1279 1280
    putjsonany(msg_rb, desc, sink, depth, emit_defaults);
    return;
  }

1281 1282 1283 1284 1285 1286
  if (is_json &&
      upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_LISTVALUE) {
    putjsonlistvalue(msg_rb, desc, sink, depth, emit_defaults);
    return;
  }

1287 1288 1289
  if (open_msg) {
    upb_sink_startmsg(sink);
  }
Chris Fallin's avatar
Chris Fallin committed
1290 1291 1292

  // Protect against cycles (possible because users may freely reassign message
  // and repeated fields) by imposing a maximum recursion depth.
1293
  if (depth > ENCODE_MAX_NESTING) {
Chris Fallin's avatar
Chris Fallin committed
1294 1295 1296 1297 1298 1299
    rb_raise(rb_eRuntimeError,
             "Maximum recursion depth exceeded during encoding.");
  }

  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

1300
  if (desc != msg->descriptor) {
1301
    rb_raise(rb_eArgError,
1302 1303 1304 1305 1306
             "The type of given msg is '%s', expect '%s'.",
             upb_msgdef_fullname(msg->descriptor->msgdef),
             upb_msgdef_fullname(desc->msgdef));
  }

1307 1308 1309
  for (upb_msg_field_begin(&i, desc->msgdef);
       !upb_msg_field_done(&i);
       upb_msg_field_next(&i)) {
Chris Fallin's avatar
Chris Fallin committed
1310
    upb_fielddef *f = upb_msg_iter_field(&i);
1311
    bool is_matching_oneof = false;
1312
    uint32_t offset =
1313 1314 1315 1316
        desc->layout->fields[upb_fielddef_index(f)].offset +
        sizeof(MessageHeader);

    if (upb_fielddef_containingoneof(f)) {
1317 1318 1319
      uint32_t oneof_case_offset =
          desc->layout->fields[upb_fielddef_index(f)].case_offset +
          sizeof(MessageHeader);
1320 1321 1322 1323 1324 1325 1326 1327
      // For a oneof, check that this field is actually present -- skip all the
      // below if not.
      if (DEREF(msg, oneof_case_offset, uint32_t) !=
          upb_fielddef_number(f)) {
        continue;
      }
      // Otherwise, fall through to the appropriate singular-field handler
      // below.
1328
      is_matching_oneof = true;
1329
    }
Chris Fallin's avatar
Chris Fallin committed
1330

1331
    if (is_map_field(f)) {
1332
      VALUE map = DEREF(msg, offset, VALUE);
1333
      if (map != Qnil || emit_defaults) {
1334
        putmap(map, f, sink, depth, emit_defaults, is_json);
1335 1336
      }
    } else if (upb_fielddef_isseq(f)) {
1337
      VALUE ary = DEREF(msg, offset, VALUE);
Chris Fallin's avatar
Chris Fallin committed
1338
      if (ary != Qnil) {
1339
        putary(ary, f, sink, depth, emit_defaults, is_json);
Chris Fallin's avatar
Chris Fallin committed
1340 1341
      }
    } else if (upb_fielddef_isstring(f)) {
1342
      VALUE str = DEREF(msg, offset, VALUE);
1343 1344 1345 1346 1347 1348 1349 1350 1351
      bool is_default = false;

      if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO2) {
        is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse;
      } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) {
        is_default = RSTRING_LEN(str) == 0;
      }

      if (is_matching_oneof || emit_defaults || !is_default) {
Chris Fallin's avatar
Chris Fallin committed
1352 1353 1354
        putstr(str, f, sink);
      }
    } else if (upb_fielddef_issubmsg(f)) {
1355 1356
      putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth,
                emit_defaults, is_json);
Chris Fallin's avatar
Chris Fallin committed
1357 1358 1359
    } else {
      upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));

1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372
#define T(upbtypeconst, upbtype, ctype, default_value)                          \
  case upbtypeconst: {                                                          \
      ctype value = DEREF(msg, offset, ctype);                                  \
      bool is_default = false;                                                  \
      if (upb_fielddef_haspresence(f)) {                                        \
        is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse;  \
      } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) {        \
        is_default = default_value == value;                                    \
      }                                                                         \
      if (is_matching_oneof || emit_defaults || !is_default) {                  \
        upb_sink_put##upbtype(sink, sel, value);                                \
      }                                                                         \
    }                                                                           \
Chris Fallin's avatar
Chris Fallin committed
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394
    break;

      switch (upb_fielddef_type(f)) {
        T(UPB_TYPE_FLOAT,  float,  float, 0.0)
        T(UPB_TYPE_DOUBLE, double, double, 0.0)
        T(UPB_TYPE_BOOL,   bool,   uint8_t, 0)
        case UPB_TYPE_ENUM:
        T(UPB_TYPE_INT32,  int32,  int32_t, 0)
        T(UPB_TYPE_UINT32, uint32, uint32_t, 0)
        T(UPB_TYPE_INT64,  int64,  int64_t, 0)
        T(UPB_TYPE_UINT64, uint64, uint64_t, 0)

        case UPB_TYPE_STRING:
        case UPB_TYPE_BYTES:
        case UPB_TYPE_MESSAGE: rb_raise(rb_eRuntimeError, "Internal error.");
      }

#undef T

    }
  }

1395 1396 1397 1398 1399
  stringsink* unknown = msg->unknown_fields;
  if (unknown != NULL) {
    upb_sink_putunknown(sink, unknown->ptr, unknown->len);
  }

1400 1401 1402
  if (open_msg) {
    upb_sink_endmsg(sink, &status);
  }
Chris Fallin's avatar
Chris Fallin committed
1403 1404 1405 1406 1407 1408 1409 1410 1411 1412
}

static const upb_handlers* msgdef_pb_serialize_handlers(Descriptor* desc) {
  if (desc->pb_serialize_handlers == NULL) {
    desc->pb_serialize_handlers =
        upb_pb_encoder_newhandlers(desc->msgdef, &desc->pb_serialize_handlers);
  }
  return desc->pb_serialize_handlers;
}

1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428
static const upb_handlers* msgdef_json_serialize_handlers(
    Descriptor* desc, bool preserve_proto_fieldnames) {
  if (preserve_proto_fieldnames) {
    if (desc->json_serialize_handlers == NULL) {
      desc->json_serialize_handlers =
          upb_json_printer_newhandlers(
              desc->msgdef, true, &desc->json_serialize_handlers);
    }
    return desc->json_serialize_handlers;
  } else {
    if (desc->json_serialize_handlers_preserve == NULL) {
      desc->json_serialize_handlers_preserve =
          upb_json_printer_newhandlers(
              desc->msgdef, false, &desc->json_serialize_handlers_preserve);
    }
    return desc->json_serialize_handlers_preserve;
Chris Fallin's avatar
Chris Fallin committed
1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439
  }
}

/*
 * call-seq:
 *     MessageClass.encode(msg) => bytes
 *
 * Encodes the given message object to its serialized form in protocol buffers
 * wire format.
 */
VALUE Message_encode(VALUE klass, VALUE msg_rb) {
1440
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
Chris Fallin's avatar
Chris Fallin committed
1441 1442 1443 1444 1445
  Descriptor* desc = ruby_to_Descriptor(descriptor);

  stringsink sink;
  stringsink_init(&sink);

1446 1447 1448
  {
    const upb_handlers* serialize_handlers =
        msgdef_pb_serialize_handlers(desc);
Chris Fallin's avatar
Chris Fallin committed
1449

1450 1451 1452
    stackenv se;
    upb_pb_encoder* encoder;
    VALUE ret;
Chris Fallin's avatar
Chris Fallin committed
1453

1454 1455
    stackenv_init(&se, "Error occurred during encoding: %s");
    encoder = upb_pb_encoder_create(&se.env, serialize_handlers, &sink.sink);
Chris Fallin's avatar
Chris Fallin committed
1456

1457
    putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0, false, false, true);
Chris Fallin's avatar
Chris Fallin committed
1458

1459
    ret = rb_str_new(sink.ptr, sink.len);
Chris Fallin's avatar
Chris Fallin committed
1460

1461 1462 1463 1464 1465
    stackenv_uninit(&se);
    stringsink_uninit(&sink);

    return ret;
  }
Chris Fallin's avatar
Chris Fallin committed
1466 1467 1468 1469
}

/*
 * call-seq:
1470
 *     MessageClass.encode_json(msg, options = {}) => json_string
Chris Fallin's avatar
Chris Fallin committed
1471 1472
 *
 * Encodes the given message object into its serialized JSON representation.
1473 1474 1475
 * @param options [Hash] options for the decoder
 *  preserve_proto_fieldnames: set true to use original fieldnames (default is to camelCase)
 *  emit_defaults: set true to emit 0/false values (default is to omit them)
Chris Fallin's avatar
Chris Fallin committed
1476
 */
1477
VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
1478
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
Chris Fallin's avatar
Chris Fallin committed
1479
  Descriptor* desc = ruby_to_Descriptor(descriptor);
1480 1481
  VALUE msg_rb;
  VALUE preserve_proto_fieldnames = Qfalse;
1482
  VALUE emit_defaults = Qfalse;
Chris Fallin's avatar
Chris Fallin committed
1483
  stringsink sink;
1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497

  if (argc < 1 || argc > 2) {
    rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
  }

  msg_rb = argv[0];

  if (argc == 2) {
    VALUE hash_args = argv[1];
    if (TYPE(hash_args) != T_HASH) {
      rb_raise(rb_eArgError, "Expected hash arguments.");
    }
    preserve_proto_fieldnames = rb_hash_lookup2(
        hash_args, ID2SYM(rb_intern("preserve_proto_fieldnames")), Qfalse);
1498 1499 1500

    emit_defaults = rb_hash_lookup2(
        hash_args, ID2SYM(rb_intern("emit_defaults")), Qfalse);
1501 1502
  }

Chris Fallin's avatar
Chris Fallin committed
1503 1504
  stringsink_init(&sink);

1505 1506
  {
    const upb_handlers* serialize_handlers =
1507
        msgdef_json_serialize_handlers(desc, RTEST(preserve_proto_fieldnames));
1508 1509 1510
    upb_json_printer* printer;
    stackenv se;
    VALUE ret;
Chris Fallin's avatar
Chris Fallin committed
1511

1512 1513
    stackenv_init(&se, "Error occurred during encoding: %s");
    printer = upb_json_printer_create(&se.env, serialize_handlers, &sink.sink);
Chris Fallin's avatar
Chris Fallin committed
1514

1515 1516
    putmsg(msg_rb, desc, upb_json_printer_input(printer), 0,
           RTEST(emit_defaults), true, true);
Chris Fallin's avatar
Chris Fallin committed
1517

1518
    ret = rb_enc_str_new(sink.ptr, sink.len, rb_utf8_encoding());
Chris Fallin's avatar
Chris Fallin committed
1519

1520 1521
    stackenv_uninit(&se);
    stringsink_uninit(&sink);
Chris Fallin's avatar
Chris Fallin committed
1522

1523 1524
    return ret;
  }
Chris Fallin's avatar
Chris Fallin committed
1525 1526
}

1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
static void discard_unknown(VALUE msg_rb, const Descriptor* desc) {
  MessageHeader* msg;
  upb_msg_field_iter it;

  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);

  stringsink* unknown = msg->unknown_fields;
  if (unknown != NULL) {
    stringsink_uninit(unknown);
    msg->unknown_fields = NULL;
  }

  for (upb_msg_field_begin(&it, desc->msgdef);
       !upb_msg_field_done(&it);
       upb_msg_field_next(&it)) {
    upb_fielddef *f = upb_msg_iter_field(&it);
    uint32_t offset =
        desc->layout->fields[upb_fielddef_index(f)].offset +
        sizeof(MessageHeader);

    if (upb_fielddef_containingoneof(f)) {
      uint32_t oneof_case_offset =
          desc->layout->fields[upb_fielddef_index(f)].case_offset +
          sizeof(MessageHeader);
      // For a oneof, check that this field is actually present -- skip all the
      // below if not.
      if (DEREF(msg, oneof_case_offset, uint32_t) !=
          upb_fielddef_number(f)) {
        continue;
      }
      // Otherwise, fall through to the appropriate singular-field handler
      // below.
    }

    if (!upb_fielddef_issubmsg(f)) {
      continue;
    }

    if (is_map_field(f)) {
      if (!upb_fielddef_issubmsg(map_field_value(f))) continue;
      VALUE map = DEREF(msg, offset, VALUE);
      if (map == Qnil) continue;
      Map_iter map_it;
      for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) {
        VALUE submsg = Map_iter_value(&map_it);
        VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
        const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
        discard_unknown(submsg, subdesc);
      }
    } else if (upb_fielddef_isseq(f)) {
      VALUE ary = DEREF(msg, offset, VALUE);
      if (ary == Qnil) continue;
      int size = NUM2INT(RepeatedField_length(ary));
      for (int i = 0; i < size; i++) {
        void* memory = RepeatedField_index_native(ary, i);
        VALUE submsg = *((VALUE *)memory);
        VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
        const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
        discard_unknown(submsg, subdesc);
      }
    } else {
      VALUE submsg = DEREF(msg, offset, VALUE);
      if (submsg == Qnil) continue;
      VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
      const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
      discard_unknown(submsg, subdesc);
    }
  }
}

/*
 * call-seq:
 *     Google::Protobuf.discard_unknown(msg)
 *
 * Discard unknown fields in the given message object and recursively discard
 * unknown fields in submessages.
 */
VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) {
  VALUE klass = CLASS_OF(msg_rb);
  VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
  Descriptor* desc = ruby_to_Descriptor(descriptor);
  if (klass == cRepeatedField || klass == cMap) {
    rb_raise(rb_eArgError, "Expected proto msg for discard unknown.");
  } else {
    discard_unknown(msg_rb, desc);
  }
  return Qnil;
}