Commit 97b663a8 authored by Chris Fallin's avatar Chris Fallin

Update upb amalgamation.

parent 4c922897
...@@ -3416,6 +3416,8 @@ char *upb_strdup(const char *s) { ...@@ -3416,6 +3416,8 @@ char *upb_strdup(const char *s) {
} }
char *upb_strdup2(const char *s, size_t len) { char *upb_strdup2(const char *s, size_t len) {
// Prevent overflow errors.
if (len == SIZE_MAX) return NULL;
// Always null-terminate, even if binary data; but don't rely on the input to // Always null-terminate, even if binary data; but don't rely on the input to
// have a null-terminating byte since it may be a raw binary buffer. // have a null-terminating byte since it may be a raw binary buffer.
size_t n = len + 1; size_t n = len + 1;
...@@ -4230,8 +4232,10 @@ static void nullz(upb_status *status) { ...@@ -4230,8 +4232,10 @@ static void nullz(upb_status *status) {
} }
void upb_status_clear(upb_status *status) { void upb_status_clear(upb_status *status) {
upb_status blank = UPB_STATUS_INIT; if (!status) return;
upb_status_copy(status, &blank); status->ok_ = true;
status->code_ = 0;
status->msg[0] = '\0';
} }
bool upb_ok(const upb_status *status) { return status->ok_; } bool upb_ok(const upb_status *status) { return status->ok_; }
...@@ -5998,6 +6002,7 @@ static void putop(compiler *c, opcode op, ...) { ...@@ -5998,6 +6002,7 @@ static void putop(compiler *c, opcode op, ...) {
case OP_SETDELIM: case OP_SETDELIM:
case OP_HALT: case OP_HALT:
case OP_RET: case OP_RET:
case OP_DISPATCH:
put32(c, op); put32(c, op);
break; break;
case OP_PARSE_DOUBLE: case OP_PARSE_DOUBLE:
...@@ -6078,7 +6083,7 @@ const char *upb_pbdecoder_getopname(unsigned int op) { ...@@ -6078,7 +6083,7 @@ const char *upb_pbdecoder_getopname(unsigned int op) {
OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET), OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET),
OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM), OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM),
OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP), OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP),
OP(SETBIGGROUPNUM), OP(HALT), OP(SETBIGGROUPNUM), OP(DISPATCH), OP(HALT),
}; };
return op > OP_HALT ? names[0] : names[op]; return op > OP_HALT ? names[0] : names[op];
#undef OP #undef OP
...@@ -6110,6 +6115,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) { ...@@ -6110,6 +6115,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
upb_handlers_msgdef(method->dest_handlers_))); upb_handlers_msgdef(method->dest_handlers_)));
break; break;
} }
case OP_DISPATCH:
case OP_STARTMSG: case OP_STARTMSG:
case OP_ENDMSG: case OP_ENDMSG:
case OP_PUSHLENDELIM: case OP_PUSHLENDELIM:
...@@ -6455,6 +6461,7 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) { ...@@ -6455,6 +6461,7 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
putop(c, OP_SETDISPATCH, &method->dispatch); putop(c, OP_SETDISPATCH, &method->dispatch);
putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h); putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
label(c, LABEL_FIELD); label(c, LABEL_FIELD);
uint32_t* start_pc = c->pc;
upb_msg_iter i; upb_msg_iter i;
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) { for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i); const upb_fielddef *f = upb_msg_iter_field(&i);
...@@ -6470,8 +6477,18 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) { ...@@ -6470,8 +6477,18 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
} }
} }
// If there were no fields, or if no handlers were defined, we need to
// generate a non-empty loop body so that we can at least dispatch for unknown
// fields and check for the end of the message.
if (c->pc == start_pc) {
// Check for end-of-message.
putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
// Unconditionally dispatch.
putop(c, OP_DISPATCH, 0);
}
// For now we just loop back to the last field of the message (or if none, // For now we just loop back to the last field of the message (or if none,
// the DISPATCH opcode for the message. // the DISPATCH opcode for the message).
putop(c, OP_BRANCH, -LABEL_FIELD); putop(c, OP_BRANCH, -LABEL_FIELD);
// Insert both a label and a dispatch table entry for this end-of-msg. // Insert both a label and a dispatch table entry for this end-of-msg.
...@@ -7455,6 +7472,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, ...@@ -7455,6 +7472,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
if (result == DECODE_MISMATCH) goto badtag; if (result == DECODE_MISMATCH) goto badtag;
if (result >= 0) return result; if (result >= 0) return result;
}) })
VMCASE(OP_DISPATCH, {
CHECK_RETURN(dispatch(d));
})
VMCASE(OP_HALT, { VMCASE(OP_HALT, {
return size; return size;
}) })
...@@ -7513,7 +7533,8 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) { ...@@ -7513,7 +7533,8 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
// Rewind from OP_TAG* to OP_CHECKDELIM. // Rewind from OP_TAG* to OP_CHECKDELIM.
assert(getop(*d->pc) == OP_TAG1 || assert(getop(*d->pc) == OP_TAG1 ||
getop(*d->pc) == OP_TAG2 || getop(*d->pc) == OP_TAG2 ||
getop(*d->pc) == OP_TAGN); getop(*d->pc) == OP_TAGN ||
getop(*d->pc == OP_DISPATCH));
d->pc = p; d->pc = p;
} }
upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL); upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
...@@ -8648,6 +8669,9 @@ upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) { ...@@ -8648,6 +8669,9 @@ upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
#define PARSER_CHECK_RETURN(x) if (!(x)) return false #define PARSER_CHECK_RETURN(x) if (!(x)) return false
// Used to signal that a capture has been suspended.
static char suspend_capture;
static upb_selector_t getsel_for_handlertype(upb_json_parser *p, static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
upb_handlertype_t type) { upb_handlertype_t type) {
upb_selector_t sel; upb_selector_t sel;
...@@ -8661,41 +8685,6 @@ static upb_selector_t parser_getsel(upb_json_parser *p) { ...@@ -8661,41 +8685,6 @@ static upb_selector_t parser_getsel(upb_json_parser *p) {
p, upb_handlers_getprimitivehandlertype(p->top->f)); p, upb_handlers_getprimitivehandlertype(p->top->f));
} }
static void start_member(upb_json_parser *p) {
assert(!p->top->f);
assert(!p->accumulated);
p->accumulated_len = 0;
}
static bool end_member(upb_json_parser *p) {
// TODO(haberman): support keys that span buffers or have escape sequences.
assert(!p->top->f);
assert(p->accumulated);
const upb_fielddef *f =
upb_msgdef_ntof(p->top->m, p->accumulated, p->accumulated_len);
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n",
(int)p->accumulated_len, p->accumulated);
return false;
}
p->top->f = f;
p->accumulated = NULL;
return true;
}
static void start_object(upb_json_parser *p) {
upb_sink_startmsg(&p->top->sink);
}
static void end_object(upb_json_parser *p) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
}
static bool check_stack(upb_json_parser *p) { static bool check_stack(upb_json_parser *p) {
if ((p->top + 1) == p->limit) { if ((p->top + 1) == p->limit) {
upb_status_seterrmsg(p->status, "Nesting too deep"); upb_status_seterrmsg(p->status, "Nesting too deep");
...@@ -8705,83 +8694,28 @@ static bool check_stack(upb_json_parser *p) { ...@@ -8705,83 +8694,28 @@ static bool check_stack(upb_json_parser *p) {
return true; return true;
} }
static bool start_subobject(upb_json_parser *p) { // There are GCC/Clang built-ins for overflow checking which we could start
assert(p->top->f); // using if there was any performance benefit to it.
if (!upb_fielddef_issubmsg(p->top->f)) {
upb_status_seterrf(p->status,
"Object specified for non-message/group field: %s",
upb_fielddef_name(p->top->f));
return false;
}
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
p->top = inner;
static bool checked_add(size_t a, size_t b, size_t *c) {
if (SIZE_MAX - a < b) return false;
*c = a + b;
return true; return true;
} }
static void end_subobject(upb_json_parser *p) { static size_t saturating_multiply(size_t a, size_t b) {
p->top--; // size_t is unsigned, so this is defined behavior even on overflow.
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); size_t ret = a * b;
upb_sink_endsubmsg(&p->top->sink, sel); if (b != 0 && ret / b != a) {
} ret = SIZE_MAX;
static bool start_array(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_isseq(p->top->f)) {
upb_status_seterrf(p->status,
"Array specified for non-repeated field: %s",
upb_fielddef_name(p->top->f));
return false;
} }
return ret;
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
return true;
}
static void end_array(upb_json_parser *p) {
assert(p->top > p->stack);
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
} }
static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
static bool parser_putbool(upb_json_parser *p, bool val) {
if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
upb_status_seterrf(p->status,
"Boolean value specified for non-bool field: %s",
upb_fielddef_name(p->top->f));
return false;
}
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); /* Base64 decoding ************************************************************/
UPB_ASSERT_VAR(ok, ok);
return true;
}
static void start_text(upb_json_parser *p, const char *ptr) { // TODO(haberman): make this streaming.
p->text_begin = ptr;
}
static const signed char b64table[] = { static const signed char b64table[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
...@@ -8901,148 +8835,231 @@ badpadding: ...@@ -8901,148 +8835,231 @@ badpadding:
return false; return false;
} }
static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
assert(!p->accumulated); // TODO: handle this case.
p->accumulated = p->text_begin;
p->accumulated_len = ptr - p->text_begin;
if (p->top->f && upb_fielddef_isstring(p->top->f)) { /* Accumulate buffer **********************************************************/
// This is a string field (as opposed to a member name).
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); // Functionality for accumulating a buffer.
if (upb_fielddef_type(p->top->f) == UPB_TYPE_BYTES) { //
PARSER_CHECK_RETURN(base64_push(p, sel, p->accumulated, // Some parts of the parser need an entire value as a contiguous string. For
p->accumulated_len)); // example, to look up a member name in a hash table, or to turn a string into
} else { // a number, the relevant library routines need the input string to be in
upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL); // contiguous memory, even if the value spanned two or more buffers in the
} // input. These routines handle that.
//
// In the common case we can just point to the input buffer to get this
// contiguous string and avoid any actual copy. So we optimistically begin
// this way. But there are a few cases where we must instead copy into a
// separate buffer:
//
// 1. The string was not contiguous in the input (it spanned buffers).
//
// 2. The string included escape sequences that need to be interpreted to get
// the true value in a contiguous buffer.
static void assert_accumulate_empty(upb_json_parser *p) {
assert(p->accumulated == NULL);
assert(p->accumulated_len == 0);
}
static void accumulate_clear(upb_json_parser *p) {
p->accumulated = NULL; p->accumulated = NULL;
} else if (p->top->f && p->accumulated_len = 0;
upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM && }
!is_num) {
// Enum case: resolve enum symbolic name to integer value. // Used internally by accumulate_append().
const upb_enumdef *enumdef = static bool accumulate_realloc(upb_json_parser *p, size_t need) {
(const upb_enumdef*)upb_fielddef_subdef(p->top->f); size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
while (new_size < need) {
new_size = saturating_multiply(new_size, 2);
}
int32_t int_val = 0; void *mem = realloc(p->accumulate_buf, new_size);
if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len, if (!mem) {
&int_val)) { upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
upb_selector_t sel = parser_getsel(p);
upb_sink_putint32(&p->top->sink, sel, int_val);
} else {
upb_status_seterrmsg(p->status, "Enum value name unknown");
return false; return false;
} }
p->accumulated = NULL;
}
p->accumulate_buf = mem;
p->accumulate_buf_size = new_size;
return true; return true;
} }
static bool start_stringval(upb_json_parser *p) { // Logically appends the given data to the append buffer.
assert(p->top->f); // If "can_alias" is true, we will try to avoid actually copying, but the buffer
// must be valid until the next accumulate_append() call (if any).
if (upb_fielddef_isstring(p->top->f)) { static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
if (!check_stack(p)) return false; bool can_alias) {
if (!p->accumulated && can_alias) {
p->accumulated = buf;
p->accumulated_len = len;
return true;
}
// Start a new parser frame: parser frames correspond one-to-one with size_t need;
// handler frames, and string events occur in a sub-frame. if (!checked_add(p->accumulated_len, len, &need)) {
upb_jsonparser_frame *inner = p->top + 1; upb_status_seterrmsg(p->status, "Integer overflow.");
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); return false;
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); }
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
return true; if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
} else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
// Do nothing -- symbolic enum names in quotes remain in the
// current parser frame.
return true;
} else {
upb_status_seterrf(p->status,
"String specified for non-string/non-enum field: %s",
upb_fielddef_name(p->top->f));
return false; return false;
} }
if (p->accumulated != p->accumulate_buf) {
memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
p->accumulated = p->accumulate_buf;
}
memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
p->accumulated_len += len;
return true;
} }
static void end_stringval(upb_json_parser *p) { // Returns a pointer to the data accumulated since the last accumulate_clear()
if (upb_fielddef_isstring(p->top->f)) { // call, and writes the length to *len. This with point either to the input
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); // buffer or a temporary accumulate buffer.
upb_sink_endstr(&p->top->sink, sel); static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
p->top--; assert(p->accumulated);
} *len = p->accumulated_len;
return p->accumulated;
} }
static void start_number(upb_json_parser *p, const char *ptr) {
start_text(p, ptr); /* Mult-part text data ********************************************************/
assert(p->accumulated == NULL);
// When we have text data in the input, it can often come in multiple segments.
// For example, there may be some raw string data followed by an escape
// sequence. The two segments are processed with different logic. Also buffer
// seams in the input can cause multiple segments.
//
// As we see segments, there are two main cases for how we want to process them:
//
// 1. we want to push the captured input directly to string handlers.
//
// 2. we need to accumulate all the parts into a contiguous buffer for further
// processing (field name lookup, string->number conversion, etc).
// This is the set of states for p->multipart_state.
enum {
// We are not currently processing multipart data.
MULTIPART_INACTIVE = 0,
// We are processing multipart data by accumulating it into a contiguous
// buffer.
MULTIPART_ACCUMULATE = 1,
// We are processing multipart data by pushing each part directly to the
// current string handlers.
MULTIPART_PUSHEAGERLY = 2
};
// Start a multi-part text value where we accumulate the data for processing at
// the end.
static void multipart_startaccum(upb_json_parser *p) {
assert_accumulate_empty(p);
assert(p->multipart_state == MULTIPART_INACTIVE);
p->multipart_state = MULTIPART_ACCUMULATE;
} }
static void end_number(upb_json_parser *p, const char *ptr) { // Start a multi-part text value where we immediately push text data to a string
end_text(p, ptr, true); // value with the given selector.
const char *myend = p->accumulated + p->accumulated_len; static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
char *end; assert_accumulate_empty(p);
assert(p->multipart_state == MULTIPART_INACTIVE);
p->multipart_state = MULTIPART_PUSHEAGERLY;
p->string_selector = sel;
}
switch (upb_fielddef_type(p->top->f)) { static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
case UPB_TYPE_ENUM: bool can_alias) {
case UPB_TYPE_INT32: { switch (p->multipart_state) {
long val = strtol(p->accumulated, &end, 0); case MULTIPART_INACTIVE:
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend) upb_status_seterrmsg(
assert(false); p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
else return false;
upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
break; case MULTIPART_ACCUMULATE:
} if (!accumulate_append(p, buf, len, can_alias)) {
case UPB_TYPE_INT64: { return false;
long long val = strtoll(p->accumulated, &end, 0);
if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
assert(false);
else
upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
break;
} }
case UPB_TYPE_UINT32: {
unsigned long val = strtoul(p->accumulated, &end, 0);
if (val > UINT32_MAX || errno == ERANGE || end != myend)
assert(false);
else
upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
break; break;
}
case UPB_TYPE_UINT64: { case MULTIPART_PUSHEAGERLY: {
unsigned long long val = strtoull(p->accumulated, &end, 0); const upb_bufhandle *handle = can_alias ? p->handle : NULL;
if (val > UINT64_MAX || errno == ERANGE || end != myend) upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
assert(false);
else
upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
break; break;
} }
case UPB_TYPE_DOUBLE: {
double val = strtod(p->accumulated, &end);
if (errno == ERANGE || end != myend)
assert(false);
else
upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
break;
} }
case UPB_TYPE_FLOAT: {
float val = strtof(p->accumulated, &end); return true;
if (errno == ERANGE || end != myend) }
assert(false);
else // Note: this invalidates the accumulate buffer! Call only after reading its
upb_sink_putfloat(&p->top->sink, parser_getsel(p), val); // contents.
break; static void multipart_end(upb_json_parser *p) {
assert(p->multipart_state != MULTIPART_INACTIVE);
p->multipart_state = MULTIPART_INACTIVE;
accumulate_clear(p);
}
/* Input capture **************************************************************/
// Functionality for capturing a region of the input as text. Gracefully
// handles the case where a buffer seam occurs in the middle of the captured
// region.
static void capture_begin(upb_json_parser *p, const char *ptr) {
assert(p->multipart_state != MULTIPART_INACTIVE);
assert(p->capture == NULL);
p->capture = ptr;
}
static bool capture_end(upb_json_parser *p, const char *ptr) {
assert(p->capture);
if (multipart_text(p, p->capture, ptr - p->capture, true)) {
p->capture = NULL;
return true;
} else {
return false;
} }
default: }
assert(false);
// This is called at the end of each input buffer (ie. when we have hit a
// buffer seam). If we are in the middle of capturing the input, this
// processes the unprocessed capture region.
static void capture_suspend(upb_json_parser *p, const char **ptr) {
if (!p->capture) return;
if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
// We use this as a signal that we were in the middle of capturing, and
// that capturing should resume at the beginning of the next buffer.
//
// We can't use *ptr here, because we have no guarantee that this pointer
// will be valid when we resume (if the underlying memory is freed, then
// using the pointer at all, even to compare to NULL, is likely undefined
// behavior).
p->capture = &suspend_capture;
} else {
// Need to back up the pointer to the beginning of the capture, since
// we were not able to actually preserve it.
*ptr = p->capture;
} }
}
p->accumulated = NULL; static void capture_resume(upb_json_parser *p, const char *ptr) {
if (p->capture) {
assert(p->capture == &suspend_capture);
p->capture = ptr;
}
} }
/* Callbacks from the parser **************************************************/
// These are the functions called directly from the parser itself.
// We define these in the same order as their declarations in the parser.
static char escape_char(char in) { static char escape_char(char in) {
switch (in) { switch (in) {
case 'r': return '\r'; case 'r': return '\r';
...@@ -9059,35 +9076,33 @@ static char escape_char(char in) { ...@@ -9059,35 +9076,33 @@ static char escape_char(char in) {
} }
} }
static void escape(upb_json_parser *p, const char *ptr) { static bool escape(upb_json_parser *p, const char *ptr) {
char ch = escape_char(*ptr); char ch = escape_char(*ptr);
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); return multipart_text(p, &ch, 1, false);
upb_sink_putstring(&p->top->sink, sel, &ch, 1, NULL); }
static void start_hex(upb_json_parser *p) {
p->digit = 0;
} }
static uint8_t hexdigit(char ch) { static void hexdigit(upb_json_parser *p, const char *ptr) {
char ch = *ptr;
p->digit <<= 4;
if (ch >= '0' && ch <= '9') { if (ch >= '0' && ch <= '9') {
return ch - '0'; p->digit += (ch - '0');
} else if (ch >= 'a' && ch <= 'f') { } else if (ch >= 'a' && ch <= 'f') {
return ch - 'a' + 10; p->digit += ((ch - 'a') + 10);
} else { } else {
assert(ch >= 'A' && ch <= 'F'); assert(ch >= 'A' && ch <= 'F');
return ch - 'A' + 10; p->digit += ((ch - 'A') + 10);
} }
} }
static void start_hex(upb_json_parser *p, const char *ptr) { static bool end_hex(upb_json_parser *p) {
start_text(p, ptr); uint32_t codepoint = p->digit;
}
static void hex(upb_json_parser *p, const char *end) {
const char *start = p->text_begin;
UPB_ASSERT_VAR(end, end - start == 4);
uint16_t codepoint =
(hexdigit(start[0]) << 12) |
(hexdigit(start[1]) << 8) |
(hexdigit(start[2]) << 4) |
hexdigit(start[3]);
// emit the codepoint as UTF-8. // emit the codepoint as UTF-8.
char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
int length = 0; int length = 0;
...@@ -9110,160 +9125,466 @@ static void hex(upb_json_parser *p, const char *end) { ...@@ -9110,160 +9125,466 @@ static void hex(upb_json_parser *p, const char *end) {
// TODO(haberman): Handle high surrogates: if codepoint is a high surrogate // TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
// we have to wait for the next escape to get the full code point). // we have to wait for the next escape to get the full code point).
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); return multipart_text(p, utf8, length, false);
upb_sink_putstring(&p->top->sink, sel, utf8, length, NULL);
} }
#define CHECK_RETURN_TOP(x) if (!(x)) goto error static void start_text(upb_json_parser *p, const char *ptr) {
capture_begin(p, ptr);
}
static bool end_text(upb_json_parser *p, const char *ptr) {
return capture_end(p, ptr);
}
static void start_number(upb_json_parser *p, const char *ptr) {
multipart_startaccum(p);
capture_begin(p, ptr);
}
static bool end_number(upb_json_parser *p, const char *ptr) {
if (!capture_end(p, ptr)) {
return false;
}
// strtol() and friends unfortunately do not support specifying the length of
// the input string, so we need to force a copy into a NULL-terminated buffer.
if (!multipart_text(p, "\0", 1, false)) {
return false;
}
size_t len;
const char *buf = accumulate_getptr(p, &len);
const char *myend = buf + len - 1; // One for NULL.
char *end;
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_ENUM:
case UPB_TYPE_INT32: {
long val = strtol(p->accumulated, &end, 0);
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
goto err;
else
upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
break;
}
case UPB_TYPE_INT64: {
long long val = strtoll(p->accumulated, &end, 0);
if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
goto err;
else
upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
break;
}
case UPB_TYPE_UINT32: {
unsigned long val = strtoul(p->accumulated, &end, 0);
if (val > UINT32_MAX || errno == ERANGE || end != myend)
goto err;
else
upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
break;
}
case UPB_TYPE_UINT64: {
unsigned long long val = strtoull(p->accumulated, &end, 0);
if (val > UINT64_MAX || errno == ERANGE || end != myend)
goto err;
else
upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
break;
}
case UPB_TYPE_DOUBLE: {
double val = strtod(p->accumulated, &end);
if (errno == ERANGE || end != myend)
goto err;
else
upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
break;
}
case UPB_TYPE_FLOAT: {
float val = strtof(p->accumulated, &end);
if (errno == ERANGE || end != myend)
goto err;
else
upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
break;
}
default:
assert(false);
}
multipart_end(p);
return true;
err:
upb_status_seterrf(p->status, "error parsing number: %s", buf);
multipart_end(p);
return false;
}
static bool parser_putbool(upb_json_parser *p, bool val) {
if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
upb_status_seterrf(p->status,
"Boolean value specified for non-bool field: %s",
upb_fielddef_name(p->top->f));
return false;
}
bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
UPB_ASSERT_VAR(ok, ok);
return true;
}
static bool start_stringval(upb_json_parser *p) {
assert(p->top->f);
if (upb_fielddef_isstring(p->top->f)) {
if (!check_stack(p)) return false;
// Start a new parser frame: parser frames correspond one-to-one with
// handler frames, and string events occur in a sub-frame.
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
// For STRING fields we push data directly to the handlers as it is
// parsed. We don't do this yet for BYTES fields, because our base64
// decoder is not streaming.
//
// TODO(haberman): make base64 decoding streaming also.
multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
return true;
} else {
multipart_startaccum(p);
return true;
}
} else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
// No need to push a frame -- symbolic enum names in quotes remain in the
// current parser frame.
//
// Enum string values must accumulate so we can look up the value in a table
// once it is complete.
multipart_startaccum(p);
return true;
} else {
upb_status_seterrf(p->status,
"String specified for non-string/non-enum field: %s",
upb_fielddef_name(p->top->f));
return false;
}
}
static bool end_stringval(upb_json_parser *p) {
bool ok = true;
switch (upb_fielddef_type(p->top->f)) {
case UPB_TYPE_BYTES:
if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
p->accumulated, p->accumulated_len)) {
return false;
}
// Fall through.
case UPB_TYPE_STRING: {
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
upb_sink_endstr(&p->top->sink, sel);
p->top--;
break;
}
case UPB_TYPE_ENUM: {
// Resolve enum symbolic name to integer value.
const upb_enumdef *enumdef =
(const upb_enumdef*)upb_fielddef_subdef(p->top->f);
size_t len;
const char *buf = accumulate_getptr(p, &len);
int32_t int_val = 0;
ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
if (ok) {
upb_selector_t sel = parser_getsel(p);
upb_sink_putint32(&p->top->sink, sel, int_val);
} else {
upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
}
break;
}
default:
assert(false);
upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
ok = false;
break;
}
multipart_end(p);
return ok;
}
static void start_member(upb_json_parser *p) {
assert(!p->top->f);
multipart_startaccum(p);
}
static bool end_member(upb_json_parser *p) {
assert(!p->top->f);
size_t len;
const char *buf = accumulate_getptr(p, &len);
const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
if (!f) {
// TODO(haberman): Ignore unknown fields if requested/configured to do so.
upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
return false;
}
p->top->f = f;
multipart_end(p);
return true;
}
static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
static bool start_subobject(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_issubmsg(p->top->f)) {
upb_status_seterrf(p->status,
"Object specified for non-message/group field: %s",
upb_fielddef_name(p->top->f));
return false;
}
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
inner->m = upb_fielddef_msgsubdef(p->top->f);
inner->f = NULL;
p->top = inner;
return true;
}
static void end_subobject(upb_json_parser *p) {
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
upb_sink_endsubmsg(&p->top->sink, sel);
}
static bool start_array(upb_json_parser *p) {
assert(p->top->f);
if (!upb_fielddef_isseq(p->top->f)) {
upb_status_seterrf(p->status,
"Array specified for non-repeated field: %s",
upb_fielddef_name(p->top->f));
return false;
}
if (!check_stack(p)) return false;
upb_jsonparser_frame *inner = p->top + 1;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
upb_sink_startseq(&p->top->sink, sel, &inner->sink);
inner->m = p->top->m;
inner->f = p->top->f;
p->top = inner;
return true;
}
static void end_array(upb_json_parser *p) {
assert(p->top > p->stack);
p->top--;
upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
upb_sink_endseq(&p->top->sink, sel);
}
static void start_object(upb_json_parser *p) {
upb_sink_startmsg(&p->top->sink);
}
static void end_object(upb_json_parser *p) {
upb_status status;
upb_sink_endmsg(&p->top->sink, &status);
}
#define CHECK_RETURN_TOP(x) if (!(x)) goto error
/* The actual parser **********************************************************/
// What follows is the Ragel parser itself. The language is specified in Ragel // What follows is the Ragel parser itself. The language is specified in Ragel
// and the actions call our C functions above. // and the actions call our C functions above.
//
// Ragel has an extensive set of functionality, and we use only a small part of
// it. There are many action types but we only use a few:
//
// ">" -- transition into a machine
// "%" -- transition out of a machine
// "@" -- transition into a final state of a machine.
//
// "@" transitions are tricky because a machine can transition into a final
// state repeatedly. But in some cases we know this can't happen, for example
// a string which is delimited by a final '"' can only transition into its
// final state once, when the closing '"' is seen.
#line 596 "upb/json/parser.rl" #line 904 "upb/json/parser.rl"
#line 514 "upb/json/parser.c" #line 816 "upb/json/parser.c"
static const char _json_actions[] = { static const char _json_actions[] = {
0, 1, 0, 1, 2, 1, 3, 1, 0, 1, 0, 1, 2, 1, 3, 1,
4, 1, 5, 1, 6, 1, 7, 1, 5, 1, 6, 1, 7, 1, 8, 1,
9, 1, 11, 1, 12, 1, 13, 1, 10, 1, 12, 1, 13, 1, 14, 1,
14, 1, 15, 1, 16, 1, 24, 1, 15, 1, 16, 1, 17, 1, 21, 1,
26, 2, 3, 7, 2, 5, 2, 2, 25, 1, 27, 2, 3, 8, 2, 4,
5, 7, 2, 10, 8, 2, 12, 14, 5, 2, 6, 2, 2, 6, 8, 2,
2, 13, 14, 2, 17, 1, 2, 18, 11, 9, 2, 13, 15, 2, 14, 15,
26, 2, 19, 8, 2, 20, 26, 2, 2, 18, 1, 2, 19, 27, 2, 20,
21, 26, 2, 22, 26, 2, 23, 26, 9, 2, 22, 27, 2, 23, 27, 2,
2, 25, 26, 3, 13, 10, 8 24, 27, 2, 26, 27, 3, 14, 11,
9
}; };
static const unsigned char _json_key_offsets[] = { static const unsigned char _json_key_offsets[] = {
0, 0, 4, 9, 14, 18, 22, 27, 0, 0, 4, 9, 14, 15, 19, 24,
32, 37, 41, 45, 48, 51, 53, 57, 29, 34, 38, 42, 45, 48, 50, 54,
61, 63, 65, 70, 72, 74, 83, 89, 58, 60, 62, 67, 69, 71, 80, 86,
95, 101, 107, 109, 118, 118, 118, 123, 92, 98, 104, 106, 115, 116, 116, 116,
128, 133, 133, 134, 135, 136, 137, 137, 121, 126, 131, 132, 133, 134, 135, 135,
138, 139, 140, 140, 141, 142, 143, 143, 136, 137, 138, 138, 139, 140, 141, 141,
148, 153, 157, 161, 166, 171, 176, 180, 146, 151, 152, 156, 161, 166, 171, 175,
180, 183, 183, 183 175, 178, 178, 178
}; };
static const char _json_trans_keys[] = { static const char _json_trans_keys[] = {
32, 123, 9, 13, 32, 34, 125, 9, 32, 123, 9, 13, 32, 34, 125, 9,
13, 32, 34, 125, 9, 13, 32, 58, 13, 32, 34, 125, 9, 13, 34, 32,
9, 13, 32, 58, 9, 13, 32, 93, 58, 9, 13, 32, 93, 125, 9, 13,
125, 9, 13, 32, 44, 125, 9, 13, 32, 44, 125, 9, 13, 32, 44, 125,
32, 44, 125, 9, 13, 32, 34, 9, 9, 13, 32, 34, 9, 13, 45, 48,
13, 45, 48, 49, 57, 48, 49, 57, 49, 57, 48, 49, 57, 46, 69, 101,
46, 69, 101, 48, 57, 69, 101, 48, 48, 57, 69, 101, 48, 57, 43, 45,
57, 43, 45, 48, 57, 48, 57, 48, 48, 57, 48, 57, 48, 57, 46, 69,
57, 46, 69, 101, 48, 57, 34, 92, 101, 48, 57, 34, 92, 34, 92, 34,
34, 92, 34, 47, 92, 98, 102, 110, 47, 92, 98, 102, 110, 114, 116, 117,
114, 116, 117, 48, 57, 65, 70, 97, 48, 57, 65, 70, 97, 102, 48, 57,
102, 48, 57, 65, 70, 97, 102, 48, 65, 70, 97, 102, 48, 57, 65, 70,
57, 65, 70, 97, 102, 48, 57, 65, 97, 102, 48, 57, 65, 70, 97, 102,
70, 97, 102, 34, 92, 34, 45, 91, 34, 92, 34, 45, 91, 102, 110, 116,
102, 110, 116, 123, 48, 57, 32, 93, 123, 48, 57, 34, 32, 93, 125, 9,
125, 9, 13, 32, 44, 93, 9, 13, 13, 32, 44, 93, 9, 13, 32, 93,
32, 93, 125, 9, 13, 97, 108, 115, 125, 9, 13, 97, 108, 115, 101, 117,
101, 117, 108, 108, 114, 117, 101, 32, 108, 108, 114, 117, 101, 32, 34, 125,
34, 125, 9, 13, 32, 34, 125, 9, 9, 13, 32, 34, 125, 9, 13, 34,
13, 32, 58, 9, 13, 32, 58, 9, 32, 58, 9, 13, 32, 93, 125, 9,
13, 32, 93, 125, 9, 13, 32, 44, 13, 32, 44, 125, 9, 13, 32, 44,
125, 9, 13, 32, 44, 125, 9, 13, 125, 9, 13, 32, 34, 9, 13, 32,
32, 34, 9, 13, 32, 9, 13, 0 9, 13, 0
}; };
static const char _json_single_lengths[] = { static const char _json_single_lengths[] = {
0, 2, 3, 3, 2, 2, 3, 3, 0, 2, 3, 3, 1, 2, 3, 3,
3, 2, 2, 1, 3, 0, 2, 2, 3, 2, 2, 1, 3, 0, 2, 2,
0, 0, 3, 2, 2, 9, 0, 0, 0, 0, 3, 2, 2, 9, 0, 0,
0, 0, 2, 7, 0, 0, 3, 3, 0, 0, 2, 7, 1, 0, 0, 3,
3, 0, 1, 1, 1, 1, 0, 1, 3, 3, 1, 1, 1, 1, 0, 1,
1, 1, 0, 1, 1, 1, 0, 3, 1, 1, 0, 1, 1, 1, 0, 3,
3, 2, 2, 3, 3, 3, 2, 0, 3, 1, 2, 3, 3, 3, 2, 0,
1, 0, 0, 0 1, 0, 0, 0
}; };
static const char _json_range_lengths[] = { static const char _json_range_lengths[] = {
0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 0, 0, 0, 3, 3, 1, 1, 1, 0, 0, 0, 3, 3,
3, 3, 0, 1, 0, 0, 1, 1, 3, 3, 0, 1, 0, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0,
1, 0, 0, 0 1, 0, 0, 0
}; };
static const short _json_index_offsets[] = { static const short _json_index_offsets[] = {
0, 0, 4, 9, 14, 18, 22, 27, 0, 0, 4, 9, 14, 16, 20, 25,
32, 37, 41, 45, 48, 52, 54, 58, 30, 35, 39, 43, 46, 50, 52, 56,
62, 64, 66, 71, 74, 77, 87, 91, 60, 62, 64, 69, 72, 75, 85, 89,
95, 99, 103, 106, 115, 116, 117, 122, 93, 97, 101, 104, 113, 115, 116, 117,
127, 132, 133, 135, 137, 139, 141, 142, 122, 127, 132, 134, 136, 138, 140, 141,
144, 146, 148, 149, 151, 153, 155, 156, 143, 145, 147, 148, 150, 152, 154, 155,
161, 166, 170, 174, 179, 184, 189, 193, 160, 165, 167, 171, 176, 181, 186, 190,
194, 197, 198, 199 191, 194, 195, 196
}; };
static const char _json_indicies[] = { static const char _json_indicies[] = {
0, 2, 0, 1, 3, 4, 5, 3, 0, 2, 0, 1, 3, 4, 5, 3,
1, 6, 7, 8, 6, 1, 9, 10, 1, 6, 7, 8, 6, 1, 9, 1,
9, 1, 11, 12, 11, 1, 12, 1, 10, 11, 10, 1, 11, 1, 1, 11,
1, 12, 13, 14, 15, 16, 14, 1, 12, 13, 14, 15, 13, 1, 16, 17,
17, 18, 8, 17, 1, 18, 7, 18, 8, 16, 1, 17, 7, 17, 1, 18,
1, 19, 20, 21, 1, 20, 21, 1, 19, 20, 1, 19, 20, 1, 22, 23,
23, 24, 24, 22, 25, 1, 24, 24, 23, 21, 24, 1, 23, 23, 24, 21,
25, 22, 26, 26, 27, 1, 27, 1, 25, 25, 26, 1, 26, 1, 26, 21,
27, 22, 23, 24, 24, 21, 22, 29, 22, 23, 23, 20, 21, 28, 29, 27,
30, 28, 32, 33, 31, 34, 34, 34, 31, 32, 30, 33, 33, 33, 33, 33,
34, 34, 34, 34, 34, 35, 1, 36, 33, 33, 33, 34, 1, 35, 35, 35,
36, 36, 1, 37, 37, 37, 1, 38, 1, 36, 36, 36, 1, 37, 37, 37,
38, 38, 1, 39, 39, 39, 1, 41, 1, 38, 38, 38, 1, 40, 41, 39,
42, 40, 43, 44, 45, 46, 47, 48, 42, 43, 44, 45, 46, 47, 48, 43,
49, 44, 1, 50, 51, 53, 54, 1, 1, 49, 1, 50, 51, 53, 54, 1,
53, 52, 55, 56, 54, 55, 1, 56, 53, 52, 55, 56, 54, 55, 1, 56,
1, 1, 56, 52, 57, 58, 1, 59, 1, 1, 56, 52, 57, 1, 58, 1,
1, 60, 1, 61, 1, 62, 63, 1, 59, 1, 60, 1, 61, 62, 1, 63,
64, 1, 65, 1, 66, 67, 1, 68, 1, 64, 1, 65, 66, 1, 67, 1,
1, 69, 1, 70, 71, 72, 73, 71, 68, 1, 69, 70, 71, 72, 70, 1,
1, 74, 75, 76, 74, 1, 77, 78, 73, 74, 75, 73, 1, 76, 1, 77,
77, 1, 79, 80, 79, 1, 80, 1, 78, 77, 1, 78, 1, 1, 78, 79,
1, 80, 81, 82, 83, 84, 82, 1, 80, 81, 82, 80, 1, 83, 84, 75,
85, 86, 76, 85, 1, 86, 75, 86, 83, 1, 84, 74, 84, 1, 85, 86,
1, 87, 88, 88, 1, 1, 1, 1, 86, 1, 1, 1, 1, 0
0
}; };
static const char _json_trans_targs[] = { static const char _json_trans_targs[] = {
1, 0, 2, 3, 4, 56, 3, 4, 1, 0, 2, 3, 4, 56, 3, 4,
56, 5, 6, 5, 6, 7, 8, 9, 56, 5, 5, 6, 7, 8, 9, 56,
56, 8, 9, 11, 12, 18, 57, 13, 8, 9, 11, 12, 18, 57, 13, 15,
15, 14, 16, 17, 20, 58, 21, 20, 14, 16, 17, 20, 58, 21, 20, 58,
58, 21, 19, 22, 23, 24, 25, 26, 21, 19, 22, 23, 24, 25, 26, 20,
20, 58, 21, 28, 29, 30, 34, 39, 58, 21, 28, 30, 31, 34, 39, 43,
43, 47, 59, 59, 31, 30, 33, 31, 47, 29, 59, 59, 32, 31, 29, 32,
32, 59, 35, 36, 37, 38, 59, 40, 33, 35, 36, 37, 38, 59, 40, 41,
41, 42, 59, 44, 45, 46, 59, 48, 42, 59, 44, 45, 46, 59, 48, 49,
49, 55, 48, 49, 55, 50, 51, 50, 55, 48, 49, 55, 50, 50, 51, 52,
51, 52, 53, 54, 55, 53, 54, 59, 53, 54, 55, 53, 54, 59, 56
56
}; };
static const char _json_trans_actions[] = { static const char _json_trans_actions[] = {
0, 0, 0, 21, 75, 48, 0, 42, 0, 0, 0, 21, 77, 53, 0, 47,
23, 17, 17, 0, 0, 15, 19, 19, 23, 17, 0, 0, 15, 19, 19, 50,
45, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 3, 13, 0, 0, 0, 0, 0, 3, 13, 0, 0, 35,
33, 5, 11, 0, 7, 0, 0, 0, 5, 11, 0, 38, 7, 7, 7, 41,
36, 39, 9, 57, 51, 25, 0, 0, 44, 9, 62, 56, 25, 0, 0, 0,
0, 29, 60, 54, 15, 0, 27, 0, 31, 29, 33, 59, 15, 0, 27, 0,
0, 31, 0, 0, 0, 0, 66, 0, 0, 0, 0, 0, 0, 68, 0, 0,
0, 0, 69, 0, 0, 0, 63, 21, 0, 71, 0, 0, 0, 65, 21, 77,
75, 48, 0, 42, 23, 17, 17, 0, 53, 0, 47, 23, 17, 0, 0, 15,
0, 15, 19, 19, 45, 0, 0, 72, 19, 19, 50, 0, 0, 74, 0
0
}; };
static const int json_start = 1; static const int json_start = 1;
...@@ -9276,13 +9597,14 @@ static const int json_en_value_machine = 27; ...@@ -9276,13 +9597,14 @@ static const int json_en_value_machine = 27;
static const int json_en_main = 1; static const int json_en_main = 1;
#line 599 "upb/json/parser.rl" #line 907 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size, size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) { const upb_bufhandle *handle) {
UPB_UNUSED(hd); UPB_UNUSED(hd);
UPB_UNUSED(handle); UPB_UNUSED(handle);
upb_json_parser *parser = closure; upb_json_parser *parser = closure;
parser->handle = handle;
// Variables used by Ragel's generated code. // Variables used by Ragel's generated code.
int cs = parser->current_state; int cs = parser->current_state;
...@@ -9292,8 +9614,10 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, ...@@ -9292,8 +9614,10 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const char *p = buf; const char *p = buf;
const char *pe = buf + size; const char *pe = buf + size;
capture_resume(parser, buf);
#line 684 "upb/json/parser.c"
#line 987 "upb/json/parser.c"
{ {
int _klen; int _klen;
unsigned int _trans; unsigned int _trans;
...@@ -9368,114 +9692,118 @@ _match: ...@@ -9368,114 +9692,118 @@ _match:
switch ( *_acts++ ) switch ( *_acts++ )
{ {
case 0: case 0:
#line 517 "upb/json/parser.rl" #line 819 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} } { p--; {cs = stack[--top]; goto _again;} }
break; break;
case 1: case 1:
#line 518 "upb/json/parser.rl" #line 820 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 10; goto _again;} } { p--; {stack[top++] = cs; cs = 10; goto _again;} }
break; break;
case 2: case 2:
#line 522 "upb/json/parser.rl" #line 824 "upb/json/parser.rl"
{ start_text(parser, p); } { start_text(parser, p); }
break; break;
case 3: case 3:
#line 523 "upb/json/parser.rl" #line 825 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_text(parser, p, false)); } { CHECK_RETURN_TOP(end_text(parser, p)); }
break; break;
case 4: case 4:
#line 529 "upb/json/parser.rl" #line 831 "upb/json/parser.rl"
{ start_hex(parser, p); } { start_hex(parser); }
break; break;
case 5: case 5:
#line 530 "upb/json/parser.rl" #line 832 "upb/json/parser.rl"
{ hex(parser, p); } { hexdigit(parser, p); }
break; break;
case 6: case 6:
#line 536 "upb/json/parser.rl" #line 833 "upb/json/parser.rl"
{ escape(parser, p); } { CHECK_RETURN_TOP(end_hex(parser)); }
break; break;
case 7: case 7:
#line 539 "upb/json/parser.rl" #line 839 "upb/json/parser.rl"
{ {cs = stack[--top]; goto _again;} } { CHECK_RETURN_TOP(escape(parser, p)); }
break; break;
case 8: case 8:
#line 540 "upb/json/parser.rl" #line 845 "upb/json/parser.rl"
{ {stack[top++] = cs; cs = 19; goto _again;} } { p--; {cs = stack[--top]; goto _again;} }
break; break;
case 9: case 9:
#line 542 "upb/json/parser.rl" #line 848 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 27; goto _again;} } { {stack[top++] = cs; cs = 19; goto _again;} }
break; break;
case 10: case 10:
#line 547 "upb/json/parser.rl" #line 850 "upb/json/parser.rl"
{ start_member(parser); } { p--; {stack[top++] = cs; cs = 27; goto _again;} }
break; break;
case 11: case 11:
#line 548 "upb/json/parser.rl" #line 855 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(end_member(parser)); } { start_member(parser); }
break; break;
case 12: case 12:
#line 551 "upb/json/parser.rl" #line 856 "upb/json/parser.rl"
{ clear_member(parser); } { CHECK_RETURN_TOP(end_member(parser)); }
break; break;
case 13: case 13:
#line 557 "upb/json/parser.rl" #line 859 "upb/json/parser.rl"
{ start_object(parser); } { clear_member(parser); }
break; break;
case 14: case 14:
#line 560 "upb/json/parser.rl" #line 865 "upb/json/parser.rl"
{ end_object(parser); } { start_object(parser); }
break; break;
case 15: case 15:
#line 566 "upb/json/parser.rl" #line 868 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_array(parser)); } { end_object(parser); }
break; break;
case 16: case 16:
#line 570 "upb/json/parser.rl" #line 874 "upb/json/parser.rl"
{ end_array(parser); } { CHECK_RETURN_TOP(start_array(parser)); }
break; break;
case 17: case 17:
#line 575 "upb/json/parser.rl" #line 878 "upb/json/parser.rl"
{ start_number(parser, p); } { end_array(parser); }
break; break;
case 18: case 18:
#line 576 "upb/json/parser.rl" #line 883 "upb/json/parser.rl"
{ end_number(parser, p); } { start_number(parser, p); }
break; break;
case 19: case 19:
#line 578 "upb/json/parser.rl" #line 884 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_stringval(parser)); } { CHECK_RETURN_TOP(end_number(parser, p)); }
break; break;
case 20: case 20:
#line 579 "upb/json/parser.rl" #line 886 "upb/json/parser.rl"
{ end_stringval(parser); } { CHECK_RETURN_TOP(start_stringval(parser)); }
break; break;
case 21: case 21:
#line 581 "upb/json/parser.rl" #line 887 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, true)); } { CHECK_RETURN_TOP(end_stringval(parser)); }
break; break;
case 22: case 22:
#line 583 "upb/json/parser.rl" #line 889 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(parser_putbool(parser, false)); } { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
break; break;
case 23: case 23:
#line 585 "upb/json/parser.rl" #line 891 "upb/json/parser.rl"
{ /* null value */ } { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
break; break;
case 24: case 24:
#line 587 "upb/json/parser.rl" #line 893 "upb/json/parser.rl"
{ CHECK_RETURN_TOP(start_subobject(parser)); } { /* null value */ }
break; break;
case 25: case 25:
#line 588 "upb/json/parser.rl" #line 895 "upb/json/parser.rl"
{ end_subobject(parser); } { CHECK_RETURN_TOP(start_subobject(parser)); }
break; break;
case 26: case 26:
#line 593 "upb/json/parser.rl" #line 896 "upb/json/parser.rl"
{ end_subobject(parser); }
break;
case 27:
#line 901 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} } { p--; {cs = stack[--top]; goto _again;} }
break; break;
#line 866 "upb/json/parser.c" #line 1173 "upb/json/parser.c"
} }
} }
...@@ -9488,10 +9816,12 @@ _again: ...@@ -9488,10 +9816,12 @@ _again:
_out: {} _out: {}
} }
#line 615 "upb/json/parser.rl" #line 926 "upb/json/parser.rl"
if (p != pe) { if (p != pe) {
upb_status_seterrf(parser->status, "Parse error at %s\n", p); upb_status_seterrf(parser->status, "Parse error at %s\n", p);
} else {
capture_suspend(parser, &p);
} }
error: error:
...@@ -9508,8 +9838,13 @@ bool end(void *closure, const void *hd) { ...@@ -9508,8 +9838,13 @@ bool end(void *closure, const void *hd) {
return true; return true;
} }
/* Public API *****************************************************************/
void upb_json_parser_init(upb_json_parser *p, upb_status *status) { void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
p->limit = p->stack + UPB_JSON_MAX_DEPTH; p->limit = p->stack + UPB_JSON_MAX_DEPTH;
p->accumulate_buf = NULL;
p->accumulate_buf_size = 0;
upb_byteshandler_init(&p->input_handler_); upb_byteshandler_init(&p->input_handler_);
upb_byteshandler_setstring(&p->input_handler_, parse, NULL); upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
upb_byteshandler_setendstr(&p->input_handler_, end, NULL); upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
...@@ -9519,6 +9854,7 @@ void upb_json_parser_init(upb_json_parser *p, upb_status *status) { ...@@ -9519,6 +9854,7 @@ void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
void upb_json_parser_uninit(upb_json_parser *p) { void upb_json_parser_uninit(upb_json_parser *p) {
upb_byteshandler_uninit(&p->input_handler_); upb_byteshandler_uninit(&p->input_handler_);
free(p->accumulate_buf);
} }
void upb_json_parser_reset(upb_json_parser *p) { void upb_json_parser_reset(upb_json_parser *p) {
...@@ -9529,18 +9865,18 @@ void upb_json_parser_reset(upb_json_parser *p) { ...@@ -9529,18 +9865,18 @@ void upb_json_parser_reset(upb_json_parser *p) {
int top; int top;
// Emit Ragel initialization of the parser. // Emit Ragel initialization of the parser.
#line 920 "upb/json/parser.c" #line 1235 "upb/json/parser.c"
{ {
cs = json_start; cs = json_start;
top = 0; top = 0;
} }
#line 655 "upb/json/parser.rl" #line 974 "upb/json/parser.rl"
p->current_state = cs; p->current_state = cs;
p->parser_top = top; p->parser_top = top;
p->text_begin = NULL; accumulate_clear(p);
p->accumulated = NULL; p->multipart_state = MULTIPART_INACTIVE;
p->accumulated_len = 0; p->capture = NULL;
} }
void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) { void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
......
...@@ -6662,7 +6662,9 @@ typedef enum { ...@@ -6662,7 +6662,9 @@ typedef enum {
// | unused (24) | opc | // | unused (24) | opc |
// | upb_inttable* (32 or 64) | // | upb_inttable* (32 or 64) |
OP_HALT = 36, // No arg. OP_DISPATCH = 36, // No arg.
OP_HALT = 37, // No arg.
} opcode; } opcode;
#define OP_MAX OP_HALT #define OP_MAX OP_HALT
...@@ -7339,15 +7341,24 @@ UPB_DEFINE_STRUCT0(upb_json_parser, ...@@ -7339,15 +7341,24 @@ UPB_DEFINE_STRUCT0(upb_json_parser,
int parser_stack[UPB_JSON_MAX_DEPTH]; int parser_stack[UPB_JSON_MAX_DEPTH];
int parser_top; int parser_top;
// A pointer to the beginning of whatever text we are currently parsing. // The handle for the current buffer.
const char *text_begin; const upb_bufhandle *handle;
// We have to accumulate text for member names, integers, unicode escapes, and // Accumulate buffer. See details in parser.rl.
// base64 partial results.
const char *accumulated; const char *accumulated;
size_t accumulated_len; size_t accumulated_len;
// TODO: add members and code for allocating a buffer when necessary (when the char *accumulate_buf;
// member spans input buffers or contains escapes). size_t accumulate_buf_size;
// Multi-part text data. See details in parser.rl.
int multipart_state;
upb_selector_t string_selector;
// Input capture. See details in parser.rl.
const char *capture;
// Intermediate result of parsing a unicode escape sequence.
uint32_t digit;
)); ));
UPB_BEGIN_EXTERN_C UPB_BEGIN_EXTERN_C
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment