Commit 1e37a94b authored by Joshua Haberman's avatar Joshua Haberman

Optimized away the creation of empty string objects.

Prior to this CL, creating an empty message object would create
two empty string objects for every declared field.  First we
created a unique string object for the field's default.  Then
we created yet another string object when we assigned the
default value into the message: we called #encode to ensure
that the string would have the correct encoding and be frozen.

I optimized these unnecessary objects away with two fixes:

1. Memoize the empty string so that we don't create a new empty
   string for every field's default.
2. If we are assigning a string to a message object, avoid creating
   a new string if the assigned string has the correct encoding and
   is already frozen.
parent c132a4aa
......@@ -51,6 +51,32 @@ VALUE get_def_obj(const void* def) {
return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def));
}
static VALUE cached_empty_string = Qnil;
static VALUE cached_empty_bytes = Qnil;
static VALUE create_frozen_string(const char* str, size_t size, bool binary) {
VALUE str_rb = rb_str_new(str, size);
rb_enc_associate(str_rb,
binary ? kRubyString8bitEncoding : kRubyStringUtf8Encoding);
rb_obj_freeze(str_rb);
return str_rb;
}
VALUE get_frozen_string(const char* str, size_t size, bool binary) {
if (size == 0) {
return binary ? cached_empty_bytes : cached_empty_string;
} else {
// It is harder to memoize non-empty strings. The obvious approach would be
// to use a Ruby hash keyed by string as memo table, but looking up in such a table
// requires constructing a string (the very thing we're trying to avoid).
//
// Since few fields have defaults, we will just optimize the empty string
// case for now.
return create_frozen_string(str, size, binary);
}
}
// -----------------------------------------------------------------------------
// Utilities.
// -----------------------------------------------------------------------------
......@@ -118,4 +144,9 @@ void Init_protobuf_c() {
rb_gc_register_address(&upb_def_to_ruby_obj_map);
upb_def_to_ruby_obj_map = rb_hash_new();
rb_gc_register_address(&cached_empty_string);
rb_gc_register_address(&cached_empty_bytes);
cached_empty_string = create_frozen_string("", 0, false);
cached_empty_bytes = create_frozen_string("", 0, true);
}
......@@ -591,6 +591,11 @@ const upb_pbdecodermethod *new_fillmsg_decodermethod(
// cycles.
#define ENCODE_MAX_NESTING 63
// -----------------------------------------------------------------------------
// A cache of frozen string objects to use as field defaults.
// -----------------------------------------------------------------------------
VALUE get_frozen_string(const char* data, size_t size, bool binary);
// -----------------------------------------------------------------------------
// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
// instances.
......
......@@ -96,17 +96,19 @@ VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) {
kRubyStringUtf8Encoding : kRubyString8bitEncoding;
VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding);
// Note: this will not duplicate underlying string data unless necessary.
value = rb_str_encode(value, desired_encoding_value, 0, Qnil);
if (rb_obj_encoding(value) != desired_encoding_value || !OBJ_FROZEN(value)) {
// Note: this will not duplicate underlying string data unless necessary.
value = rb_str_encode(value, desired_encoding_value, 0, Qnil);
if (type == UPB_TYPE_STRING &&
rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncodingError, "String is invalid UTF-8");
}
if (type == UPB_TYPE_STRING &&
rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncodingError, "String is invalid UTF-8");
}
// Ensure the data remains valid. Since we called #encode a moment ago,
// this does not freeze the string the user assigned.
rb_obj_freeze(value);
// Ensure the data remains valid. Since we called #encode a moment ago,
// this does not freeze the string the user assigned.
rb_obj_freeze(value);
}
return value;
}
......@@ -729,12 +731,8 @@ VALUE layout_get_default(const upb_fielddef *field) {
case UPB_TYPE_BYTES: {
size_t size;
const char *str = upb_fielddef_defaultstr(field, &size);
VALUE str_rb = rb_str_new(str, size);
rb_enc_associate(str_rb, (upb_fielddef_type(field) == UPB_TYPE_BYTES) ?
kRubyString8bitEncoding : kRubyStringUtf8Encoding);
rb_obj_freeze(str_rb);
return str_rb;
return get_frozen_string(str, size,
upb_fielddef_type(field) == UPB_TYPE_BYTES);
}
default: return Qnil;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment