Commit daee0516 authored by kenton@google.com's avatar kenton@google.com

Optimize Java string serialization. Patch from Evan Jones.

parent ab6950d7
????-??-?? version 2.3.1:
Java
* Improved performance of string serialization.
2010-01-08 version 2.3.0:
General
......
......@@ -80,6 +80,8 @@ Patch contributors:
* Fixes for Solaris 10 32/64-bit confusion.
Evan Jones <evanj@mit.edu>
* Optimize Java serialization code when writing a small message to a stream.
* Optimize Java serialization of strings so that UTF-8 encoding happens only
once per string per serialization call.
* Clean up some Java warnings.
Michael Kucharski <m.kucharski@gmail.com>
* Added CodedInputStream.getTotalBytesRead().
......@@ -193,6 +193,23 @@ public final class CodedOutputStream {
writeStringNoTag(value);
}
/**
* Write a {@code string} field, including tag, to the stream, where bytes
* is the encoded version of value. Used by the SPEED version of messages
* to avoid performing the UTF-8 conversion twice. bytes is simply a hint
* and may be null. If it is null, value will be converted as usual.
*/
public void writeStringCached(final int fieldNumber, final String value,
ByteString bytes)
throws IOException {
// The cache can be null if serializing without getting the size first, or
// if there are multiple threads.
if (bytes == null) {
bytes = ByteString.copyFromUtf8(value);
}
writeBytes(fieldNumber, bytes);
}
/** Write a {@code group} field, including tag, to the stream. */
public void writeGroup(final int fieldNumber, final MessageLite value)
throws IOException {
......
......@@ -36,6 +36,7 @@ import protobuf_unittest.UnittestProto.TestPackedTypes;
import junit.framework.TestCase;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
......@@ -211,6 +212,29 @@ public class CodedOutputStreamTest extends TestCase {
0x9abcdef012345678L);
}
/** Test writing cached strings. */
public void testWriteStringCached() throws IOException {
final ByteArrayOutputStream output = new ByteArrayOutputStream();
final CodedOutputStream stream = CodedOutputStream.newInstance(output);
// Test writing a string that is not cached
stream.writeStringCached(5, "hello", null);
stream.flush();
CodedInputStream in = CodedInputStream.newInstance(output.toByteArray());
assertEquals(WireFormat.makeTag(5, WireFormat.WIRETYPE_LENGTH_DELIMITED),
in.readTag());
assertEquals("hello", in.readString());
// Write a cached string: the real string is ignored
output.reset();
stream.writeStringCached(5, "ignored", ByteString.copyFromUtf8("hello"));
stream.flush();
in = CodedInputStream.newInstance(output.toByteArray());
assertEquals(WireFormat.makeTag(5, WireFormat.WIRETYPE_LENGTH_DELIMITED),
in.readTag());
assertEquals("hello", in.readString());
}
/** Test encodeZigZag32() and encodeZigZag64(). */
public void testEncodeZigZag() throws Exception {
assertEquals(0, CodedOutputStream.encodeZigZag32( 0));
......
......@@ -199,6 +199,14 @@ GenerateMembers(io::Printer* printer) const {
"private $type$ $name$_ = $default$;\n"
"public boolean has$capitalized_name$() { return has$capitalized_name$; }\n"
"public $type$ get$capitalized_name$() { return $name$_; }\n");
// Avoid double encoding for Java strings
// This field does not need to be volatile because ByteString is immutable.
// http://www.cs.umd.edu/~pugh/java/memoryModel/jsr-133-faq.html#finalRight
// However, it seems better to be safe than sorry.
if (ShouldUseStringEncodingCache()) {
printer->Print(variables_,
"private volatile com.google.protobuf.ByteString $name$EncodedCache_;\n");
}
}
void PrimitiveFieldGenerator::
......@@ -259,25 +267,57 @@ GenerateParsingCode(io::Printer* printer) const {
void PrimitiveFieldGenerator::
GenerateSerializationCode(io::Printer* printer) const {
printer->Print(variables_,
"if (has$capitalized_name$()) {\n"
" output.write$capitalized_type$($number$, get$capitalized_name$());\n"
"}\n");
if (ShouldUseStringEncodingCache()) {
// Pass the cached serialized version, then forget it.
// The cached version could be null if we didn't compute the size first,
// or if there are two threads attempting to serialize simultaneously.
// CodedOutputStream.writeStringCached handles this for us.
printer->Print(variables_,
"if (has$capitalized_name$()) {\n"
" output.write$capitalized_type$Cached($number$,\n"
" get$capitalized_name$(),\n"
" $name$EncodedCache_);\n"
" $name$EncodedCache_ = null;\n"
"}\n");
} else {
printer->Print(variables_,
"if (has$capitalized_name$()) {\n"
" output.write$capitalized_type$($number$, get$capitalized_name$());\n"
"}\n");
}
}
void PrimitiveFieldGenerator::
GenerateSerializedSizeCode(io::Printer* printer) const {
printer->Print(variables_,
"if (has$capitalized_name$()) {\n"
" size += com.google.protobuf.CodedOutputStream\n"
" .compute$capitalized_type$Size($number$, get$capitalized_name$());\n"
"}\n");
// Avoid double encoding for strings: serialize the string here
if (ShouldUseStringEncodingCache()) {
printer->Print(variables_,
"if (has$capitalized_name$()) {\n"
" com.google.protobuf.ByteString serialized = \n"
" com.google.protobuf.ByteString.copyFromUtf8(\n"
" get$capitalized_name$());\n"
" $name$EncodedCache_ = serialized;\n"
" size += com.google.protobuf.CodedOutputStream\n"
" .computeBytesSize($number$, serialized);\n"
"}\n");
} else {
printer->Print(variables_,
"if (has$capitalized_name$()) {\n"
" size += com.google.protobuf.CodedOutputStream\n"
" .compute$capitalized_type$Size($number$, get$capitalized_name$());\n"
"}\n");
}
}
string PrimitiveFieldGenerator::GetBoxedType() const {
return BoxedPrimitiveTypeName(GetJavaType(descriptor_));
}
bool PrimitiveFieldGenerator::ShouldUseStringEncodingCache() const {
return GetType(descriptor_) == FieldDescriptor::TYPE_STRING &&
descriptor_->file()->options().optimize_for() == FileOptions::SPEED;
}
// ===================================================================
RepeatedPrimitiveFieldGenerator::
......
......@@ -62,6 +62,8 @@ class PrimitiveFieldGenerator : public FieldGenerator {
string GetBoxedType() const;
private:
bool ShouldUseStringEncodingCache() const;
const FieldDescriptor* descriptor_;
map<string, string> variables_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment