Optimize Java string serialization. Patch from Evan Jones.

pull/3335/head
kenton@google.com 15 years ago
parent ab6950d75d
commit daee05168e
  1. 5
      CHANGES.txt
  2. 2
      CONTRIBUTORS.txt
  3. 17
      java/src/main/java/com/google/protobuf/CodedOutputStream.java
  4. 24
      java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java
  5. 58
      src/google/protobuf/compiler/java/java_primitive_field.cc
  6. 2
      src/google/protobuf/compiler/java/java_primitive_field.h

@ -1,3 +1,8 @@
????-??-?? version 2.3.1:
Java
* Improved performance of string serialization.
2010-01-08 version 2.3.0: 2010-01-08 version 2.3.0:
General General

@ -80,6 +80,8 @@ Patch contributors:
* Fixes for Solaris 10 32/64-bit confusion. * Fixes for Solaris 10 32/64-bit confusion.
Evan Jones <evanj@mit.edu> Evan Jones <evanj@mit.edu>
* Optimize Java serialization code when writing a small message to a stream. * Optimize Java serialization code when writing a small message to a stream.
* Optimize Java serialization of strings so that UTF-8 encoding happens only
once per string per serialization call.
* Clean up some Java warnings. * Clean up some Java warnings.
Michael Kucharski <m.kucharski@gmail.com> Michael Kucharski <m.kucharski@gmail.com>
* Added CodedInputStream.getTotalBytesRead(). * Added CodedInputStream.getTotalBytesRead().

@ -193,6 +193,23 @@ public final class CodedOutputStream {
writeStringNoTag(value); writeStringNoTag(value);
} }
/**
* Write a {@code string} field, including tag, to the stream, where bytes
* is the encoded version of value. Used by the SPEED version of messages
* to avoid performing the UTF-8 conversion twice. bytes is simply a hint
* and may be null. If it is null, value will be converted as usual.
*/
public void writeStringCached(final int fieldNumber, final String value,
ByteString bytes)
throws IOException {
// The cache can be null if serializing without getting the size first, or
// if there are multiple threads.
if (bytes == null) {
bytes = ByteString.copyFromUtf8(value);
}
writeBytes(fieldNumber, bytes);
}
/** Write a {@code group} field, including tag, to the stream. */ /** Write a {@code group} field, including tag, to the stream. */
public void writeGroup(final int fieldNumber, final MessageLite value) public void writeGroup(final int fieldNumber, final MessageLite value)
throws IOException { throws IOException {

@ -36,6 +36,7 @@ import protobuf_unittest.UnittestProto.TestPackedTypes;
import junit.framework.TestCase; import junit.framework.TestCase;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -211,6 +212,29 @@ public class CodedOutputStreamTest extends TestCase {
0x9abcdef012345678L); 0x9abcdef012345678L);
} }
/** Test writing cached strings. */
public void testWriteStringCached() throws IOException {
final ByteArrayOutputStream output = new ByteArrayOutputStream();
final CodedOutputStream stream = CodedOutputStream.newInstance(output);
// Test writing a string that is not cached
stream.writeStringCached(5, "hello", null);
stream.flush();
CodedInputStream in = CodedInputStream.newInstance(output.toByteArray());
assertEquals(WireFormat.makeTag(5, WireFormat.WIRETYPE_LENGTH_DELIMITED),
in.readTag());
assertEquals("hello", in.readString());
// Write a cached string: the real string is ignored
output.reset();
stream.writeStringCached(5, "ignored", ByteString.copyFromUtf8("hello"));
stream.flush();
in = CodedInputStream.newInstance(output.toByteArray());
assertEquals(WireFormat.makeTag(5, WireFormat.WIRETYPE_LENGTH_DELIMITED),
in.readTag());
assertEquals("hello", in.readString());
}
/** Test encodeZigZag32() and encodeZigZag64(). */ /** Test encodeZigZag32() and encodeZigZag64(). */
public void testEncodeZigZag() throws Exception { public void testEncodeZigZag() throws Exception {
assertEquals(0, CodedOutputStream.encodeZigZag32( 0)); assertEquals(0, CodedOutputStream.encodeZigZag32( 0));

@ -199,6 +199,14 @@ GenerateMembers(io::Printer* printer) const {
"private $type$ $name$_ = $default$;\n" "private $type$ $name$_ = $default$;\n"
"public boolean has$capitalized_name$() { return has$capitalized_name$; }\n" "public boolean has$capitalized_name$() { return has$capitalized_name$; }\n"
"public $type$ get$capitalized_name$() { return $name$_; }\n"); "public $type$ get$capitalized_name$() { return $name$_; }\n");
// Avoid double encoding for Java strings
// This field does not need to be volatile because ByteString is immutable.
// http://www.cs.umd.edu/~pugh/java/memoryModel/jsr-133-faq.html#finalRight
// However, it seems better to be safe than sorry.
if (ShouldUseStringEncodingCache()) {
printer->Print(variables_,
"private volatile com.google.protobuf.ByteString $name$EncodedCache_;\n");
}
} }
void PrimitiveFieldGenerator:: void PrimitiveFieldGenerator::
@ -259,25 +267,57 @@ GenerateParsingCode(io::Printer* printer) const {
void PrimitiveFieldGenerator:: void PrimitiveFieldGenerator::
GenerateSerializationCode(io::Printer* printer) const { GenerateSerializationCode(io::Printer* printer) const {
printer->Print(variables_, if (ShouldUseStringEncodingCache()) {
"if (has$capitalized_name$()) {\n" // Pass the cached serialized version, then forget it.
" output.write$capitalized_type$($number$, get$capitalized_name$());\n" // The cached version could be null if we didn't compute the size first,
"}\n"); // or if there are two threads attempting to serialize simultaneously.
// CodedOutputStream.writeStringCached handles this for us.
printer->Print(variables_,
"if (has$capitalized_name$()) {\n"
" output.write$capitalized_type$Cached($number$,\n"
" get$capitalized_name$(),\n"
" $name$EncodedCache_);\n"
" $name$EncodedCache_ = null;\n"
"}\n");
} else {
printer->Print(variables_,
"if (has$capitalized_name$()) {\n"
" output.write$capitalized_type$($number$, get$capitalized_name$());\n"
"}\n");
}
} }
void PrimitiveFieldGenerator:: void PrimitiveFieldGenerator::
GenerateSerializedSizeCode(io::Printer* printer) const { GenerateSerializedSizeCode(io::Printer* printer) const {
printer->Print(variables_, // Avoid double encoding for strings: serialize the string here
"if (has$capitalized_name$()) {\n" if (ShouldUseStringEncodingCache()) {
" size += com.google.protobuf.CodedOutputStream\n" printer->Print(variables_,
" .compute$capitalized_type$Size($number$, get$capitalized_name$());\n" "if (has$capitalized_name$()) {\n"
"}\n"); " com.google.protobuf.ByteString serialized = \n"
" com.google.protobuf.ByteString.copyFromUtf8(\n"
" get$capitalized_name$());\n"
" $name$EncodedCache_ = serialized;\n"
" size += com.google.protobuf.CodedOutputStream\n"
" .computeBytesSize($number$, serialized);\n"
"}\n");
} else {
printer->Print(variables_,
"if (has$capitalized_name$()) {\n"
" size += com.google.protobuf.CodedOutputStream\n"
" .compute$capitalized_type$Size($number$, get$capitalized_name$());\n"
"}\n");
}
} }
string PrimitiveFieldGenerator::GetBoxedType() const { string PrimitiveFieldGenerator::GetBoxedType() const {
return BoxedPrimitiveTypeName(GetJavaType(descriptor_)); return BoxedPrimitiveTypeName(GetJavaType(descriptor_));
} }
bool PrimitiveFieldGenerator::ShouldUseStringEncodingCache() const {
return GetType(descriptor_) == FieldDescriptor::TYPE_STRING &&
descriptor_->file()->options().optimize_for() == FileOptions::SPEED;
}
// =================================================================== // ===================================================================
RepeatedPrimitiveFieldGenerator:: RepeatedPrimitiveFieldGenerator::

@ -62,6 +62,8 @@ class PrimitiveFieldGenerator : public FieldGenerator {
string GetBoxedType() const; string GetBoxedType() const;
private: private:
bool ShouldUseStringEncodingCache() const;
const FieldDescriptor* descriptor_; const FieldDescriptor* descriptor_;
map<string, string> variables_; map<string, string> variables_;

Loading…
Cancel
Save