perf: String#getBytes(Charset) vs getBytes(String)

pull/264/head
Viktor Szathmáry 10 years ago committed by Tamir Duberstein
parent 7139d1eff7
commit e84893f676
  1. 49
      java/src/main/java/com/google/protobuf/ByteString.java
  2. 10
      java/src/main/java/com/google/protobuf/LiteralByteString.java
  3. 10
      java/src/main/java/com/google/protobuf/RopeByteString.java
  4. 13
      java/src/test/java/com/google/protobuf/BoundedByteStringTest.java
  5. 7
      java/src/test/java/com/google/protobuf/LiteralByteStringTest.java
  6. 30
      java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java
  7. 26
      java/src/test/java/com/google/protobuf/RopeByteStringTest.java

@ -37,6 +37,8 @@ import java.io.OutputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
@ -76,8 +78,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b
static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k
// Defined by java.nio.charset.Charset
protected static final String UTF_8 = "UTF-8";
protected static final Charset UTF_8 = Charset.forName("UTF-8");
/**
* Empty {@code ByteString}.
@ -269,11 +270,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @return new {@code ByteString}
*/
public static ByteString copyFromUtf8(String text) {
try {
return new LiteralByteString(text.getBytes(UTF_8));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("UTF-8 not supported?", e);
}
return new LiteralByteString(text.getBytes(UTF_8));
}
// =================================================================
@ -612,8 +609,36 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @return new string
* @throws UnsupportedEncodingException if charset isn't recognized
*/
public abstract String toString(String charsetName)
throws UnsupportedEncodingException;
public String toString(String charsetName)
throws UnsupportedEncodingException {
try {
return toString(Charset.forName(charsetName));
} catch (UnsupportedCharsetException e) {
UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName);
exception.initCause(e);
throw exception;
}
}
/**
* Constructs a new {@code String} by decoding the bytes using the
* specified charset. Returns the same empty String if empty.
*
* @param charset encode using this charset
* @return new string
*/
public String toString(Charset charset) {
return size() == 0 ? "" : toStringInternal(charset);
}
/**
* Constructs a new {@code String} by decoding the bytes using the
* specified charset.
*
* @param charset encode using this charset
* @return new string
*/
protected abstract String toStringInternal(Charset charset);
// =================================================================
// UTF-8 decoding
@ -624,11 +649,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @return new string using UTF-8 encoding
*/
public String toStringUtf8() {
try {
return toString(UTF_8);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("UTF-8 not supported?", e);
}
return toString(UTF_8);
}
/**

@ -36,6 +36,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
@ -152,13 +153,8 @@ class LiteralByteString extends ByteString {
}
@Override
public String toString(String charsetName)
throws UnsupportedEncodingException {
// Optimize for empty strings, but ensure we don't silently ignore invalid
// encodings.
return size() == 0 && UTF_8.equals(charsetName)
? ""
: new String(bytes, getOffsetIntoBytes(), size(), charsetName);
protected String toStringInternal(Charset charset) {
return new String(bytes, getOffsetIntoBytes(), size(), charset);
}
// =================================================================

@ -38,6 +38,7 @@ import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.io.ByteArrayInputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
@ -418,13 +419,8 @@ class RopeByteString extends ByteString {
}
@Override
public String toString(String charsetName)
throws UnsupportedEncodingException {
// Optimize for empty strings, but ensure we don't silently ignore invalid
// encodings.
return size() == 0 && UTF_8.equals(charsetName)
? ""
: new String(toByteArray(), charsetName);
protected String toStringInternal(Charset charset) {
return new String(toByteArray(), charset);
}
// =================================================================

@ -72,6 +72,19 @@ public class BoundedByteStringTest extends LiteralByteStringTest {
testString.substring(2, testString.length() - 6), roundTripString);
}
@Override
public void testCharsetToString() throws UnsupportedEncodingException {
String testString = "I love unicode \u1234\u5678 characters";
LiteralByteString unicode = new LiteralByteString(testString.getBytes(ByteString.UTF_8));
ByteString chopped = unicode.substring(2, unicode.size() - 6);
assertEquals(classUnderTest + ".substring() must have the expected type",
classUnderTest, getActualClassName(chopped));
String roundTripString = chopped.toString(ByteString.UTF_8);
assertEquals(classUnderTest + " unicode bytes must match",
testString.substring(2, testString.length() - 6), roundTripString);
}
public void testJavaSerialization() throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(out);

@ -298,6 +298,13 @@ public class LiteralByteStringTest extends TestCase {
assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
}
public void testCharsetToString() throws UnsupportedEncodingException {
String testString = "I love unicode \u1234\u5678 characters";
LiteralByteString unicode = new LiteralByteString(testString.getBytes(ByteString.UTF_8));
String roundTripString = unicode.toString(ByteString.UTF_8);
assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
}
public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException{
assertSame(classUnderTest + " must be the same string references",
ByteString.EMPTY.toString(UTF_8), new LiteralByteString(new byte[]{}).toString(UTF_8));

@ -94,4 +94,34 @@ public class RopeByteStringSubstringTest extends LiteralByteStringTest {
assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
flatString.hashCode(), unicode.hashCode());
}
@Override
public void testCharsetToString() throws UnsupportedEncodingException {
String sourceString = "I love unicode \u1234\u5678 characters";
ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
int copies = 250;
// By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
StringBuilder builder = new StringBuilder(copies * sourceString.length());
ByteString unicode = ByteString.EMPTY;
for (int i = 0; i < copies; ++i) {
builder.append(sourceString);
unicode = RopeByteString.concatenate(unicode, sourceByteString);
}
String testString = builder.toString();
// Do the substring part
testString = testString.substring(2, testString.length() - 6);
unicode = unicode.substring(2, unicode.size() - 6);
assertEquals(classUnderTest + " from string must have the expected type",
classUnderTest, getActualClassName(unicode));
String roundTripString = unicode.toString(ByteString.UTF_8);
assertEquals(classUnderTest + " unicode bytes must match",
testString, roundTripString);
ByteString flatString = ByteString.copyFromUtf8(testString);
assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
flatString.hashCode(), unicode.hashCode());
}
}

@ -118,6 +118,32 @@ public class RopeByteStringTest extends LiteralByteStringTest {
flatString.hashCode(), unicode.hashCode());
}
@Override
public void testCharsetToString() throws UnsupportedEncodingException {
String sourceString = "I love unicode \u1234\u5678 characters";
ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
int copies = 250;
// By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
StringBuilder builder = new StringBuilder(copies * sourceString.length());
ByteString unicode = ByteString.EMPTY;
for (int i = 0; i < copies; ++i) {
builder.append(sourceString);
unicode = RopeByteString.concatenate(unicode, sourceByteString);
}
String testString = builder.toString();
assertEquals(classUnderTest + " from string must have the expected type",
classUnderTest, getActualClassName(unicode));
String roundTripString = unicode.toString(ByteString.UTF_8);
assertEquals(classUnderTest + " unicode bytes must match",
testString, roundTripString);
ByteString flatString = ByteString.copyFromUtf8(testString);
assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
flatString.hashCode(), unicode.hashCode());
}
@Override
public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException {
RopeByteString ropeByteString =

Loading…
Cancel
Save