Avoid interface calls in hot loop

Before, every charAt would emit (on android):
```
    0x00002104    adrp x17, #+0x1000 (addr 0x3000)
    0x00002108    ldr w17, [x17, #20]
    0x0000210c    ldr x0, [x0, #128]
    0x00002110    ldr x0, [x0, #328]
    0x00002114    ldr lr, [x0, #24]
    0x00002118    blr lr <-- Call into String.charAt(int)
```
Now, it emits the inlined implementation of charAt (branch is for possibly compressed strings):
```
    0x000020b4    ldur w16, [x4, #-8]
    0x000020b8    tbnz w16, #0, #+0xc (addr 0x20c4)
    0x000020bc    ldrb w4, [x4, x0]
    0x000020c0    b #+0x8 (addr 0x20c8)
    0x000020c4    ldrh w4, [x4, x0, lsl #1]
```

PiperOrigin-RevId: 591147406
pull/15087/head
Protobuf Team Bot 1 year ago committed by Copybara-Service
parent 220415ddfb
commit b10d3f93b6
  1. 40
      java/core/src/main/java/com/google/protobuf/Utf8.java
  2. 2
      java/core/src/test/java/com/google/protobuf/Utf8Test.java

@ -214,24 +214,24 @@ final class Utf8 {
* @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired
* surrogates)
*/
static int encodedLength(CharSequence sequence) {
static int encodedLength(String string) {
// Warning to maintainers: this implementation is highly optimized.
int utf16Length = sequence.length();
int utf16Length = string.length();
int utf8Length = utf16Length;
int i = 0;
// This loop optimizes for pure ASCII.
while (i < utf16Length && sequence.charAt(i) < 0x80) {
while (i < utf16Length && string.charAt(i) < 0x80) {
i++;
}
// This loop optimizes for chars less than 0x800.
for (; i < utf16Length; i++) {
char c = sequence.charAt(i);
char c = string.charAt(i);
if (c < 0x800) {
utf8Length += ((0x7f - c) >>> 31); // branch free!
} else {
utf8Length += encodedLengthGeneral(sequence, i);
utf8Length += encodedLengthGeneral(string, i);
break;
}
}
@ -244,11 +244,11 @@ final class Utf8 {
return utf8Length;
}
private static int encodedLengthGeneral(CharSequence sequence, int start) {
int utf16Length = sequence.length();
private static int encodedLengthGeneral(String string, int start) {
int utf16Length = string.length();
int utf8Length = 0;
for (int i = start; i < utf16Length; i++) {
char c = sequence.charAt(i);
char c = string.charAt(i);
if (c < 0x800) {
utf8Length += (0x7f - c) >>> 31; // branch free!
} else {
@ -256,7 +256,7 @@ final class Utf8 {
// jdk7+: if (Character.isSurrogate(c)) {
if (Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) {
// Check that we have a well-formed surrogate pair.
int cp = Character.codePointAt(sequence, i);
int cp = Character.codePointAt(string, i);
if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
throw new UnpairedSurrogateException(i, utf16Length);
}
@ -267,7 +267,7 @@ final class Utf8 {
return utf8Length;
}
static int encode(CharSequence in, byte[] out, int offset, int length) {
static int encode(String in, byte[] out, int offset, int length) {
return processor.encodeUtf8(in, out, offset, length);
}
// End Guava UTF-8 methods.
@ -326,9 +326,9 @@ final class Utf8 {
*
* @param in the source string to be encoded
* @param out the target buffer to receive the encoded string.
* @see Utf8#encode(CharSequence, byte[], int, int)
* @see Utf8#encode(String, byte[], int, int)
*/
static void encodeUtf8(CharSequence in, ByteBuffer out) {
static void encodeUtf8(String in, ByteBuffer out) {
processor.encodeUtf8(in, out);
}
@ -724,7 +724,7 @@ final class Utf8 {
* {@code bytes.length - offset}
* @return the new offset, equivalent to {@code offset + Utf8.encodedLength(sequence)}
*/
abstract int encodeUtf8(CharSequence in, byte[] out, int offset, int length);
abstract int encodeUtf8(String in, byte[] out, int offset, int length);
/**
* Encodes an input character sequence ({@code in}) to UTF-8 in the target buffer ({@code out}).
@ -743,7 +743,7 @@ final class Utf8 {
* @throws ArrayIndexOutOfBoundsException if {@code in} encoded in UTF-8 is longer than {@code
* out.remaining()}
*/
final void encodeUtf8(CharSequence in, ByteBuffer out) {
final void encodeUtf8(String in, ByteBuffer out) {
if (out.hasArray()) {
final int offset = out.arrayOffset();
int endIndex = Utf8.encode(in, out.array(), offset + out.position(), out.remaining());
@ -756,13 +756,13 @@ final class Utf8 {
}
/** Encodes the input character sequence to a direct {@link ByteBuffer} instance. */
abstract void encodeUtf8Direct(CharSequence in, ByteBuffer out);
abstract void encodeUtf8Direct(String in, ByteBuffer out);
/**
* Encodes the input character sequence to a {@link ByteBuffer} instance using the {@link
* ByteBuffer} API, rather than potentially faster approaches.
*/
final void encodeUtf8Default(CharSequence in, ByteBuffer out) {
final void encodeUtf8Default(String in, ByteBuffer out) {
final int inLength = in.length();
int outIx = out.position();
int inIx = 0;
@ -1013,7 +1013,7 @@ final class Utf8 {
}
@Override
int encodeUtf8(CharSequence in, byte[] out, int offset, int length) {
int encodeUtf8(String in, byte[] out, int offset, int length) {
int utf16Length = in.length();
int j = offset;
int i = 0;
@ -1065,7 +1065,7 @@ final class Utf8 {
}
@Override
void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
void encodeUtf8Direct(String in, ByteBuffer out) {
// For safe processing, we have to use the ByteBuffer API.
encodeUtf8Default(in, out);
}
@ -1442,7 +1442,7 @@ final class Utf8 {
}
@Override
int encodeUtf8(final CharSequence in, final byte[] out, final int offset, final int length) {
int encodeUtf8(final String in, final byte[] out, final int offset, final int length) {
long outIx = offset;
final long outLimit = outIx + length;
final int inLimit = in.length();
@ -1503,7 +1503,7 @@ final class Utf8 {
}
@Override
void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
void encodeUtf8Direct(String in, ByteBuffer out) {
final long address = addressOffset(out);
long outIx = address + out.position();
final long outLimit = address + out.limit();

@ -194,7 +194,7 @@ public class Utf8Test {
private static byte[] encodeToByteArray(String message, int length, Utf8.Processor processor) {
byte[] output = new byte[length];
processor.encodeUtf8(message, output, 0, output.length);
int unused = processor.encodeUtf8(message, output, 0, output.length);
return output;
}

Loading…
Cancel
Save