|
|
|
@ -31,6 +31,9 @@ |
|
|
|
|
package com.google.protobuf.nano; |
|
|
|
|
|
|
|
|
|
import java.io.IOException; |
|
|
|
|
import java.nio.BufferOverflowException; |
|
|
|
|
import java.nio.ByteBuffer; |
|
|
|
|
import java.nio.ReadOnlyBufferException; |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* Encodes and writes protocol message fields. |
|
|
|
@ -47,15 +50,17 @@ import java.io.IOException; |
|
|
|
|
* @author kneton@google.com Kenton Varda |
|
|
|
|
*/ |
|
|
|
|
public final class CodedOutputByteBufferNano { |
|
|
|
|
private final byte[] buffer; |
|
|
|
|
private final int limit; |
|
|
|
|
private int position; |
|
|
|
|
/* max bytes per java UTF-16 char in UTF-8 */ |
|
|
|
|
private static final int MAX_UTF8_EXPANSION = 3; |
|
|
|
|
private final ByteBuffer buffer; |
|
|
|
|
|
|
|
|
|
private CodedOutputByteBufferNano(final byte[] buffer, final int offset, |
|
|
|
|
final int length) { |
|
|
|
|
this(ByteBuffer.wrap(buffer, offset, length)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private CodedOutputByteBufferNano(final ByteBuffer buffer) { |
|
|
|
|
this.buffer = buffer; |
|
|
|
|
position = offset; |
|
|
|
|
limit = offset + length; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
@ -287,14 +292,213 @@ public final class CodedOutputByteBufferNano { |
|
|
|
|
|
|
|
|
|
/** Write a {@code string} field to the stream. */ |
|
|
|
|
public void writeStringNoTag(final String value) throws IOException { |
|
|
|
|
// Unfortunately there does not appear to be any way to tell Java to encode
|
|
|
|
|
// UTF-8 directly into our buffer, so we have to let it create its own byte
|
|
|
|
|
// array and then copy.
|
|
|
|
|
final byte[] bytes = value.getBytes(InternalNano.UTF_8); |
|
|
|
|
writeRawVarint32(bytes.length); |
|
|
|
|
writeRawBytes(bytes); |
|
|
|
|
// UTF-8 byte length of the string is at least its UTF-16 code unit length (value.length()),
|
|
|
|
|
// and at most 3 times of it. Optimize for the case where we know this length results in a
|
|
|
|
|
// constant varint length - saves measuring length of the string.
|
|
|
|
|
try { |
|
|
|
|
final int minLengthVarIntSize = computeRawVarint32Size(value.length()); |
|
|
|
|
final int maxLengthVarIntSize = computeRawVarint32Size(value.length() * MAX_UTF8_EXPANSION); |
|
|
|
|
if (minLengthVarIntSize == maxLengthVarIntSize) { |
|
|
|
|
int oldPosition = buffer.position(); |
|
|
|
|
// Buffer.position, when passed a position that is past its limit, throws
|
|
|
|
|
// IllegalArgumentException, and this class is documented to throw
|
|
|
|
|
// OutOfSpaceException instead.
|
|
|
|
|
if (buffer.remaining() < minLengthVarIntSize) { |
|
|
|
|
throw new OutOfSpaceException(oldPosition + minLengthVarIntSize, buffer.limit()); |
|
|
|
|
} |
|
|
|
|
buffer.position(oldPosition + minLengthVarIntSize); |
|
|
|
|
encode(value, buffer); |
|
|
|
|
int newPosition = buffer.position(); |
|
|
|
|
buffer.position(oldPosition); |
|
|
|
|
writeRawVarint32(newPosition - oldPosition - minLengthVarIntSize); |
|
|
|
|
buffer.position(newPosition); |
|
|
|
|
} else { |
|
|
|
|
writeRawVarint32(encodedLength(value)); |
|
|
|
|
encode(value, buffer); |
|
|
|
|
} |
|
|
|
|
} catch (BufferOverflowException e) { |
|
|
|
|
final OutOfSpaceException outOfSpaceException = new OutOfSpaceException(buffer.position(), |
|
|
|
|
buffer.limit()); |
|
|
|
|
outOfSpaceException.initCause(e); |
|
|
|
|
throw outOfSpaceException; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// These UTF-8 handling methods are copied from Guava's Utf8 class.
|
|
|
|
|
/** |
|
|
|
|
* Returns the number of bytes in the UTF-8-encoded form of {@code sequence}. For a string, |
|
|
|
|
* this method is equivalent to {@code string.getBytes(UTF_8).length}, but is more efficient in |
|
|
|
|
* both time and space. |
|
|
|
|
* |
|
|
|
|
* @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired |
|
|
|
|
* surrogates) |
|
|
|
|
*/ |
|
|
|
|
private static int encodedLength(CharSequence sequence) { |
|
|
|
|
// Warning to maintainers: this implementation is highly optimized.
|
|
|
|
|
int utf16Length = sequence.length(); |
|
|
|
|
int utf8Length = utf16Length; |
|
|
|
|
int i = 0; |
|
|
|
|
|
|
|
|
|
// This loop optimizes for pure ASCII.
|
|
|
|
|
while (i < utf16Length && sequence.charAt(i) < 0x80) { |
|
|
|
|
i++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// This loop optimizes for chars less than 0x800.
|
|
|
|
|
for (; i < utf16Length; i++) { |
|
|
|
|
char c = sequence.charAt(i); |
|
|
|
|
if (c < 0x800) { |
|
|
|
|
utf8Length += ((0x7f - c) >>> 31); // branch free!
|
|
|
|
|
} else { |
|
|
|
|
utf8Length += encodedLengthGeneral(sequence, i); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (utf8Length < utf16Length) { |
|
|
|
|
// Necessary and sufficient condition for overflow because of maximum 3x expansion
|
|
|
|
|
throw new IllegalArgumentException("UTF-8 length does not fit in int: " |
|
|
|
|
+ (utf8Length + (1L << 32))); |
|
|
|
|
} |
|
|
|
|
return utf8Length; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private static int encodedLengthGeneral(CharSequence sequence, int start) { |
|
|
|
|
int utf16Length = sequence.length(); |
|
|
|
|
int utf8Length = 0; |
|
|
|
|
for (int i = start; i < utf16Length; i++) { |
|
|
|
|
char c = sequence.charAt(i); |
|
|
|
|
if (c < 0x800) { |
|
|
|
|
utf8Length += (0x7f - c) >>> 31; // branch free!
|
|
|
|
|
} else { |
|
|
|
|
utf8Length += 2; |
|
|
|
|
// jdk7+: if (Character.isSurrogate(c)) {
|
|
|
|
|
if (Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) { |
|
|
|
|
// Check that we have a well-formed surrogate pair.
|
|
|
|
|
int cp = Character.codePointAt(sequence, i); |
|
|
|
|
if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) { |
|
|
|
|
throw new IllegalArgumentException("Unpaired surrogate at index " + i); |
|
|
|
|
} |
|
|
|
|
i++; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return utf8Length; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* Encodes {@code sequence} into UTF-8, in {@code byteBuffer}. For a string, this method is |
|
|
|
|
* equivalent to {@code buffer.put(string.getBytes(UTF_8))}, but is more efficient in both time |
|
|
|
|
* and space. Bytes are written starting at the current position. This method requires paired |
|
|
|
|
* surrogates, and therefore does not support chunking. |
|
|
|
|
* |
|
|
|
|
* <p>To ensure sufficient space in the output buffer, either call {@link #encodedLength} to |
|
|
|
|
* compute the exact amount needed, or leave room for {@code 3 * sequence.length()}, which is the |
|
|
|
|
* largest possible number of bytes that any input can be encoded to. |
|
|
|
|
* |
|
|
|
|
* @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired |
|
|
|
|
* surrogates) |
|
|
|
|
* @throws BufferOverflowException if {@code sequence} encoded in UTF-8 does not fit in |
|
|
|
|
* {@code byteBuffer}'s remaining space. |
|
|
|
|
* @throws ReadOnlyBufferException if {@code byteBuffer} is a read-only buffer. |
|
|
|
|
*/ |
|
|
|
|
private static void encode(CharSequence sequence, ByteBuffer byteBuffer) { |
|
|
|
|
if (byteBuffer.isReadOnly()) { |
|
|
|
|
throw new ReadOnlyBufferException(); |
|
|
|
|
} else if (byteBuffer.hasArray()) { |
|
|
|
|
try { |
|
|
|
|
int encoded = encode(sequence, |
|
|
|
|
byteBuffer.array(), |
|
|
|
|
byteBuffer.arrayOffset() + byteBuffer.position(), |
|
|
|
|
byteBuffer.remaining()); |
|
|
|
|
byteBuffer.position(encoded - byteBuffer.arrayOffset()); |
|
|
|
|
} catch (ArrayIndexOutOfBoundsException e) { |
|
|
|
|
BufferOverflowException boe = new BufferOverflowException(); |
|
|
|
|
boe.initCause(e); |
|
|
|
|
throw boe; |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
encodeDirect(sequence, byteBuffer); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private static void encodeDirect(CharSequence sequence, ByteBuffer byteBuffer) { |
|
|
|
|
int utf16Length = sequence.length(); |
|
|
|
|
for (int i = 0; i < utf16Length; i++) { |
|
|
|
|
final char c = sequence.charAt(i); |
|
|
|
|
if (c < 0x80) { // ASCII
|
|
|
|
|
byteBuffer.put((byte) c); |
|
|
|
|
} else if (c < 0x800) { // 11 bits, two UTF-8 bytes
|
|
|
|
|
byteBuffer.put((byte) ((0xF << 6) | (c >>> 6))); |
|
|
|
|
byteBuffer.put((byte) (0x80 | (0x3F & c))); |
|
|
|
|
} else if (c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) { |
|
|
|
|
// Maximium single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
|
|
|
|
|
byteBuffer.put((byte) ((0xF << 5) | (c >>> 12))); |
|
|
|
|
byteBuffer.put((byte) (0x80 | (0x3F & (c >>> 6)))); |
|
|
|
|
byteBuffer.put((byte) (0x80 | (0x3F & c))); |
|
|
|
|
} else { |
|
|
|
|
final char low; |
|
|
|
|
if (i + 1 == sequence.length() |
|
|
|
|
|| !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) { |
|
|
|
|
throw new IllegalArgumentException("Unpaired surrogate at index " + (i - 1)); |
|
|
|
|
} |
|
|
|
|
int codePoint = Character.toCodePoint(c, low); |
|
|
|
|
byteBuffer.put((byte) ((0xF << 4) | (codePoint >>> 18))); |
|
|
|
|
byteBuffer.put((byte) (0x80 | (0x3F & (codePoint >>> 12)))); |
|
|
|
|
byteBuffer.put((byte) (0x80 | (0x3F & (codePoint >>> 6)))); |
|
|
|
|
byteBuffer.put((byte) (0x80 | (0x3F & codePoint))); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private static int encode(CharSequence sequence, byte[] bytes, int offset, int length) { |
|
|
|
|
int utf16Length = sequence.length(); |
|
|
|
|
int j = offset; |
|
|
|
|
int i = 0; |
|
|
|
|
int limit = offset + length; |
|
|
|
|
// Designed to take advantage of
|
|
|
|
|
// https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
|
|
|
|
|
for (char c; i < utf16Length && i + j < limit && (c = sequence.charAt(i)) < 0x80; i++) { |
|
|
|
|
bytes[j + i] = (byte) c; |
|
|
|
|
} |
|
|
|
|
if (i == utf16Length) { |
|
|
|
|
return j + utf16Length; |
|
|
|
|
} |
|
|
|
|
j += i; |
|
|
|
|
for (char c; i < utf16Length; i++) { |
|
|
|
|
c = sequence.charAt(i); |
|
|
|
|
if (c < 0x80 && j < limit) { |
|
|
|
|
bytes[j++] = (byte) c; |
|
|
|
|
} else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes
|
|
|
|
|
bytes[j++] = (byte) ((0xF << 6) | (c >>> 6)); |
|
|
|
|
bytes[j++] = (byte) (0x80 | (0x3F & c)); |
|
|
|
|
} else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) { |
|
|
|
|
// Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
|
|
|
|
|
bytes[j++] = (byte) ((0xF << 5) | (c >>> 12)); |
|
|
|
|
bytes[j++] = (byte) (0x80 | (0x3F & (c >>> 6))); |
|
|
|
|
bytes[j++] = (byte) (0x80 | (0x3F & c)); |
|
|
|
|
} else if (j <= limit - 4) { |
|
|
|
|
// Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8 bytes
|
|
|
|
|
final char low; |
|
|
|
|
if (i + 1 == sequence.length() |
|
|
|
|
|| !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) { |
|
|
|
|
throw new IllegalArgumentException("Unpaired surrogate at index " + (i - 1)); |
|
|
|
|
} |
|
|
|
|
int codePoint = Character.toCodePoint(c, low); |
|
|
|
|
bytes[j++] = (byte) ((0xF << 4) | (codePoint >>> 18)); |
|
|
|
|
bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12))); |
|
|
|
|
bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6))); |
|
|
|
|
bytes[j++] = (byte) (0x80 | (0x3F & codePoint)); |
|
|
|
|
} else { |
|
|
|
|
throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return j; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// End guava UTF-8 methods
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** Write a {@code group} field to the stream. */ |
|
|
|
|
public void writeGroupNoTag(final MessageNano value) throws IOException { |
|
|
|
|
value.writeTo(this); |
|
|
|
@ -602,9 +806,8 @@ public final class CodedOutputByteBufferNano { |
|
|
|
|
* {@code string} field. |
|
|
|
|
*/ |
|
|
|
|
public static int computeStringSizeNoTag(final String value) { |
|
|
|
|
final byte[] bytes = value.getBytes(InternalNano.UTF_8); |
|
|
|
|
return computeRawVarint32Size(bytes.length) + |
|
|
|
|
bytes.length; |
|
|
|
|
final int length = encodedLength(value); |
|
|
|
|
return computeRawVarint32Size(length) + length; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
@ -687,7 +890,7 @@ public final class CodedOutputByteBufferNano { |
|
|
|
|
* Otherwise, throws {@code UnsupportedOperationException}. |
|
|
|
|
*/ |
|
|
|
|
public int spaceLeft() { |
|
|
|
|
return limit - position; |
|
|
|
|
return buffer.remaining(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
@ -704,6 +907,23 @@ public final class CodedOutputByteBufferNano { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* Returns the position within the internal buffer. |
|
|
|
|
*/ |
|
|
|
|
public int position() { |
|
|
|
|
return buffer.position(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* Resets the position within the internal buffer to zero. |
|
|
|
|
* |
|
|
|
|
* @see #position |
|
|
|
|
* @see #spaceLeft |
|
|
|
|
*/ |
|
|
|
|
public void reset() { |
|
|
|
|
buffer.clear(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* If you create a CodedOutputStream around a simple flat array, you must |
|
|
|
|
* not attempt to write more bytes than the array has space. Otherwise, |
|
|
|
@ -720,12 +940,12 @@ public final class CodedOutputByteBufferNano { |
|
|
|
|
|
|
|
|
|
/** Write a single byte. */ |
|
|
|
|
public void writeRawByte(final byte value) throws IOException { |
|
|
|
|
if (position == limit) { |
|
|
|
|
if (!buffer.hasRemaining()) { |
|
|
|
|
// We're writing to a single buffer.
|
|
|
|
|
throw new OutOfSpaceException(position, limit); |
|
|
|
|
throw new OutOfSpaceException(buffer.position(), buffer.limit()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
buffer[position++] = value; |
|
|
|
|
buffer.put(value); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** Write a single byte, represented by an integer value. */ |
|
|
|
@ -741,13 +961,11 @@ public final class CodedOutputByteBufferNano { |
|
|
|
|
/** Write part of an array of bytes. */ |
|
|
|
|
public void writeRawBytes(final byte[] value, int offset, int length) |
|
|
|
|
throws IOException { |
|
|
|
|
if (limit - position >= length) { |
|
|
|
|
// We have room in the current buffer.
|
|
|
|
|
System.arraycopy(value, offset, buffer, position, length); |
|
|
|
|
position += length; |
|
|
|
|
if (buffer.remaining() >= length) { |
|
|
|
|
buffer.put(value, offset, length); |
|
|
|
|
} else { |
|
|
|
|
// We're writing to a single buffer.
|
|
|
|
|
throw new OutOfSpaceException(position, limit); |
|
|
|
|
throw new OutOfSpaceException(buffer.position(), buffer.limit()); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|