@ -42,6 +42,7 @@ import static java.lang.Character.isSurrogatePair;
import static java.lang.Character.toCodePoint ;
import static java.lang.Character.toCodePoint ;
import java.nio.ByteBuffer ;
import java.nio.ByteBuffer ;
import java.util.Arrays ;
/ * *
/ * *
* A set of low - level , high - performance static utility methods related to the UTF - 8 character
* A set of low - level , high - performance static utility methods related to the UTF - 8 character
@ -1365,88 +1366,30 @@ final class Utf8 {
}
}
@Override
@Override
String decodeUtf8 ( byte [ ] bytes , int index , int size ) throws InvalidProtocolBufferException {
String decodeUtf8 ( byte [ ] bytes , int index , int size )
if ( ( index | size | bytes . length - index - size ) < 0 ) {
throws InvalidProtocolBufferException {
throw new ArrayIndexOutOfBoundsException (
try {
String . format ( "buffer length=%d, index=%d, size=%d" , bytes . length , index , size ) ) ;
String s = new String ( bytes , index , size , Internal . UTF_8 ) ;
}
int offset = index + unsafeEstimateConsecutiveAscii ( bytes , index , size ) ;
final int limit = index + size ;
// get an "exact" consecutive ASCII
// "\uFFFD" is UTF-8 default replacement string, which illegal byte sequences get replaced with.
while ( offset < limit ) {
if ( ! s . contains ( "\uFFFD" ) ) {
byte b = UnsafeUtil . getByte ( bytes , offset ) ;
return s ;
if ( b < 0 ) {
break ;
}
}
offset + + ;
}
if ( offset = = limit ) {
// The entire byte sequence is ASCII. Don't bother copying to a char[], JVMs using
// compact strings will just turn it back into the same byte[].
return new String ( bytes , index , size , Internal . US_ASCII ) ;
}
// It's not all ASCII, at this point. This may over-allocate, but we will truncate in the
// end.
char [ ] resultArr = new char [ size ] ;
int resultPos = 0 ;
// Copy over the initial run of ASCII.
for ( int i = index ; i < offset ; i + + ) {
DecodeUtil . handleOneByte ( UnsafeUtil . getByte ( bytes , i ) , resultArr , resultPos + + ) ;
}
while ( offset < limit ) {
byte byte1 = UnsafeUtil . getByte ( bytes , offset + + ) ;
if ( DecodeUtil . isOneByte ( byte1 ) ) {
DecodeUtil . handleOneByte ( byte1 , resultArr , resultPos + + ) ;
// It's common for there to be multiple ASCII characters in a run mixed in, so add an
// Since s contains "\uFFFD" there are 2 options:
// extra optimized loop to take care of these runs.
// 1) The byte array slice is invalid UTF-8.
while ( offset < limit ) {
// 2) The byte array slice is valid UTF-8 and contains encodings for "\uFFFD".
byte b = UnsafeUtil . getByte ( bytes , offset ) ;
// To rule out (1), we encode s and compare it to the byte array slice.
if ( ! DecodeUtil . isOneByte ( b ) ) {
// If the byte array slice was invalid UTF-8, then we would get a different sequence of bytes.
break ;
if ( Arrays . equals ( s . getBytes ( Internal . UTF_8 ) , Arrays . copyOfRange ( bytes , index , index + size ) ) ) {
}
return s ;
offset + + ;
DecodeUtil . handleOneByte ( b , resultArr , resultPos + + ) ;
}
} else if ( DecodeUtil . isTwoBytes ( byte1 ) ) {
if ( offset > = limit ) {
throw InvalidProtocolBufferException . invalidUtf8 ( ) ;
}
DecodeUtil . handleTwoBytes (
byte1 , /* byte2 */ UnsafeUtil . getByte ( bytes , offset + + ) , resultArr , resultPos + + ) ;
} else if ( DecodeUtil . isThreeBytes ( byte1 ) ) {
if ( offset > = limit - 1 ) {
throw InvalidProtocolBufferException . invalidUtf8 ( ) ;
}
DecodeUtil . handleThreeBytes (
byte1 ,
/* byte2 */ UnsafeUtil . getByte ( bytes , offset + + ) ,
/* byte3 */ UnsafeUtil . getByte ( bytes , offset + + ) ,
resultArr ,
resultPos + + ) ;
} else {
if ( offset > = limit - 2 ) {
throw InvalidProtocolBufferException . invalidUtf8 ( ) ;
}
DecodeUtil . handleFourBytes (
byte1 ,
/* byte2 */ UnsafeUtil . getByte ( bytes , offset + + ) ,
/* byte3 */ UnsafeUtil . getByte ( bytes , offset + + ) ,
/* byte4 */ UnsafeUtil . getByte ( bytes , offset + + ) ,
resultArr ,
resultPos + + ) ;
// 4-byte case requires two chars.
resultPos + + ;
}
}
}
return new String ( resultArr , 0 , resultPos ) ;
throw InvalidProtocolBufferException . invalidUtf8 ( ) ;
} catch ( IndexOutOfBoundsException e ) {
throw new ArrayIndexOutOfBoundsException (
String . format ( "buffer length=%d, index=%d, size=%d" , bytes . length , index , size ) ) ;
}
}
}
@Override
@Override