From 278dfb24964ff12702dc21dc9d49e20f0a4345b4 Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Mon, 29 Nov 2021 16:07:20 -0500 Subject: [PATCH] remove UTF8Utils class that hasn't been needed since Java 6 --- Makefile.am | 1 - .../java/com/google/protobuf/Utf8Test.java | 2 +- .../java/com/google/protobuf/Utf8Utils.java | 192 ------------------ 3 files changed, 1 insertion(+), 194 deletions(-) delete mode 100644 java/core/src/test/java/com/google/protobuf/Utf8Utils.java diff --git a/Makefile.am b/Makefile.am index 712f5ae397..7e21cc0a61 100644 --- a/Makefile.am +++ b/Makefile.am @@ -494,7 +494,6 @@ java_EXTRA_DIST= java/core/src/test/java/com/google/protobuf/UnknownFieldSetTest.java \ java/core/src/test/java/com/google/protobuf/UnmodifiableLazyStringListTest.java \ java/core/src/test/java/com/google/protobuf/Utf8Test.java \ - java/core/src/test/java/com/google/protobuf/Utf8Utils.java \ java/core/src/test/java/com/google/protobuf/WellKnownTypesTest.java \ java/core/src/test/java/com/google/protobuf/WireFormatLiteTest.java \ java/core/src/test/java/com/google/protobuf/WireFormatTest.java \ diff --git a/java/core/src/test/java/com/google/protobuf/Utf8Test.java b/java/core/src/test/java/com/google/protobuf/Utf8Test.java index 5fdc61b5e5..787cb11cb9 100644 --- a/java/core/src/test/java/com/google/protobuf/Utf8Test.java +++ b/java/core/src/test/java/com/google/protobuf/Utf8Test.java @@ -102,7 +102,7 @@ public class Utf8Test { int codePoint; do { codePoint = rnd.nextInt(maxCodePoint); - } while (Utf8Utils.isSurrogate(codePoint)); + } while (Character.isSurrogate((char) codePoint)); sb.appendCodePoint(codePoint); } return sb.toString(); diff --git a/java/core/src/test/java/com/google/protobuf/Utf8Utils.java b/java/core/src/test/java/com/google/protobuf/Utf8Utils.java deleted file mode 100644 index 6b031867a3..0000000000 --- a/java/core/src/test/java/com/google/protobuf/Utf8Utils.java +++ /dev/null @@ -1,192 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -package com.google.protobuf; - -import static java.lang.Character.MIN_HIGH_SURROGATE; -import static java.lang.Character.MIN_LOW_SURROGATE; -import static java.lang.Character.MIN_SURROGATE; - -import java.util.Random; - -/** Utilities for benchmarking UTF-8. */ -final class Utf8Utils { - private Utf8Utils() {} - - static class MaxCodePoint { - final int value; - - /** - * Convert the input string to a code point. Accepts regular decimal numerals, hex strings, and - * some symbolic names meaningful to humans. - */ - private static int decode(String userFriendly) { - try { - return Integer.decode(userFriendly); - } catch (NumberFormatException ignored) { - if (userFriendly.matches("(?i)(?:American|English|ASCII)")) { - // 1-byte UTF-8 sequences - "American" ASCII text - return 0x80; - } else if (userFriendly.matches("(?i)(?:Danish|Latin|Western.*European)")) { - // Mostly 1-byte UTF-8 sequences, mixed with occasional 2-byte - // sequences - "Western European" text - return 0x90; - } else if (userFriendly.matches("(?i)(?:Greek|Cyrillic|European|ISO.?8859)")) { - // Mostly 2-byte UTF-8 sequences - "European" text - return 0x800; - } else if (userFriendly.matches("(?i)(?:Chinese|Han|Asian|BMP)")) { - // Mostly 3-byte UTF-8 sequences - "Asian" text - return Character.MIN_SUPPLEMENTARY_CODE_POINT; - } else if (userFriendly.matches("(?i)(?:Cuneiform|rare|exotic|supplementary.*)")) { - // Mostly 4-byte UTF-8 sequences - "rare exotic" text - return Character.MAX_CODE_POINT; - } else { - throw new IllegalArgumentException("Can't decode codepoint " + userFriendly); - } - } - } - - public static MaxCodePoint valueOf(String userFriendly) { - return new MaxCodePoint(userFriendly); - } - - public MaxCodePoint(String userFriendly) { - value = decode(userFriendly); - } - } - - /** - * The Utf8 distribution of real data. The distribution is an array with length 4. - * "distribution[i]" means the total number of characters who are encoded with (i + 1) bytes. - * - *

GMM_UTF8_DISTRIBUTION is the distribution of gmm data set. GSR_UTF8_DISTRIBUTION is the - * distribution of gsreq/gsresp data set - */ - public enum Utf8Distribution { - GMM_UTF8_DISTRIBUTION { - @Override - public int[] getDistribution() { - return new int[] {53059, 104, 0, 0}; - } - }, - GSR_UTF8_DISTRIBUTION { - @Override - public int[] getDistribution() { - return new int[] {119458, 74, 2706, 0}; - } - }; - - public abstract int[] getDistribution(); - } - - /** - * Creates an array of random strings. - * - * @param stringCount the number of strings to be created. - * @param charCount the number of characters per string. - * @param maxCodePoint the maximum code point for the characters in the strings. - * @return an array of random strings. - */ - static String[] randomStrings(int stringCount, int charCount, MaxCodePoint maxCodePoint) { - final long seed = 99; - final Random rnd = new Random(seed); - String[] strings = new String[stringCount]; - for (int i = 0; i < stringCount; i++) { - strings[i] = randomString(rnd, charCount, maxCodePoint); - } - return strings; - } - - /** - * Creates a random string - * - * @param rnd the random generator. - * @param charCount the number of characters per string. - * @param maxCodePoint the maximum code point for the characters in the strings. - */ - static String randomString(Random rnd, int charCount, MaxCodePoint maxCodePoint) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < charCount; i++) { - int codePoint; - do { - codePoint = rnd.nextInt(maxCodePoint.value); - } while (Utf8Utils.isSurrogate(codePoint)); - sb.appendCodePoint(codePoint); - } - return sb.toString(); - } - - /** Character.isSurrogate was added in Java SE 7. */ - static boolean isSurrogate(int c) { - return Character.MIN_HIGH_SURROGATE <= c && c <= Character.MAX_LOW_SURROGATE; - } - - /** - * Creates an array of random strings according to UTF8 distribution. - * - * @param stringCount the number of strings to be created. - * @param charCount the number of characters per string. - */ - static String[] randomStringsWithDistribution( - int stringCount, int charCount, Utf8Distribution utf8Distribution) { - final int[] distribution = utf8Distribution.getDistribution(); - for (int i = 0; i < 3; i++) { - distribution[i + 1] += distribution[i]; - } - final long seed = 99; - final Random rnd = new Random(seed); - String[] strings = new String[stringCount]; - for (int i = 0; i < stringCount; i++) { - StringBuilder sb = new StringBuilder(); - for (int j = 0; j < charCount; j++) { - int codePoint; - do { - codePoint = rnd.nextInt(distribution[3]); - if (codePoint < distribution[0]) { - // 1 bytes - sb.append((char) 0x7F); - } else if (codePoint < distribution[1]) { - // 2 bytes - sb.append((char) 0x7FF); - } else if (codePoint < distribution[2]) { - // 3 bytes - sb.append((char) (MIN_SURROGATE - 1)); - } else { - // 4 bytes - sb.append(MIN_HIGH_SURROGATE); - sb.append(MIN_LOW_SURROGATE); - } - } while (Utf8Utils.isSurrogate(codePoint)); - } - strings[i] = sb.toString(); - } - return strings; - } -}