Merge pull request #264 from tamird/getbytes

perf: String#getBytes(Charset) vs getBytes(String)

Merge pull request #264 from tamird/getbytes
perf: String#getBytes(Charset) vs getBytes(String)
4990875f · Feng Xiao · 8d9e51cf · 654bd317 · 4990875f · 4990875f
Commit 4990875f authored Apr 02, 2015 by Feng Xiao
25 changed files
--- a/.travis.yml
+++ b/.travis.yml
 sudo: false
-language: cpp
+language: java
+jdk:
+  - openjdk6
+  - openjdk7
+  - oraclejdk7
 os:
  - linux
  - osx
-script: ./autogen.sh && ./configure && make distcheck -j2
+script:
+  - ./autogen.sh && ./configure && make -j2
+  - cd java && mvn test && cd ..
+  - cd javanano && mvn test && cd ..
+  - make distcheck -j2
 notifications:
  email: false
--- a/java/src/main/java/com/google/protobuf/ByteString.java
+++ b/java/src/main/java/com/google/protobuf/ByteString.java
@@ -37,6 +37,8 @@ import java.io.OutputStream;
 import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.UnsupportedCharsetException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
@@ -76,9 +78,6 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
  static final int MIN_READ_FROM_CHUNK_SIZE = 0x100;  // 256b
  static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000;  // 8k

-  // Defined by java.nio.charset.Charset
-  protected static final String UTF_8 = "UTF-8";
-
  /**
   * Empty {@code ByteString}.
   */
@@ -261,6 +260,18 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    return new LiteralByteString(text.getBytes(charsetName));
  }

+  /**
+   * Encodes {@code text} into a sequence of bytes using the named charset
+   * and returns the result as a {@code ByteString}.
+   *
+   * @param text source string
+   * @param charset encode using this charset
+   * @return new {@code ByteString}
+   */
+  public static ByteString copyFrom(String text, Charset charset) {
+    return new LiteralByteString(text.getBytes(charset));
+  }
+
  /**
   * Encodes {@code text} into a sequence of UTF-8 bytes and returns the
   * result as a {@code ByteString}.
@@ -269,11 +280,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
   * @return new {@code ByteString}
   */
  public static ByteString copyFromUtf8(String text) {
-    try {
-      return new LiteralByteString(text.getBytes(UTF_8));
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported?", e);
-    }
+    return new LiteralByteString(text.getBytes(Internal.UTF_8));
  }

  // =================================================================
@@ -612,8 +619,36 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
   * @return new string
   * @throws UnsupportedEncodingException if charset isn't recognized
   */
-  public abstract String toString(String charsetName)
-      throws UnsupportedEncodingException;
+  public String toString(String charsetName)
+      throws UnsupportedEncodingException {
+    try {
+      return toString(Charset.forName(charsetName));
+    } catch (UnsupportedCharsetException e) {
+      UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName);
+      exception.initCause(e);
+      throw exception;
+    }
+  }
+
+  /**
+   * Constructs a new {@code String} by decoding the bytes using the
+   * specified charset. Returns the same empty String if empty.
+   *
+   * @param charset encode using this charset
+   * @return new string
+   */
+  public String toString(Charset charset) {
+    return size() == 0 ? "" : toStringInternal(charset);
+  }
+
+  /**
+   * Constructs a new {@code String} by decoding the bytes using the
+   * specified charset.
+   *
+   * @param charset encode using this charset
+   * @return new string
+   */
+  protected abstract String toStringInternal(Charset charset);

  // =================================================================
  // UTF-8 decoding
@@ -624,11 +659,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
   * @return new string using UTF-8 encoding
   */
  public String toStringUtf8() {
-    try {
-      return toString(UTF_8);
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported?", e);
-    }
+    return toString(Internal.UTF_8);
  }

  /**

--- a/java/src/main/java/com/google/protobuf/CodedInputStream.java
+++ b/java/src/main/java/com/google/protobuf/CodedInputStream.java
@@ -373,14 +373,14 @@ public final class CodedInputStream {
    if (size <= (bufferSize - bufferPos) && size > 0) {
      // Fast path:  We already have the bytes in a contiguous buffer, so
      //   just copy directly from it.
-      final String result = new String(buffer, bufferPos, size, "UTF-8");
+      final String result = new String(buffer, bufferPos, size, Internal.UTF_8);
      bufferPos += size;
      return result;
    } else if (size == 0) {
      return "";
    } else {
      // Slow path:  Build a byte array first then copy it.
-      return new String(readRawBytesSlowPath(size), "UTF-8");
+      return new String(readRawBytesSlowPath(size), Internal.UTF_8);
    }
  }

@@ -409,7 +409,7 @@ public final class CodedInputStream {
    if (!Utf8.isValidUtf8(bytes, pos, pos + size)) {
      throw InvalidProtocolBufferException.invalidUtf8();
    }
-    return new String(bytes, pos, size, "UTF-8");
+    return new String(bytes, pos, size, Internal.UTF_8);
  }

  /** Read a {@code group} field value from the stream. */

--- a/java/src/main/java/com/google/protobuf/CodedOutputStream.java
+++ b/java/src/main/java/com/google/protobuf/CodedOutputStream.java
@@ -32,7 +32,6 @@ package com.google.protobuf;

 import java.io.IOException;
 import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;

 /**
@@ -420,7 +419,7 @@ public final class CodedOutputStream {
    // Unfortunately there does not appear to be any way to tell Java to encode
    // UTF-8 directly into our buffer, so we have to let it create its own byte
    // array and then copy.
-    final byte[] bytes = value.getBytes("UTF-8");
+    final byte[] bytes = value.getBytes(Internal.UTF_8);
    writeRawVarint32(bytes.length);
    writeRawBytes(bytes);
  }
@@ -827,13 +826,9 @@ public final class CodedOutputStream {
   * {@code string} field.
   */
  public static int computeStringSizeNoTag(final String value) {
-    try {
-      final byte[] bytes = value.getBytes("UTF-8");
+    final byte[] bytes = value.getBytes(Internal.UTF_8);
    return computeRawVarint32Size(bytes.length) +
           bytes.length;
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported.", e);
-    }
  }

  /**

--- a/java/src/main/java/com/google/protobuf/Descriptors.java
+++ b/java/src/main/java/com/google/protobuf/Descriptors.java
@@ -43,7 +43,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.WeakHashMap;
 import java.util.logging.Logger;
-import java.io.UnsupportedEncodingException;

 /**
 * Contains a collection of classes which describe protocol message types.
@@ -319,12 +318,7 @@ public final class Descriptors {
      }

      final byte[] descriptorBytes;
-      try {
-        descriptorBytes = descriptorData.toString().getBytes("ISO-8859-1");
-      } catch (UnsupportedEncodingException e) {
-        throw new RuntimeException(
-          "Standard encoding ISO-8859-1 not supported by JVM.", e);
-      }
+      descriptorBytes = descriptorData.toString().getBytes(Internal.ISO_8859_1);

      FileDescriptorProto proto;
      try {

--- a/java/src/main/java/com/google/protobuf/Internal.java
+++ b/java/src/main/java/com/google/protobuf/Internal.java
@@ -30,9 +30,8 @@

 package com.google.protobuf;

-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.AbstractList;
 import java.util.AbstractMap;
 import java.util.AbstractSet;
@@ -51,6 +50,10 @@ import java.util.Set;
 * @author kenton@google.com (Kenton Varda)
 */
 public class Internal {
+
+  protected static final Charset UTF_8 = Charset.forName("UTF-8");
+  protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
+
  /**
   * Helper called by generated code to construct default values for string
   * fields.
@@ -80,14 +83,7 @@ public class Internal {
   * generated code calls this automatically.
   */
  public static String stringDefaultValue(String bytes) {
-    try {
-      return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      // This should never happen since all JVMs are required to implement
-      // both of the above character sets.
-      throw new IllegalStateException(
-          "Java VM does not support a standard character set.", e);
-    }
+    return new String(bytes.getBytes(ISO_8859_1), UTF_8);
  }

  /**
@@ -99,14 +95,7 @@ public class Internal {
   * embed raw bytes as a string literal with ISO-8859-1 encoding.
   */
  public static ByteString bytesDefaultValue(String bytes) {
-    try {
-      return ByteString.copyFrom(bytes.getBytes("ISO-8859-1"));
-    } catch (UnsupportedEncodingException e) {
-      // This should never happen since all JVMs are required to implement
-      // ISO-8859-1.
-      throw new IllegalStateException(
-          "Java VM does not support a standard character set.", e);
-    }
+    return ByteString.copyFrom(bytes.getBytes(ISO_8859_1));
  }
  /**
   * Helper called by generated code to construct default values for bytes
@@ -115,14 +104,7 @@ public class Internal {
   * This is like {@link #bytesDefaultValue}, but returns a byte array.
   */
  public static byte[] byteArrayDefaultValue(String bytes) {
-    try {
-      return bytes.getBytes("ISO-8859-1");
-    } catch (UnsupportedEncodingException e) {
-      // This should never happen since all JVMs are required to implement
-      // ISO-8859-1.
-      throw new IllegalStateException(
-          "Java VM does not support a standard character set.", e);
-    }
+    return bytes.getBytes(ISO_8859_1);
  }

  /**
@@ -197,22 +179,14 @@ public class Internal {
   * Helper method to get the UTF-8 bytes of a string.
   */
  public static byte[] toByteArray(String value) {
-    try {
-      return value.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported?", e);
-    }
+    return value.getBytes(UTF_8);
  }

  /**
   * Helper method to convert a byte array to a string using UTF-8 encoding.
   */
  public static String toStringUtf8(byte[] bytes) {
-    try {
-      return new String(bytes, "UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported?", e);
-    }
+    return new String(bytes, UTF_8);
  }

  /**

--- a/java/src/main/java/com/google/protobuf/LiteralByteString.java
+++ b/java/src/main/java/com/google/protobuf/LiteralByteString.java
@@ -34,8 +34,8 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NoSuchElementException;
@@ -152,13 +152,8 @@ class LiteralByteString extends ByteString {
  }

  @Override
-  public String toString(String charsetName)
-      throws UnsupportedEncodingException {
-    // Optimize for empty strings, but ensure we don't silently ignore invalid
-    // encodings.
-    return size() == 0 && UTF_8.equals(charsetName)
-        ? ""
-        : new String(bytes, getOffsetIntoBytes(), size(), charsetName);
+  protected String toStringInternal(Charset charset) {
+    return new String(bytes, getOffsetIntoBytes(), size(), charset);
  }

  // =================================================================

--- a/java/src/main/java/com/google/protobuf/RopeByteString.java
+++ b/java/src/main/java/com/google/protobuf/RopeByteString.java
@@ -30,14 +30,14 @@

 package com.google.protobuf;

+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InvalidObjectException;
 import java.io.ObjectInputStream;
 import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
-import java.io.ByteArrayInputStream;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
@@ -418,13 +418,8 @@ class RopeByteString extends ByteString {
  }

  @Override
-  public String toString(String charsetName)
-      throws UnsupportedEncodingException {
-    // Optimize for empty strings, but ensure we don't silently ignore invalid
-    // encodings.
-    return size() == 0 && UTF_8.equals(charsetName)
-        ? ""
-        : new String(toByteArray(), charsetName);
+  protected String toStringInternal(Charset charset) {
+    return new String(toByteArray(), charset);
  }

  // =================================================================

--- a/java/src/main/java/com/google/protobuf/Utf8.java
+++ b/java/src/main/java/com/google/protobuf/Utf8.java
@@ -46,7 +46,7 @@ package com.google.protobuf;
 * <p>The byte sequences considered valid by this class are exactly
 * those that can be roundtrip converted to Strings and back to bytes
 * using the UTF-8 charset, without loss: <pre> {@code
- * Arrays.equals(bytes, new String(bytes, "UTF-8").getBytes("UTF-8"))
+ * Arrays.equals(bytes, new String(bytes, Internal.UTF_8).getBytes(Internal.UTF_8))
 * }</pre>
 *
 * <p>See the Unicode Standard,</br>

--- a/java/src/test/java/com/google/protobuf/BoundedByteStringTest.java
+++ b/java/src/test/java/com/google/protobuf/BoundedByteStringTest.java
@@ -62,7 +62,7 @@ public class BoundedByteStringTest extends LiteralByteStringTest {
  @Override
  public void testToString() throws UnsupportedEncodingException {
    String testString = "I love unicode \u1234\u5678 characters";
-    LiteralByteString unicode = new LiteralByteString(testString.getBytes(UTF_8));
+    LiteralByteString unicode = new LiteralByteString(testString.getBytes(Internal.UTF_8));
    ByteString chopped = unicode.substring(2, unicode.size() - 6);
    assertEquals(classUnderTest + ".substring() must have the expected type",
        classUnderTest, getActualClassName(chopped));
@@ -72,6 +72,19 @@ public class BoundedByteStringTest extends LiteralByteStringTest {
        testString.substring(2, testString.length() - 6), roundTripString);
  }

+  @Override
+  public void testCharsetToString() throws UnsupportedEncodingException {
+    String testString = "I love unicode \u1234\u5678 characters";
+    LiteralByteString unicode = new LiteralByteString(testString.getBytes(Internal.UTF_8));
+    ByteString chopped = unicode.substring(2, unicode.size() - 6);
+    assertEquals(classUnderTest + ".substring() must have the expected type",
+        classUnderTest, getActualClassName(chopped));
+
+    String roundTripString = chopped.toString(Internal.UTF_8);
+    assertEquals(classUnderTest + " unicode bytes must match",
+        testString.substring(2, testString.length() - 6), roundTripString);
+  }
+
  public void testJavaSerialization() throws Exception {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    ObjectOutputStream oos = new ObjectOutputStream(out);

--- a/java/src/test/java/com/google/protobuf/ByteStringTest.java
+++ b/java/src/test/java/com/google/protobuf/ByteStringTest.java
@@ -41,6 +41,7 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
@@ -56,7 +57,7 @@ import java.util.Random;
 */
 public class ByteStringTest extends TestCase {

-  private static final String UTF_16 = "UTF-16";
+  private static final Charset UTF_16 = Charset.forName("UTF-16");

  static byte[] getTestBytes(int size, long seed) {
    Random random = new Random(seed);
@@ -139,7 +140,7 @@ public class ByteStringTest extends TestCase {
  public void testCopyFrom_Utf8() throws UnsupportedEncodingException {
    String testString = "I love unicode \u1234\u5678 characters";
    ByteString byteString = ByteString.copyFromUtf8(testString);
-    byte[] testBytes = testString.getBytes("UTF-8");
+    byte[] testBytes = testString.getBytes(Internal.UTF_8);
    assertTrue("copyFromUtf8 string must respect the charset",
        isArrayRange(byteString.toByteArray(), testBytes, 0, testBytes.length));
  }
@@ -400,7 +401,7 @@ public class ByteStringTest extends TestCase {

  public void testToStringUtf8() throws UnsupportedEncodingException {
    String testString = "I love unicode \u1234\u5678 characters";
-    byte[] testBytes = testString.getBytes("UTF-8");
+    byte[] testBytes = testString.getBytes(Internal.UTF_8);
    ByteString byteString = ByteString.copyFrom(testBytes);
    assertEquals("copyToStringUtf8 must respect the charset",
        testString, byteString.toStringUtf8());

--- a/java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java
+++ b/java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java
@@ -321,7 +321,7 @@ public class CodedOutputStreamTest extends TestCase {
    final int BUFFER_SIZE = 4 * 1024;
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream(BUFFER_SIZE);
    CodedOutputStream codedStream = CodedOutputStream.newInstance(outputStream);
-    byte[] value = "abcde".getBytes("UTF-8");
+    byte[] value = "abcde".getBytes(Internal.UTF_8);
    for (int i = 0; i < 1024; ++i) {
      codedStream.writeRawBytes(value, 0, value.length);
    }
@@ -367,7 +367,7 @@ public class CodedOutputStreamTest extends TestCase {
  }

  public void testWriteByteBuffer() throws Exception {
-    byte[] value = "abcde".getBytes("UTF-8");
+    byte[] value = "abcde".getBytes(Internal.UTF_8);
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    CodedOutputStream codedStream = CodedOutputStream.newInstance(outputStream);
    ByteBuffer byteBuffer = ByteBuffer.wrap(value, 0, 1);

--- a/java/src/test/java/com/google/protobuf/DescriptorsTest.java
+++ b/java/src/test/java/com/google/protobuf/DescriptorsTest.java
@@ -35,32 +35,28 @@ import com.google.protobuf.DescriptorProtos.EnumDescriptorProto;
 import com.google.protobuf.DescriptorProtos.EnumValueDescriptorProto;
 import com.google.protobuf.DescriptorProtos.FieldDescriptorProto;
 import com.google.protobuf.DescriptorProtos.FileDescriptorProto;
-import com.google.protobuf.Descriptors.DescriptorValidationException;
-import com.google.protobuf.Descriptors.FileDescriptor;
 import com.google.protobuf.Descriptors.Descriptor;
-import com.google.protobuf.Descriptors.FieldDescriptor;
-import com.google.protobuf.Descriptors.OneofDescriptor;
+import com.google.protobuf.Descriptors.DescriptorValidationException;
 import com.google.protobuf.Descriptors.EnumDescriptor;
 import com.google.protobuf.Descriptors.EnumValueDescriptor;
-import com.google.protobuf.Descriptors.ServiceDescriptor;
+import com.google.protobuf.Descriptors.FieldDescriptor;
+import com.google.protobuf.Descriptors.FileDescriptor;
 import com.google.protobuf.Descriptors.MethodDescriptor;
-
+import com.google.protobuf.Descriptors.OneofDescriptor;
+import com.google.protobuf.Descriptors.ServiceDescriptor;
 import com.google.protobuf.test.UnittestImport;
 import com.google.protobuf.test.UnittestImport.ImportEnum;
-import com.google.protobuf.test.UnittestImport.ImportMessage;
+import protobuf_unittest.TestCustomOptions;
+import protobuf_unittest.UnittestCustomOptions;
 import protobuf_unittest.UnittestProto;
 import protobuf_unittest.UnittestProto.ForeignEnum;
 import protobuf_unittest.UnittestProto.ForeignMessage;
-import protobuf_unittest.UnittestProto.TestAllTypes;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
+import protobuf_unittest.UnittestProto.TestAllTypes;
 import protobuf_unittest.UnittestProto.TestExtremeDefaultValues;
 import protobuf_unittest.UnittestProto.TestMultipleExtensionRanges;
 import protobuf_unittest.UnittestProto.TestRequired;
 import protobuf_unittest.UnittestProto.TestService;
-import protobuf_unittest.UnittestCustomOptions;
-
-import protobuf_unittest.TestCustomOptions;
-

 import junit.framework.TestCase;

@@ -286,7 +282,7 @@ public class DescriptorsTest extends TestCase {
    d = TestExtremeDefaultValues.getDescriptor();
    assertEquals(
      ByteString.copyFrom(
-        "\0\001\007\b\f\n\r\t\013\\\'\"\u00fe".getBytes("ISO-8859-1")),
+        "\0\001\007\b\f\n\r\t\013\\\'\"\u00fe".getBytes(Internal.ISO_8859_1)),
      d.findFieldByName("escaped_bytes").getDefaultValue());
    assertEquals(-1, d.findFieldByName("large_uint32").getDefaultValue());
    assertEquals(-1L, d.findFieldByName("large_uint64").getDefaultValue());

--- a/java/src/test/java/com/google/protobuf/IsValidUtf8Test.java
+++ b/java/src/test/java/com/google/protobuf/IsValidUtf8Test.java
@@ -72,9 +72,12 @@ public class IsValidUtf8Test extends TestCase {
   * Tests that round tripping of all three byte permutations work.
   */
  public void testIsValidUtf8_3Bytes() throws UnsupportedEncodingException {
+    // Travis' OOM killer doesn't like this test
+    if (System.getenv("TRAVIS") == null) {
      IsValidUtf8TestUtil.testBytes(3,
          IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
    }
+  }

  /**
   * Tests that round tripping of a sample of four byte permutations work.

--- a/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
+++ b/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
@@ -33,18 +33,17 @@ package com.google.protobuf;
 import static junit.framework.Assert.*;

 import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
 import java.util.logging.Logger;
-import java.nio.charset.CharsetDecoder;
-import java.nio.charset.Charset;
-import java.nio.charset.CodingErrorAction;
-import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CoderResult;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;

 /**
 * Shared testing code for {@link IsValidUtf8Test} and
@@ -220,8 +219,8 @@ class IsValidUtf8TestUtil {
      }
      ByteString bs = ByteString.copyFrom(bytes);
      boolean isRoundTrippable = bs.isValidUtf8();
-      String s = new String(bytes, "UTF-8");
-      byte[] bytesReencoded = s.getBytes("UTF-8");
+      String s = new String(bytes, Internal.UTF_8);
+      byte[] bytesReencoded = s.getBytes(Internal.UTF_8);
      boolean bytesEqual = Arrays.equals(bytes, bytesReencoded);

      if (bytesEqual != isRoundTrippable) {
@@ -313,10 +312,10 @@ class IsValidUtf8TestUtil {
  void testBytesUsingByteBuffers(
      int numBytes, long expectedCount, long start, long lim)
      throws UnsupportedEncodingException {
-    CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
+    CharsetDecoder decoder = Internal.UTF_8.newDecoder()
        .onMalformedInput(CodingErrorAction.REPLACE)
        .onUnmappableCharacter(CodingErrorAction.REPLACE);
-    CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder()
+    CharsetEncoder encoder = Internal.UTF_8.newEncoder()
        .onMalformedInput(CodingErrorAction.REPLACE)
        .onUnmappableCharacter(CodingErrorAction.REPLACE);
    byte[] bytes = new byte[numBytes];

--- a/java/src/test/java/com/google/protobuf/LiteralByteStringTest.java
+++ b/java/src/test/java/com/google/protobuf/LiteralByteStringTest.java
@@ -293,14 +293,21 @@ public class LiteralByteStringTest extends TestCase {

  public void testToString() throws UnsupportedEncodingException {
    String testString = "I love unicode \u1234\u5678 characters";
-    LiteralByteString unicode = new LiteralByteString(testString.getBytes(UTF_8));
+    LiteralByteString unicode = new LiteralByteString(testString.getBytes(Internal.UTF_8));
    String roundTripString = unicode.toString(UTF_8);
    assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
  }

+  public void testCharsetToString() throws UnsupportedEncodingException {
+    String testString = "I love unicode \u1234\u5678 characters";
+    LiteralByteString unicode = new LiteralByteString(testString.getBytes(Internal.UTF_8));
+    String roundTripString = unicode.toString(Internal.UTF_8);
+    assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
+  }
+
  public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException{
    assertSame(classUnderTest + " must be the same string references",
-        ByteString.EMPTY.toString(UTF_8), new LiteralByteString(new byte[]{}).toString(UTF_8));
+        ByteString.EMPTY.toString(Internal.UTF_8), new LiteralByteString(new byte[]{}).toString(Internal.UTF_8));
  }

  public void testToString_raisesException() throws UnsupportedEncodingException{

--- a/java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java
+++ b/java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java
@@ -94,4 +94,34 @@ public class RopeByteStringSubstringTest extends LiteralByteStringTest {
    assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
        flatString.hashCode(), unicode.hashCode());
  }
+
+  @Override
+  public void testCharsetToString() throws UnsupportedEncodingException {
+    String sourceString = "I love unicode \u1234\u5678 characters";
+    ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
+    int copies = 250;
+
+    // By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
+    StringBuilder builder = new StringBuilder(copies * sourceString.length());
+    ByteString unicode = ByteString.EMPTY;
+    for (int i = 0; i < copies; ++i) {
+      builder.append(sourceString);
+      unicode = RopeByteString.concatenate(unicode, sourceByteString);
+    }
+    String testString = builder.toString();
+
+    // Do the substring part
+    testString = testString.substring(2, testString.length() - 6);
+    unicode = unicode.substring(2, unicode.size() - 6);
+
+    assertEquals(classUnderTest + " from string must have the expected type",
+        classUnderTest, getActualClassName(unicode));
+    String roundTripString = unicode.toString(Internal.UTF_8);
+    assertEquals(classUnderTest + " unicode bytes must match",
+        testString, roundTripString);
+    ByteString flatString = ByteString.copyFromUtf8(testString);
+    assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
+    assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
+        flatString.hashCode(), unicode.hashCode());
+  }
 }
--- a/java/src/test/java/com/google/protobuf/RopeByteStringTest.java
+++ b/java/src/test/java/com/google/protobuf/RopeByteStringTest.java
@@ -118,12 +118,38 @@ public class RopeByteStringTest extends LiteralByteStringTest {
        flatString.hashCode(), unicode.hashCode());
  }

+  @Override
+  public void testCharsetToString() throws UnsupportedEncodingException {
+    String sourceString = "I love unicode \u1234\u5678 characters";
+    ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
+    int copies = 250;
+
+    // By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
+    StringBuilder builder = new StringBuilder(copies * sourceString.length());
+    ByteString unicode = ByteString.EMPTY;
+    for (int i = 0; i < copies; ++i) {
+      builder.append(sourceString);
+      unicode = RopeByteString.concatenate(unicode, sourceByteString);
+    }
+    String testString = builder.toString();
+
+    assertEquals(classUnderTest + " from string must have the expected type",
+        classUnderTest, getActualClassName(unicode));
+    String roundTripString = unicode.toString(Internal.UTF_8);
+    assertEquals(classUnderTest + " unicode bytes must match",
+        testString, roundTripString);
+    ByteString flatString = ByteString.copyFromUtf8(testString);
+    assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
+    assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
+        flatString.hashCode(), unicode.hashCode());
+  }
+
  @Override
  public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException {
    RopeByteString ropeByteString =
        RopeByteString.newInstanceForTest(ByteString.EMPTY, ByteString.EMPTY);
    assertSame(classUnderTest + " must be the same string references",
-        ByteString.EMPTY.toString(UTF_8), ropeByteString.toString(UTF_8));
+        ByteString.EMPTY.toString(Internal.UTF_8), ropeByteString.toString(Internal.UTF_8));
  }

  public void testToString_raisesException() throws UnsupportedEncodingException{

--- a/java/src/test/java/com/google/protobuf/TestUtil.java
+++ b/java/src/test/java/com/google/protobuf/TestUtil.java
@@ -276,11 +276,7 @@ public final class TestUtil {

  /** Helper to convert a String to ByteString. */
  static ByteString toBytes(String str) {
-    try {
-      return ByteString.copyFrom(str.getBytes("UTF-8"));
-    } catch(java.io.UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported.", e);
-    }
+    return ByteString.copyFrom(str.getBytes(Internal.UTF_8));
  }

  /**

--- a/java/src/test/java/com/google/protobuf/TextFormatTest.java
+++ b/java/src/test/java/com/google/protobuf/TextFormatTest.java
@@ -243,8 +243,8 @@ public class TextFormatTest extends TestCase {
   * characters.  The characters are converted directly to bytes, *not*
   * encoded using UTF-8.
   */
-  private ByteString bytes(String str) throws Exception {
-    return ByteString.copyFrom(str.getBytes("ISO-8859-1"));
+  private ByteString bytes(String str) {
+    return ByteString.copyFrom(str.getBytes(Internal.ISO_8859_1));
  }

  /**

--- a/java/src/test/java/com/google/protobuf/UnknownFieldSetLiteTest.java
+++ b/java/src/test/java/com/google/protobuf/UnknownFieldSetLiteTest.java
@@ -229,7 +229,7 @@ public class UnknownFieldSetLiteTest extends TestCase {

  public void testMalformedBytes() throws Exception {
    try {
-      Foo.parseFrom("this is a malformed protocol buffer".getBytes("UTF-8"));
+      Foo.parseFrom("this is a malformed protocol buffer".getBytes(Internal.UTF_8));
      fail();
    } catch (InvalidProtocolBufferException e) {
      // Expected.

--- a/javanano/src/main/java/com/google/protobuf/nano/CodedInputByteBufferNano.java
+++ b/javanano/src/main/java/com/google/protobuf/nano/CodedInputByteBufferNano.java
@@ -190,12 +190,12 @@ public final class CodedInputByteBufferNano {
    if (size <= (bufferSize - bufferPos) && size > 0) {
      // Fast path:  We already have the bytes in a contiguous buffer, so
      //   just copy directly from it.
-      final String result = new String(buffer, bufferPos, size, "UTF-8");
+      final String result = new String(buffer, bufferPos, size, InternalNano.UTF_8);
      bufferPos += size;
      return result;
    } else {
      // Slow path:  Build a byte array first then copy it.
-      return new String(readRawBytes(size), "UTF-8");
+      return new String(readRawBytes(size), InternalNano.UTF_8);
    }
  }


--- a/javanano/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java
+++ b/javanano/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java
@@ -31,7 +31,6 @@
 package com.google.protobuf.nano;

 import java.io.IOException;
-import java.io.UnsupportedEncodingException;

 /**
 * Encodes and writes protocol message fields.
@@ -291,7 +290,7 @@ public final class CodedOutputByteBufferNano {
    // Unfortunately there does not appear to be any way to tell Java to encode
    // UTF-8 directly into our buffer, so we have to let it create its own byte
    // array and then copy.
-    final byte[] bytes = value.getBytes("UTF-8");
+    final byte[] bytes = value.getBytes(InternalNano.UTF_8);
    writeRawVarint32(bytes.length);
    writeRawBytes(bytes);
  }
@@ -603,13 +602,9 @@ public final class CodedOutputByteBufferNano {
   * {@code string} field.
   */
  public static int computeStringSizeNoTag(final String value) {
-    try {
-      final byte[] bytes = value.getBytes("UTF-8");
+    final byte[] bytes = value.getBytes(InternalNano.UTF_8);
    return computeRawVarint32Size(bytes.length) +
           bytes.length;
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported.");
-    }
  }

  /**

--- a/javanano/src/main/java/com/google/protobuf/nano/InternalNano.java
+++ b/javanano/src/main/java/com/google/protobuf/nano/InternalNano.java
@@ -33,7 +33,7 @@ package com.google.protobuf.nano;
 import com.google.protobuf.nano.MapFactories.MapFactory;

 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -67,6 +67,8 @@ public final class InternalNano {
  public static final int TYPE_SINT32   = 17;
  public static final int TYPE_SINT64   = 18;

+  protected static final Charset UTF_8 = Charset.forName("UTF-8");
+  protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");

  private InternalNano() {}

@@ -111,14 +113,7 @@ public final class InternalNano {
   * generated code calls this automatically.
   */
  public static String stringDefaultValue(String bytes) {
-    try {
-      return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      // This should never happen since all JVMs are required to implement
-      // both of the above character sets.
-      throw new IllegalStateException(
-          "Java VM does not support a standard character set.", e);
-    }
+    return new String(bytes.getBytes(ISO_8859_1), InternalNano.UTF_8);
  }

  /**
@@ -130,14 +125,7 @@ public final class InternalNano {
   * embed raw bytes as a string literal with ISO-8859-1 encoding.
   */
  public static byte[] bytesDefaultValue(String bytes) {
-    try {
-      return bytes.getBytes("ISO-8859-1");
-    } catch (UnsupportedEncodingException e) {
-      // This should never happen since all JVMs are required to implement
-      // ISO-8859-1.
-      throw new IllegalStateException(
-          "Java VM does not support a standard character set.", e);
-    }
+    return bytes.getBytes(ISO_8859_1);
  }

  /**
@@ -145,11 +133,7 @@ public final class InternalNano {
   * UnsupportedEncodingException to a RuntimeException.
   */
  public static byte[] copyFromUtf8(final String text) {
-    try {
-      return text.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported?");
-    }
+    return text.getBytes(InternalNano.UTF_8);
  }

  /**

--- a/javanano/src/test/java/com/google/protobuf/nano/NanoTest.java
+++ b/javanano/src/test/java/com/google/protobuf/nano/NanoTest.java
@@ -458,7 +458,7 @@ public class NanoTest extends TestCase {
    assertFalse(msg.optionalBytes.length > 0);
    msg.optionalBytes = InternalNano.copyFromUtf8("hello");
    assertTrue(msg.optionalBytes.length > 0);
-    assertEquals("hello", new String(msg.optionalBytes, "UTF-8"));
+    assertEquals("hello", new String(msg.optionalBytes, InternalNano.UTF_8));
    msg.clear();
    assertFalse(msg.optionalBytes.length > 0);
    msg.clear()
@@ -476,7 +476,7 @@ public class NanoTest extends TestCase {

    TestAllTypesNano newMsg = TestAllTypesNano.parseFrom(result);
    assertTrue(newMsg.optionalBytes.length > 0);
-    assertEquals("bye", new String(newMsg.optionalBytes, "UTF-8"));
+    assertEquals("bye", new String(newMsg.optionalBytes, InternalNano.UTF_8));
  }

  public void testNanoOptionalGroup() throws Exception {
@@ -1346,14 +1346,14 @@ public class NanoTest extends TestCase {
        InternalNano.copyFromUtf8("bye"),
        InternalNano.copyFromUtf8("boo")
    };
-    assertEquals("bye", new String(msg.repeatedBytes[1], "UTF-8"));
-    assertEquals("boo", new String(msg.repeatedBytes[2], "UTF-8"));
+    assertEquals("bye", new String(msg.repeatedBytes[1], InternalNano.UTF_8));
+    assertEquals("boo", new String(msg.repeatedBytes[2], InternalNano.UTF_8));
    msg.clear();
    assertEquals(0, msg.repeatedBytes.length);
    msg.clear()
       .repeatedBytes = new byte[][] { InternalNano.copyFromUtf8("boo") };
    assertEquals(1, msg.repeatedBytes.length);
-    assertEquals("boo", new String(msg.repeatedBytes[0], "UTF-8"));
+    assertEquals("boo", new String(msg.repeatedBytes[0], InternalNano.UTF_8));
    msg.clear();
    assertEquals(0, msg.repeatedBytes.length);

@@ -1385,8 +1385,8 @@ public class NanoTest extends TestCase {

    newMsg = TestAllTypesNano.parseFrom(result);
    assertEquals(2, newMsg.repeatedBytes.length);
-    assertEquals("hello", new String(newMsg.repeatedBytes[0], "UTF-8"));
-    assertEquals("world", new String(newMsg.repeatedBytes[1], "UTF-8"));
+    assertEquals("hello", new String(newMsg.repeatedBytes[0], InternalNano.UTF_8));
+    assertEquals("world", new String(newMsg.repeatedBytes[1], InternalNano.UTF_8));
  }

  public void testNanoRepeatedGroup() throws Exception {
@@ -2277,9 +2277,9 @@ public class NanoTest extends TestCase {
      assertTrue(52.0e3 == msg.defaultDouble);
      assertEquals(true, msg.defaultBool);
      assertEquals("hello", msg.defaultString);
-      assertEquals("world", new String(msg.defaultBytes, "UTF-8"));
+      assertEquals("world", new String(msg.defaultBytes, InternalNano.UTF_8));
      assertEquals("dünya", msg.defaultStringNonascii);
-      assertEquals("dünyab", new String(msg.defaultBytesNonascii, "UTF-8"));
+      assertEquals("dünyab", new String(msg.defaultBytesNonascii, InternalNano.UTF_8));
      assertEquals(TestAllTypesNano.BAR, msg.defaultNestedEnum);
      assertEquals(NanoOuterClass.FOREIGN_NANO_BAR, msg.defaultForeignEnum);
      assertEquals(UnittestImportNano.IMPORT_NANO_BAR, msg.defaultImportEnum);
@@ -2385,7 +2385,7 @@ public class NanoTest extends TestCase {
    assertEquals(TestAllTypesNanoHas.FOO, newMsg.optionalNestedEnum);
    assertEquals(41, newMsg.defaultInt32);
    assertEquals("hello", newMsg.defaultString);
-    assertEquals("world", new String(newMsg.defaultBytes, "UTF-8"));
+    assertEquals("world", new String(newMsg.defaultBytes, InternalNano.UTF_8));
    assertEquals(TestAllTypesNanoHas.BAR, newMsg.defaultNestedEnum);
    assertEquals(Float.NaN, newMsg.defaultFloatNan);
    assertEquals(0, newMsg.id);
@@ -2567,7 +2567,7 @@ public class NanoTest extends TestCase {
    assertEquals(TestNanoAccessors.FOO, newMsg.getOptionalNestedEnum());
    assertEquals(41, newMsg.getDefaultInt32());
    assertEquals("hello", newMsg.getDefaultString());
-    assertEquals("world", new String(newMsg.getDefaultBytes(), "UTF-8"));
+    assertEquals("world", new String(newMsg.getDefaultBytes(), InternalNano.UTF_8));
    assertEquals(TestNanoAccessors.BAR, newMsg.getDefaultNestedEnum());
    assertEquals(Float.NaN, newMsg.getDefaultFloatNan());
    assertEquals(0, newMsg.id);