Commit e84893f6 authored by Viktor Szathmáry's avatar Viktor Szathmáry Committed by Tamir Duberstein

perf: String#getBytes(Charset) vs getBytes(String)

parent 7139d1ef
...@@ -37,6 +37,8 @@ import java.io.OutputStream; ...@@ -37,6 +37,8 @@ import java.io.OutputStream;
import java.io.Serializable; import java.io.Serializable;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Iterator; import java.util.Iterator;
...@@ -76,8 +78,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { ...@@ -76,8 +78,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b
static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k
// Defined by java.nio.charset.Charset protected static final Charset UTF_8 = Charset.forName("UTF-8");
protected static final String UTF_8 = "UTF-8";
/** /**
* Empty {@code ByteString}. * Empty {@code ByteString}.
...@@ -269,11 +270,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { ...@@ -269,11 +270,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @return new {@code ByteString} * @return new {@code ByteString}
*/ */
public static ByteString copyFromUtf8(String text) { public static ByteString copyFromUtf8(String text) {
try { return new LiteralByteString(text.getBytes(UTF_8));
return new LiteralByteString(text.getBytes(UTF_8));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("UTF-8 not supported?", e);
}
} }
// ================================================================= // =================================================================
...@@ -612,8 +609,36 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { ...@@ -612,8 +609,36 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @return new string * @return new string
* @throws UnsupportedEncodingException if charset isn't recognized * @throws UnsupportedEncodingException if charset isn't recognized
*/ */
public abstract String toString(String charsetName) public String toString(String charsetName)
throws UnsupportedEncodingException; throws UnsupportedEncodingException {
try {
return toString(Charset.forName(charsetName));
} catch (UnsupportedCharsetException e) {
UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName);
exception.initCause(e);
throw exception;
}
}
/**
* Constructs a new {@code String} by decoding the bytes using the
* specified charset. Returns the same empty String if empty.
*
* @param charset encode using this charset
* @return new string
*/
public String toString(Charset charset) {
return size() == 0 ? "" : toStringInternal(charset);
}
/**
* Constructs a new {@code String} by decoding the bytes using the
* specified charset.
*
* @param charset encode using this charset
* @return new string
*/
protected abstract String toStringInternal(Charset charset);
// ================================================================= // =================================================================
// UTF-8 decoding // UTF-8 decoding
...@@ -624,11 +649,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { ...@@ -624,11 +649,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @return new string using UTF-8 encoding * @return new string using UTF-8 encoding
*/ */
public String toStringUtf8() { public String toStringUtf8() {
try { return toString(UTF_8);
return toString(UTF_8);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("UTF-8 not supported?", e);
}
} }
/** /**
......
...@@ -36,6 +36,7 @@ import java.io.InputStream; ...@@ -36,6 +36,7 @@ import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
...@@ -152,13 +153,8 @@ class LiteralByteString extends ByteString { ...@@ -152,13 +153,8 @@ class LiteralByteString extends ByteString {
} }
@Override @Override
public String toString(String charsetName) protected String toStringInternal(Charset charset) {
throws UnsupportedEncodingException { return new String(bytes, getOffsetIntoBytes(), size(), charset);
// Optimize for empty strings, but ensure we don't silently ignore invalid
// encodings.
return size() == 0 && UTF_8.equals(charsetName)
? ""
: new String(bytes, getOffsetIntoBytes(), size(), charsetName);
} }
// ================================================================= // =================================================================
......
...@@ -38,6 +38,7 @@ import java.io.OutputStream; ...@@ -38,6 +38,7 @@ import java.io.OutputStream;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
...@@ -418,13 +419,8 @@ class RopeByteString extends ByteString { ...@@ -418,13 +419,8 @@ class RopeByteString extends ByteString {
} }
@Override @Override
public String toString(String charsetName) protected String toStringInternal(Charset charset) {
throws UnsupportedEncodingException { return new String(toByteArray(), charset);
// Optimize for empty strings, but ensure we don't silently ignore invalid
// encodings.
return size() == 0 && UTF_8.equals(charsetName)
? ""
: new String(toByteArray(), charsetName);
} }
// ================================================================= // =================================================================
......
...@@ -72,6 +72,19 @@ public class BoundedByteStringTest extends LiteralByteStringTest { ...@@ -72,6 +72,19 @@ public class BoundedByteStringTest extends LiteralByteStringTest {
testString.substring(2, testString.length() - 6), roundTripString); testString.substring(2, testString.length() - 6), roundTripString);
} }
@Override
public void testCharsetToString() throws UnsupportedEncodingException {
String testString = "I love unicode \u1234\u5678 characters";
LiteralByteString unicode = new LiteralByteString(testString.getBytes(ByteString.UTF_8));
ByteString chopped = unicode.substring(2, unicode.size() - 6);
assertEquals(classUnderTest + ".substring() must have the expected type",
classUnderTest, getActualClassName(chopped));
String roundTripString = chopped.toString(ByteString.UTF_8);
assertEquals(classUnderTest + " unicode bytes must match",
testString.substring(2, testString.length() - 6), roundTripString);
}
public void testJavaSerialization() throws Exception { public void testJavaSerialization() throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(out); ObjectOutputStream oos = new ObjectOutputStream(out);
......
...@@ -298,6 +298,13 @@ public class LiteralByteStringTest extends TestCase { ...@@ -298,6 +298,13 @@ public class LiteralByteStringTest extends TestCase {
assertEquals(classUnderTest + " unicode must match", testString, roundTripString); assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
} }
public void testCharsetToString() throws UnsupportedEncodingException {
String testString = "I love unicode \u1234\u5678 characters";
LiteralByteString unicode = new LiteralByteString(testString.getBytes(ByteString.UTF_8));
String roundTripString = unicode.toString(ByteString.UTF_8);
assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
}
public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException{ public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException{
assertSame(classUnderTest + " must be the same string references", assertSame(classUnderTest + " must be the same string references",
ByteString.EMPTY.toString(UTF_8), new LiteralByteString(new byte[]{}).toString(UTF_8)); ByteString.EMPTY.toString(UTF_8), new LiteralByteString(new byte[]{}).toString(UTF_8));
......
...@@ -94,4 +94,34 @@ public class RopeByteStringSubstringTest extends LiteralByteStringTest { ...@@ -94,4 +94,34 @@ public class RopeByteStringSubstringTest extends LiteralByteStringTest {
assertEquals(classUnderTest + " string must must have same hashCode as the flat string", assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
flatString.hashCode(), unicode.hashCode()); flatString.hashCode(), unicode.hashCode());
} }
@Override
public void testCharsetToString() throws UnsupportedEncodingException {
String sourceString = "I love unicode \u1234\u5678 characters";
ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
int copies = 250;
// By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
StringBuilder builder = new StringBuilder(copies * sourceString.length());
ByteString unicode = ByteString.EMPTY;
for (int i = 0; i < copies; ++i) {
builder.append(sourceString);
unicode = RopeByteString.concatenate(unicode, sourceByteString);
}
String testString = builder.toString();
// Do the substring part
testString = testString.substring(2, testString.length() - 6);
unicode = unicode.substring(2, unicode.size() - 6);
assertEquals(classUnderTest + " from string must have the expected type",
classUnderTest, getActualClassName(unicode));
String roundTripString = unicode.toString(ByteString.UTF_8);
assertEquals(classUnderTest + " unicode bytes must match",
testString, roundTripString);
ByteString flatString = ByteString.copyFromUtf8(testString);
assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
flatString.hashCode(), unicode.hashCode());
}
} }
...@@ -118,6 +118,32 @@ public class RopeByteStringTest extends LiteralByteStringTest { ...@@ -118,6 +118,32 @@ public class RopeByteStringTest extends LiteralByteStringTest {
flatString.hashCode(), unicode.hashCode()); flatString.hashCode(), unicode.hashCode());
} }
@Override
public void testCharsetToString() throws UnsupportedEncodingException {
String sourceString = "I love unicode \u1234\u5678 characters";
ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
int copies = 250;
// By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
StringBuilder builder = new StringBuilder(copies * sourceString.length());
ByteString unicode = ByteString.EMPTY;
for (int i = 0; i < copies; ++i) {
builder.append(sourceString);
unicode = RopeByteString.concatenate(unicode, sourceByteString);
}
String testString = builder.toString();
assertEquals(classUnderTest + " from string must have the expected type",
classUnderTest, getActualClassName(unicode));
String roundTripString = unicode.toString(ByteString.UTF_8);
assertEquals(classUnderTest + " unicode bytes must match",
testString, roundTripString);
ByteString flatString = ByteString.copyFromUtf8(testString);
assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
flatString.hashCode(), unicode.hashCode());
}
@Override @Override
public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException { public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException {
RopeByteString ropeByteString = RopeByteString ropeByteString =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment