Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
P
protobuf
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
protobuf
Commits
3e944aec
Commit
3e944aec
authored
Oct 31, 2017
by
Anuraag Agrawal
Committed by
Anuraag Agrawal
Nov 29, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add a UTF-8 decoder that uses Unsafe to directly decode a byte buffer.
parent
3c6fd3f7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
113 additions
and
36 deletions
+113
-36
Makefile.am
Makefile.am
+1
-0
CodedInputStream.java
...e/src/main/java/com/google/protobuf/CodedInputStream.java
+71
-33
UnsafeUtil.java
java/core/src/main/java/com/google/protobuf/UnsafeUtil.java
+32
-3
Utf8.java
java/core/src/main/java/com/google/protobuf/Utf8.java
+0
-0
DecodeUtf8Test.java
...ore/src/test/java/com/google/protobuf/DecodeUtf8Test.java
+0
-0
IsValidUtf8TestUtil.java
...rc/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
+9
-0
No files found.
Makefile.am
View file @
3e944aec
...
...
@@ -286,6 +286,7 @@ java_EXTRA_DIST=
java/core/src/test/java/com/google/protobuf/CheckUtf8Test.java
\
java/core/src/test/java/com/google/protobuf/CodedInputStreamTest.java
\
java/core/src/test/java/com/google/protobuf/CodedOutputStreamTest.java
\
java/core/src/test/java/com/google/protobuf/DecodeUtf8Test.java
\
java/core/src/test/java/com/google/protobuf/DeprecatedFieldTest.java
\
java/core/src/test/java/com/google/protobuf/DescriptorsTest.java
\
java/core/src/test/java/com/google/protobuf/DiscardUnknownFieldsTest.java
\
...
...
java/core/src/main/java/com/google/protobuf/CodedInputStream.java
View file @
3e944aec
...
...
@@ -64,6 +64,14 @@ public abstract class CodedInputStream {
// Integer.MAX_VALUE == 0x7FFFFFF == INT_MAX from limits.h
private
static
final
int
DEFAULT_SIZE_LIMIT
=
Integer
.
MAX_VALUE
;
/**
* Whether to enable our custom UTF-8 decode codepath which does not use {@link StringCoding}.
* Enabled by default, disable by setting
* {@code -Dcom.google.protobuf.enableCustomutf8Decode=false} in JVM args.
*/
private
static
final
boolean
ENABLE_CUSTOM_UTF8_DECODE
=
!
"false"
.
equals
(
System
.
getProperty
(
"com.google.protobuf.enableCustomUtf8Decode"
));
/** Visible for subclasses. See setRecursionLimit() */
int
recursionDepth
;
...
...
@@ -825,13 +833,19 @@ public abstract class CodedInputStream {
public
String
readStringRequireUtf8
()
throws
IOException
{
final
int
size
=
readRawVarint32
();
if
(
size
>
0
&&
size
<=
(
limit
-
pos
))
{
// TODO(martinrb): We could save a pass by validating while decoding.
if
(!
Utf8
.
isValidUtf8
(
buffer
,
pos
,
pos
+
size
))
{
throw
InvalidProtocolBufferException
.
invalidUtf8
();
if
(
ENABLE_CUSTOM_UTF8_DECODE
)
{
String
result
=
Utf8
.
decodeUtf8
(
buffer
,
pos
,
size
);
pos
+=
size
;
return
result
;
}
else
{
// TODO(martinrb): We could save a pass by validating while decoding.
if
(!
Utf8
.
isValidUtf8
(
buffer
,
pos
,
pos
+
size
))
{
throw
InvalidProtocolBufferException
.
invalidUtf8
();
}
final
int
tempPos
=
pos
;
pos
+=
size
;
return
new
String
(
buffer
,
tempPos
,
size
,
UTF_8
);
}
final
int
tempPos
=
pos
;
pos
+=
size
;
return
new
String
(
buffer
,
tempPos
,
size
,
UTF_8
);
}
if
(
size
==
0
)
{
...
...
@@ -1524,6 +1538,8 @@ public abstract class CodedInputStream {
final
int
size
=
readRawVarint32
();
if
(
size
>
0
&&
size
<=
remaining
())
{
// TODO(nathanmittler): Is there a way to avoid this copy?
// TODO(anuraaga): It might be possible to share the optimized loop with
// readStringRequireUtf8 by implementing Java replacement logic there.
// The same as readBytes' logic
byte
[]
bytes
=
new
byte
[
size
];
UnsafeUtil
.
copyMemory
(
pos
,
bytes
,
0
,
size
);
...
...
@@ -1544,19 +1560,26 @@ public abstract class CodedInputStream {
@Override
public
String
readStringRequireUtf8
()
throws
IOException
{
final
int
size
=
readRawVarint32
();
if
(
size
>=
0
&&
size
<=
remaining
())
{
// TODO(nathanmittler): Is there a way to avoid this copy?
// The same as readBytes' logic
byte
[]
bytes
=
new
byte
[
size
];
UnsafeUtil
.
copyMemory
(
pos
,
bytes
,
0
,
size
);
// TODO(martinrb): We could save a pass by validating while decoding.
if
(!
Utf8
.
isValidUtf8
(
bytes
))
{
throw
InvalidProtocolBufferException
.
invalidUtf8
();
}
if
(
size
>
0
&&
size
<=
remaining
())
{
if
(
ENABLE_CUSTOM_UTF8_DECODE
)
{
final
int
bufferPos
=
bufferPos
(
pos
);
String
result
=
Utf8
.
decodeUtf8
(
buffer
,
bufferPos
,
size
);
pos
+=
size
;
return
result
;
}
else
{
// TODO(nathanmittler): Is there a way to avoid this copy?
// The same as readBytes' logic
byte
[]
bytes
=
new
byte
[
size
];
UnsafeUtil
.
copyMemory
(
pos
,
bytes
,
0
,
size
);
// TODO(martinrb): We could save a pass by validating while decoding.
if
(!
Utf8
.
isValidUtf8
(
bytes
))
{
throw
InvalidProtocolBufferException
.
invalidUtf8
();
}
String
result
=
new
String
(
bytes
,
UTF_8
);
pos
+=
size
;
return
result
;
String
result
=
new
String
(
bytes
,
UTF_8
);
pos
+=
size
;
return
result
;
}
}
if
(
size
==
0
)
{
...
...
@@ -2324,11 +2347,15 @@ public abstract class CodedInputStream {
bytes
=
readRawBytesSlowPath
(
size
);
tempPos
=
0
;
}
// TODO(martinrb): We could save a pass by validating while decoding.
if
(!
Utf8
.
isValidUtf8
(
bytes
,
tempPos
,
tempPos
+
size
))
{
throw
InvalidProtocolBufferException
.
invalidUtf8
();
if
(
ENABLE_CUSTOM_UTF8_DECODE
)
{
return
Utf8
.
decodeUtf8
(
bytes
,
tempPos
,
size
);
}
else
{
// TODO(martinrb): We could save a pass by validating while decoding.
if
(!
Utf8
.
isValidUtf8
(
bytes
,
tempPos
,
tempPos
+
size
))
{
throw
InvalidProtocolBufferException
.
invalidUtf8
();
}
return
new
String
(
bytes
,
tempPos
,
size
,
UTF_8
);
}
return
new
String
(
bytes
,
tempPos
,
size
,
UTF_8
);
}
@Override
...
...
@@ -3348,23 +3375,34 @@ public abstract class CodedInputStream {
public
String
readStringRequireUtf8
()
throws
IOException
{
final
int
size
=
readRawVarint32
();
if
(
size
>
0
&&
size
<=
currentByteBufferLimit
-
currentByteBufferPos
)
{
byte
[]
bytes
=
new
byte
[
size
];
UnsafeUtil
.
copyMemory
(
currentByteBufferPos
,
bytes
,
0
,
size
);
if
(!
Utf8
.
isValidUtf8
(
bytes
))
{
throw
InvalidProtocolBufferException
.
invalidUtf8
();
if
(
ENABLE_CUSTOM_UTF8_DECODE
)
{
final
int
bufferPos
=
(
int
)
(
currentByteBufferPos
-
currentByteBufferStartPos
);
String
result
=
Utf8
.
decodeUtf8
(
currentByteBuffer
,
bufferPos
,
size
);
currentByteBufferPos
+=
size
;
return
result
;
}
else
{
byte
[]
bytes
=
new
byte
[
size
];
UnsafeUtil
.
copyMemory
(
currentByteBufferPos
,
bytes
,
0
,
size
);
if
(!
Utf8
.
isValidUtf8
(
bytes
))
{
throw
InvalidProtocolBufferException
.
invalidUtf8
();
}
String
result
=
new
String
(
bytes
,
UTF_8
);
currentByteBufferPos
+=
size
;
return
result
;
}
String
result
=
new
String
(
bytes
,
UTF_8
);
currentByteBufferPos
+=
size
;
return
result
;
}
if
(
size
>=
0
&&
size
<=
remaining
())
{
byte
[]
bytes
=
new
byte
[
size
];
readRawBytesTo
(
bytes
,
0
,
size
);
if
(!
Utf8
.
isValidUtf8
(
bytes
))
{
throw
InvalidProtocolBufferException
.
invalidUtf8
();
if
(
ENABLE_CUSTOM_UTF8_DECODE
)
{
return
Utf8
.
decodeUtf8
(
bytes
,
0
,
size
);
}
else
{
if
(!
Utf8
.
isValidUtf8
(
bytes
))
{
throw
InvalidProtocolBufferException
.
invalidUtf8
();
}
String
result
=
new
String
(
bytes
,
UTF_8
);
return
result
;
}
String
result
=
new
String
(
bytes
,
UTF_8
);
return
result
;
}
if
(
size
==
0
)
{
...
...
java/core/src/main/java/com/google/protobuf/UnsafeUtil.java
View file @
3e944aec
...
...
@@ -33,7 +33,6 @@ package com.google.protobuf;
import
java.lang.reflect.Field
;
import
java.nio.Buffer
;
import
java.nio.ByteBuffer
;
import
java.nio.ByteOrder
;
import
java.security.AccessController
;
import
java.security.PrivilegedExceptionAction
;
import
java.util.logging.Level
;
...
...
@@ -72,6 +71,8 @@ final class UnsafeUtil {
private
static
final
long
BUFFER_ADDRESS_OFFSET
=
fieldOffset
(
bufferAddressField
());
private
static
final
long
STRING_VALUE_OFFSET
=
fieldOffset
(
stringValueField
());
private
UnsafeUtil
()
{}
static
boolean
hasUnsafeArrayOperations
()
{
...
...
@@ -259,6 +260,26 @@ final class UnsafeUtil {
return
MEMORY_ACCESSOR
.
getLong
(
buffer
,
BUFFER_ADDRESS_OFFSET
);
}
/**
* Returns a new {@link String} backed by the given {@code chars}. The char array should not
* be mutated any more after calling this function.
*/
static
String
moveToString
(
char
[]
chars
)
{
if
(
STRING_VALUE_OFFSET
==
-
1
)
{
// In the off-chance that this JDK does not implement String as we'd expect, just do a copy.
return
new
String
(
chars
);
}
final
String
str
;
try
{
str
=
(
String
)
UNSAFE
.
allocateInstance
(
String
.
class
);
}
catch
(
InstantiationException
e
)
{
// This should never happen, but return a copy as a fallback just in case.
return
new
String
(
chars
);
}
putObject
(
str
,
STRING_VALUE_OFFSET
,
chars
);
return
str
;
}
static
Object
getStaticObject
(
Field
field
)
{
return
MEMORY_ACCESSOR
.
getStaticObject
(
field
);
}
...
...
@@ -375,7 +396,12 @@ final class UnsafeUtil {
/** Finds the address field within a direct {@link Buffer}. */
private
static
Field
bufferAddressField
()
{
return
field
(
Buffer
.
class
,
"address"
);
return
field
(
Buffer
.
class
,
"address"
,
long
.
class
);
}
/** Finds the value field within a {@link String}. */
private
static
Field
stringValueField
()
{
return
field
(
String
.
class
,
"value"
,
char
[].
class
);
}
/**
...
...
@@ -390,11 +416,14 @@ final class UnsafeUtil {
* Gets the field with the given name within the class, or {@code null} if not found. If found,
* the field is made accessible.
*/
private
static
Field
field
(
Class
<?>
clazz
,
String
fieldName
)
{
private
static
Field
field
(
Class
<?>
clazz
,
String
fieldName
,
Class
<?>
expectedType
)
{
Field
field
;
try
{
field
=
clazz
.
getDeclaredField
(
fieldName
);
field
.
setAccessible
(
true
);
if
(!
field
.
getType
().
equals
(
expectedType
))
{
return
null
;
}
}
catch
(
Throwable
t
)
{
// Failed to access the fields.
field
=
null
;
...
...
java/core/src/main/java/com/google/protobuf/Utf8.java
View file @
3e944aec
This diff is collapsed.
Click to expand it.
java/core/src/test/java/com/google/protobuf/DecodeUtf8Test.java
0 → 100644
View file @
3e944aec
This diff is collapsed.
Click to expand it.
java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
View file @
3e944aec
...
...
@@ -273,6 +273,15 @@ final class IsValidUtf8TestUtil {
assertEquals
(
isRoundTrippable
,
Utf8
.
isValidUtf8
(
bytes
));
assertEquals
(
isRoundTrippable
,
Utf8
.
isValidUtf8
(
bytes
,
0
,
numBytes
));
try
{
assertEquals
(
s
,
Utf8
.
decodeUtf8
(
bytes
,
0
,
numBytes
));
}
catch
(
InvalidProtocolBufferException
e
)
{
if
(
isRoundTrippable
)
{
System
.
out
.
println
(
"Could not decode utf-8"
);
outputFailure
(
byteChar
,
bytes
,
bytesReencoded
);
}
}
// Test partial sequences.
// Partition numBytes into three segments (not necessarily non-empty).
int
i
=
rnd
.
nextInt
(
numBytes
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment