Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
P
protobuf
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
protobuf
Commits
162b656f
Commit
162b656f
authored
Feb 18, 2010
by
Ayende Rahien
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implementing string interning
parent
e3aff478
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
209 additions
and
14 deletions
+209
-14
ByteBuffer.cs
src/ProtocolBuffers/ByteBuffer.cs
+57
-0
ByteStringStringInterning.cs
src/ProtocolBuffers/ByteStringStringInterning.cs
+132
-0
CodedInputStream.cs
src/ProtocolBuffers/CodedInputStream.cs
+18
-14
ProtocolBuffers.csproj
src/ProtocolBuffers/ProtocolBuffers.csproj
+2
-0
No files found.
src/ProtocolBuffers/ByteBuffer.cs
0 → 100644
View file @
162b656f
using
System
;
namespace
Google.ProtocolBuffers
{
public
class
ByteBuffer
{
public
byte
[]
Buffer
;
public
int
Offset
;
public
int
Length
;
private
int
hash
;
public
void
ResetHash
()
{
hash
=
23
;
for
(
var
i
=
Offset
;
i
<
Offset
+
Length
;
i
++)
{
hash
=
(
hash
*
23
)
^
Buffer
[
i
];
}
}
public
ByteBuffer
(
byte
[]
buffer
,
int
offset
,
int
length
)
{
Buffer
=
buffer
;
Offset
=
offset
;
Length
=
length
;
ResetHash
();
}
public
ByteString
ToByteString
()
{
return
ByteString
.
CopyFrom
(
Buffer
,
Offset
,
Length
);
}
public
override
int
GetHashCode
()
{
return
hash
;
}
public
override
bool
Equals
(
object
obj
)
{
var
other
=
obj
as
ByteBuffer
;
if
(
other
==
null
)
return
false
;
if
(
other
.
Offset
!=
Offset
)
return
false
;
if
(
other
.
Length
!=
Length
)
return
false
;
for
(
int
i
=
Offset
;
i
<
Offset
+
Length
;
i
++)
{
if
(
Buffer
[
i
]
!=
other
.
Buffer
[
i
])
return
false
;
}
return
true
;
}
}
}
\ No newline at end of file
src/ProtocolBuffers/ByteStringStringInterning.cs
0 → 100644
View file @
162b656f
using
System
;
using
System.Collections.Generic
;
using
System.Text
;
using
System.Threading
;
namespace
Google.ProtocolBuffers
{
/// <summary>
/// This class tries hard to allow us to generate strings directly from buffer outputs without having to
///
/// Note, non thread safe
/// </summary>
public
class
ByteStringStringInterning
{
private
class
ByteStringOrByteBuffer
:
IEquatable
<
ByteStringOrByteBuffer
>
{
private
readonly
ByteString
str
;
private
readonly
ByteBuffer
buffer
;
public
ByteStringOrByteBuffer
(
ByteString
str
)
{
this
.
str
=
str
;
}
public
ByteStringOrByteBuffer
(
ByteBuffer
buffer
)
{
this
.
buffer
=
buffer
;
}
public
bool
Equals
(
ByteStringOrByteBuffer
other
)
{
if
(
ReferenceEquals
(
null
,
other
))
return
false
;
if
(
ReferenceEquals
(
this
,
other
))
return
true
;
if
(
other
.
str
!=
null
&&
str
!=
null
)
return
Equals
(
other
.
str
,
str
);
if
(
other
.
buffer
!=
null
&&
buffer
!=
null
)
return
Equals
(
other
.
buffer
,
buffer
);
if
(
other
.
str
!=
null
&&
str
==
null
)
return
StringEqualsToBuffer
(
other
.
str
,
buffer
);
return
StringEqualsToBuffer
(
str
,
other
.
buffer
);
}
private
static
bool
StringEqualsToBuffer
(
ByteString
byteString
,
ByteBuffer
byteBuffer
)
{
var
strLen
=
byteString
.
Length
;
if
(
strLen
!=
byteBuffer
.
Length
)
return
false
;
for
(
int
i
=
0
;
i
<
strLen
;
i
++)
{
if
(
byteString
.
bytes
[
i
]
!=
byteBuffer
.
Buffer
[
byteBuffer
.
Offset
+
i
])
return
false
;
}
return
true
;
}
public
override
bool
Equals
(
object
obj
)
{
if
(
ReferenceEquals
(
null
,
obj
))
return
false
;
if
(
ReferenceEquals
(
this
,
obj
))
return
true
;
return
Equals
(
obj
as
ByteStringOrByteBuffer
);
}
public
override
int
GetHashCode
()
{
return
str
!=
null
?
str
.
GetHashCode
()
:
buffer
.
GetHashCode
();
}
}
private
readonly
int
limit
;
private
int
timestamp
;
private
readonly
IDictionary
<
ByteStringOrByteBuffer
,
Data
>
strings
=
new
Dictionary
<
ByteStringOrByteBuffer
,
Data
>();
public
static
ByteStringStringInterning
CreateInstance
()
{
return
new
ByteStringStringInterning
(
65536
);
}
[
Serializable
]
private
class
Data
{
public
string
Value
;
public
int
Timestamp
;
}
private
ByteStringStringInterning
(
int
limit
)
{
this
.
limit
=
limit
;
}
public
void
Clear
()
{
strings
.
Clear
();
}
public
string
Intern
(
ByteBuffer
str
)
{
Data
val
;
int
currentTimestamp
=
Interlocked
.
Increment
(
ref
timestamp
);
if
(
strings
.
TryGetValue
(
new
ByteStringOrByteBuffer
(
str
),
out
val
))
{
Interlocked
.
Exchange
(
ref
val
.
Timestamp
,
currentTimestamp
);
return
val
.
Value
;
}
var
byteString
=
str
.
ToByteString
();
val
=
new
Data
{
Timestamp
=
currentTimestamp
,
Value
=
byteString
.
ToStringUtf8
()
};
strings
.
Add
(
new
ByteStringOrByteBuffer
(
byteString
),
val
);
DoCleanupIfNeeded
();
return
val
.
Value
;
}
private
void
DoCleanupIfNeeded
()
{
if
(
strings
.
Count
<=
limit
)
return
;
// to avoid frequent thrashing, we will remove the bottom 10% of the current pool in one go
// that means that we will hit the limit fairly infrequently
var
list
=
new
List
<
KeyValuePair
<
ByteStringOrByteBuffer
,
Data
>>(
strings
);
list
.
Sort
((
x
,
y
)
=>
x
.
Value
.
Timestamp
-
y
.
Value
.
Timestamp
);
for
(
int
i
=
0
;
i
<
limit
/
10
;
i
++)
{
strings
.
Remove
(
list
[
i
].
Key
);
}
}
}
}
\ No newline at end of file
src/ProtocolBuffers/CodedInputStream.cs
View file @
162b656f
...
@@ -63,6 +63,9 @@ namespace Google.ProtocolBuffers {
...
@@ -63,6 +63,9 @@ namespace Google.ProtocolBuffers {
private
int
bufferPos
=
0
;
private
int
bufferPos
=
0
;
private
readonly
Stream
input
;
private
readonly
Stream
input
;
private
uint
lastTag
=
0
;
private
uint
lastTag
=
0
;
private
readonly
ByteBuffer
rawBytesBuffer
=
new
ByteBuffer
(
new
byte
[
BufferSize
],
0
,
0
);
private
readonly
ByteStringStringInterning
byteStringStringInterning
=
ByteStringStringInterning
.
CreateInstance
();
internal
const
int
DefaultRecursionLimit
=
64
;
internal
const
int
DefaultRecursionLimit
=
64
;
internal
const
int
DefaultSizeLimit
=
64
<<
20
;
// 64MB
internal
const
int
DefaultSizeLimit
=
64
<<
20
;
// 64MB
...
@@ -238,12 +241,12 @@ namespace Google.ProtocolBuffers {
...
@@ -238,12 +241,12 @@ namespace Google.ProtocolBuffers {
if
(
size
<=
bufferSize
-
bufferPos
)
{
if
(
size
<=
bufferSize
-
bufferPos
)
{
// Fast path: We already have the bytes in a contiguous buffer, so
// Fast path: We already have the bytes in a contiguous buffer, so
// just copy directly from it.
// just copy directly from it.
String
result
=
Encoding
.
UTF8
.
GetString
(
buffer
,
bufferPos
,
size
);
String
result
=
byteStringStringInterning
.
Intern
(
new
ByteBuffer
(
buffer
,
bufferPos
,
size
)
);
bufferPos
+=
size
;
bufferPos
+=
size
;
return
result
;
return
result
;
}
}
// Slow path: Build a byte array first then copy it.
// Slow path: Build a byte array first then copy it.
return
Encoding
.
UTF8
.
GetString
(
ReadRawBytes
(
size
),
0
,
size
);
return
byteStringStringInterning
.
Intern
(
ReadRawBytes
(
size
)
);
}
}
/// <summary>
/// <summary>
...
@@ -303,7 +306,8 @@ namespace Google.ProtocolBuffers {
...
@@ -303,7 +306,8 @@ namespace Google.ProtocolBuffers {
return
result
;
return
result
;
}
else
{
}
else
{
// Slow path: Build a byte array first then copy it.
// Slow path: Build a byte array first then copy it.
return
ByteString
.
CopyFrom
(
ReadRawBytes
(
size
));
ByteBuffer
rawBytes
=
ReadRawBytes
(
size
);
return
ByteString
.
CopyFrom
(
rawBytes
.
Buffer
,
rawBytes
.
Offset
,
rawBytes
.
Length
);
}
}
}
}
...
@@ -763,7 +767,7 @@ namespace Google.ProtocolBuffers {
...
@@ -763,7 +767,7 @@ namespace Google.ProtocolBuffers {
/// <exception cref="InvalidProtocolBufferException">
/// <exception cref="InvalidProtocolBufferException">
/// the end of the stream or the current limit was reached
/// the end of the stream or the current limit was reached
/// </exception>
/// </exception>
public
byte
[]
ReadRawBytes
(
int
size
)
{
public
ByteBuffer
ReadRawBytes
(
int
size
)
{
if
(
size
<
0
)
{
if
(
size
<
0
)
{
throw
InvalidProtocolBufferException
.
NegativeSize
();
throw
InvalidProtocolBufferException
.
NegativeSize
();
}
}
...
@@ -777,18 +781,18 @@ namespace Google.ProtocolBuffers {
...
@@ -777,18 +781,18 @@ namespace Google.ProtocolBuffers {
if
(
size
<=
bufferSize
-
bufferPos
)
{
if
(
size
<=
bufferSize
-
bufferPos
)
{
// We have all the bytes we need already.
// We have all the bytes we need already.
byte
[]
bytes
=
new
byte
[
size
];
var
result
=
new
ByteBuffer
(
buffer
,
bufferPos
,
size
);
Array
.
Copy
(
buffer
,
bufferPos
,
bytes
,
0
,
size
);
bufferPos
+=
size
;
bufferPos
+=
size
;
return
bytes
;
return
result
;
}
else
if
(
size
<
BufferSize
)
{
}
else
if
(
size
<
BufferSize
)
{
// Reading more bytes than are in the buffer, but not an excessive number
// Reading more bytes than are in the buffer, but not an excessive number
// of bytes. We can safely allocate the resulting array ahead of time.
// of bytes. We can safely allocate the resulting array ahead of time.
// First copy what we have.
// First copy what we have.
byte
[]
bytes
=
new
byte
[
size
];
rawBytesBuffer
.
Length
=
size
;
rawBytesBuffer
.
Offset
=
0
;
int
pos
=
bufferSize
-
bufferPos
;
int
pos
=
bufferSize
-
bufferPos
;
Array
.
Copy
(
buffer
,
bufferPos
,
bytes
,
0
,
pos
);
Array
.
Copy
(
buffer
,
bufferPos
,
rawBytesBuffer
.
Buffer
,
0
,
pos
);
bufferPos
=
bufferSize
;
bufferPos
=
bufferSize
;
// We want to use RefillBuffer() and then copy from the buffer into our
// We want to use RefillBuffer() and then copy from the buffer into our
...
@@ -797,16 +801,16 @@ namespace Google.ProtocolBuffers {
...
@@ -797,16 +801,16 @@ namespace Google.ProtocolBuffers {
RefillBuffer
(
true
);
RefillBuffer
(
true
);
while
(
size
-
pos
>
bufferSize
)
{
while
(
size
-
pos
>
bufferSize
)
{
Array
.
Copy
(
buffer
,
0
,
bytes
,
pos
,
bufferSize
);
Array
.
Copy
(
buffer
,
0
,
rawBytesBuffer
.
Buffer
,
pos
,
bufferSize
);
pos
+=
bufferSize
;
pos
+=
bufferSize
;
bufferPos
=
bufferSize
;
bufferPos
=
bufferSize
;
RefillBuffer
(
true
);
RefillBuffer
(
true
);
}
}
Array
.
Copy
(
buffer
,
0
,
bytes
,
pos
,
size
-
pos
);
Array
.
Copy
(
buffer
,
0
,
rawBytesBuffer
.
Buffer
,
pos
,
size
-
pos
);
bufferPos
=
size
-
pos
;
bufferPos
=
size
-
pos
;
rawBytesBuffer
.
ResetHash
();
return
bytes
;
return
rawBytesBuffer
;
}
else
{
}
else
{
// The size is very large. For security reasons, we can't allocate the
// The size is very large. For security reasons, we can't allocate the
// entire byte array yet. The size comes directly from the input, so a
// entire byte array yet. The size comes directly from the input, so a
...
@@ -859,7 +863,7 @@ namespace Google.ProtocolBuffers {
...
@@ -859,7 +863,7 @@ namespace Google.ProtocolBuffers {
}
}
// Done.
// Done.
return
bytes
;
return
new
ByteBuffer
(
buffer
,
0
,
size
)
;
}
}
}
}
...
...
src/ProtocolBuffers/ProtocolBuffers.csproj
View file @
162b656f
...
@@ -50,7 +50,9 @@
...
@@ -50,7 +50,9 @@
<ItemGroup>
<ItemGroup>
<Compile
Include=
"AbstractBuilder.cs"
/>
<Compile
Include=
"AbstractBuilder.cs"
/>
<Compile
Include=
"AbstractMessage.cs"
/>
<Compile
Include=
"AbstractMessage.cs"
/>
<Compile
Include=
"ByteBuffer.cs"
/>
<Compile
Include=
"ByteString.cs"
/>
<Compile
Include=
"ByteString.cs"
/>
<Compile
Include=
"ByteStringStringInterning.cs"
/>
<Compile
Include=
"Collections\Enumerables.cs"
/>
<Compile
Include=
"Collections\Enumerables.cs"
/>
<Compile
Include=
"Collections\IPopsicleList.cs"
/>
<Compile
Include=
"Collections\IPopsicleList.cs"
/>
<Compile
Include=
"Collections\PopsicleList.cs"
/>
<Compile
Include=
"Collections\PopsicleList.cs"
/>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment