Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
capnproto
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
capnproto
Commits
91acb5b2
Commit
91acb5b2
authored
Nov 30, 2013
by
Kenton Varda
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Make kj::Arena not thread-safe since it hurts performance even when used single-threaded.
parent
c5bed0d2
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
72 additions
and
182 deletions
+72
-182
compiler.c++
c++/src/capnp/compiler/compiler.c++
+4
-5
arena-test.c++
c++/src/kj/arena-test.c++
+0
-66
arena.c++
c++/src/kj/arena.c++
+41
-73
arena.h
c++/src/kj/arena.h
+27
-38
No files found.
c++/src/capnp/compiler/compiler.c++
View file @
91acb5b2
...
...
@@ -185,8 +185,7 @@ private:
// Extract the ID from the declaration, or if it has none, generate one based on the name and
// parent ID.
static
kj
::
StringPtr
joinDisplayName
(
const
kj
::
Arena
&
arena
,
Node
&
parent
,
kj
::
StringPtr
declName
);
static
kj
::
StringPtr
joinDisplayName
(
kj
::
Arena
&
arena
,
Node
&
parent
,
kj
::
StringPtr
declName
);
// Join the parent's display name with the child's unqualified name to construct the child's
// display name.
...
...
@@ -274,10 +273,10 @@ public:
bootstrapLoader
(
loaderCallback
)
{}
};
const
kj
::
Arena
&
getNodeArena
()
{
return
nodeArena
;
}
kj
::
Arena
&
getNodeArena
()
{
return
nodeArena
;
}
// Arena where nodes and other permanent objects should be allocated.
const
Workspace
&
getWorkspace
()
{
return
workspace
;
}
Workspace
&
getWorkspace
()
{
return
workspace
;
}
// Temporary workspace that can be used to construct bootstrap objects.
inline
bool
shouldCompileAnnotations
()
{
...
...
@@ -395,7 +394,7 @@ uint64_t Compiler::Node::generateId(uint64_t parentId, kj::StringPtr declName,
}
kj
::
StringPtr
Compiler
::
Node
::
joinDisplayName
(
const
kj
::
Arena
&
arena
,
Node
&
parent
,
kj
::
StringPtr
declName
)
{
kj
::
Arena
&
arena
,
Node
&
parent
,
kj
::
StringPtr
declName
)
{
kj
::
ArrayPtr
<
char
>
result
=
arena
.
allocateArray
<
char
>
(
parent
.
displayName
.
size
()
+
declName
.
size
()
+
2
);
...
...
c++/src/kj/arena-test.c++
View file @
91acb5b2
...
...
@@ -23,7 +23,6 @@
#include "arena.h"
#include "debug.h"
#include "thread.h"
#include <gtest/gtest.h>
#include <stdint.h>
...
...
@@ -307,70 +306,5 @@ TEST(Arena, Strings) {
EXPECT_EQ
(
quux
.
end
()
+
1
,
corge
.
begin
());
}
struct
ThreadTestObject
{
ThreadTestObject
*
next
;
void
*
owner
;
// points into the owning thread's stack
ThreadTestObject
(
ThreadTestObject
*
next
,
void
*
owner
)
:
next
(
next
),
owner
(
owner
)
{}
~
ThreadTestObject
()
{
++
destructorCount
;
}
static
uint
destructorCount
;
};
uint
ThreadTestObject
::
destructorCount
=
0
;
TEST
(
Arena
,
Threads
)
{
// Test thread-safety. We allocate objects in four threads simultaneously, verify that they
// are not corrupted, then verify that their destructors are all called when the Arena is
// destroyed.
{
MutexGuarded
<
Arena
>
arena
;
// Func to run in each thread.
auto
threadFunc
=
[
&
]()
{
int
me
;
ThreadTestObject
*
head
=
nullptr
;
{
auto
lock
=
arena
.
lockShared
();
// Allocate a huge linked list.
for
(
uint
i
=
0
;
i
<
100000
;
i
++
)
{
head
=
&
lock
->
allocate
<
ThreadTestObject
>
(
head
,
&
me
);
}
}
// Wait until all other threads are done before verifying.
arena
.
lockExclusive
();
// Verify that the list hasn't been corrupted.
while
(
head
!=
nullptr
)
{
ASSERT_EQ
(
&
me
,
head
->
owner
);
head
=
head
->
next
;
}
};
{
auto
lock
=
arena
.
lockExclusive
();
Thread
thread1
(
threadFunc
);
Thread
thread2
(
threadFunc
);
Thread
thread3
(
threadFunc
);
Thread
thread4
(
threadFunc
);
// Wait for threads to be ready.
usleep
(
10000
);
auto
release
=
kj
::
mv
(
lock
);
// As we go out of scope, the lock will be released (since `release` is destroyed first),
// allowing all the threads to start running. We'll then join each thread.
}
EXPECT_EQ
(
0u
,
ThreadTestObject
::
destructorCount
);
}
EXPECT_EQ
(
400000u
,
ThreadTestObject
::
destructorCount
);
}
}
// namespace
}
// namespace kj
c++/src/kj/arena.c++
View file @
91acb5b2
...
...
@@ -27,10 +27,10 @@
namespace
kj
{
Arena
::
Arena
(
size_t
chunkSizeHint
)
:
stat
e
(
kj
::
max
(
sizeof
(
ChunkHeader
),
chunkSizeHint
))
{}
Arena
::
Arena
(
size_t
chunkSizeHint
)
:
nextChunkSiz
e
(
kj
::
max
(
sizeof
(
ChunkHeader
),
chunkSizeHint
))
{}
Arena
::
Arena
(
ArrayPtr
<
byte
>
scratch
)
:
stat
e
(
kj
::
max
(
sizeof
(
ChunkHeader
),
scratch
.
size
()))
{
:
nextChunkSiz
e
(
kj
::
max
(
sizeof
(
ChunkHeader
),
scratch
.
size
()))
{
if
(
scratch
.
size
()
>
sizeof
(
ChunkHeader
))
{
ChunkHeader
*
chunk
=
reinterpret_cast
<
ChunkHeader
*>
(
scratch
.
begin
());
chunk
->
end
=
scratch
.
end
();
...
...
@@ -39,19 +39,19 @@ Arena::Arena(ArrayPtr<byte> scratch)
// Don't place the chunk in the chunk list because it's not ours to delete. Just make it the
// current chunk so that we'll allocate from it until it is empty.
state
.
getWithoutLock
().
currentChunk
=
chunk
;
currentChunk
=
chunk
;
}
}
Arena
::~
Arena
()
noexcept
(
false
)
{
// Run cleanup
explicitly. It will be executed again implicitly when state's destructor is
//
called. This ensures that if the first pass throws an exception, remaining objects are still
//
destroyed. If the second pass throws, the program terminates, but any destructors that could
// throw should be using UnwindDetector to avoid this.
state
.
getWithoutLock
().
cleanup
();
// Run cleanup
() explicitly, but if it throws an exception, make sure to run it again as part of
//
unwind. The second call will not throw because destructors are required to guard against
//
exceptions when already unwinding.
KJ_ON_SCOPE_FAILURE
(
cleanup
());
cleanup
();
}
void
Arena
::
State
::
cleanup
()
{
void
Arena
::
cleanup
()
{
while
(
objectList
!=
nullptr
)
{
void
*
ptr
=
objectList
+
1
;
auto
destructor
=
objectList
->
destructor
;
...
...
@@ -91,17 +91,13 @@ inline size_t alignTo(size_t s, uint alignment) {
}
// namespace
void
*
Arena
::
allocateBytes
(
size_t
amount
,
uint
alignment
,
bool
hasDisposer
)
const
{
void
*
Arena
::
allocateBytes
(
size_t
amount
,
uint
alignment
,
bool
hasDisposer
)
{
if
(
hasDisposer
)
{
alignment
=
kj
::
max
(
alignment
,
alignof
(
ObjectHeader
));
amount
+=
alignTo
(
sizeof
(
ObjectHeader
),
alignment
);
}
void
*
result
=
allocateBytesLockless
(
amount
,
alignment
);
if
(
result
==
nullptr
)
{
result
=
allocateBytesFallback
(
amount
,
alignment
);
}
void
*
result
=
allocateBytesInternal
(
amount
,
alignment
);
if
(
hasDisposer
)
{
// Reserve space for the ObjectHeader, but don't add it to the object list yet.
...
...
@@ -112,90 +108,62 @@ void* Arena::allocateBytes(size_t amount, uint alignment, bool hasDisposer) cons
return
result
;
}
void
*
Arena
::
allocateBytesLockless
(
size_t
amount
,
uint
alignment
)
const
{
for
(;;)
{
ChunkHeader
*
chunk
=
__atomic_load_n
(
&
state
.
getWithoutLock
().
currentChunk
,
__ATOMIC_ACQUIRE
);
if
(
chunk
==
nullptr
)
{
// No chunks allocated yet.
return
nullptr
;
}
byte
*
pos
=
__atomic_load_n
(
&
chunk
->
pos
,
__ATOMIC_RELAXED
);
byte
*
alignedPos
=
alignTo
(
pos
,
alignment
);
byte
*
endPos
=
alignedPos
+
amount
;
// Careful about pointer wrapping (e.g. if the chunk is near the end of the address space).
if
(
chunk
->
end
-
endPos
<
0
)
{
// Not enough space.
return
nullptr
;
}
void
*
Arena
::
allocateBytesInternal
(
size_t
amount
,
uint
alignment
)
{
if
(
currentChunk
!=
nullptr
)
{
ChunkHeader
*
chunk
=
currentChunk
;
byte
*
alignedPos
=
alignTo
(
chunk
->
pos
,
alignment
);
// There appears to be enough space in this chunk, unless another thread stole it.
if
(
KJ_LIKELY
(
__atomic_compare_exchange_n
(
&
chunk
->
pos
,
&
pos
,
endPos
,
true
,
__ATOMIC_RELAXED
,
__ATOMIC_RELAXED
)))
{
// Careful about overflow here.
if
(
amount
+
(
alignedPos
-
chunk
->
pos
)
<=
chunk
->
end
-
chunk
->
pos
)
{
// There's enough space in this chunk.
chunk
->
pos
=
alignedPos
+
amount
;
return
alignedPos
;
}
// Contention. Retry.
}
}
void
*
Arena
::
allocateBytesFallback
(
size_t
amount
,
uint
alignment
)
const
{
auto
lock
=
state
.
lockExclusive
();
// Now that we have the lock, try one more time to allocate from the current chunk. This could
// work if another thread allocated a new chunk while we were waiting for the lock.
void
*
locklessResult
=
allocateBytesLockless
(
amount
,
alignment
);
if
(
locklessResult
!=
nullptr
)
{
return
locklessResult
;
}
// Not enough space in the current chunk. Allocate a new one.
// OK, we know the current chunk is out of space and we hold the lock so no one else is
// allocating a new one. Let's do it!
// We need to allocate at least enough space for the ChunkHeader and the requested allocation.
// If the alignment is less than that of the chunk header, we'll need to increase it.
alignment
=
kj
::
max
(
alignment
,
alignof
(
ChunkHeader
));
// If the ChunkHeader size does not match the alignment, we'll need to pad it up.
amount
+=
alignTo
(
sizeof
(
ChunkHeader
),
alignment
);
while
(
lock
->
nextChunkSize
<
amount
)
{
lock
->
nextChunkSize
*=
2
;
// Make sure we're going to allocate enough space.
while
(
nextChunkSize
<
amount
)
{
nextChunkSize
*=
2
;
}
byte
*
bytes
=
reinterpret_cast
<
byte
*>
(
operator
new
(
lock
->
nextChunkSize
));
// Allocate.
byte
*
bytes
=
reinterpret_cast
<
byte
*>
(
operator
new
(
nextChunkSize
));
// Set up the ChunkHeader at the beginning of the allocation.
ChunkHeader
*
newChunk
=
reinterpret_cast
<
ChunkHeader
*>
(
bytes
);
newChunk
->
next
=
lock
->
chunkList
;
newChunk
->
next
=
chunkList
;
newChunk
->
pos
=
bytes
+
amount
;
newChunk
->
end
=
bytes
+
lock
->
nextChunkSize
;
__atomic_store_n
(
&
lock
->
currentChunk
,
newChunk
,
__ATOMIC_RELEASE
);
newChunk
->
end
=
bytes
+
nextChunkSize
;
currentChunk
=
newChunk
;
chunkList
=
newChunk
;
nextChunkSize
*=
2
;
lock
->
nextChunkSize
*=
2
;
byte
*
result
=
alignTo
(
bytes
+
sizeof
(
ChunkHeader
),
alignment
);
lock
->
chunkList
=
newChunk
;
return
result
;
// Move past the ChunkHeader to find the position of the allocated object.
return
alignTo
(
bytes
+
sizeof
(
ChunkHeader
),
alignment
);
}
StringPtr
Arena
::
copyString
(
StringPtr
content
)
const
{
StringPtr
Arena
::
copyString
(
StringPtr
content
)
{
char
*
data
=
reinterpret_cast
<
char
*>
(
allocateBytes
(
content
.
size
()
+
1
,
1
,
false
));
memcpy
(
data
,
content
.
cStr
(),
content
.
size
()
+
1
);
return
StringPtr
(
data
,
content
.
size
());
}
void
Arena
::
setDestructor
(
void
*
ptr
,
void
(
*
destructor
)(
void
*
))
const
{
void
Arena
::
setDestructor
(
void
*
ptr
,
void
(
*
destructor
)(
void
*
))
{
ObjectHeader
*
header
=
reinterpret_cast
<
ObjectHeader
*>
(
ptr
)
-
1
;
KJ_DASSERT
(
reinterpret_cast
<
uintptr_t
>
(
header
)
%
alignof
(
ObjectHeader
)
==
0
);
header
->
destructor
=
destructor
;
header
->
next
=
state
.
getWithoutLock
().
objectList
;
// We can use relaxed atomics here because the object list is not actually traversed until the
// destructor, which needs to be synchronized in its own way.
while
(
!
__atomic_compare_exchange_n
(
&
state
.
getWithoutLock
().
objectList
,
&
header
->
next
,
header
,
true
,
__ATOMIC_RELAXED
,
__ATOMIC_RELAXED
))
{
// Retry.
}
header
->
next
=
objectList
;
objectList
=
header
;
}
}
// namespace kj
c++/src/kj/arena.h
View file @
91acb5b2
...
...
@@ -27,7 +27,6 @@
#include "memory.h"
#include "array.h"
#include "string.h"
#include "mutex.h"
namespace
kj
{
...
...
@@ -35,9 +34,10 @@ class Arena {
// A class which allows several objects to be allocated in contiguous chunks of memory, then
// frees them all at once.
//
// Allocating from the same Arena in multiple threads concurrently is safe but not particularly
// performant due to contention. The class could be optimized in the future to use per-thread
// chunks to solve this.
// Allocating from the same Arena in multiple threads concurrently is NOT safe, because making
// it safe would require atomic operations that would slow down allocation even when
// single-threaded. If you need to use arena allocation in a multithreaded context, consider
// allocating thread-local arenas.
public
:
explicit
Arena
(
size_t
chunkSizeHint
=
1024
);
...
...
@@ -52,20 +52,20 @@ public:
~
Arena
()
noexcept
(
false
);
template
<
typename
T
,
typename
...
Params
>
T
&
allocate
(
Params
&&
...
params
)
const
;
T
&
allocate
(
Params
&&
...
params
);
template
<
typename
T
>
ArrayPtr
<
T
>
allocateArray
(
size_t
size
)
const
;
ArrayPtr
<
T
>
allocateArray
(
size_t
size
);
// Allocate an object or array of type T. If T has a non-trivial destructor, that destructor
// will be run during the Arena's destructor. Such destructors are run in opposite order of
// allocation. Note that these methods must maintain a list of destructors to call, which has
// overhead, but this overhead only applies if T has a non-trivial destructor.
template
<
typename
T
,
typename
...
Params
>
Own
<
T
>
allocateOwn
(
Params
&&
...
params
)
const
;
Own
<
T
>
allocateOwn
(
Params
&&
...
params
);
template
<
typename
T
>
Array
<
T
>
allocateOwnArray
(
size_t
size
)
const
;
Array
<
T
>
allocateOwnArray
(
size_t
size
);
template
<
typename
T
>
ArrayBuilder
<
T
>
allocateOwnArrayBuilder
(
size_t
capacity
)
const
;
ArrayBuilder
<
T
>
allocateOwnArrayBuilder
(
size_t
capacity
);
// Allocate an object or array of type T. Destructors are executed when the returned Own<T>
// or Array<T> goes out-of-scope, which must happen before the Arena is destroyed. This variant
// is useful when you need to control when the destructor is called. This variant also avoids
...
...
@@ -73,11 +73,11 @@ public:
// slightly more efficient.
template
<
typename
T
>
inline
T
&
copy
(
T
&&
value
)
const
{
return
allocate
<
Decay
<
T
>>
(
kj
::
fwd
<
T
>
(
value
));
}
inline
T
&
copy
(
T
&&
value
)
{
return
allocate
<
Decay
<
T
>>
(
kj
::
fwd
<
T
>
(
value
));
}
// Allocate a copy of the given value in the arena. This is just a shortcut for calling the
// type's copy (or move) constructor.
StringPtr
copyString
(
StringPtr
content
)
const
;
StringPtr
copyString
(
StringPtr
content
);
// Make a copy of the given string inside the arena, and return a pointer to the copy.
private
:
...
...
@@ -91,37 +91,26 @@ private:
ObjectHeader
*
next
;
};
struct
State
{
size_t
nextChunkSize
;
ChunkHeader
*
chunkList
;
mutable
ObjectHeader
*
objectList
;
size_t
nextChunkSize
;
ChunkHeader
*
chunkList
=
nullptr
;
ObjectHeader
*
objectList
=
nullptr
;
ChunkHeader
*
currentChunk
;
ChunkHeader
*
currentChunk
=
nullptr
;
inline
State
(
size_t
nextChunkSize
)
:
nextChunkSize
(
nextChunkSize
),
chunkList
(
nullptr
),
objectList
(
nullptr
),
currentChunk
(
nullptr
)
{}
inline
~
State
()
noexcept
(
false
)
{
cleanup
();
}
void
cleanup
();
// Run all destructors, leaving the above pointers null. If a destructor throws, the State is
// left in a consistent state, such that if cleanup() is called again, it will pick up where
// it left off.
void
cleanup
();
// Run all destructors, leaving the above pointers null. If a destructor throws, the State is
// left in a consistent state, such that if cleanup() is called again, it will pick up where
// it left off.
};
MutexGuarded
<
State
>
state
;
void
*
allocateBytes
(
size_t
amount
,
uint
alignment
,
bool
hasDisposer
)
const
;
void
*
allocateBytes
(
size_t
amount
,
uint
alignment
,
bool
hasDisposer
);
// Allocate the given number of bytes. `hasDisposer` must be true if `setDisposer()` may be
// called on this pointer later.
void
*
allocateBytes
Lockless
(
size_t
amount
,
uint
alignment
)
const
;
void
*
allocateBytes
Internal
(
size_t
amount
,
uint
alignment
)
;
// Try to allocate the given number of bytes without taking a lock. Fails if and only if there
// is no space left in the current chunk.
void
*
allocateBytesFallback
(
size_t
amount
,
uint
alignment
)
const
;
// Fallback used when the current chunk is out of space.
void
setDestructor
(
void
*
ptr
,
void
(
*
destructor
)(
void
*
))
const
;
void
setDestructor
(
void
*
ptr
,
void
(
*
destructor
)(
void
*
));
// Schedule the given destructor to be executed when the Arena is destroyed. `ptr` must be a
// pointer previously returned by an `allocateBytes()` call for which `hasDisposer` was true.
...
...
@@ -144,7 +133,7 @@ private:
// Inline implementation details
template
<
typename
T
,
typename
...
Params
>
T
&
Arena
::
allocate
(
Params
&&
...
params
)
const
{
T
&
Arena
::
allocate
(
Params
&&
...
params
)
{
T
&
result
=
*
reinterpret_cast
<
T
*>
(
allocateBytes
(
sizeof
(
T
),
alignof
(
T
),
!
__has_trivial_destructor
(
T
)));
if
(
!
__has_trivial_constructor
(
T
)
||
sizeof
...(
Params
)
>
0
)
{
...
...
@@ -157,7 +146,7 @@ T& Arena::allocate(Params&&... params) const {
}
template
<
typename
T
>
ArrayPtr
<
T
>
Arena
::
allocateArray
(
size_t
size
)
const
{
ArrayPtr
<
T
>
Arena
::
allocateArray
(
size_t
size
)
{
if
(
__has_trivial_destructor
(
T
))
{
ArrayPtr
<
T
>
result
=
arrayPtr
(
reinterpret_cast
<
T
*>
(
allocateBytes
(
...
...
@@ -193,7 +182,7 @@ ArrayPtr<T> Arena::allocateArray(size_t size) const {
}
template
<
typename
T
,
typename
...
Params
>
Own
<
T
>
Arena
::
allocateOwn
(
Params
&&
...
params
)
const
{
Own
<
T
>
Arena
::
allocateOwn
(
Params
&&
...
params
)
{
T
&
result
=
*
reinterpret_cast
<
T
*>
(
allocateBytes
(
sizeof
(
T
),
alignof
(
T
),
false
));
if
(
!
__has_trivial_constructor
(
T
)
||
sizeof
...(
Params
)
>
0
)
{
ctor
(
result
,
kj
::
fwd
<
Params
>
(
params
)...);
...
...
@@ -202,7 +191,7 @@ Own<T> Arena::allocateOwn(Params&&... params) const {
}
template
<
typename
T
>
Array
<
T
>
Arena
::
allocateOwnArray
(
size_t
size
)
const
{
Array
<
T
>
Arena
::
allocateOwnArray
(
size_t
size
)
{
ArrayBuilder
<
T
>
result
=
allocateOwnArrayBuilder
<
T
>
(
size
);
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
result
.
add
();
...
...
@@ -211,7 +200,7 @@ Array<T> Arena::allocateOwnArray(size_t size) const {
}
template
<
typename
T
>
ArrayBuilder
<
T
>
Arena
::
allocateOwnArrayBuilder
(
size_t
capacity
)
const
{
ArrayBuilder
<
T
>
Arena
::
allocateOwnArrayBuilder
(
size_t
capacity
)
{
return
ArrayBuilder
<
T
>
(
reinterpret_cast
<
T
*>
(
allocateBytes
(
sizeof
(
T
)
*
capacity
,
alignof
(
T
),
false
)),
capacity
,
DestructorOnlyArrayDisposer
::
instance
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment