Commit 91acb5b2 authored by Kenton Varda's avatar Kenton Varda

Make kj::Arena not thread-safe since it hurts performance even when used single-threaded.

parent c5bed0d2
...@@ -185,8 +185,7 @@ private: ...@@ -185,8 +185,7 @@ private:
// Extract the ID from the declaration, or if it has none, generate one based on the name and // Extract the ID from the declaration, or if it has none, generate one based on the name and
// parent ID. // parent ID.
static kj::StringPtr joinDisplayName(const kj::Arena& arena, Node& parent, static kj::StringPtr joinDisplayName(kj::Arena& arena, Node& parent, kj::StringPtr declName);
kj::StringPtr declName);
// Join the parent's display name with the child's unqualified name to construct the child's // Join the parent's display name with the child's unqualified name to construct the child's
// display name. // display name.
...@@ -274,10 +273,10 @@ public: ...@@ -274,10 +273,10 @@ public:
bootstrapLoader(loaderCallback) {} bootstrapLoader(loaderCallback) {}
}; };
const kj::Arena& getNodeArena() { return nodeArena; } kj::Arena& getNodeArena() { return nodeArena; }
// Arena where nodes and other permanent objects should be allocated. // Arena where nodes and other permanent objects should be allocated.
const Workspace& getWorkspace() { return workspace; } Workspace& getWorkspace() { return workspace; }
// Temporary workspace that can be used to construct bootstrap objects. // Temporary workspace that can be used to construct bootstrap objects.
inline bool shouldCompileAnnotations() { inline bool shouldCompileAnnotations() {
...@@ -395,7 +394,7 @@ uint64_t Compiler::Node::generateId(uint64_t parentId, kj::StringPtr declName, ...@@ -395,7 +394,7 @@ uint64_t Compiler::Node::generateId(uint64_t parentId, kj::StringPtr declName,
} }
kj::StringPtr Compiler::Node::joinDisplayName( kj::StringPtr Compiler::Node::joinDisplayName(
const kj::Arena& arena, Node& parent, kj::StringPtr declName) { kj::Arena& arena, Node& parent, kj::StringPtr declName) {
kj::ArrayPtr<char> result = arena.allocateArray<char>( kj::ArrayPtr<char> result = arena.allocateArray<char>(
parent.displayName.size() + declName.size() + 2); parent.displayName.size() + declName.size() + 2);
......
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
#include "arena.h" #include "arena.h"
#include "debug.h" #include "debug.h"
#include "thread.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <stdint.h> #include <stdint.h>
...@@ -307,70 +306,5 @@ TEST(Arena, Strings) { ...@@ -307,70 +306,5 @@ TEST(Arena, Strings) {
EXPECT_EQ(quux.end() + 1, corge.begin()); EXPECT_EQ(quux.end() + 1, corge.begin());
} }
struct ThreadTestObject {
ThreadTestObject* next;
void* owner; // points into the owning thread's stack
ThreadTestObject(ThreadTestObject* next, void* owner)
: next(next), owner(owner) {}
~ThreadTestObject() { ++destructorCount; }
static uint destructorCount;
};
uint ThreadTestObject::destructorCount = 0;
TEST(Arena, Threads) {
// Test thread-safety. We allocate objects in four threads simultaneously, verify that they
// are not corrupted, then verify that their destructors are all called when the Arena is
// destroyed.
{
MutexGuarded<Arena> arena;
// Func to run in each thread.
auto threadFunc = [&]() {
int me;
ThreadTestObject* head = nullptr;
{
auto lock = arena.lockShared();
// Allocate a huge linked list.
for (uint i = 0; i < 100000; i++) {
head = &lock->allocate<ThreadTestObject>(head, &me);
}
}
// Wait until all other threads are done before verifying.
arena.lockExclusive();
// Verify that the list hasn't been corrupted.
while (head != nullptr) {
ASSERT_EQ(&me, head->owner);
head = head->next;
}
};
{
auto lock = arena.lockExclusive();
Thread thread1(threadFunc);
Thread thread2(threadFunc);
Thread thread3(threadFunc);
Thread thread4(threadFunc);
// Wait for threads to be ready.
usleep(10000);
auto release = kj::mv(lock);
// As we go out of scope, the lock will be released (since `release` is destroyed first),
// allowing all the threads to start running. We'll then join each thread.
}
EXPECT_EQ(0u, ThreadTestObject::destructorCount);
}
EXPECT_EQ(400000u, ThreadTestObject::destructorCount);
}
} // namespace } // namespace
} // namespace kj } // namespace kj
...@@ -27,10 +27,10 @@ ...@@ -27,10 +27,10 @@
namespace kj { namespace kj {
Arena::Arena(size_t chunkSizeHint): state(kj::max(sizeof(ChunkHeader), chunkSizeHint)) {} Arena::Arena(size_t chunkSizeHint): nextChunkSize(kj::max(sizeof(ChunkHeader), chunkSizeHint)) {}
Arena::Arena(ArrayPtr<byte> scratch) Arena::Arena(ArrayPtr<byte> scratch)
: state(kj::max(sizeof(ChunkHeader), scratch.size())) { : nextChunkSize(kj::max(sizeof(ChunkHeader), scratch.size())) {
if (scratch.size() > sizeof(ChunkHeader)) { if (scratch.size() > sizeof(ChunkHeader)) {
ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(scratch.begin()); ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(scratch.begin());
chunk->end = scratch.end(); chunk->end = scratch.end();
...@@ -39,19 +39,19 @@ Arena::Arena(ArrayPtr<byte> scratch) ...@@ -39,19 +39,19 @@ Arena::Arena(ArrayPtr<byte> scratch)
// Don't place the chunk in the chunk list because it's not ours to delete. Just make it the // Don't place the chunk in the chunk list because it's not ours to delete. Just make it the
// current chunk so that we'll allocate from it until it is empty. // current chunk so that we'll allocate from it until it is empty.
state.getWithoutLock().currentChunk = chunk; currentChunk = chunk;
} }
} }
Arena::~Arena() noexcept(false) { Arena::~Arena() noexcept(false) {
// Run cleanup explicitly. It will be executed again implicitly when state's destructor is // Run cleanup() explicitly, but if it throws an exception, make sure to run it again as part of
// called. This ensures that if the first pass throws an exception, remaining objects are still // unwind. The second call will not throw because destructors are required to guard against
// destroyed. If the second pass throws, the program terminates, but any destructors that could // exceptions when already unwinding.
// throw should be using UnwindDetector to avoid this. KJ_ON_SCOPE_FAILURE(cleanup());
state.getWithoutLock().cleanup(); cleanup();
} }
void Arena::State::cleanup() { void Arena::cleanup() {
while (objectList != nullptr) { while (objectList != nullptr) {
void* ptr = objectList + 1; void* ptr = objectList + 1;
auto destructor = objectList->destructor; auto destructor = objectList->destructor;
...@@ -91,17 +91,13 @@ inline size_t alignTo(size_t s, uint alignment) { ...@@ -91,17 +91,13 @@ inline size_t alignTo(size_t s, uint alignment) {
} // namespace } // namespace
void* Arena::allocateBytes(size_t amount, uint alignment, bool hasDisposer) const { void* Arena::allocateBytes(size_t amount, uint alignment, bool hasDisposer) {
if (hasDisposer) { if (hasDisposer) {
alignment = kj::max(alignment, alignof(ObjectHeader)); alignment = kj::max(alignment, alignof(ObjectHeader));
amount += alignTo(sizeof(ObjectHeader), alignment); amount += alignTo(sizeof(ObjectHeader), alignment);
} }
void* result = allocateBytesLockless(amount, alignment); void* result = allocateBytesInternal(amount, alignment);
if (result == nullptr) {
result = allocateBytesFallback(amount, alignment);
}
if (hasDisposer) { if (hasDisposer) {
// Reserve space for the ObjectHeader, but don't add it to the object list yet. // Reserve space for the ObjectHeader, but don't add it to the object list yet.
...@@ -112,90 +108,62 @@ void* Arena::allocateBytes(size_t amount, uint alignment, bool hasDisposer) cons ...@@ -112,90 +108,62 @@ void* Arena::allocateBytes(size_t amount, uint alignment, bool hasDisposer) cons
return result; return result;
} }
void* Arena::allocateBytesLockless(size_t amount, uint alignment) const { void* Arena::allocateBytesInternal(size_t amount, uint alignment) {
for (;;) { if (currentChunk != nullptr) {
ChunkHeader* chunk = __atomic_load_n(&state.getWithoutLock().currentChunk, __ATOMIC_ACQUIRE); ChunkHeader* chunk = currentChunk;
byte* alignedPos = alignTo(chunk->pos, alignment);
if (chunk == nullptr) {
// No chunks allocated yet.
return nullptr;
}
byte* pos = __atomic_load_n(&chunk->pos, __ATOMIC_RELAXED);
byte* alignedPos = alignTo(pos, alignment);
byte* endPos = alignedPos + amount;
// Careful about pointer wrapping (e.g. if the chunk is near the end of the address space).
if (chunk->end - endPos < 0) {
// Not enough space.
return nullptr;
}
// There appears to be enough space in this chunk, unless another thread stole it. // Careful about overflow here.
if (KJ_LIKELY(__atomic_compare_exchange_n( if (amount + (alignedPos - chunk->pos) <= chunk->end - chunk->pos) {
&chunk->pos, &pos, endPos, true, __ATOMIC_RELAXED, __ATOMIC_RELAXED))) { // There's enough space in this chunk.
chunk->pos = alignedPos + amount;
return alignedPos; return alignedPos;
} }
// Contention. Retry.
} }
}
void* Arena::allocateBytesFallback(size_t amount, uint alignment) const {
auto lock = state.lockExclusive();
// Now that we have the lock, try one more time to allocate from the current chunk. This could // Not enough space in the current chunk. Allocate a new one.
// work if another thread allocated a new chunk while we were waiting for the lock.
void* locklessResult = allocateBytesLockless(amount, alignment);
if (locklessResult != nullptr) {
return locklessResult;
}
// OK, we know the current chunk is out of space and we hold the lock so no one else is // We need to allocate at least enough space for the ChunkHeader and the requested allocation.
// allocating a new one. Let's do it!
// If the alignment is less than that of the chunk header, we'll need to increase it.
alignment = kj::max(alignment, alignof(ChunkHeader)); alignment = kj::max(alignment, alignof(ChunkHeader));
// If the ChunkHeader size does not match the alignment, we'll need to pad it up.
amount += alignTo(sizeof(ChunkHeader), alignment); amount += alignTo(sizeof(ChunkHeader), alignment);
while (lock->nextChunkSize < amount) { // Make sure we're going to allocate enough space.
lock->nextChunkSize *= 2; while (nextChunkSize < amount) {
nextChunkSize *= 2;
} }
byte* bytes = reinterpret_cast<byte*>(operator new(lock->nextChunkSize)); // Allocate.
byte* bytes = reinterpret_cast<byte*>(operator new(nextChunkSize));
// Set up the ChunkHeader at the beginning of the allocation.
ChunkHeader* newChunk = reinterpret_cast<ChunkHeader*>(bytes); ChunkHeader* newChunk = reinterpret_cast<ChunkHeader*>(bytes);
newChunk->next = lock->chunkList; newChunk->next = chunkList;
newChunk->pos = bytes + amount; newChunk->pos = bytes + amount;
newChunk->end = bytes + lock->nextChunkSize; newChunk->end = bytes + nextChunkSize;
__atomic_store_n(&lock->currentChunk, newChunk, __ATOMIC_RELEASE); currentChunk = newChunk;
chunkList = newChunk;
nextChunkSize *= 2;
lock->nextChunkSize *= 2; // Move past the ChunkHeader to find the position of the allocated object.
return alignTo(bytes + sizeof(ChunkHeader), alignment);
byte* result = alignTo(bytes + sizeof(ChunkHeader), alignment);
lock->chunkList = newChunk;
return result;
} }
StringPtr Arena::copyString(StringPtr content) const { StringPtr Arena::copyString(StringPtr content) {
char* data = reinterpret_cast<char*>(allocateBytes(content.size() + 1, 1, false)); char* data = reinterpret_cast<char*>(allocateBytes(content.size() + 1, 1, false));
memcpy(data, content.cStr(), content.size() + 1); memcpy(data, content.cStr(), content.size() + 1);
return StringPtr(data, content.size()); return StringPtr(data, content.size());
} }
void Arena::setDestructor(void* ptr, void (*destructor)(void*)) const { void Arena::setDestructor(void* ptr, void (*destructor)(void*)) {
ObjectHeader* header = reinterpret_cast<ObjectHeader*>(ptr) - 1; ObjectHeader* header = reinterpret_cast<ObjectHeader*>(ptr) - 1;
KJ_DASSERT(reinterpret_cast<uintptr_t>(header) % alignof(ObjectHeader) == 0); KJ_DASSERT(reinterpret_cast<uintptr_t>(header) % alignof(ObjectHeader) == 0);
header->destructor = destructor; header->destructor = destructor;
header->next = state.getWithoutLock().objectList; header->next = objectList;
objectList = header;
// We can use relaxed atomics here because the object list is not actually traversed until the
// destructor, which needs to be synchronized in its own way.
while (!__atomic_compare_exchange_n(
&state.getWithoutLock().objectList, &header->next, header, true,
__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
// Retry.
}
} }
} // namespace kj } // namespace kj
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
#include "memory.h" #include "memory.h"
#include "array.h" #include "array.h"
#include "string.h" #include "string.h"
#include "mutex.h"
namespace kj { namespace kj {
...@@ -35,9 +34,10 @@ class Arena { ...@@ -35,9 +34,10 @@ class Arena {
// A class which allows several objects to be allocated in contiguous chunks of memory, then // A class which allows several objects to be allocated in contiguous chunks of memory, then
// frees them all at once. // frees them all at once.
// //
// Allocating from the same Arena in multiple threads concurrently is safe but not particularly // Allocating from the same Arena in multiple threads concurrently is NOT safe, because making
// performant due to contention. The class could be optimized in the future to use per-thread // it safe would require atomic operations that would slow down allocation even when
// chunks to solve this. // single-threaded. If you need to use arena allocation in a multithreaded context, consider
// allocating thread-local arenas.
public: public:
explicit Arena(size_t chunkSizeHint = 1024); explicit Arena(size_t chunkSizeHint = 1024);
...@@ -52,20 +52,20 @@ public: ...@@ -52,20 +52,20 @@ public:
~Arena() noexcept(false); ~Arena() noexcept(false);
template <typename T, typename... Params> template <typename T, typename... Params>
T& allocate(Params&&... params) const; T& allocate(Params&&... params);
template <typename T> template <typename T>
ArrayPtr<T> allocateArray(size_t size) const; ArrayPtr<T> allocateArray(size_t size);
// Allocate an object or array of type T. If T has a non-trivial destructor, that destructor // Allocate an object or array of type T. If T has a non-trivial destructor, that destructor
// will be run during the Arena's destructor. Such destructors are run in opposite order of // will be run during the Arena's destructor. Such destructors are run in opposite order of
// allocation. Note that these methods must maintain a list of destructors to call, which has // allocation. Note that these methods must maintain a list of destructors to call, which has
// overhead, but this overhead only applies if T has a non-trivial destructor. // overhead, but this overhead only applies if T has a non-trivial destructor.
template <typename T, typename... Params> template <typename T, typename... Params>
Own<T> allocateOwn(Params&&... params) const; Own<T> allocateOwn(Params&&... params);
template <typename T> template <typename T>
Array<T> allocateOwnArray(size_t size) const; Array<T> allocateOwnArray(size_t size);
template <typename T> template <typename T>
ArrayBuilder<T> allocateOwnArrayBuilder(size_t capacity) const; ArrayBuilder<T> allocateOwnArrayBuilder(size_t capacity);
// Allocate an object or array of type T. Destructors are executed when the returned Own<T> // Allocate an object or array of type T. Destructors are executed when the returned Own<T>
// or Array<T> goes out-of-scope, which must happen before the Arena is destroyed. This variant // or Array<T> goes out-of-scope, which must happen before the Arena is destroyed. This variant
// is useful when you need to control when the destructor is called. This variant also avoids // is useful when you need to control when the destructor is called. This variant also avoids
...@@ -73,11 +73,11 @@ public: ...@@ -73,11 +73,11 @@ public:
// slightly more efficient. // slightly more efficient.
template <typename T> template <typename T>
inline T& copy(T&& value) const { return allocate<Decay<T>>(kj::fwd<T>(value)); } inline T& copy(T&& value) { return allocate<Decay<T>>(kj::fwd<T>(value)); }
// Allocate a copy of the given value in the arena. This is just a shortcut for calling the // Allocate a copy of the given value in the arena. This is just a shortcut for calling the
// type's copy (or move) constructor. // type's copy (or move) constructor.
StringPtr copyString(StringPtr content) const; StringPtr copyString(StringPtr content);
// Make a copy of the given string inside the arena, and return a pointer to the copy. // Make a copy of the given string inside the arena, and return a pointer to the copy.
private: private:
...@@ -91,37 +91,26 @@ private: ...@@ -91,37 +91,26 @@ private:
ObjectHeader* next; ObjectHeader* next;
}; };
struct State {
size_t nextChunkSize; size_t nextChunkSize;
ChunkHeader* chunkList; ChunkHeader* chunkList = nullptr;
mutable ObjectHeader* objectList; ObjectHeader* objectList = nullptr;
ChunkHeader* currentChunk; ChunkHeader* currentChunk = nullptr;
inline State(size_t nextChunkSize)
: nextChunkSize(nextChunkSize), chunkList(nullptr),
objectList(nullptr), currentChunk(nullptr) {}
inline ~State() noexcept(false) { cleanup(); }
void cleanup(); void cleanup();
// Run all destructors, leaving the above pointers null. If a destructor throws, the State is // Run all destructors, leaving the above pointers null. If a destructor throws, the State is
// left in a consistent state, such that if cleanup() is called again, it will pick up where // left in a consistent state, such that if cleanup() is called again, it will pick up where
// it left off. // it left off.
};
MutexGuarded<State> state;
void* allocateBytes(size_t amount, uint alignment, bool hasDisposer) const; void* allocateBytes(size_t amount, uint alignment, bool hasDisposer);
// Allocate the given number of bytes. `hasDisposer` must be true if `setDisposer()` may be // Allocate the given number of bytes. `hasDisposer` must be true if `setDisposer()` may be
// called on this pointer later. // called on this pointer later.
void* allocateBytesLockless(size_t amount, uint alignment) const; void* allocateBytesInternal(size_t amount, uint alignment);
// Try to allocate the given number of bytes without taking a lock. Fails if and only if there // Try to allocate the given number of bytes without taking a lock. Fails if and only if there
// is no space left in the current chunk. // is no space left in the current chunk.
void* allocateBytesFallback(size_t amount, uint alignment) const; void setDestructor(void* ptr, void (*destructor)(void*));
// Fallback used when the current chunk is out of space.
void setDestructor(void* ptr, void (*destructor)(void*)) const;
// Schedule the given destructor to be executed when the Arena is destroyed. `ptr` must be a // Schedule the given destructor to be executed when the Arena is destroyed. `ptr` must be a
// pointer previously returned by an `allocateBytes()` call for which `hasDisposer` was true. // pointer previously returned by an `allocateBytes()` call for which `hasDisposer` was true.
...@@ -144,7 +133,7 @@ private: ...@@ -144,7 +133,7 @@ private:
// Inline implementation details // Inline implementation details
template <typename T, typename... Params> template <typename T, typename... Params>
T& Arena::allocate(Params&&... params) const { T& Arena::allocate(Params&&... params) {
T& result = *reinterpret_cast<T*>(allocateBytes( T& result = *reinterpret_cast<T*>(allocateBytes(
sizeof(T), alignof(T), !__has_trivial_destructor(T))); sizeof(T), alignof(T), !__has_trivial_destructor(T)));
if (!__has_trivial_constructor(T) || sizeof...(Params) > 0) { if (!__has_trivial_constructor(T) || sizeof...(Params) > 0) {
...@@ -157,7 +146,7 @@ T& Arena::allocate(Params&&... params) const { ...@@ -157,7 +146,7 @@ T& Arena::allocate(Params&&... params) const {
} }
template <typename T> template <typename T>
ArrayPtr<T> Arena::allocateArray(size_t size) const { ArrayPtr<T> Arena::allocateArray(size_t size) {
if (__has_trivial_destructor(T)) { if (__has_trivial_destructor(T)) {
ArrayPtr<T> result = ArrayPtr<T> result =
arrayPtr(reinterpret_cast<T*>(allocateBytes( arrayPtr(reinterpret_cast<T*>(allocateBytes(
...@@ -193,7 +182,7 @@ ArrayPtr<T> Arena::allocateArray(size_t size) const { ...@@ -193,7 +182,7 @@ ArrayPtr<T> Arena::allocateArray(size_t size) const {
} }
template <typename T, typename... Params> template <typename T, typename... Params>
Own<T> Arena::allocateOwn(Params&&... params) const { Own<T> Arena::allocateOwn(Params&&... params) {
T& result = *reinterpret_cast<T*>(allocateBytes(sizeof(T), alignof(T), false)); T& result = *reinterpret_cast<T*>(allocateBytes(sizeof(T), alignof(T), false));
if (!__has_trivial_constructor(T) || sizeof...(Params) > 0) { if (!__has_trivial_constructor(T) || sizeof...(Params) > 0) {
ctor(result, kj::fwd<Params>(params)...); ctor(result, kj::fwd<Params>(params)...);
...@@ -202,7 +191,7 @@ Own<T> Arena::allocateOwn(Params&&... params) const { ...@@ -202,7 +191,7 @@ Own<T> Arena::allocateOwn(Params&&... params) const {
} }
template <typename T> template <typename T>
Array<T> Arena::allocateOwnArray(size_t size) const { Array<T> Arena::allocateOwnArray(size_t size) {
ArrayBuilder<T> result = allocateOwnArrayBuilder<T>(size); ArrayBuilder<T> result = allocateOwnArrayBuilder<T>(size);
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
result.add(); result.add();
...@@ -211,7 +200,7 @@ Array<T> Arena::allocateOwnArray(size_t size) const { ...@@ -211,7 +200,7 @@ Array<T> Arena::allocateOwnArray(size_t size) const {
} }
template <typename T> template <typename T>
ArrayBuilder<T> Arena::allocateOwnArrayBuilder(size_t capacity) const { ArrayBuilder<T> Arena::allocateOwnArrayBuilder(size_t capacity) {
return ArrayBuilder<T>( return ArrayBuilder<T>(
reinterpret_cast<T*>(allocateBytes(sizeof(T) * capacity, alignof(T), false)), reinterpret_cast<T*>(allocateBytes(sizeof(T) * capacity, alignof(T), false)),
capacity, DestructorOnlyArrayDisposer::instance); capacity, DestructorOnlyArrayDisposer::instance);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment