Commit 91acb5b2 authored by Kenton Varda's avatar Kenton Varda

Make kj::Arena not thread-safe since it hurts performance even when used single-threaded.

parent c5bed0d2
......@@ -185,8 +185,7 @@ private:
// Extract the ID from the declaration, or if it has none, generate one based on the name and
// parent ID.
static kj::StringPtr joinDisplayName(const kj::Arena& arena, Node& parent,
kj::StringPtr declName);
static kj::StringPtr joinDisplayName(kj::Arena& arena, Node& parent, kj::StringPtr declName);
// Join the parent's display name with the child's unqualified name to construct the child's
// display name.
......@@ -274,10 +273,10 @@ public:
bootstrapLoader(loaderCallback) {}
};
const kj::Arena& getNodeArena() { return nodeArena; }
kj::Arena& getNodeArena() { return nodeArena; }
// Arena where nodes and other permanent objects should be allocated.
const Workspace& getWorkspace() { return workspace; }
Workspace& getWorkspace() { return workspace; }
// Temporary workspace that can be used to construct bootstrap objects.
inline bool shouldCompileAnnotations() {
......@@ -395,7 +394,7 @@ uint64_t Compiler::Node::generateId(uint64_t parentId, kj::StringPtr declName,
}
kj::StringPtr Compiler::Node::joinDisplayName(
const kj::Arena& arena, Node& parent, kj::StringPtr declName) {
kj::Arena& arena, Node& parent, kj::StringPtr declName) {
kj::ArrayPtr<char> result = arena.allocateArray<char>(
parent.displayName.size() + declName.size() + 2);
......
......@@ -23,7 +23,6 @@
#include "arena.h"
#include "debug.h"
#include "thread.h"
#include <gtest/gtest.h>
#include <stdint.h>
......@@ -307,70 +306,5 @@ TEST(Arena, Strings) {
EXPECT_EQ(quux.end() + 1, corge.begin());
}
struct ThreadTestObject {
ThreadTestObject* next;
void* owner; // points into the owning thread's stack
ThreadTestObject(ThreadTestObject* next, void* owner)
: next(next), owner(owner) {}
~ThreadTestObject() { ++destructorCount; }
static uint destructorCount;
};
uint ThreadTestObject::destructorCount = 0;
TEST(Arena, Threads) {
// Test thread-safety. We allocate objects in four threads simultaneously, verify that they
// are not corrupted, then verify that their destructors are all called when the Arena is
// destroyed.
{
MutexGuarded<Arena> arena;
// Func to run in each thread.
auto threadFunc = [&]() {
int me;
ThreadTestObject* head = nullptr;
{
auto lock = arena.lockShared();
// Allocate a huge linked list.
for (uint i = 0; i < 100000; i++) {
head = &lock->allocate<ThreadTestObject>(head, &me);
}
}
// Wait until all other threads are done before verifying.
arena.lockExclusive();
// Verify that the list hasn't been corrupted.
while (head != nullptr) {
ASSERT_EQ(&me, head->owner);
head = head->next;
}
};
{
auto lock = arena.lockExclusive();
Thread thread1(threadFunc);
Thread thread2(threadFunc);
Thread thread3(threadFunc);
Thread thread4(threadFunc);
// Wait for threads to be ready.
usleep(10000);
auto release = kj::mv(lock);
// As we go out of scope, the lock will be released (since `release` is destroyed first),
// allowing all the threads to start running. We'll then join each thread.
}
EXPECT_EQ(0u, ThreadTestObject::destructorCount);
}
EXPECT_EQ(400000u, ThreadTestObject::destructorCount);
}
} // namespace
} // namespace kj
......@@ -27,10 +27,10 @@
namespace kj {
Arena::Arena(size_t chunkSizeHint): state(kj::max(sizeof(ChunkHeader), chunkSizeHint)) {}
Arena::Arena(size_t chunkSizeHint): nextChunkSize(kj::max(sizeof(ChunkHeader), chunkSizeHint)) {}
Arena::Arena(ArrayPtr<byte> scratch)
: state(kj::max(sizeof(ChunkHeader), scratch.size())) {
: nextChunkSize(kj::max(sizeof(ChunkHeader), scratch.size())) {
if (scratch.size() > sizeof(ChunkHeader)) {
ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(scratch.begin());
chunk->end = scratch.end();
......@@ -39,19 +39,19 @@ Arena::Arena(ArrayPtr<byte> scratch)
// Don't place the chunk in the chunk list because it's not ours to delete. Just make it the
// current chunk so that we'll allocate from it until it is empty.
state.getWithoutLock().currentChunk = chunk;
currentChunk = chunk;
}
}
Arena::~Arena() noexcept(false) {
// Run cleanup explicitly. It will be executed again implicitly when state's destructor is
// called. This ensures that if the first pass throws an exception, remaining objects are still
// destroyed. If the second pass throws, the program terminates, but any destructors that could
// throw should be using UnwindDetector to avoid this.
state.getWithoutLock().cleanup();
// Run cleanup() explicitly, but if it throws an exception, make sure to run it again as part of
// unwind. The second call will not throw because destructors are required to guard against
// exceptions when already unwinding.
KJ_ON_SCOPE_FAILURE(cleanup());
cleanup();
}
void Arena::State::cleanup() {
void Arena::cleanup() {
while (objectList != nullptr) {
void* ptr = objectList + 1;
auto destructor = objectList->destructor;
......@@ -91,17 +91,13 @@ inline size_t alignTo(size_t s, uint alignment) {
} // namespace
void* Arena::allocateBytes(size_t amount, uint alignment, bool hasDisposer) const {
void* Arena::allocateBytes(size_t amount, uint alignment, bool hasDisposer) {
if (hasDisposer) {
alignment = kj::max(alignment, alignof(ObjectHeader));
amount += alignTo(sizeof(ObjectHeader), alignment);
}
void* result = allocateBytesLockless(amount, alignment);
if (result == nullptr) {
result = allocateBytesFallback(amount, alignment);
}
void* result = allocateBytesInternal(amount, alignment);
if (hasDisposer) {
// Reserve space for the ObjectHeader, but don't add it to the object list yet.
......@@ -112,90 +108,62 @@ void* Arena::allocateBytes(size_t amount, uint alignment, bool hasDisposer) cons
return result;
}
void* Arena::allocateBytesLockless(size_t amount, uint alignment) const {
for (;;) {
ChunkHeader* chunk = __atomic_load_n(&state.getWithoutLock().currentChunk, __ATOMIC_ACQUIRE);
if (chunk == nullptr) {
// No chunks allocated yet.
return nullptr;
}
byte* pos = __atomic_load_n(&chunk->pos, __ATOMIC_RELAXED);
byte* alignedPos = alignTo(pos, alignment);
byte* endPos = alignedPos + amount;
// Careful about pointer wrapping (e.g. if the chunk is near the end of the address space).
if (chunk->end - endPos < 0) {
// Not enough space.
return nullptr;
}
void* Arena::allocateBytesInternal(size_t amount, uint alignment) {
if (currentChunk != nullptr) {
ChunkHeader* chunk = currentChunk;
byte* alignedPos = alignTo(chunk->pos, alignment);
// There appears to be enough space in this chunk, unless another thread stole it.
if (KJ_LIKELY(__atomic_compare_exchange_n(
&chunk->pos, &pos, endPos, true, __ATOMIC_RELAXED, __ATOMIC_RELAXED))) {
// Careful about overflow here.
if (amount + (alignedPos - chunk->pos) <= chunk->end - chunk->pos) {
// There's enough space in this chunk.
chunk->pos = alignedPos + amount;
return alignedPos;
}
// Contention. Retry.
}
}
void* Arena::allocateBytesFallback(size_t amount, uint alignment) const {
auto lock = state.lockExclusive();
// Now that we have the lock, try one more time to allocate from the current chunk. This could
// work if another thread allocated a new chunk while we were waiting for the lock.
void* locklessResult = allocateBytesLockless(amount, alignment);
if (locklessResult != nullptr) {
return locklessResult;
}
// Not enough space in the current chunk. Allocate a new one.
// OK, we know the current chunk is out of space and we hold the lock so no one else is
// allocating a new one. Let's do it!
// We need to allocate at least enough space for the ChunkHeader and the requested allocation.
// If the alignment is less than that of the chunk header, we'll need to increase it.
alignment = kj::max(alignment, alignof(ChunkHeader));
// If the ChunkHeader size does not match the alignment, we'll need to pad it up.
amount += alignTo(sizeof(ChunkHeader), alignment);
while (lock->nextChunkSize < amount) {
lock->nextChunkSize *= 2;
// Make sure we're going to allocate enough space.
while (nextChunkSize < amount) {
nextChunkSize *= 2;
}
byte* bytes = reinterpret_cast<byte*>(operator new(lock->nextChunkSize));
// Allocate.
byte* bytes = reinterpret_cast<byte*>(operator new(nextChunkSize));
// Set up the ChunkHeader at the beginning of the allocation.
ChunkHeader* newChunk = reinterpret_cast<ChunkHeader*>(bytes);
newChunk->next = lock->chunkList;
newChunk->next = chunkList;
newChunk->pos = bytes + amount;
newChunk->end = bytes + lock->nextChunkSize;
__atomic_store_n(&lock->currentChunk, newChunk, __ATOMIC_RELEASE);
newChunk->end = bytes + nextChunkSize;
currentChunk = newChunk;
chunkList = newChunk;
nextChunkSize *= 2;
lock->nextChunkSize *= 2;
byte* result = alignTo(bytes + sizeof(ChunkHeader), alignment);
lock->chunkList = newChunk;
return result;
// Move past the ChunkHeader to find the position of the allocated object.
return alignTo(bytes + sizeof(ChunkHeader), alignment);
}
StringPtr Arena::copyString(StringPtr content) const {
StringPtr Arena::copyString(StringPtr content) {
char* data = reinterpret_cast<char*>(allocateBytes(content.size() + 1, 1, false));
memcpy(data, content.cStr(), content.size() + 1);
return StringPtr(data, content.size());
}
void Arena::setDestructor(void* ptr, void (*destructor)(void*)) const {
void Arena::setDestructor(void* ptr, void (*destructor)(void*)) {
ObjectHeader* header = reinterpret_cast<ObjectHeader*>(ptr) - 1;
KJ_DASSERT(reinterpret_cast<uintptr_t>(header) % alignof(ObjectHeader) == 0);
header->destructor = destructor;
header->next = state.getWithoutLock().objectList;
// We can use relaxed atomics here because the object list is not actually traversed until the
// destructor, which needs to be synchronized in its own way.
while (!__atomic_compare_exchange_n(
&state.getWithoutLock().objectList, &header->next, header, true,
__ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
// Retry.
}
header->next = objectList;
objectList = header;
}
} // namespace kj
......@@ -27,7 +27,6 @@
#include "memory.h"
#include "array.h"
#include "string.h"
#include "mutex.h"
namespace kj {
......@@ -35,9 +34,10 @@ class Arena {
// A class which allows several objects to be allocated in contiguous chunks of memory, then
// frees them all at once.
//
// Allocating from the same Arena in multiple threads concurrently is safe but not particularly
// performant due to contention. The class could be optimized in the future to use per-thread
// chunks to solve this.
// Allocating from the same Arena in multiple threads concurrently is NOT safe, because making
// it safe would require atomic operations that would slow down allocation even when
// single-threaded. If you need to use arena allocation in a multithreaded context, consider
// allocating thread-local arenas.
public:
explicit Arena(size_t chunkSizeHint = 1024);
......@@ -52,20 +52,20 @@ public:
~Arena() noexcept(false);
template <typename T, typename... Params>
T& allocate(Params&&... params) const;
T& allocate(Params&&... params);
template <typename T>
ArrayPtr<T> allocateArray(size_t size) const;
ArrayPtr<T> allocateArray(size_t size);
// Allocate an object or array of type T. If T has a non-trivial destructor, that destructor
// will be run during the Arena's destructor. Such destructors are run in opposite order of
// allocation. Note that these methods must maintain a list of destructors to call, which has
// overhead, but this overhead only applies if T has a non-trivial destructor.
template <typename T, typename... Params>
Own<T> allocateOwn(Params&&... params) const;
Own<T> allocateOwn(Params&&... params);
template <typename T>
Array<T> allocateOwnArray(size_t size) const;
Array<T> allocateOwnArray(size_t size);
template <typename T>
ArrayBuilder<T> allocateOwnArrayBuilder(size_t capacity) const;
ArrayBuilder<T> allocateOwnArrayBuilder(size_t capacity);
// Allocate an object or array of type T. Destructors are executed when the returned Own<T>
// or Array<T> goes out-of-scope, which must happen before the Arena is destroyed. This variant
// is useful when you need to control when the destructor is called. This variant also avoids
......@@ -73,11 +73,11 @@ public:
// slightly more efficient.
template <typename T>
inline T& copy(T&& value) const { return allocate<Decay<T>>(kj::fwd<T>(value)); }
inline T& copy(T&& value) { return allocate<Decay<T>>(kj::fwd<T>(value)); }
// Allocate a copy of the given value in the arena. This is just a shortcut for calling the
// type's copy (or move) constructor.
StringPtr copyString(StringPtr content) const;
StringPtr copyString(StringPtr content);
// Make a copy of the given string inside the arena, and return a pointer to the copy.
private:
......@@ -91,37 +91,26 @@ private:
ObjectHeader* next;
};
struct State {
size_t nextChunkSize;
ChunkHeader* chunkList;
mutable ObjectHeader* objectList;
size_t nextChunkSize;
ChunkHeader* chunkList = nullptr;
ObjectHeader* objectList = nullptr;
ChunkHeader* currentChunk;
ChunkHeader* currentChunk = nullptr;
inline State(size_t nextChunkSize)
: nextChunkSize(nextChunkSize), chunkList(nullptr),
objectList(nullptr), currentChunk(nullptr) {}
inline ~State() noexcept(false) { cleanup(); }
void cleanup();
// Run all destructors, leaving the above pointers null. If a destructor throws, the State is
// left in a consistent state, such that if cleanup() is called again, it will pick up where
// it left off.
void cleanup();
// Run all destructors, leaving the above pointers null. If a destructor throws, the State is
// left in a consistent state, such that if cleanup() is called again, it will pick up where
// it left off.
};
MutexGuarded<State> state;
void* allocateBytes(size_t amount, uint alignment, bool hasDisposer) const;
void* allocateBytes(size_t amount, uint alignment, bool hasDisposer);
// Allocate the given number of bytes. `hasDisposer` must be true if `setDisposer()` may be
// called on this pointer later.
void* allocateBytesLockless(size_t amount, uint alignment) const;
void* allocateBytesInternal(size_t amount, uint alignment);
// Try to allocate the given number of bytes without taking a lock. Fails if and only if there
// is no space left in the current chunk.
void* allocateBytesFallback(size_t amount, uint alignment) const;
// Fallback used when the current chunk is out of space.
void setDestructor(void* ptr, void (*destructor)(void*)) const;
void setDestructor(void* ptr, void (*destructor)(void*));
// Schedule the given destructor to be executed when the Arena is destroyed. `ptr` must be a
// pointer previously returned by an `allocateBytes()` call for which `hasDisposer` was true.
......@@ -144,7 +133,7 @@ private:
// Inline implementation details
template <typename T, typename... Params>
T& Arena::allocate(Params&&... params) const {
T& Arena::allocate(Params&&... params) {
T& result = *reinterpret_cast<T*>(allocateBytes(
sizeof(T), alignof(T), !__has_trivial_destructor(T)));
if (!__has_trivial_constructor(T) || sizeof...(Params) > 0) {
......@@ -157,7 +146,7 @@ T& Arena::allocate(Params&&... params) const {
}
template <typename T>
ArrayPtr<T> Arena::allocateArray(size_t size) const {
ArrayPtr<T> Arena::allocateArray(size_t size) {
if (__has_trivial_destructor(T)) {
ArrayPtr<T> result =
arrayPtr(reinterpret_cast<T*>(allocateBytes(
......@@ -193,7 +182,7 @@ ArrayPtr<T> Arena::allocateArray(size_t size) const {
}
template <typename T, typename... Params>
Own<T> Arena::allocateOwn(Params&&... params) const {
Own<T> Arena::allocateOwn(Params&&... params) {
T& result = *reinterpret_cast<T*>(allocateBytes(sizeof(T), alignof(T), false));
if (!__has_trivial_constructor(T) || sizeof...(Params) > 0) {
ctor(result, kj::fwd<Params>(params)...);
......@@ -202,7 +191,7 @@ Own<T> Arena::allocateOwn(Params&&... params) const {
}
template <typename T>
Array<T> Arena::allocateOwnArray(size_t size) const {
Array<T> Arena::allocateOwnArray(size_t size) {
ArrayBuilder<T> result = allocateOwnArrayBuilder<T>(size);
for (size_t i = 0; i < size; i++) {
result.add();
......@@ -211,7 +200,7 @@ Array<T> Arena::allocateOwnArray(size_t size) const {
}
template <typename T>
ArrayBuilder<T> Arena::allocateOwnArrayBuilder(size_t capacity) const {
ArrayBuilder<T> Arena::allocateOwnArrayBuilder(size_t capacity) {
return ArrayBuilder<T>(
reinterpret_cast<T*>(allocateBytes(sizeof(T) * capacity, alignof(T), false)),
capacity, DestructorOnlyArrayDisposer::instance);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment