mirror of
https://github.com/LadybirdBrowser/ladybird
synced 2026-05-11 09:27:00 +02:00
Use a direct anonymous mapping for POSIX BlockAllocator chunks and trim any temporary padding needed to make the live 2 MiB chunk HeapBlock- aligned. The GC only needs each 16 KiB HeapBlock slot aligned so from_cell() can recover the block base by masking low bits. Request that alignment from mach_vm_map() as well, rather than aligning whole chunks to 2 MiB.
398 lines
12 KiB
C++
398 lines
12 KiB
C++
/*
|
|
* Copyright (c) 2021-2023, Andreas Kling <andreas@ladybird.org>
|
|
* Copyright (c) 2025, Ryszard Goc <ryszardgoc@gmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/Assertions.h>
|
|
#include <AK/NeverDestroyed.h>
|
|
#include <AK/Platform.h>
|
|
#include <AK/Vector.h>
|
|
#include <LibGC/BlockAllocator.h>
|
|
#include <LibGC/HeapBlock.h>
|
|
#include <LibThreading/Thread.h>
|
|
#include <sys/mman.h>
|
|
|
|
#if defined(AK_OS_MACOS)
|
|
# include <mach/mach.h>
|
|
# include <mach/mach_vm.h>
|
|
#endif
|
|
|
|
#ifdef HAS_ADDRESS_SANITIZER
|
|
# include <sanitizer/asan_interface.h>
|
|
# include <sanitizer/lsan_interface.h>
|
|
#endif
|
|
|
|
#if defined(AK_OS_WINDOWS)
|
|
# include <AK/Windows.h>
|
|
# include <memoryapi.h>
|
|
#else
|
|
# include <sched.h>
|
|
# include <unistd.h>
|
|
#endif
|
|
|
|
namespace GC {
|
|
|
|
// Each BlockAllocator carves its 16 KiB HeapBlock slots out of 2 MiB
|
|
// chunks. Chunks are owned exclusively by a single BlockAllocator and are
|
|
// never released back to the OS or shared across allocators -- the heap's
|
|
// VM is permanently type-isolated.
|
|
//
|
|
// Per-block madvise() is deferred to a single global background "decommit
|
|
// worker" so it never costs us GC pause time, and slots that are recycled
|
|
// before the worker sees them skip the madvise pair entirely.
|
|
static constexpr size_t CHUNK_SIZE = 2 * MiB;
|
|
static constexpr size_t BLOCKS_PER_CHUNK = CHUNK_SIZE / HeapBlock::BLOCK_SIZE;
|
|
static_assert((HeapBlock::BLOCK_SIZE & (HeapBlock::BLOCK_SIZE - 1)) == 0);
|
|
static_assert(CHUNK_SIZE % HeapBlock::BLOCK_SIZE == 0);
|
|
static_assert(BLOCKS_PER_CHUNK == 128);
|
|
|
|
#if !defined(AK_OS_MACOS) && !defined(AK_OS_WINDOWS)
|
|
static auto const s_page_size = [] {
|
|
auto page_size_result = sysconf(_SC_PAGESIZE);
|
|
VERIFY(page_size_result > 0);
|
|
return static_cast<size_t>(page_size_result);
|
|
}();
|
|
|
|
static void* allocate_chunk_with_aligned_heap_blocks()
|
|
{
|
|
auto const page_size = s_page_size;
|
|
VERIFY((page_size & (page_size - 1)) == 0);
|
|
VERIFY(HeapBlock::BLOCK_SIZE % page_size == 0 || page_size % HeapBlock::BLOCK_SIZE == 0);
|
|
VERIFY(CHUNK_SIZE % page_size == 0);
|
|
|
|
auto const extra_size = page_size < HeapBlock::BLOCK_SIZE ? HeapBlock::BLOCK_SIZE - page_size : 0;
|
|
auto* mapped = mmap(nullptr, CHUNK_SIZE + extra_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
|
VERIFY(mapped != MAP_FAILED);
|
|
|
|
auto const mapped_address = reinterpret_cast<FlatPtr>(mapped);
|
|
auto const aligned_address = align_up_to(mapped_address, HeapBlock::BLOCK_SIZE);
|
|
VERIFY(aligned_address % HeapBlock::BLOCK_SIZE == 0);
|
|
|
|
auto const left_padding = aligned_address - mapped_address;
|
|
if (left_padding > 0) {
|
|
VERIFY(left_padding % page_size == 0);
|
|
if (munmap(mapped, left_padding) < 0) {
|
|
perror("munmap");
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
auto const mapped_end = mapped_address + CHUNK_SIZE + extra_size;
|
|
auto const chunk_end = aligned_address + CHUNK_SIZE;
|
|
VERIFY(chunk_end <= mapped_end);
|
|
|
|
auto const right_padding = mapped_end - chunk_end;
|
|
if (right_padding > 0) {
|
|
VERIFY(right_padding % page_size == 0);
|
|
if (munmap(reinterpret_cast<void*>(chunk_end), right_padding) < 0) {
|
|
perror("munmap");
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
return reinterpret_cast<void*>(aligned_address);
|
|
}
|
|
#endif
|
|
|
|
static void madvise_block_for_decommit(void* block)
|
|
{
|
|
#if defined(AK_OS_WINDOWS)
|
|
DWORD ret = DiscardVirtualMemory(block, HeapBlock::BLOCK_SIZE);
|
|
if (ret != ERROR_SUCCESS) {
|
|
warnln("{}", Error::from_windows_error(ret));
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
#elif defined(MADV_FREE_REUSE) && defined(MADV_FREE_REUSABLE)
|
|
// macOS uses the FREE_REUSABLE/FREE_REUSE paired protocol, which integrates
|
|
// with its RSS accounting properly.
|
|
if (madvise(block, HeapBlock::BLOCK_SIZE, MADV_FREE_REUSABLE) < 0) {
|
|
perror("madvise(MADV_FREE_REUSABLE)");
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
#elif defined(MADV_DONTNEED)
|
|
// Prefer DONTNEED over FREE on Linux: FREE is lazy and only releases pages
|
|
// under memory pressure, which leaves freed blocks counted in RSS for
|
|
// arbitrarily long after a busy page goes idle.
|
|
if (madvise(block, HeapBlock::BLOCK_SIZE, MADV_DONTNEED) < 0) {
|
|
perror("madvise(MADV_DONTNEED)");
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
#elif defined(MADV_FREE)
|
|
if (madvise(block, HeapBlock::BLOCK_SIZE, MADV_FREE) < 0) {
|
|
perror("madvise(MADV_FREE)");
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void sleep_before_decommit()
|
|
{
|
|
#if defined(AK_OS_WINDOWS)
|
|
Sleep(50);
|
|
#else
|
|
usleep(50 * 1000);
|
|
#endif
|
|
}
|
|
|
|
static void yield_during_decommit()
|
|
{
|
|
#if defined(AK_OS_WINDOWS)
|
|
Sleep(0);
|
|
#else
|
|
sched_yield();
|
|
#endif
|
|
}
|
|
|
|
class DecommitWorker {
|
|
public:
|
|
static DecommitWorker& the();
|
|
|
|
void register_pending(BlockAllocator&);
|
|
void deregister(BlockAllocator&);
|
|
void kick();
|
|
|
|
DecommitWorker();
|
|
|
|
private:
|
|
void run();
|
|
void process_one(BlockAllocator&);
|
|
|
|
Sync::Mutex m_mutex;
|
|
Sync::ConditionVariable m_cv { m_mutex };
|
|
RefPtr<Threading::Thread> m_thread;
|
|
Vector<BlockAllocator*> m_pending;
|
|
bool m_kicked { false };
|
|
};
|
|
|
|
DecommitWorker& DecommitWorker::the()
|
|
{
|
|
static AK::NeverDestroyed<DecommitWorker> instance;
|
|
return *instance;
|
|
}
|
|
|
|
DecommitWorker::DecommitWorker()
|
|
{
|
|
m_thread = Threading::Thread::construct("DecommitWorker"sv, [this] {
|
|
run();
|
|
return static_cast<intptr_t>(0);
|
|
});
|
|
m_thread->start();
|
|
m_thread->detach();
|
|
}
|
|
|
|
void DecommitWorker::register_pending(BlockAllocator& a)
|
|
{
|
|
Sync::MutexLocker locker(m_mutex);
|
|
m_pending.append(&a);
|
|
}
|
|
|
|
void DecommitWorker::deregister(BlockAllocator& a)
|
|
{
|
|
Sync::MutexLocker locker(m_mutex);
|
|
m_pending.remove_first_matching([&](auto* p) { return p == &a; });
|
|
}
|
|
|
|
void DecommitWorker::kick()
|
|
{
|
|
{
|
|
Sync::MutexLocker locker(m_mutex);
|
|
m_kicked = true;
|
|
}
|
|
m_cv.signal();
|
|
}
|
|
|
|
void DecommitWorker::run()
|
|
{
|
|
while (true) {
|
|
Vector<BlockAllocator*> snapshot;
|
|
{
|
|
Sync::MutexLocker locker(m_mutex);
|
|
while (!m_kicked)
|
|
m_cv.wait();
|
|
m_kicked = false;
|
|
snapshot = move(m_pending);
|
|
// Pin every allocator we're about to process so destructors
|
|
// block until we drop our reference.
|
|
for (auto* a : snapshot)
|
|
a->m_worker_refcount.fetch_add(1);
|
|
}
|
|
|
|
if (snapshot.is_empty())
|
|
continue;
|
|
|
|
// Stagger: give the JS thread some breathing room after the kick
|
|
// (typically right after sweep ends) before we consume CPU and
|
|
// syscall bandwidth.
|
|
sleep_before_decommit();
|
|
|
|
for (auto* a : snapshot) {
|
|
process_one(*a);
|
|
int prev_refcount = a->m_worker_refcount.fetch_sub(1);
|
|
if (prev_refcount == 1) {
|
|
Sync::MutexLocker locker(a->m_mutex);
|
|
a->m_worker_cv.broadcast();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void DecommitWorker::process_one(BlockAllocator& a)
|
|
{
|
|
Vector<void*> to_process;
|
|
{
|
|
Sync::MutexLocker locker(a.m_mutex);
|
|
a.m_in_decommit_registry = false;
|
|
to_process = move(a.m_freshly_freed);
|
|
}
|
|
|
|
// Madvise each slot outside the per-allocator lock so the JS thread can
|
|
// continue to allocate/free; yield every 64 slots to avoid hogging the
|
|
// kernel's mm subsystem.
|
|
constexpr size_t BATCH = 64;
|
|
for (size_t i = 0; i < to_process.size(); ++i) {
|
|
madvise_block_for_decommit(to_process[i]);
|
|
if ((i + 1) % BATCH == 0)
|
|
yield_during_decommit();
|
|
}
|
|
|
|
{
|
|
Sync::MutexLocker locker(a.m_mutex);
|
|
for (auto* slot : to_process)
|
|
a.m_blocks.append(slot);
|
|
}
|
|
}
|
|
|
|
void BlockAllocator::wake_decommit_worker_async()
|
|
{
|
|
DecommitWorker::the().kick();
|
|
}
|
|
|
|
BlockAllocator::BlockAllocator()
|
|
: m_worker_cv(m_mutex)
|
|
{
|
|
}
|
|
|
|
BlockAllocator::~BlockAllocator()
|
|
{
|
|
// Chunks are permanent -- we never tear them down. The destructor only
|
|
// exists to make sure the global decommit worker has finished any
|
|
// in-flight processing of *this before our storage goes away.
|
|
DecommitWorker::the().deregister(*this);
|
|
|
|
Sync::MutexLocker locker(m_mutex);
|
|
while (m_worker_refcount.load() != 0)
|
|
m_worker_cv.wait();
|
|
}
|
|
|
|
size_t BlockAllocator::block_count()
|
|
{
|
|
Sync::MutexLocker locker(m_mutex);
|
|
return m_blocks.size();
|
|
}
|
|
|
|
void* BlockAllocator::allocate_block([[maybe_unused]] char const* name)
|
|
{
|
|
void* block = nullptr;
|
|
bool needs_madvise_reuse = false;
|
|
|
|
{
|
|
Sync::MutexLocker locker(m_mutex);
|
|
|
|
// Prefer m_freshly_freed: those slots were never madvised, so we
|
|
// can hand them back out with zero syscalls. This is the deferred-
|
|
// decommit payoff -- hot recycle skips both MADV_FREE_REUSABLE
|
|
// and MADV_FREE_REUSE.
|
|
if (!m_freshly_freed.is_empty()) {
|
|
block = m_freshly_freed.take_last();
|
|
} else if (!m_blocks.is_empty()) {
|
|
block = m_blocks.take_last();
|
|
needs_madvise_reuse = true;
|
|
}
|
|
}
|
|
|
|
if (block == nullptr) {
|
|
// Both pools empty: allocate a fresh 2 MiB chunk and slice it.
|
|
void* chunk_base = nullptr;
|
|
#if defined(AK_OS_MACOS)
|
|
mach_vm_address_t address = 0;
|
|
kern_return_t kr = mach_vm_map(
|
|
mach_task_self(),
|
|
&address,
|
|
CHUNK_SIZE,
|
|
HeapBlock::BLOCK_SIZE - 1,
|
|
VM_FLAGS_ANYWHERE,
|
|
MEMORY_OBJECT_NULL,
|
|
0,
|
|
false,
|
|
VM_PROT_READ | VM_PROT_WRITE,
|
|
VM_PROT_READ | VM_PROT_WRITE,
|
|
VM_INHERIT_DEFAULT);
|
|
VERIFY(kr == KERN_SUCCESS);
|
|
chunk_base = reinterpret_cast<void*>(address);
|
|
#elif defined(AK_OS_WINDOWS)
|
|
chunk_base = VirtualAlloc(nullptr, CHUNK_SIZE, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
|
|
VERIFY(chunk_base);
|
|
#else
|
|
chunk_base = allocate_chunk_with_aligned_heap_blocks();
|
|
#endif
|
|
|
|
#if defined(MADV_FREE_REUSE) && defined(MADV_FREE_REUSABLE)
|
|
// Mark the whole chunk reusable upfront so MADV_FREE_REUSE pairs
|
|
// symmetrically when slots are popped from m_blocks later. (Linux
|
|
// and Windows fall through with no-op.)
|
|
if (madvise(chunk_base, CHUNK_SIZE, MADV_FREE_REUSABLE) < 0) {
|
|
perror("madvise(MADV_FREE_REUSABLE)");
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
#endif
|
|
|
|
ASAN_POISON_MEMORY_REGION(chunk_base, CHUNK_SIZE);
|
|
|
|
Sync::MutexLocker locker(m_mutex);
|
|
for (size_t i = 0; i < BLOCKS_PER_CHUNK; ++i)
|
|
m_blocks.append(static_cast<u8*>(chunk_base) + i * HeapBlock::BLOCK_SIZE);
|
|
block = m_blocks.take_last();
|
|
needs_madvise_reuse = true;
|
|
}
|
|
|
|
ASAN_UNPOISON_MEMORY_REGION(block, HeapBlock::BLOCK_SIZE);
|
|
LSAN_REGISTER_ROOT_REGION(block, HeapBlock::BLOCK_SIZE);
|
|
#if defined(MADV_FREE_REUSE) && defined(MADV_FREE_REUSABLE)
|
|
if (needs_madvise_reuse) {
|
|
if (madvise(block, HeapBlock::BLOCK_SIZE, MADV_FREE_REUSE) < 0) {
|
|
perror("madvise(MADV_FREE_REUSE)");
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
}
|
|
#else
|
|
(void)needs_madvise_reuse;
|
|
#endif
|
|
return block;
|
|
}
|
|
|
|
void BlockAllocator::deallocate_block(void* block)
|
|
{
|
|
VERIFY(block);
|
|
|
|
// Fast path: bookkeep only. The actual madvise is deferred to the
|
|
// global decommit worker, which the GC kicks at the end of sweep.
|
|
ASAN_POISON_MEMORY_REGION(block, HeapBlock::BLOCK_SIZE);
|
|
LSAN_UNREGISTER_ROOT_REGION(block, HeapBlock::BLOCK_SIZE);
|
|
|
|
bool need_to_register = false;
|
|
{
|
|
Sync::MutexLocker locker(m_mutex);
|
|
m_freshly_freed.append(block);
|
|
if (!m_in_decommit_registry) {
|
|
m_in_decommit_registry = true;
|
|
need_to_register = true;
|
|
}
|
|
}
|
|
if (need_to_register)
|
|
DecommitWorker::the().register_pending(*this);
|
|
}
|
|
|
|
}
|