AK: Hide tentative HashTable bucket from iterators across ensure()

HashMap<_, GC::Ref<_>>::ensure() crashed under UBSan whenever the
initialization callback triggered a GC: lookup_for_writing() stamped
the target bucket as used and added it to the ordered list before the
callback ran, so the marking visitor walked the map, read the
uninitialized slot, and failed the returns_nonnull check in GC::Ref.

Split bucket reservation into two phases. lookup_for_writing() now
hands back the target in the Free state (not in the ordered list,
m_size unchanged); callers placement-new the value and then commit via
commit_inserted_bucket(). The Robin Hood displacement loop still
stamps the slot internally and un-stamps before returning, so probing
is unchanged and the whole operation remains a single hash and a
single probe.
This commit is contained in:
Andreas Kling
2026-04-24 19:45:23 +02:00
committed by Andreas Kling
parent 0b9636fadf
commit c66cab7e6b
Notes: github-actions[bot] 2026-04-25 04:22:37 +00:00
2 changed files with 83 additions and 26 deletions

View File

@@ -406,10 +406,14 @@ public:
if (should_grow())
rehash(max(capacity() * 2, grow_capacity_at_least));
auto [result, bucket] = lookup_for_writing<T>(hash, move(predicate), existing_entry_behavior);
auto [result, bucket, probe_length] = lookup_for_writing<T>(hash, move(predicate), existing_entry_behavior);
switch (result) {
case HashSetResult::InsertedNewEntry:
// The bucket is in a tentative state (Free, not in the ordered list). Construct the
// value first so that if the callback triggers traversal of this hash table,
// iterators skip this bucket. Commit only after construction succeeds.
new (bucket.slot()) T(initialization_callback());
commit_inserted_bucket(bucket, hash, probe_length);
break;
case HashSetResult::ReplacedExistingEntry:
(*bucket.slot()) = T(initialization_callback());
@@ -696,23 +700,19 @@ private:
struct LookupForWritingResult {
HashSetResult result;
BucketType& bucket;
// Only meaningful for InsertedNewEntry: the probe length to stamp when the caller commits.
size_t probe_length;
};
template<typename U, typename TUnaryPredicate>
ALWAYS_INLINE LookupForWritingResult lookup_for_writing(u32 const hash, TUnaryPredicate predicate,
HashSetExistingEntryBehavior existing_entry_behavior)
{
auto update_collection_for_new_bucket = [&](BucketType& bucket) {
if constexpr (IsOrdered) {
if (!m_collection_data.head) [[unlikely]] {
m_collection_data.head = &bucket;
} else {
bucket.previous = m_collection_data.tail;
m_collection_data.tail->next = &bucket;
}
m_collection_data.tail = &bucket;
}
};
// NB: For InsertedNewEntry results we hand the bucket back in a tentative state:
// state=Free, not in the ordered list, and m_size unchanged. The caller must
// placement-new the value and then call commit_inserted_bucket() to finalize.
// Leaving state=Free across construction makes the bucket invisible to iterators,
// which is essential for ensure() callbacks that may trigger traversal.
auto update_collection_for_swapped_buckets = [&](BucketType* left_bucket, BucketType* right_bucket) {
if constexpr (IsOrdered) {
if (m_collection_data.head == left_bucket)
@@ -747,21 +747,17 @@ private:
for (;;) {
auto* bucket = &m_buckets[bucket_index];
// We found a free bucket, write to it and stop
// We found a free bucket. Return it tentatively; caller commits after construction.
if (bucket->state == BucketState::Free) {
bucket->state = bucket_state_for_probe_length(probe_length);
bucket->hash.set(hash);
update_collection_for_new_bucket(*bucket);
++m_size;
return { HashSetResult::InsertedNewEntry, *bucket };
return { HashSetResult::InsertedNewEntry, *bucket, probe_length };
}
// The bucket is already used, does it have an identical value?
if (bucket->hash.check(hash) && predicate(*bucket->slot())) {
if (existing_entry_behavior == HashSetExistingEntryBehavior::Replace) {
return { HashSetResult::ReplacedExistingEntry, *bucket };
return { HashSetResult::ReplacedExistingEntry, *bucket, 0 };
}
return { HashSetResult::KeptExistingEntry, *bucket };
return { HashSetResult::KeptExistingEntry, *bucket, 0 };
}
// Robin hood: if our probe length is larger (poor) than this bucket's (rich), steal its position!
@@ -773,15 +769,18 @@ private:
relocate_bucket(&bucket_to_move, bucket);
update_collection_for_swapped_buckets(bucket, &bucket_to_move);
// Write new bucket
// Tentatively occupy the stolen slot so the displacement loop below sees it as
// used with the correct probe length. We will un-stamp it before returning so
// that the caller can commit after constructing the value.
BucketType* inserted_bucket = bucket;
size_t const inserted_probe_length = probe_length;
bucket->state = bucket_state_for_probe_length(probe_length);
bucket->hash.set(hash);
probe_length = target_probe_length;
if constexpr (IsOrdered)
if constexpr (IsOrdered) {
bucket->previous = nullptr;
bucket->next = nullptr;
update_collection_for_new_bucket(*bucket);
++m_size;
}
// Find a free bucket, swapping with smaller probe length buckets along the way
for (;;) {
@@ -805,7 +804,15 @@ private:
}
}
return { HashSetResult::InsertedNewEntry, *inserted_bucket };
// Un-stamp the target bucket: state back to Free, not in the ordered list,
// m_size unchanged. The caller will commit after placement-new.
inserted_bucket->state = BucketState::Free;
if constexpr (IsOrdered) {
inserted_bucket->previous = nullptr;
inserted_bucket->next = nullptr;
}
return { HashSetResult::InsertedNewEntry, *inserted_bucket, inserted_probe_length };
}
// Try next bucket
@@ -814,17 +821,36 @@ private:
}
}
// Finalizes a bucket tentatively reserved by lookup_for_writing. Must be called after
// the caller has constructed the value in bucket.slot() via placement-new.
ALWAYS_INLINE void commit_inserted_bucket(BucketType& bucket, u32 hash, size_t probe_length)
{
bucket.state = bucket_state_for_probe_length(probe_length);
bucket.hash.set(hash);
if constexpr (IsOrdered) {
if (!m_collection_data.head) [[unlikely]] {
m_collection_data.head = &bucket;
} else {
bucket.previous = m_collection_data.tail;
m_collection_data.tail->next = &bucket;
}
m_collection_data.tail = &bucket;
}
++m_size;
}
template<typename U = T>
ALWAYS_INLINE HashSetResult write_value(U&& value, HashSetExistingEntryBehavior existing_entry_behavior)
{
u32 const hash = TraitsForT::hash(value);
auto [result, bucket] = lookup_for_writing<U>(hash, [&](auto& candidate) { return TraitsForT::equals(candidate, static_cast<T const&>(value)); }, existing_entry_behavior);
auto [result, bucket, probe_length] = lookup_for_writing<U>(hash, [&](auto& candidate) { return TraitsForT::equals(candidate, static_cast<T const&>(value)); }, existing_entry_behavior);
switch (result) {
case HashSetResult::ReplacedExistingEntry:
(*bucket.slot()) = forward<U>(value);
break;
case HashSetResult::InsertedNewEntry:
new (bucket.slot()) T(forward<U>(value));
commit_inserted_bucket(bucket, hash, probe_length);
break;
case HashSetResult::KeptExistingEntry:
break;

View File

@@ -406,3 +406,34 @@ TEST_CASE(compare)
first.set(2, 20);
EXPECT_NE(second, first);
}
TEST_CASE(ensure_callback_sees_consistent_table)
{
// HashMap::ensure() must run the initialization callback while the tentative
// target bucket is invisible to iterators. If the bucket were stamped as "used"
// before the callback ran, its uninitialized slot would be exposed to anything
// that iterated the map during the callback -- this was a real crash for
// HashMap<_, GC::Ref<_>>::ensure() callbacks that triggered GC marking.
static constexpr u64 MAGIC = 0xC0FFEECAFEBABEULL;
struct Sentinel {
u64 magic = MAGIC;
};
HashMap<int, Sentinel> map;
for (int i = 0; i < 100; ++i)
map.set(i, Sentinel {});
size_t visited = 0;
auto& value = map.ensure(9999, [&] {
for (auto& it : map) {
EXPECT_EQ(it.value.magic, MAGIC);
++visited;
}
return Sentinel {};
});
EXPECT_EQ(value.magic, MAGIC);
EXPECT_EQ(visited, 100u);
EXPECT_EQ(map.size(), 101u);
}