diff --git a/AK/HashTable.h b/AK/HashTable.h index 72365b5f518..188e472ff29 100644 --- a/AK/HashTable.h +++ b/AK/HashTable.h @@ -406,10 +406,14 @@ public: if (should_grow()) rehash(max(capacity() * 2, grow_capacity_at_least)); - auto [result, bucket] = lookup_for_writing(hash, move(predicate), existing_entry_behavior); + auto [result, bucket, probe_length] = lookup_for_writing(hash, move(predicate), existing_entry_behavior); switch (result) { case HashSetResult::InsertedNewEntry: + // The bucket is in a tentative state (Free, not in the ordered list). Construct the + // value first so that if the callback triggers traversal of this hash table, + // iterators skip this bucket. Commit only after construction succeeds. new (bucket.slot()) T(initialization_callback()); + commit_inserted_bucket(bucket, hash, probe_length); break; case HashSetResult::ReplacedExistingEntry: (*bucket.slot()) = T(initialization_callback()); @@ -696,23 +700,19 @@ private: struct LookupForWritingResult { HashSetResult result; BucketType& bucket; + // Only meaningful for InsertedNewEntry: the probe length to stamp when the caller commits. + size_t probe_length; }; template ALWAYS_INLINE LookupForWritingResult lookup_for_writing(u32 const hash, TUnaryPredicate predicate, HashSetExistingEntryBehavior existing_entry_behavior) { - auto update_collection_for_new_bucket = [&](BucketType& bucket) { - if constexpr (IsOrdered) { - if (!m_collection_data.head) [[unlikely]] { - m_collection_data.head = &bucket; - } else { - bucket.previous = m_collection_data.tail; - m_collection_data.tail->next = &bucket; - } - m_collection_data.tail = &bucket; - } - }; + // NB: For InsertedNewEntry results we hand the bucket back in a tentative state: + // state=Free, not in the ordered list, and m_size unchanged. The caller must + // placement-new the value and then call commit_inserted_bucket() to finalize. + // Leaving state=Free across construction makes the bucket invisible to iterators, + // which is essential for ensure() callbacks that may trigger traversal. auto update_collection_for_swapped_buckets = [&](BucketType* left_bucket, BucketType* right_bucket) { if constexpr (IsOrdered) { if (m_collection_data.head == left_bucket) @@ -747,21 +747,17 @@ private: for (;;) { auto* bucket = &m_buckets[bucket_index]; - // We found a free bucket, write to it and stop + // We found a free bucket. Return it tentatively; caller commits after construction. if (bucket->state == BucketState::Free) { - bucket->state = bucket_state_for_probe_length(probe_length); - bucket->hash.set(hash); - update_collection_for_new_bucket(*bucket); - ++m_size; - return { HashSetResult::InsertedNewEntry, *bucket }; + return { HashSetResult::InsertedNewEntry, *bucket, probe_length }; } // The bucket is already used, does it have an identical value? if (bucket->hash.check(hash) && predicate(*bucket->slot())) { if (existing_entry_behavior == HashSetExistingEntryBehavior::Replace) { - return { HashSetResult::ReplacedExistingEntry, *bucket }; + return { HashSetResult::ReplacedExistingEntry, *bucket, 0 }; } - return { HashSetResult::KeptExistingEntry, *bucket }; + return { HashSetResult::KeptExistingEntry, *bucket, 0 }; } // Robin hood: if our probe length is larger (poor) than this bucket's (rich), steal its position! @@ -773,15 +769,18 @@ private: relocate_bucket(&bucket_to_move, bucket); update_collection_for_swapped_buckets(bucket, &bucket_to_move); - // Write new bucket + // Tentatively occupy the stolen slot so the displacement loop below sees it as + // used with the correct probe length. We will un-stamp it before returning so + // that the caller can commit after constructing the value. BucketType* inserted_bucket = bucket; + size_t const inserted_probe_length = probe_length; bucket->state = bucket_state_for_probe_length(probe_length); bucket->hash.set(hash); probe_length = target_probe_length; - if constexpr (IsOrdered) + if constexpr (IsOrdered) { + bucket->previous = nullptr; bucket->next = nullptr; - update_collection_for_new_bucket(*bucket); - ++m_size; + } // Find a free bucket, swapping with smaller probe length buckets along the way for (;;) { @@ -805,7 +804,15 @@ private: } } - return { HashSetResult::InsertedNewEntry, *inserted_bucket }; + // Un-stamp the target bucket: state back to Free, not in the ordered list, + // m_size unchanged. The caller will commit after placement-new. + inserted_bucket->state = BucketState::Free; + if constexpr (IsOrdered) { + inserted_bucket->previous = nullptr; + inserted_bucket->next = nullptr; + } + + return { HashSetResult::InsertedNewEntry, *inserted_bucket, inserted_probe_length }; } // Try next bucket @@ -814,17 +821,36 @@ private: } } + // Finalizes a bucket tentatively reserved by lookup_for_writing. Must be called after + // the caller has constructed the value in bucket.slot() via placement-new. + ALWAYS_INLINE void commit_inserted_bucket(BucketType& bucket, u32 hash, size_t probe_length) + { + bucket.state = bucket_state_for_probe_length(probe_length); + bucket.hash.set(hash); + if constexpr (IsOrdered) { + if (!m_collection_data.head) [[unlikely]] { + m_collection_data.head = &bucket; + } else { + bucket.previous = m_collection_data.tail; + m_collection_data.tail->next = &bucket; + } + m_collection_data.tail = &bucket; + } + ++m_size; + } + template ALWAYS_INLINE HashSetResult write_value(U&& value, HashSetExistingEntryBehavior existing_entry_behavior) { u32 const hash = TraitsForT::hash(value); - auto [result, bucket] = lookup_for_writing(hash, [&](auto& candidate) { return TraitsForT::equals(candidate, static_cast(value)); }, existing_entry_behavior); + auto [result, bucket, probe_length] = lookup_for_writing(hash, [&](auto& candidate) { return TraitsForT::equals(candidate, static_cast(value)); }, existing_entry_behavior); switch (result) { case HashSetResult::ReplacedExistingEntry: (*bucket.slot()) = forward(value); break; case HashSetResult::InsertedNewEntry: new (bucket.slot()) T(forward(value)); + commit_inserted_bucket(bucket, hash, probe_length); break; case HashSetResult::KeptExistingEntry: break; diff --git a/Tests/AK/TestHashMap.cpp b/Tests/AK/TestHashMap.cpp index 3ed97f22933..449bd9538b2 100644 --- a/Tests/AK/TestHashMap.cpp +++ b/Tests/AK/TestHashMap.cpp @@ -406,3 +406,34 @@ TEST_CASE(compare) first.set(2, 20); EXPECT_NE(second, first); } + +TEST_CASE(ensure_callback_sees_consistent_table) +{ + // HashMap::ensure() must run the initialization callback while the tentative + // target bucket is invisible to iterators. If the bucket were stamped as "used" + // before the callback ran, its uninitialized slot would be exposed to anything + // that iterated the map during the callback -- this was a real crash for + // HashMap<_, GC::Ref<_>>::ensure() callbacks that triggered GC marking. + + static constexpr u64 MAGIC = 0xC0FFEECAFEBABEULL; + struct Sentinel { + u64 magic = MAGIC; + }; + + HashMap map; + for (int i = 0; i < 100; ++i) + map.set(i, Sentinel {}); + + size_t visited = 0; + auto& value = map.ensure(9999, [&] { + for (auto& it : map) { + EXPECT_EQ(it.value.magic, MAGIC); + ++visited; + } + return Sentinel {}; + }); + + EXPECT_EQ(value.magic, MAGIC); + EXPECT_EQ(visited, 100u); + EXPECT_EQ(map.size(), 101u); +}