From 577bc7ef95c7eb2d2e02a2f7c31e8efa70a4c025 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=B6nke=20Holz?= <soenke.holz@serenityos.org>
Date: Fri, 19 Dec 2025 17:27:31 +0100
Subject: [PATCH] Kernel/MM: Handle concurrent page faults properly in
 handle_zero_fault()

This essentially reverts 5ada38f9c3c525938ef9032b4d551d050661e2e9.

Previously, two threads could end up trying to allocate a committed
page at once, possibly resulting in a panic because we tried to
allocate more pages than committed.

Another problem was that a thread could incorrectly think that the page
fault was already handled. This can happen if the thread handling the
page fault already set the physical page slot to the newly allocated
page, but didn't remap the page yet. We check if a page fault was
already processed based on the physical page slot contents.
This issue is not causing problems currently, since thinking a page
fault was already handled and incorrectly returning will still work
eventually when the other thread is done remapping the page.
However, a future commit will add extra assertions checking that page
faults were already handled appropriately if we couldn't find a reason
for the fault. These assertions would trip on this.

Prevent these issues by taking the lock for a longer amount of time.
There might be a better solution to this, but that would likely require
more complex code changes.

Also modify the code in handle_fault() a bit to avoid using should_cow()
for zero faults. The checks in should_cow() can refer to a different
physical page if the page fault was handled immediately after the check.
---
 Kernel/Memory/Region.cpp   | 39 +++++++++++++++++++++-----------------
 Kernel/Memory/VMObject.cpp |  3 ++-
 Kernel/Memory/VMObject.h   | 12 ++++++++++++
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/Kernel/Memory/Region.cpp b/Kernel/Memory/Region.cpp
index fc138fc0891..4fcc79ce3b3 100644
--- a/Kernel/Memory/Region.cpp
+++ b/Kernel/Memory/Region.cpp
@@ -433,15 +433,17 @@ PageFaultResponse Region::handle_fault(PageFault const& fault)
     }
 
     if (fault.is_write()) {
+        auto phys_page = physical_page(page_index_in_region);
+        if (phys_page && (phys_page->is_shared_zero_page() || phys_page->is_lazy_committed_page())) {
+            dbgln_if(PAGE_FAULT_DEBUG, "Zero page fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
+            return handle_zero_fault(page_index_in_region, *phys_page);
+        }
+
         if (should_cow(page_index_in_region)) {
             dbgln_if(PAGE_FAULT_DEBUG, "CoW page fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
-            auto phys_page = physical_page(page_index_in_region);
-            if (phys_page->is_shared_zero_page() || phys_page->is_lazy_committed_page()) {
-                dbgln_if(PAGE_FAULT_DEBUG, "Zero page fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
-                return handle_zero_fault(page_index_in_region, *phys_page);
-            }
             return handle_cow_fault(page_index_in_region);
         }
+
         if (vmobject().is_inode()) {
             dbgln_if(PAGE_FAULT_DEBUG, "Inode write page fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
             return handle_inode_write_fault(page_index_in_region);
@@ -462,8 +464,22 @@ PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region, Physica
     VERIFY(vmobject().is_anonymous());
 
     auto& anonymous_vmobject = static_cast<AnonymousVMObject&>(vmobject());
+
+    SpinlockLocker locker(anonymous_vmobject.m_lock);
+
+    auto& page_slot = physical_page_slot(page_index_in_region);
     auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region);
 
+    bool already_handled = !page_slot.is_null() && !page_slot->is_shared_zero_page() && !page_slot->is_lazy_committed_page();
+
+    if (already_handled) {
+        if (!remap_vmobject_page(page_index_in_vmobject, *page_slot)) {
+            dmesgln("MM: handle_zero_fault was unable to allocate a physical page");
+            return PageFaultResponse::OutOfMemory;
+        }
+        return PageFaultResponse::Continue;
+    }
+
     auto current_thread = Thread::current();
     if (current_thread != nullptr)
         current_thread->did_zero_fault();
@@ -483,18 +499,7 @@ PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region, Physica
         dbgln_if(PAGE_FAULT_DEBUG, "      >> ALLOCATED {}", new_physical_page->paddr());
     }
 
-    {
-        SpinlockLocker locker(anonymous_vmobject.m_lock);
-        auto& page_slot = physical_page_slot(page_index_in_region);
-        bool already_handled = !page_slot.is_null() && !page_slot->is_shared_zero_page() && !page_slot->is_lazy_committed_page();
-        if (already_handled) {
-            // Someone else already faulted in a new page in this slot. That's fine, we'll just remap with their page.
-            new_physical_page = page_slot;
-        } else {
-            // Install the newly allocated page into the VMObject.
-            page_slot = new_physical_page;
-        }
-    }
+    page_slot = new_physical_page;
 
     if (m_shared) {
         if (!anonymous_vmobject.remap_regions_one_page(page_index_in_vmobject, *new_physical_page)) {
diff --git a/Kernel/Memory/VMObject.cpp b/Kernel/Memory/VMObject.cpp
index 05f92fd2e13..e86c0b6f7ff 100644
--- a/Kernel/Memory/VMObject.cpp
+++ b/Kernel/Memory/VMObject.cpp
@@ -54,8 +54,9 @@ void VMObject::remap_regions()
 
 bool VMObject::remap_regions_one_page(size_t page_index, NonnullRefPtr<PhysicalRAMPage> page)
 {
+    VERIFY(m_lock.is_locked());
     bool success = true;
-    for_each_region([&](Region& region) {
+    for_each_region_locked([&](Region& region) {
         if (!region.remap_vmobject_page(page_index, *page))
             success = false;
     });
diff --git a/Kernel/Memory/VMObject.h b/Kernel/Memory/VMObject.h
index 5ddc42945a9..18537e3c6a0 100644
--- a/Kernel/Memory/VMObject.h
+++ b/Kernel/Memory/VMObject.h
@@ -63,6 +63,9 @@ protected:
     template<typename Callback>
     void for_each_region(Callback);
 
+    template<typename Callback>
+    void for_each_region_locked(Callback);
+
     void remap_regions_locked();
     void remap_regions();
     bool remap_regions_one_page(size_t page_index, NonnullRefPtr<PhysicalRAMPage> page);
@@ -93,4 +96,13 @@ inline void VMObject::for_each_region(Callback callback)
     }
 }
 
+template<typename Callback>
+inline void VMObject::for_each_region_locked(Callback callback)
+{
+    VERIFY(m_lock.is_locked());
+    for (auto& region : m_regions) {
+        callback(region);
+    }
+}
+
 }