Files
serenity/Kernel/Syscalls/fork.cpp
Liav A. b93ca74d81 Kernel: Add a prctl option to enter jail mode until an execve syscall
In addition to the already existing option to enter jail mode (which is
set indefinitely), there should be a less restrictive option that should
allow exiting jail mode when doing the execve syscall.

This option will be useful for programs that need this kind of security
layer only in their runtime, but they're meant to actually initiate
another program in the end.
2024-10-03 12:39:45 +02:00

171 lines
7.0 KiB
C++

/*
* Copyright (c) 2018-2023, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2023, Idan Horowitz <idan.horowitz@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Debug.h>
#include <Kernel/Devices/TTY/TTY.h>
#include <Kernel/FileSystem/Custody.h>
#include <Kernel/Memory/Region.h>
#include <Kernel/Tasks/PerformanceManager.h>
#include <Kernel/Tasks/Process.h>
#include <Kernel/Tasks/Scheduler.h>
#include <Kernel/Tasks/ScopedProcessList.h>
namespace Kernel {
ErrorOr<FlatPtr> Process::sys$fork(RegisterState& regs)
{
VERIFY_NO_PROCESS_BIG_LOCK(this);
TRY(require_promise(Pledge::proc));
auto credentials = this->credentials();
auto child_and_first_thread = TRY(Process::create_with_forked_name(credentials->uid(), credentials->gid(), pid(), m_is_kernel_process, vfs_root_context(), hostname_context(), current_directory(), executable(), tty(), this));
auto& child = child_and_first_thread.process;
auto& child_first_thread = child_and_first_thread.first_thread;
ArmedScopeGuard thread_finalizer_guard = [&child_first_thread]() {
SpinlockLocker lock(g_scheduler_lock);
child_first_thread->detach();
child_first_thread->set_state(Thread::State::Dying);
};
// NOTE: All user processes have a leaked ref on them. It's balanced by Thread::WaitBlockerSet::finalize().
child->ref();
TRY(m_unveil_data.with([&](auto& parent_unveil_data) -> ErrorOr<void> {
return child->m_unveil_data.with([&](auto& child_unveil_data) -> ErrorOr<void> {
child_unveil_data.state = parent_unveil_data.state;
child_unveil_data.paths = TRY(parent_unveil_data.paths.deep_copy());
return {};
});
}));
TRY(m_exec_unveil_data.with([&](auto& parent_exec_unveil_data) -> ErrorOr<void> {
return child->m_exec_unveil_data.with([&](auto& child_exec_unveil_data) -> ErrorOr<void> {
child_exec_unveil_data.state = parent_exec_unveil_data.state;
child_exec_unveil_data.paths = TRY(parent_exec_unveil_data.paths.deep_copy());
return {};
});
}));
TRY(child->m_fds.with_exclusive([&](auto& child_fds) {
return m_fds.with_exclusive([&](auto& parent_fds) {
return child_fds.try_clone(parent_fds);
});
}));
with_protected_data([&](auto& my_protected_data) {
child->with_mutable_protected_data([&](auto& child_protected_data) {
child_protected_data.promises = my_protected_data.promises;
child_protected_data.execpromises = my_protected_data.execpromises;
child_protected_data.has_promises = my_protected_data.has_promises;
child_protected_data.has_execpromises = my_protected_data.has_execpromises;
child_protected_data.credentials = my_protected_data.credentials;
child_protected_data.umask = my_protected_data.umask;
child_protected_data.signal_trampoline = my_protected_data.signal_trampoline;
child_protected_data.dumpable = my_protected_data.dumpable;
child_protected_data.process_group = my_protected_data.process_group;
// NOTE: Propagate jailed_until_exit property to child processes.
// The jailed_until_exec property is also propagated, but will be
// set to false once the child process is calling the execve syscall.
if (my_protected_data.jailed_until_exit.was_set())
child_protected_data.jailed_until_exit.set();
child_protected_data.jailed_until_exec = my_protected_data.jailed_until_exec;
});
});
dbgln_if(FORK_DEBUG, "fork: child={}", child);
// A child created via fork(2) inherits a copy of its parent's signal mask
child_first_thread->update_signal_mask(Thread::current()->signal_mask());
// A child process created via fork(2) inherits a copy of its parent's alternate signal stack settings.
child_first_thread->m_alternative_signal_stack = Thread::current()->m_alternative_signal_stack;
auto& child_regs = child_first_thread->m_regs;
#if ARCH(X86_64)
child_regs.rax = 0; // fork() returns 0 in the child :^)
child_regs.rbx = regs.rbx;
child_regs.rcx = regs.rcx;
child_regs.rdx = regs.rdx;
child_regs.rbp = regs.rbp;
child_regs.rsp = regs.userspace_rsp;
child_regs.rsi = regs.rsi;
child_regs.rdi = regs.rdi;
child_regs.r8 = regs.r8;
child_regs.r9 = regs.r9;
child_regs.r10 = regs.r10;
child_regs.r11 = regs.r11;
child_regs.r12 = regs.r12;
child_regs.r13 = regs.r13;
child_regs.r14 = regs.r14;
child_regs.r15 = regs.r15;
child_regs.rflags = regs.rflags;
child_regs.rip = regs.rip;
child_regs.cs = regs.cs;
dbgln_if(FORK_DEBUG, "fork: child will begin executing at {:#04x}:{:p} with stack {:p}, kstack {:p}",
child_regs.cs, child_regs.rip, child_regs.rsp, child_regs.rsp0);
#elif ARCH(AARCH64)
child_regs.x[0] = 0; // fork() returns 0 in the child :^)
for (size_t i = 1; i < array_size(child_regs.x); ++i)
child_regs.x[i] = regs.x[i];
child_regs.spsr_el1 = regs.spsr_el1;
child_regs.elr_el1 = regs.elr_el1;
child_regs.sp_el0 = regs.sp_el0;
child_regs.tpidr_el0 = regs.tpidr_el0;
#elif ARCH(RISCV64)
for (size_t i = 0; i < array_size(child_regs.x); ++i)
child_regs.x[i] = regs.x[i];
child_regs.x[9] = 0; // fork() returns 0 in the child :^)
child_regs.sstatus = regs.sstatus;
child_regs.pc = regs.sepc;
dbgln_if(FORK_DEBUG, "fork: child will begin executing at {:p} with stack {:p}, kstack {:p}",
child_regs.pc, child_regs.sp(), child_regs.kernel_sp);
#else
# error Unknown architecture
#endif
TRY(address_space().with([&](auto& parent_space) {
return child->address_space().with([&](auto& child_space) -> ErrorOr<void> {
if (parent_space->enforces_syscall_regions())
child_space->set_enforces_syscall_regions();
for (auto& region : parent_space->region_tree().regions()) {
dbgln_if(FORK_DEBUG, "fork: cloning Region '{}' @ {}", region.name(), region.vaddr());
auto region_clone = TRY(region.try_clone());
TRY(region_clone->map(child_space->page_directory(), Memory::ShouldFlushTLB::No));
TRY(child_space->region_tree().place_specifically(*region_clone, region.range()));
(void)region_clone.leak_ptr();
}
return {};
});
}));
thread_finalizer_guard.disarm();
m_scoped_process_list.with([&](auto& list_ptr) {
if (list_ptr) {
child->m_scoped_process_list.with([&](auto& child_list_ptr) {
child_list_ptr = list_ptr;
});
list_ptr->attach(*child);
}
});
Process::register_new(*child);
PerformanceManager::add_process_created_event(*child);
SpinlockLocker lock(g_scheduler_lock);
child_first_thread->set_affinity(Thread::current()->affinity());
child_first_thread->set_state(Thread::State::Runnable);
auto child_pid = child->pid().value();
return child_pid;
}
}