From a14e430320a9f1ef48f766a8f0a8fe89845aacca Mon Sep 17 00:00:00 2001 From: Hajime Tazaki Date: Wed, 9 Oct 2024 14:56:23 +0900 Subject: [PATCH] um: nommu: fix vfork stack currption Signed-off-by: Hajime Tazaki --- arch/um/kernel/process.c | 22 +++++++++++++++++----- arch/x86/um/do_syscall_64.c | 21 +++++++-------------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 867eedf6d21bd0..b2e366c548d251 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -142,15 +142,27 @@ static void fork_handler(void) #ifndef CONFIG_MMU /* + * child of vfork(2) comes here. + * clone(2) also enters here but doesn't need to advance the %rsp. + * * This fork can only come from libc's vfork, which * does this: * popq %%rdx; - * call *%0; // vsyscall + * call *%rax; // zpoline => __kernel_vsyscall * pushq %%rdx; - * %rdx stores the return address which is stored - * at pt_regs[HOST_IP] at the moment. We still - * need to pop the pushed address by "call" though, - * so this is what this next line does. + * %rcx stores the return address which is stored + * at pt_regs[HOST_IP] at the moment. As child returns + * via userspace() with a jmp instruction (while parent + * does via ret instruction in __kernel_vsyscall), we + * need to pop (advance) the pushed address by "call" + * though, so this is what this next line does. + * + * As a result of vfork return in child, stack contents + * is overwritten by child (by pushq in vfork), which + * makes the parent puzzled after child returns. + * + * thus the contents should be restored before vfork/parent + * returns. this is done in do_syscall_64(). */ if (current->thread.regs.regs.gp[HOST_ORIG_AX] == __NR_vfork) current->thread.regs.regs.gp[REGS_SP_INDEX] += 8; diff --git a/arch/x86/um/do_syscall_64.c b/arch/x86/um/do_syscall_64.c index 770dd789d62703..d6ce6b5b9a1d0d 100644 --- a/arch/x86/um/do_syscall_64.c +++ b/arch/x86/um/do_syscall_64.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +//#define DEBUG 1 #include #include #include @@ -36,18 +37,10 @@ static int os_x86_arch_prctl(int pid, int option, unsigned long *arg2) } /* - * Make a copy of the stack, so the child does whatever it - * wants with it. This copy is restored before exiting from - * this function. + * save/restore the return address stored in the stack, as the child overwrites + * the contents after returning to userspace (i.e., by push %rdx). * - * (IIUC) the vfork(2) doesn't need to copy stack, and share all - * memory including stack between parent and child. so this function - * isn't needed, but some programs (in our case hush, a shell for - * nommu env of busybox) corrupts stack between vfork(2)=>execve(2). - * the hush implementation looks fine (calling immediate execve(2) - * after vfork(2)), but corrupted. - * - * so, this workaround exists. + * see the detail in fork_handler(). */ static void *vfork_save_stack(void) { @@ -59,8 +52,7 @@ static void *vfork_save_stack(void) return NULL; memcpy(stack_copy, - (void *)current->thread.regs.regs.gp[HOST_SP], - PAGE_SIZE << THREAD_SIZE_ORDER); + (void *)current->thread.regs.regs.gp[HOST_SP], 8); return stack_copy; } @@ -69,7 +61,7 @@ static void vfork_restore_stack(void *stack_copy) { WARN_ON_ONCE(!stack_copy); memcpy((void *)current->thread.regs.regs.gp[HOST_SP], - stack_copy, PAGE_SIZE << THREAD_SIZE_ORDER); + stack_copy, 8); } __visible void do_syscall_64(struct pt_regs *regs) @@ -114,6 +106,7 @@ __visible void do_syscall_64(struct pt_regs *regs) userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); } + /* only parents of vfork restores the contents of stack */ if (syscall == __NR_vfork && regs->regs.gp[HOST_AX] > 0) vfork_restore_stack(stack_copy); }