Skip to content

Commit

Permalink
userfaultfd: non-cooperative: add event for memory unmaps
Browse files Browse the repository at this point in the history
When a non-cooperative userfaultfd monitor copies pages in the
background, it may encounter regions that were already unmapped.
Addition of UFFD_EVENT_UNMAP allows the uffd monitor to track precisely
changes in the virtual memory layout.

Since there might be different uffd contexts for the affected VMAs, we
first should create a temporary representation for the unmap event for
each uffd context and then notify them one by one to the appropriate
userfault file descriptors.

The event notification occurs after the mmap_sem has been released.

[arnd@arndb.de: fix nommu build]
  Link: http://lkml.kernel.org/r/20170203165141.3665284-1-arnd@arndb.de
[mhocko@suse.com: fix nommu build]
  Link: http://lkml.kernel.org/r/20170202091503.GA22823@dhcp22.suse.cz
Link: http://lkml.kernel.org/r/1485542673-24387-3-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
rppt authored and torvalds committed Feb 25, 2017
1 parent 846b1a0 commit 897ab3e
Show file tree
Hide file tree
Showing 15 changed files with 160 additions and 45 deletions.
2 changes: 1 addition & 1 deletion arch/mips/kernel/vdso.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
VM_READ|VM_WRITE|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
0);
0, NULL);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
Expand Down
2 changes: 1 addition & 1 deletion arch/tile/mm/elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
unsigned long addr = MEM_USER_INTRPT;
addr = mmap_region(NULL, addr, INTRPT_SIZE,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0);
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0, NULL);
if (addr > (unsigned long) -PAGE_SIZE)
retval = (int) addr;
}
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/entry/vdso/vma.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)

if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
do_munmap(mm, text_start, image->size);
do_munmap(mm, text_start, image->size, NULL);
} else {
current->mm->context.vdso = (void __user *)text_start;
current->mm->context.vdso_image = image;
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/mm/mpx.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ static unsigned long mpx_mmap(unsigned long len)

down_write(&mm->mmap_sem);
addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate);
MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL);
up_write(&mm->mmap_sem);
if (populate)
mm_populate(addr, populate);
Expand Down Expand Up @@ -893,7 +893,7 @@ static int unmap_entire_bt(struct mm_struct *mm,
* avoid recursion, do_munmap() will check whether it comes
* from one bounds table through VM_MPX flag.
*/
return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm));
return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL);
}

static int try_unmap_single_bt(struct mm_struct *mm,
Expand Down
2 changes: 1 addition & 1 deletion fs/aio.c
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ static int aio_setup_ring(struct kioctx *ctx)

ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
PROT_READ | PROT_WRITE,
MAP_SHARED, 0, &unused);
MAP_SHARED, 0, &unused, NULL);
up_write(&mm->mmap_sem);
if (IS_ERR((void *)ctx->mmap_base)) {
ctx->mmap_size = 0;
Expand Down
4 changes: 2 additions & 2 deletions fs/proc/vmcore.c
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
}
return 0;
fail:
do_munmap(vma->vm_mm, from, len);
do_munmap(vma->vm_mm, from, len, NULL);
return -EAGAIN;
}

Expand Down Expand Up @@ -481,7 +481,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)

return 0;
fail:
do_munmap(vma->vm_mm, vma->vm_start, len);
do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
return -EAGAIN;
}
#else
Expand Down
65 changes: 65 additions & 0 deletions fs/userfaultfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ struct userfaultfd_fork_ctx {
struct list_head list;
};

struct userfaultfd_unmap_ctx {
struct userfaultfd_ctx *ctx;
unsigned long start;
unsigned long end;
struct list_head list;
};

struct userfaultfd_wait_queue {
struct uffd_msg msg;
wait_queue_t wq;
Expand Down Expand Up @@ -709,6 +716,64 @@ void userfaultfd_remove(struct vm_area_struct *vma,
down_read(&mm->mmap_sem);
}

static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
unsigned long start, unsigned long end)
{
struct userfaultfd_unmap_ctx *unmap_ctx;

list_for_each_entry(unmap_ctx, unmaps, list)
if (unmap_ctx->ctx == ctx && unmap_ctx->start == start &&
unmap_ctx->end == end)
return true;

return false;
}

int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct list_head *unmaps)
{
for ( ; vma && vma->vm_start < end; vma = vma->vm_next) {
struct userfaultfd_unmap_ctx *unmap_ctx;
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;

if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
has_unmap_ctx(ctx, unmaps, start, end))
continue;

unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
if (!unmap_ctx)
return -ENOMEM;

userfaultfd_ctx_get(ctx);
unmap_ctx->ctx = ctx;
unmap_ctx->start = start;
unmap_ctx->end = end;
list_add_tail(&unmap_ctx->list, unmaps);
}

return 0;
}

void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
{
struct userfaultfd_unmap_ctx *ctx, *n;
struct userfaultfd_wait_queue ewq;

list_for_each_entry_safe(ctx, n, uf, list) {
msg_init(&ewq.msg);

ewq.msg.event = UFFD_EVENT_UNMAP;
ewq.msg.arg.remove.start = ctx->start;
ewq.msg.arg.remove.end = ctx->end;

userfaultfd_event_wait_completion(ctx->ctx, &ewq);

list_del(&ctx->list);
kfree(ctx);
}
}

static int userfaultfd_release(struct inode *inode, struct file *file)
{
struct userfaultfd_ctx *ctx = file->private_data;
Expand Down
14 changes: 9 additions & 5 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -2090,18 +2090,22 @@ extern int install_special_mapping(struct mm_struct *mm,
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);

extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
struct list_head *uf);
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
extern int do_munmap(struct mm_struct *, unsigned long, size_t);
vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
struct list_head *uf);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);

static inline unsigned long
do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
unsigned long pgoff, unsigned long *populate)
unsigned long pgoff, unsigned long *populate,
struct list_head *uf)
{
return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf);
}

#ifdef CONFIG_MMU
Expand Down
18 changes: 18 additions & 0 deletions include/linux/userfaultfd_k.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ extern void userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
unsigned long end);

extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct list_head *uf);
extern void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf);

#else /* CONFIG_USERFAULTFD */

/* mm helpers */
Expand Down Expand Up @@ -118,6 +124,18 @@ static inline void userfaultfd_remove(struct vm_area_struct *vma,
unsigned long end)
{
}

static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct list_head *uf)
{
return 0;
}

static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf)
{
}
#endif /* CONFIG_USERFAULTFD */

#endif /* _LINUX_USERFAULTFD_K_H */
3 changes: 3 additions & 0 deletions include/uapi/linux/userfaultfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \
UFFD_FEATURE_EVENT_REMAP | \
UFFD_FEATURE_EVENT_REMOVE | \
UFFD_FEATURE_EVENT_UNMAP | \
UFFD_FEATURE_MISSING_HUGETLBFS | \
UFFD_FEATURE_MISSING_SHMEM)
#define UFFD_API_IOCTLS \
Expand Down Expand Up @@ -110,6 +111,7 @@ struct uffd_msg {
#define UFFD_EVENT_FORK 0x13
#define UFFD_EVENT_REMAP 0x14
#define UFFD_EVENT_REMOVE 0x15
#define UFFD_EVENT_UNMAP 0x16

/* flags for UFFD_EVENT_PAGEFAULT */
#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
Expand Down Expand Up @@ -158,6 +160,7 @@ struct uffdio_api {
#define UFFD_FEATURE_EVENT_REMOVE (1<<3)
#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
#define UFFD_FEATURE_MISSING_SHMEM (1<<5)
#define UFFD_FEATURE_EVENT_UNMAP (1<<6)
__u64 features;

__u64 ioctls;
Expand Down
8 changes: 4 additions & 4 deletions ipc/shm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1222,7 +1222,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
goto invalid;
}

addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
*raddr = addr;
err = 0;
if (IS_ERR_VALUE(addr))
Expand Down Expand Up @@ -1329,7 +1329,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
*/
file = vma->vm_file;
size = i_size_read(file_inode(vma->vm_file));
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
/*
* We discovered the size of the shm segment, so
* break out of here and fall through to the next
Expand All @@ -1356,7 +1356,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
if ((vma->vm_ops == &shm_vm_ops) &&
((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
(vma->vm_file == file))
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
vma = next;
}

Expand All @@ -1365,7 +1365,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
* given
*/
if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
retval = 0;
}

Expand Down
Loading

0 comments on commit 897ab3e

Please sign in to comment.