From 465a454f254ee2ff7acc4aececbe31f8af046bc0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 12:31:37 +0200 Subject: [PATCH] x86, mm: Add __get_user_pages_fast() Introduce a gup_fast() variant which is usable from IRQ/NMI context. Signed-off-by: Peter Zijlstra CC: Nick Piggin Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/mm/gup.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 6 +++++ 2 files changed, 62 insertions(+) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 6340cef6798a..697d5727c119 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + unsigned long flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch size + * will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables and pages from being freed on x86. + * + * So long as we atomically load page table pointers versus teardown + * (which we do on x86, with the above PAE exception), we can follow the + * address down to the the page and take a ref on it. + */ + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address diff --git a/include/linux/mm.h b/include/linux/mm.h index ad613ed66ab0..b457bc047ab1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -862,6 +862,12 @@ extern int mprotect_fixup(struct vm_area_struct *vma, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); +/* + * doesn't attempt to fault and will return short. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages); + /* * A callback you can register to apply pressure to ageable caches. * -- 2.20.1