mm: extended batches for generic mmu_gather
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Wed, 25 May 2011 00:12:01 +0000 (17:12 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 25 May 2011 15:39:16 +0000 (08:39 -0700)
Instead of using a single batch (the small on-stack, or an allocated
page), try and extend the batch every time it runs out and only flush once
either the extend fails or we're done.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Requested-by: Nick Piggin <npiggin@kernel.dk>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/asm-generic/tlb.h
mm/memory.c

index 74f80f6b6cf109cad6cf3f2bb63c5ad3ce772334..5a946a08ff9de7f2556e8c82c70a94ef77621c05 100644 (file)
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
-/*
- * For UP we don't need to worry about TLB flush
- * and page free order so much..
- */
-#ifdef CONFIG_SMP
-  #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
-#else
-  #define tlb_fast_mode(tlb) 1
-#endif
-
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 /*
  * Semi RCU freeing of the page directories.
@@ -78,6 +68,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
  */
 #define MMU_GATHER_BUNDLE      8
 
+struct mmu_gather_batch {
+       struct mmu_gather_batch *next;
+       unsigned int            nr;
+       unsigned int            max;
+       struct page             *pages[0];
+};
+
+#define MAX_GATHER_BATCH       \
+       ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
+
 /* struct mmu_gather is an opaque type used by the mm code for passing around
  * any data needed by arch specific code for tlb_remove_page.
  */
@@ -86,22 +86,48 @@ struct mmu_gather {
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
        struct mmu_table_batch  *batch;
 #endif
-       unsigned int            nr;     /* set to ~0U means fast mode */
-       unsigned int            max;    /* nr < max */
-       unsigned int            need_flush;/* Really unmapped some ptes? */
-       unsigned int            fullmm; /* non-zero means full mm flush */
-       struct page             **pages;
-       struct page             *local[MMU_GATHER_BUNDLE];
+       unsigned int            need_flush : 1, /* Did free PTEs */
+                               fast_mode  : 1; /* No batching   */
+
+       unsigned int            fullmm;
+
+       struct mmu_gather_batch *active;
+       struct mmu_gather_batch local;
+       struct page             *__pages[MMU_GATHER_BUNDLE];
 };
 
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
+/*
+ * For UP we don't need to worry about TLB flush
+ * and page free order so much..
+ */
+#ifdef CONFIG_SMP
+  #define tlb_fast_mode(tlb) (tlb->fast_mode)
+#else
+  #define tlb_fast_mode(tlb) 1
+#endif
+
+static inline int tlb_next_batch(struct mmu_gather *tlb)
 {
-       unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+       struct mmu_gather_batch *batch;
 
-       if (addr) {
-               tlb->pages = (void *)addr;
-               tlb->max = PAGE_SIZE / sizeof(struct page *);
+       batch = tlb->active;
+       if (batch->next) {
+               tlb->active = batch->next;
+               return 1;
        }
+
+       batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+       if (!batch)
+               return 0;
+
+       batch->next = NULL;
+       batch->nr   = 0;
+       batch->max  = MAX_GATHER_BATCH;
+
+       tlb->active->next = batch;
+       tlb->active = batch;
+
+       return 1;
 }
 
 /* tlb_gather_mmu
@@ -114,16 +140,13 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
 {
        tlb->mm = mm;
 
-       tlb->max = ARRAY_SIZE(tlb->local);
-       tlb->pages = tlb->local;
-
-       if (num_online_cpus() > 1) {
-               tlb->nr = 0;
-               __tlb_alloc_page(tlb);
-       } else /* Use fast mode if only one CPU is online */
-               tlb->nr = ~0U;
-
-       tlb->fullmm = fullmm;
+       tlb->fullmm     = fullmm;
+       tlb->need_flush = 0;
+       tlb->fast_mode  = (num_possible_cpus() == 1);
+       tlb->local.next = NULL;
+       tlb->local.nr   = 0;
+       tlb->local.max  = ARRAY_SIZE(tlb->__pages);
+       tlb->active     = &tlb->local;
 
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
        tlb->batch = NULL;
@@ -133,6 +156,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
 static inline void
 tlb_flush_mmu(struct mmu_gather *tlb)
 {
+       struct mmu_gather_batch *batch;
+
        if (!tlb->need_flush)
                return;
        tlb->need_flush = 0;
@@ -140,17 +165,15 @@ tlb_flush_mmu(struct mmu_gather *tlb)
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
        tlb_table_flush(tlb);
 #endif
-       if (!tlb_fast_mode(tlb)) {
-               free_pages_and_swap_cache(tlb->pages, tlb->nr);
-               tlb->nr = 0;
-               /*
-                * If we are using the local on-stack array of pages for MMU
-                * gather, try allocating an off-stack array again as we have
-                * recently freed pages.
-                */
-               if (tlb->pages == tlb->local)
-                       __tlb_alloc_page(tlb);
+
+       if (tlb_fast_mode(tlb))
+               return;
+
+       for (batch = &tlb->local; batch; batch = batch->next) {
+               free_pages_and_swap_cache(batch->pages, batch->nr);
+               batch->nr = 0;
        }
+       tlb->active = &tlb->local;
 }
 
 /* tlb_finish_mmu
@@ -160,13 +183,18 @@ tlb_flush_mmu(struct mmu_gather *tlb)
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
+       struct mmu_gather_batch *batch, *next;
+
        tlb_flush_mmu(tlb);
 
        /* keep the page table cache within bounds */
        check_pgt_cache();
 
-       if (tlb->pages != tlb->local)
-               free_pages((unsigned long)tlb->pages, 0);
+       for (batch = tlb->local.next; batch; batch = next) {
+               next = batch->next;
+               free_pages((unsigned long)batch, 0);
+       }
+       tlb->local.next = NULL;
 }
 
 /* __tlb_remove_page
@@ -177,15 +205,24 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
  */
 static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
+       struct mmu_gather_batch *batch;
+
        tlb->need_flush = 1;
+
        if (tlb_fast_mode(tlb)) {
                free_page_and_swap_cache(page);
                return 1; /* avoid calling tlb_flush_mmu() */
        }
-       tlb->pages[tlb->nr++] = page;
-       VM_BUG_ON(tlb->nr > tlb->max);
 
-       return tlb->max - tlb->nr;
+       batch = tlb->active;
+       batch->pages[batch->nr++] = page;
+       VM_BUG_ON(batch->nr > batch->max);
+       if (batch->nr == batch->max) {
+               if (!tlb_next_batch(tlb))
+                       return 0;
+       }
+
+       return batch->max - batch->nr;
 }
 
 /* tlb_remove_page
index a77fd23ee68a7b0c625ef57df3180e9f48da8900..17193d74f30284a66269da7bb8ec65901abadfd4 100644 (file)
@@ -994,8 +994,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
        spinlock_t *ptl;
        int rss[NR_MM_COUNTERS];
 
-       init_rss_vec(rss);
 again:
+       init_rss_vec(rss);
        pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
        arch_enter_lazy_mmu_mode();
        do {