mm: compaction: acquire the zone->lru_lock as late as possible

author Mel Gorman <mgorman@suse.de>

Mon, 8 Oct 2012 23:32:33 +0000 (16:32 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 9 Oct 2012 07:22:49 +0000 (16:22 +0900)
author Mel Gorman <mgorman@suse.de>
Mon, 8 Oct 2012 23:32:33 +0000 (16:32 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 9 Oct 2012 07:22:49 +0000 (16:22 +0900)
diff --git a/mm/compaction.c b/mm/compaction.c

index b16dd3822995ab4b0111b84dad13e9c2f60c70ec..832c4183dccce2e48c398f066f6e6cb2c2750f46 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -50,6 +50,11 @@ static inline bool migrate_async_suitable(int migratetype)
         return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
  }
  
+static inline bool should_release_lock(spinlock_t *lock)
+{
+       return need_resched() || spin_is_contended(lock);
+}
+
  /*
   * Compaction requires the taking of some coarse locks that are potentially
   * very heavily contended. Check if the process needs to be scheduled or
@@ -62,7 +67,7 @@ static inline bool migrate_async_suitable(int migratetype)
  static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
                                       bool locked, struct compact_control *cc)
  {
-       if (need_resched() || spin_is_contended(lock)) {
+       if (should_release_lock(lock)) {
                 if (locked) {
                         spin_unlock_irqrestore(lock, *flags);
                         locked = false;
@@ -327,7 +332,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
         isolate_mode_t mode = 0;
         struct lruvec *lruvec;
         unsigned long flags;
-       bool locked;
+       bool locked = false;
  
         /*
          * Ensure that there are not too many pages isolated from the LRU
@@ -347,23 +352,17 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
  
         /* Time to isolate some pages for migration */
         cond_resched();
-       spin_lock_irqsave(&zone->lru_lock, flags);
-       locked = true;
         for (; low_pfn < end_pfn; low_pfn++) {
                 struct page *page;
  
                 /* give a chance to irqs before checking need_resched() */
-               if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
-                       spin_unlock_irqrestore(&zone->lru_lock, flags);
-                       locked = false;
+               if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
+                       if (should_release_lock(&zone->lru_lock)) {
+                               spin_unlock_irqrestore(&zone->lru_lock, flags);
+                               locked = false;
+                       }
                 }
  
-               /* Check if it is ok to still hold the lock */
-               locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
-                                                               locked, cc);
-               if (!locked || fatal_signal_pending(current))
-                       break;
-
                 /*
                  * migrate_pfn does not necessarily start aligned to a
                  * pageblock. Ensure that pfn_valid is called when moving
@@ -403,20 +402,39 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
                 pageblock_nr = low_pfn >> pageblock_order;
                 if (!cc->sync && last_pageblock_nr != pageblock_nr &&
                     !migrate_async_suitable(get_pageblock_migratetype(page))) {
-                       low_pfn += pageblock_nr_pages;
-                       low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
-                       last_pageblock_nr = pageblock_nr;
-                       continue;
+                       goto next_pageblock;
                 }
  
+               /* Check may be lockless but that's ok as we recheck later */
                 if (!PageLRU(page))
                         continue;
  
                 /*
-                * PageLRU is set, and lru_lock excludes isolation,
-                * splitting and collapsing (collapsing has already
-                * happened if PageLRU is set).
+                * PageLRU is set. lru_lock normally excludes isolation
+                * splitting and collapsing (collapsing has already happened
+                * if PageLRU is set) but the lock is not necessarily taken
+                * here and it is wasteful to take it just to check transhuge.
+                * Check TransHuge without lock and skip the whole pageblock if
+                * it's either a transhuge or hugetlbfs page, as calling
+                * compound_order() without preventing THP from splitting the
+                * page underneath us may return surprising results.
                  */
+               if (PageTransHuge(page)) {
+                       if (!locked)
+                               goto next_pageblock;
+                       low_pfn += (1 << compound_order(page)) - 1;
+                       continue;
+               }
+
+               /* Check if it is ok to still hold the lock */
+               locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
+                                                               locked, cc);
+               if (!locked || fatal_signal_pending(current))
+                       break;
+
+               /* Recheck PageLRU and PageTransHuge under lock */
+               if (!PageLRU(page))
+                       continue;
                 if (PageTransHuge(page)) {
                         low_pfn += (1 << compound_order(page)) - 1;
                         continue;
@@ -444,6 +462,13 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
                         ++low_pfn;
                         break;
                 }
+
+               continue;
+
+next_pageblock:
+               low_pfn += pageblock_nr_pages;
+               low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
+               last_pageblock_nr = pageblock_nr;
         }
  
         acct_isolated(zone, locked, cc);
author	Mel Gorman <mgorman@suse.de>
	Mon, 8 Oct 2012 23:32:33 +0000 (16:32 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 9 Oct 2012 07:22:49 +0000 (16:22 +0900)