thp: alter compound get_page/put_page
authorAndrea Arcangeli <aarcange@redhat.com>
Thu, 13 Jan 2011 23:46:32 +0000 (15:46 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Jan 2011 01:32:39 +0000 (17:32 -0800)
Alter compound get_page/put_page to keep references on subpages too, in
order to allow __split_huge_page_refcount to split an hugepage even while
subpages have been pinned by one of the get_user_pages() variants.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/powerpc/mm/gup.c
arch/x86/mm/gup.c
include/linux/mm.h
mm/swap.c

index d7efdbf640c7d5e39cd8c3c55f79aa0b8ca64f10..fec13200868f8e6ab40fd834abe7ed4bf6dc89f8 100644 (file)
 
 #ifdef __HAVE_ARCH_PTE_SPECIAL
 
+static inline void get_huge_page_tail(struct page *page)
+{
+       /*
+        * __split_huge_page_refcount() cannot run
+        * from under us.
+        */
+       VM_BUG_ON(atomic_read(&page->_count) < 0);
+       atomic_inc(&page->_count);
+}
+
 /*
  * The performance critical leaf functions are made noinline otherwise gcc
  * inlines everything into a single function which results in too much
@@ -47,6 +57,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
                        put_page(page);
                        return 0;
                }
+               if (PageTail(page))
+                       get_huge_page_tail(page);
                pages[*nr] = page;
                (*nr)++;
 
index 738e6593799dcce1973d5d082b6c0a732107cbcd..06f56fcf9a77ceea3bf3c3d62ff7ac698024457a 100644 (file)
@@ -105,6 +105,16 @@ static inline void get_head_page_multiple(struct page *page, int nr)
        atomic_add(nr, &page->_count);
 }
 
+static inline void get_huge_page_tail(struct page *page)
+{
+       /*
+        * __split_huge_page_refcount() cannot run
+        * from under us.
+        */
+       VM_BUG_ON(atomic_read(&page->_count) < 0);
+       atomic_inc(&page->_count);
+}
+
 static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
                unsigned long end, int write, struct page **pages, int *nr)
 {
@@ -128,6 +138,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
        do {
                VM_BUG_ON(compound_head(page) != head);
                pages[*nr] = page;
+               if (PageTail(page))
+                       get_huge_page_tail(page);
                (*nr)++;
                page++;
                refs++;
index 3b1754ad8785cb4dfe9910de1757927c06729203..a2718e1ed585eee46b3cfcada4ef8d4b196040ed 100644 (file)
@@ -353,9 +353,29 @@ static inline int page_count(struct page *page)
 
 static inline void get_page(struct page *page)
 {
-       page = compound_head(page);
-       VM_BUG_ON(atomic_read(&page->_count) == 0);
+       /*
+        * Getting a normal page or the head of a compound page
+        * requires to already have an elevated page->_count. Only if
+        * we're getting a tail page, the elevated page->_count is
+        * required only in the head page, so for tail pages the
+        * bugcheck only verifies that the page->_count isn't
+        * negative.
+        */
+       VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
        atomic_inc(&page->_count);
+       /*
+        * Getting a tail page will elevate both the head and tail
+        * page->_count(s).
+        */
+       if (unlikely(PageTail(page))) {
+               /*
+                * This is safe only because
+                * __split_huge_page_refcount can't run under
+                * get_page().
+                */
+               VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+               atomic_inc(&page->first_page->_count);
+       }
 }
 
 static inline struct page *virt_to_head_page(const void *x)
index 3f4854205b16ba0c39a8b815e00fe1b48f3472f1..33f5292fe1325c388ee9db9de64b1b758f00036a 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -56,17 +56,93 @@ static void __page_cache_release(struct page *page)
                del_page_from_lru(zone, page);
                spin_unlock_irqrestore(&zone->lru_lock, flags);
        }
+}
+
+static void __put_single_page(struct page *page)
+{
+       __page_cache_release(page);
        free_hot_cold_page(page, 0);
 }
 
-static void put_compound_page(struct page *page)
+static void __put_compound_page(struct page *page)
 {
-       page = compound_head(page);
-       if (put_page_testzero(page)) {
-               compound_page_dtor *dtor;
+       compound_page_dtor *dtor;
+
+       __page_cache_release(page);
+       dtor = get_compound_page_dtor(page);
+       (*dtor)(page);
+}
 
-               dtor = get_compound_page_dtor(page);
-               (*dtor)(page);
+static void put_compound_page(struct page *page)
+{
+       if (unlikely(PageTail(page))) {
+               /* __split_huge_page_refcount can run under us */
+               struct page *page_head = page->first_page;
+               smp_rmb();
+               /*
+                * If PageTail is still set after smp_rmb() we can be sure
+                * that the page->first_page we read wasn't a dangling pointer.
+                * See __split_huge_page_refcount() smp_wmb().
+                */
+               if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
+                       unsigned long flags;
+                       /*
+                        * Verify that our page_head wasn't converted
+                        * to a a regular page before we got a
+                        * reference on it.
+                        */
+                       if (unlikely(!PageHead(page_head))) {
+                               /* PageHead is cleared after PageTail */
+                               smp_rmb();
+                               VM_BUG_ON(PageTail(page));
+                               goto out_put_head;
+                       }
+                       /*
+                        * Only run compound_lock on a valid PageHead,
+                        * after having it pinned with
+                        * get_page_unless_zero() above.
+                        */
+                       smp_mb();
+                       /* page_head wasn't a dangling pointer */
+                       flags = compound_lock_irqsave(page_head);
+                       if (unlikely(!PageTail(page))) {
+                               /* __split_huge_page_refcount run before us */
+                               compound_unlock_irqrestore(page_head, flags);
+                               VM_BUG_ON(PageHead(page_head));
+                       out_put_head:
+                               if (put_page_testzero(page_head))
+                                       __put_single_page(page_head);
+                       out_put_single:
+                               if (put_page_testzero(page))
+                                       __put_single_page(page);
+                               return;
+                       }
+                       VM_BUG_ON(page_head != page->first_page);
+                       /*
+                        * We can release the refcount taken by
+                        * get_page_unless_zero now that
+                        * split_huge_page_refcount is blocked on the
+                        * compound_lock.
+                        */
+                       if (put_page_testzero(page_head))
+                               VM_BUG_ON(1);
+                       /* __split_huge_page_refcount will wait now */
+                       VM_BUG_ON(atomic_read(&page->_count) <= 0);
+                       atomic_dec(&page->_count);
+                       VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
+                       compound_unlock_irqrestore(page_head, flags);
+                       if (put_page_testzero(page_head))
+                               __put_compound_page(page_head);
+               } else {
+                       /* page_head is a dangling pointer */
+                       VM_BUG_ON(PageTail(page));
+                       goto out_put_single;
+               }
+       } else if (put_page_testzero(page)) {
+               if (PageHead(page))
+                       __put_compound_page(page);
+               else
+                       __put_single_page(page);
        }
 }
 
@@ -75,7 +151,7 @@ void put_page(struct page *page)
        if (unlikely(PageCompound(page)))
                put_compound_page(page);
        else if (put_page_testzero(page))
-               __page_cache_release(page);
+               __put_single_page(page);
 }
 EXPORT_SYMBOL(put_page);