filemap: prepare find and delete operations for huge pages
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tue, 26 Jul 2016 22:26:04 +0000 (15:26 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2016 23:19:19 +0000 (16:19 -0700)
For now, we would have HPAGE_PMD_NR entries in radix tree for every huge
page.  That's suboptimal and it will be changed to use Matthew's
multi-order entries later.

'add' operation is not changed, because we don't need it to implement
hugetmpfs: shmem uses its own implementation.

Link: http://lkml.kernel.org/r/1466021202-61880-25-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/filemap.c

index 1efd2994dccfaf9afe781c00a4f9eb761b7a3c0e..21508ea25717af8b6fb2bc64a5bb98f86a854935 100644 (file)
@@ -114,14 +114,14 @@ static void page_cache_tree_delete(struct address_space *mapping,
                                   struct page *page, void *shadow)
 {
        struct radix_tree_node *node;
+       int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
 
-       VM_BUG_ON(!PageLocked(page));
-
-       node = radix_tree_replace_clear_tags(&mapping->page_tree, page->index,
-                                                               shadow);
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(PageTail(page), page);
+       VM_BUG_ON_PAGE(nr != 1 && shadow, page);
 
        if (shadow) {
-               mapping->nrexceptional++;
+               mapping->nrexceptional += nr;
                /*
                 * Make sure the nrexceptional update is committed before
                 * the nrpages update so that final truncate racing
@@ -130,31 +130,38 @@ static void page_cache_tree_delete(struct address_space *mapping,
                 */
                smp_wmb();
        }
-       mapping->nrpages--;
-
-       if (!node)
-               return;
+       mapping->nrpages -= nr;
 
-       workingset_node_pages_dec(node);
-       if (shadow)
-               workingset_node_shadows_inc(node);
-       else
-               if (__radix_tree_delete_node(&mapping->page_tree, node))
+       for (i = 0; i < nr; i++) {
+               node = radix_tree_replace_clear_tags(&mapping->page_tree,
+                               page->index + i, shadow);
+               if (!node) {
+                       VM_BUG_ON_PAGE(nr != 1, page);
                        return;
+               }
 
-       /*
-        * Track node that only contains shadow entries. DAX mappings contain
-        * no shadow entries and may contain other exceptional entries so skip
-        * those.
-        *
-        * Avoid acquiring the list_lru lock if already tracked.  The
-        * list_empty() test is safe as node->private_list is
-        * protected by mapping->tree_lock.
-        */
-       if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
-           list_empty(&node->private_list)) {
-               node->private_data = mapping;
-               list_lru_add(&workingset_shadow_nodes, &node->private_list);
+               workingset_node_pages_dec(node);
+               if (shadow)
+                       workingset_node_shadows_inc(node);
+               else
+                       if (__radix_tree_delete_node(&mapping->page_tree, node))
+                               continue;
+
+               /*
+                * Track node that only contains shadow entries. DAX mappings
+                * contain no shadow entries and may contain other exceptional
+                * entries so skip those.
+                *
+                * Avoid acquiring the list_lru lock if already tracked.
+                * The list_empty() test is safe as node->private_list is
+                * protected by mapping->tree_lock.
+                */
+               if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
+                               list_empty(&node->private_list)) {
+                       node->private_data = mapping;
+                       list_lru_add(&workingset_shadow_nodes,
+                                       &node->private_list);
+               }
        }
 }
 
@@ -166,6 +173,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
 void __delete_from_page_cache(struct page *page, void *shadow)
 {
        struct address_space *mapping = page->mapping;
+       int nr = hpage_nr_pages(page);
 
        trace_mm_filemap_delete_from_page_cache(page);
        /*
@@ -178,6 +186,7 @@ void __delete_from_page_cache(struct page *page, void *shadow)
        else
                cleancache_invalidate_page(mapping, page);
 
+       VM_BUG_ON_PAGE(PageTail(page), page);
        VM_BUG_ON_PAGE(page_mapped(page), page);
        if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
                int mapcount;
@@ -209,9 +218,9 @@ void __delete_from_page_cache(struct page *page, void *shadow)
 
        /* hugetlb pages do not participate in page cache accounting. */
        if (!PageHuge(page))
-               __dec_zone_page_state(page, NR_FILE_PAGES);
+               __mod_zone_page_state(page_zone(page), NR_FILE_PAGES, -nr);
        if (PageSwapBacked(page))
-               __dec_zone_page_state(page, NR_SHMEM);
+               __mod_zone_page_state(page_zone(page), NR_SHMEM, -nr);
 
        /*
         * At this point page must be either written or cleaned by truncate.
@@ -235,9 +244,8 @@ void __delete_from_page_cache(struct page *page, void *shadow)
  */
 void delete_from_page_cache(struct page *page)
 {
-       struct address_space *mapping = page->mapping;
+       struct address_space *mapping = page_mapping(page);
        unsigned long flags;
-
        void (*freepage)(struct page *);
 
        BUG_ON(!PageLocked(page));
@@ -250,7 +258,13 @@ void delete_from_page_cache(struct page *page)
 
        if (freepage)
                freepage(page);
-       put_page(page);
+
+       if (PageTransHuge(page) && !PageHuge(page)) {
+               page_ref_sub(page, HPAGE_PMD_NR);
+               VM_BUG_ON_PAGE(page_count(page) <= 0, page);
+       } else {
+               put_page(page);
+       }
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
@@ -1053,7 +1067,7 @@ EXPORT_SYMBOL(page_cache_prev_hole);
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 {
        void **pagep;
-       struct page *page;
+       struct page *head, *page;
 
        rcu_read_lock();
 repeat:
@@ -1073,8 +1087,16 @@ repeat:
                         */
                        goto out;
                }
-               if (!page_cache_get_speculative(page))
+
+               head = compound_head(page);
+               if (!page_cache_get_speculative(head))
+                       goto repeat;
+
+               /* The page was split under us? */
+               if (compound_head(page) != head) {
+                       put_page(head);
                        goto repeat;
+               }
 
                /*
                 * Has the page moved?
@@ -1082,7 +1104,7 @@ repeat:
                 * include/linux/pagemap.h for details.
                 */
                if (unlikely(page != *pagep)) {
-                       put_page(page);
+                       put_page(head);
                        goto repeat;
                }
        }
@@ -1118,12 +1140,12 @@ repeat:
        if (page && !radix_tree_exception(page)) {
                lock_page(page);
                /* Has the page been truncated? */
-               if (unlikely(page->mapping != mapping)) {
+               if (unlikely(page_mapping(page) != mapping)) {
                        unlock_page(page);
                        put_page(page);
                        goto repeat;
                }
-               VM_BUG_ON_PAGE(page->index != offset, page);
+               VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
        }
        return page;
 }
@@ -1255,7 +1277,7 @@ unsigned find_get_entries(struct address_space *mapping,
 
        rcu_read_lock();
        radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-               struct page *page;
+               struct page *head, *page;
 repeat:
                page = radix_tree_deref_slot(slot);
                if (unlikely(!page))
@@ -1272,12 +1294,20 @@ repeat:
                         */
                        goto export;
                }
-               if (!page_cache_get_speculative(page))
+
+               head = compound_head(page);
+               if (!page_cache_get_speculative(head))
+                       goto repeat;
+
+               /* The page was split under us? */
+               if (compound_head(page) != head) {
+                       put_page(head);
                        goto repeat;
+               }
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       put_page(page);
+                       put_page(head);
                        goto repeat;
                }
 export:
@@ -1318,7 +1348,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 
        rcu_read_lock();
        radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-               struct page *page;
+               struct page *head, *page;
 repeat:
                page = radix_tree_deref_slot(slot);
                if (unlikely(!page))
@@ -1337,12 +1367,19 @@ repeat:
                        continue;
                }
 
-               if (!page_cache_get_speculative(page))
+               head = compound_head(page);
+               if (!page_cache_get_speculative(head))
+                       goto repeat;
+
+               /* The page was split under us? */
+               if (compound_head(page) != head) {
+                       put_page(head);
                        goto repeat;
+               }
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       put_page(page);
+                       put_page(head);
                        goto repeat;
                }
 
@@ -1379,7 +1416,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 
        rcu_read_lock();
        radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
-               struct page *page;
+               struct page *head, *page;
 repeat:
                page = radix_tree_deref_slot(slot);
                /* The hole, there no reason to continue */
@@ -1399,12 +1436,19 @@ repeat:
                        break;
                }
 
-               if (!page_cache_get_speculative(page))
+               head = compound_head(page);
+               if (!page_cache_get_speculative(head))
+                       goto repeat;
+
+               /* The page was split under us? */
+               if (compound_head(page) != head) {
+                       put_page(head);
                        goto repeat;
+               }
 
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       put_page(page);
+                       put_page(head);
                        goto repeat;
                }
 
@@ -1413,7 +1457,7 @@ repeat:
                 * otherwise we can get both false positives and false
                 * negatives, which is just confusing to the caller.
                 */
-               if (page->mapping == NULL || page->index != iter.index) {
+               if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
                        put_page(page);
                        break;
                }
@@ -1451,7 +1495,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
        rcu_read_lock();
        radix_tree_for_each_tagged(slot, &mapping->page_tree,
                                   &iter, *index, tag) {
-               struct page *page;
+               struct page *head, *page;
 repeat:
                page = radix_tree_deref_slot(slot);
                if (unlikely(!page))
@@ -1476,12 +1520,19 @@ repeat:
                        continue;
                }
 
-               if (!page_cache_get_speculative(page))
+               head = compound_head(page);
+               if (!page_cache_get_speculative(head))
                        goto repeat;
 
+               /* The page was split under us? */
+               if (compound_head(page) != head) {
+                       put_page(head);
+                       goto repeat;
+               }
+
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       put_page(page);
+                       put_page(head);
                        goto repeat;
                }
 
@@ -1525,7 +1576,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
        rcu_read_lock();
        radix_tree_for_each_tagged(slot, &mapping->page_tree,
                                   &iter, start, tag) {
-               struct page *page;
+               struct page *head, *page;
 repeat:
                page = radix_tree_deref_slot(slot);
                if (unlikely(!page))
@@ -1543,12 +1594,20 @@ repeat:
                         */
                        goto export;
                }
-               if (!page_cache_get_speculative(page))
+
+               head = compound_head(page);
+               if (!page_cache_get_speculative(head))
                        goto repeat;
 
+               /* The page was split under us? */
+               if (compound_head(page) != head) {
+                       put_page(head);
+                       goto repeat;
+               }
+
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       put_page(page);
+                       put_page(head);
                        goto repeat;
                }
 export:
@@ -2137,7 +2196,7 @@ void filemap_map_pages(struct fault_env *fe,
        struct address_space *mapping = file->f_mapping;
        pgoff_t last_pgoff = start_pgoff;
        loff_t size;
-       struct page *page;
+       struct page *head, *page;
 
        rcu_read_lock();
        radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
@@ -2156,12 +2215,19 @@ repeat:
                        goto next;
                }
 
-               if (!page_cache_get_speculative(page))
+               head = compound_head(page);
+               if (!page_cache_get_speculative(head))
                        goto repeat;
 
+               /* The page was split under us? */
+               if (compound_head(page) != head) {
+                       put_page(head);
+                       goto repeat;
+               }
+
                /* Has the page moved? */
                if (unlikely(page != *slot)) {
-                       put_page(page);
+                       put_page(head);
                        goto repeat;
                }