mm, swap: use offset of swap entry as key of swap cache
authorHuang Ying <ying.huang@intel.com>
Sat, 8 Oct 2016 00:00:21 +0000 (17:00 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 8 Oct 2016 01:46:28 +0000 (18:46 -0700)
This patch is to improve the performance of swap cache operations when
the type of the swap device is not 0.  Originally, the whole swap entry
value is used as the key of the swap cache, even though there is one
radix tree for each swap device.  If the type of the swap device is not
0, the height of the radix tree of the swap cache will be increased
unnecessary, especially on 64bit architecture.  For example, for a 1GB
swap device on the x86_64 architecture, the height of the radix tree of
the swap cache is 11.  But if the offset of the swap entry is used as
the key of the swap cache, the height of the radix tree of the swap
cache is 4.  The increased height causes unnecessary radix tree
descending and increased cache footprint.

This patch reduces the height of the radix tree of the swap cache via
using the offset of the swap entry instead of the whole swap entry value
as the key of the swap cache.  In 32 processes sequential swap out test
case on a Xeon E5 v3 system with RAM disk as swap, the lock contention
for the spinlock of the swap cache is reduced from 20.15% to 12.19%,
when the type of the swap device is 1.

Use the whole swap entry as key,

  perf-profile.calltrace.cycles-pp._raw_spin_lock_irq.__add_to_swap_cache.add_to_swap_cache.add_to_swap.shrink_page_list: 10.37,
  perf-profile.calltrace.cycles-pp._raw_spin_lock_irqsave.__remove_mapping.shrink_page_list.shrink_inactive_list.shrink_node_memcg: 9.78,

Use the swap offset as key,

  perf-profile.calltrace.cycles-pp._raw_spin_lock_irq.__add_to_swap_cache.add_to_swap_cache.add_to_swap.shrink_page_list: 6.25,
  perf-profile.calltrace.cycles-pp._raw_spin_lock_irqsave.__remove_mapping.shrink_page_list.shrink_inactive_list.shrink_node_memcg: 5.94,

Link: http://lkml.kernel.org/r/1473270649-27229-1-git-send-email-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/mm.h
mm/memcontrol.c
mm/mincore.c
mm/swap_state.c
mm/swapfile.c

index 046077b4209d65b82e3534b3338a6ea0500c78a3..028e84e2ab42832bda5b6698518c753a428bf2ca 100644 (file)
@@ -1048,19 +1048,19 @@ struct address_space *page_file_mapping(struct page *page)
        return page->mapping;
 }
 
+extern pgoff_t __page_file_index(struct page *page);
+
 /*
  * Return the pagecache index of the passed page.  Regular pagecache pages
- * use ->index whereas swapcache pages use ->private
+ * use ->index whereas swapcache pages use swp_offset(->private)
  */
 static inline pgoff_t page_index(struct page *page)
 {
        if (unlikely(PageSwapCache(page)))
-               return page_private(page);
+               return __page_file_index(page);
        return page->index;
 }
 
-extern pgoff_t __page_file_index(struct page *page);
-
 /*
  * Return the file index of the page. Regular pagecache pages use ->index
  * whereas swapcache pages use swp_offset(->private)
index 0739d4129a93aab464a1f1f88763c99362f675e8..60bb830abc345bee90d2a15a0c9d532baaec5aea 100644 (file)
@@ -4408,7 +4408,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
         * Because lookup_swap_cache() updates some statistics counter,
         * we call find_get_page() with swapper_space directly.
         */
-       page = find_get_page(swap_address_space(ent), ent.val);
+       page = find_get_page(swap_address_space(ent), swp_offset(ent));
        if (do_memsw_account())
                entry->val = ent.val;
 
@@ -4446,7 +4446,8 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
                        swp_entry_t swp = radix_to_swp_entry(page);
                        if (do_memsw_account())
                                *entry = swp;
-                       page = find_get_page(swap_address_space(swp), swp.val);
+                       page = find_get_page(swap_address_space(swp),
+                                            swp_offset(swp));
                }
        } else
                page = find_get_page(mapping, pgoff);
index c0b5ba965200942741347500c0b6a739434b715d..bfb866435478b33dada2231b5f64553f7208c75e 100644 (file)
@@ -66,7 +66,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
                 */
                if (radix_tree_exceptional_entry(page)) {
                        swp_entry_t swp = radix_to_swp_entry(page);
-                       page = find_get_page(swap_address_space(swp), swp.val);
+                       page = find_get_page(swap_address_space(swp),
+                                            swp_offset(swp));
                }
        } else
                page = find_get_page(mapping, pgoff);
@@ -150,7 +151,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                        } else {
 #ifdef CONFIG_SWAP
                                *vec = mincore_page(swap_address_space(entry),
-                                       entry.val);
+                                                   swp_offset(entry));
 #else
                                WARN_ON(1);
                                *vec = 1;
index 8679c997eab63677a0d01547f54508ca04e64bc2..35d7e0ee1c77c9fb94915905e171f82ff859b7da 100644 (file)
@@ -94,7 +94,7 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
        address_space = swap_address_space(entry);
        spin_lock_irq(&address_space->tree_lock);
        error = radix_tree_insert(&address_space->page_tree,
-                                       entry.val, page);
+                                 swp_offset(entry), page);
        if (likely(!error)) {
                address_space->nrpages++;
                __inc_node_page_state(page, NR_FILE_PAGES);
@@ -145,7 +145,7 @@ void __delete_from_swap_cache(struct page *page)
 
        entry.val = page_private(page);
        address_space = swap_address_space(entry);
-       radix_tree_delete(&address_space->page_tree, page_private(page));
+       radix_tree_delete(&address_space->page_tree, swp_offset(entry));
        set_page_private(page, 0);
        ClearPageSwapCache(page);
        address_space->nrpages--;
@@ -283,7 +283,7 @@ struct page * lookup_swap_cache(swp_entry_t entry)
 {
        struct page *page;
 
-       page = find_get_page(swap_address_space(entry), entry.val);
+       page = find_get_page(swap_address_space(entry), swp_offset(entry));
 
        if (page) {
                INC_CACHE_INFO(find_success);
@@ -310,7 +310,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                 * called after lookup_swap_cache() failed, re-calling
                 * that would confuse statistics.
                 */
-               found_page = find_get_page(swapper_space, entry.val);
+               found_page = find_get_page(swapper_space, swp_offset(entry));
                if (found_page)
                        break;
 
index 134c085d0d7bb927e779b526eee028a304860e01..2210de290b54d160d31afc937471225077bf083a 100644 (file)
@@ -105,7 +105,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
        struct page *page;
        int ret = 0;
 
-       page = find_get_page(swap_address_space(entry), entry.val);
+       page = find_get_page(swap_address_space(entry), swp_offset(entry));
        if (!page)
                return 0;
        /*
@@ -1005,7 +1005,7 @@ int free_swap_and_cache(swp_entry_t entry)
        if (p) {
                if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
                        page = find_get_page(swap_address_space(entry),
-                                               entry.val);
+                                            swp_offset(entry));
                        if (page && !trylock_page(page)) {
                                put_page(page);
                                page = NULL;