mm: test code to write THP to swap device as a whole
authorHuang Ying <ying.huang@intel.com>
Wed, 6 Sep 2017 23:22:30 +0000 (16:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 7 Sep 2017 00:27:28 +0000 (17:27 -0700)
To support delay splitting THP (Transparent Huge Page) after swapped
out, we need to enhance swap writing code to support to write a THP as a
whole.  This will improve swap write IO performance.

As Ming Lei <ming.lei@redhat.com> pointed out, this should be based on
multipage bvec support, which hasn't been merged yet.  So this patch is
only for testing the functionality of the other patches in the series.
And will be reimplemented after multipage bvec support is merged.

Link: http://lkml.kernel.org/r/20170724051840.2309-7-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ross Zwisler <ross.zwisler@intel.com> [for brd.c, zram_drv.c, pmem.c]
Cc: Shaohua Li <shli@kernel.org>
Cc: Vishal L Verma <vishal.l.verma@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/bio.h
include/linux/page-flags.h
include/linux/vm_event_item.h
mm/page_io.c
mm/vmstat.c

index 7b1cf4ba090291af59e904dc1c59d8196ab88d3d..1f0720de89903e649c6ebe7f8dd73033e072b5d6 100644 (file)
 #define BIO_BUG_ON
 #endif
 
+#ifdef CONFIG_THP_SWAP
+#if HPAGE_PMD_NR > 256
+#define BIO_MAX_PAGES          HPAGE_PMD_NR
+#else
 #define BIO_MAX_PAGES          256
+#endif
+#else
+#define BIO_MAX_PAGES          256
+#endif
 
 #define bio_prio(bio)                  (bio)->bi_ioprio
 #define bio_set_prio(bio, prio)                ((bio)->bi_ioprio = prio)
index d33e3280c8adc3b73a5426ec3bfdb6d0729d62ff..ba2d470d2d0a325e9e9d19d39bc675cc311b6a83 100644 (file)
@@ -303,8 +303,8 @@ PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
  * Only test-and-set exist for PG_writeback.  The unconditional operators are
  * risky: they bypass page accounting.
  */
-TESTPAGEFLAG(Writeback, writeback, PF_NO_COMPOUND)
-       TESTSCFLAG(Writeback, writeback, PF_NO_COMPOUND)
+TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL)
+       TESTSCFLAG(Writeback, writeback, PF_NO_TAIL)
 PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
 
 /* PG_readahead is only used for reads; PG_reclaim is only for writes */
index 37e8d31a4632dbbdcb22b492153cb36897057687..c75024e80eed46d7f2db77a49e662be467dc3ff8 100644 (file)
@@ -85,6 +85,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #endif
                THP_ZERO_PAGE_ALLOC,
                THP_ZERO_PAGE_ALLOC_FAILED,
+               THP_SWPOUT,
 #endif
 #ifdef CONFIG_MEMORY_BALLOON
                BALLOON_INFLATE,
index 5f61b54ee1f38e0bb7ae7bee3efe3257f8967a44..20139b90125a8bda3bc9fae8775426db459f15ea 100644 (file)
 static struct bio *get_swap_bio(gfp_t gfp_flags,
                                struct page *page, bio_end_io_t end_io)
 {
+       int i, nr = hpage_nr_pages(page);
        struct bio *bio;
 
-       bio = bio_alloc(gfp_flags, 1);
+       bio = bio_alloc(gfp_flags, nr);
        if (bio) {
                bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev);
                bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
                bio->bi_end_io = end_io;
 
-               bio_add_page(bio, page, PAGE_SIZE, 0);
-               BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE);
+               for (i = 0; i < nr; i++)
+                       bio_add_page(bio, page + i, PAGE_SIZE, 0);
+               VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr);
        }
        return bio;
 }
@@ -262,6 +264,15 @@ static sector_t swap_page_sector(struct page *page)
        return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
 }
 
+static inline void count_swpout_vm_event(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       if (unlikely(PageTransHuge(page)))
+               count_vm_event(THP_SWPOUT);
+#endif
+       count_vm_events(PSWPOUT, hpage_nr_pages(page));
+}
+
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
                bio_end_io_t end_write_func)
 {
@@ -313,7 +324,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 
        ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
        if (!ret) {
-               count_vm_event(PSWPOUT);
+               count_swpout_vm_event(page);
                return 0;
        }
 
@@ -326,7 +337,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
                goto out;
        }
        bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
-       count_vm_event(PSWPOUT);
+       count_swpout_vm_event(page);
        set_page_writeback(page);
        unlock_page(page);
        submit_bio(bio);
index 9a4441bbeef26d7bf39fe8bb9148a7e8a4ef5632..bccf426453cdfce078055a37363bf688bd556122 100644 (file)
@@ -1071,6 +1071,7 @@ const char * const vmstat_text[] = {
 #endif
        "thp_zero_page_alloc",
        "thp_zero_page_alloc_failed",
+       "thp_swpout",
 #endif
 #ifdef CONFIG_MEMORY_BALLOON
        "balloon_inflate",