Enable zram writeback
authorhuangzq2 <huangzq2@motorola.com>
Wed, 4 Sep 2019 04:58:12 +0000 (12:58 +0800)
committerxiest1 <xiest1@lenovo.com>
Tue, 5 Nov 2019 09:32:10 +0000 (17:32 +0800)
Porting zram changes from Google, and enable zram writeback

Change-Id: I1bcb545dd4cdeb7f456d2f609fdb43cd9a822816
Signed-off-by: huangzq2 <huangzq2@motorola.com>
Reviewed-on: https://gerrit.mot.com/1416294
SLTApproved: Slta Waiver
SME-Granted: SME Approvals Granted
Tested-by: Jira Key
Reviewed-by: Guolin Wang <wanggl3@mt.com>
Submit-Approved: Jira Key

Documentation/ABI/testing/sysfs-block-zram
Documentation/blockdev/zram.txt
arch/arm64/configs/ext_config/moto-erd9610.config
drivers/block/zram/Kconfig
drivers/block/zram/zcomp.c
drivers/block/zram/zram_drv.c
drivers/block/zram/zram_drv.h

index c1513c756af125ce5adb76ee8e4522e25572a099..14b2bf2e5105ced6872477b0b85e6f24a76f1d08 100644 (file)
@@ -98,3 +98,42 @@ Description:
                The backing_dev file is read-write and set up backing
                device for zram to write incompressible pages.
                For using, user should enable CONFIG_ZRAM_WRITEBACK.
+
+What:          /sys/block/zram<id>/idle
+Date:          November 2018
+Contact:       Minchan Kim <minchan@kernel.org>
+Description:
+               idle file is write-only and mark zram slot as idle.
+               If system has mounted debugfs, user can see which slots
+               are idle via /sys/kernel/debug/zram/zram<id>/block_state
+
+What:          /sys/block/zram<id>/writeback
+Date:          November 2018
+Contact:       Minchan Kim <minchan@kernel.org>
+Description:
+               The writeback file is write-only and trigger idle and/or
+               huge page writeback to backing device.
+
+What:          /sys/block/zram<id>/bd_stat
+Date:          November 2018
+Contact:       Minchan Kim <minchan@kernel.org>
+Description:
+               The bd_stat file is read-only and represents backing device's
+               statistics (bd_count, bd_reads, bd_writes) in a format
+               similar to block layer statistics file format.
+
+What:          /sys/block/zram<id>/writeback_limit_enable
+Date:          November 2018
+Contact:       Minchan Kim <minchan@kernel.org>
+Description:
+               The writeback_limit_enable file is read-write and specifies
+               eanbe of writeback_limit feature. "1" means eable the feature.
+               No limit "0" is the initial state.
+
+What:          /sys/block/zram<id>/writeback_limit
+Date:          November 2018
+Contact:       Minchan Kim <minchan@kernel.org>
+Description:
+               The writeback_limit file is read-write and specifies the maximum
+               amount of writeback ZRAM can do. The limit could be changed
+               in run time.
index 257e65714c6a216f3fce9ebce7398eb821f96176..6e5c2bb222c30428483d8559298858233bc98273 100644 (file)
@@ -156,19 +156,23 @@ Per-device statistics are exported as various nodes under /sys/block/zram<id>/
 A brief description of exported device attributes. For more details please
 read Documentation/ABI/testing/sysfs-block-zram.
 
-Name            access            description
-----            ------            -----------
-disksize          RW    show and set the device's disk size
-initstate         RO    shows the initialization state of the device
-reset             WO    trigger device reset
-mem_used_max      WO    reset the `mem_used_max' counter (see later)
-mem_limit         WO    specifies the maximum amount of memory ZRAM can use
-                        to store the compressed data
-max_comp_streams  RW    the number of possible concurrent compress operations
-comp_algorithm    RW    show and change the compression algorithm
-compact           WO    trigger memory compaction
-debug_stat        RO    this file is used for zram debugging purposes
-backing_dev      RW    set up backend storage for zram to write out
+Name                   access            description
+----                   ------            -----------
+disksize               RW      show and set the device's disk size
+initstate              RO      shows the initialization state of the device
+reset                  WO      trigger device reset
+mem_used_max           WO      reset the `mem_used_max' counter (see later)
+mem_limit              WO      specifies the maximum amount of memory ZRAM can use
+                               to store the compressed data
+writeback_limit        WO      specifies the maximum amount of write IO zram can
+                               write out to backing device as 4KB unit
+writeback_limit_enable  RW     show and set writeback_limit feature
+max_comp_streams       RW      the number of possible concurrent compress operations
+comp_algorithm         RW      show and change the compression algorithm
+compact                WO      trigger memory compaction
+debug_stat             RO      this file is used for zram debugging purposes
+backing_dev            RW      set up backend storage for zram to write out
+idle                   WO      mark allocated slot as idle
 
 
 User space is advised to use the following files to read the device statistics.
@@ -218,6 +222,18 @@ line of text and contains the following stats separated by whitespace:
  same_pages       the number of same element filled pages written to this disk.
                   No memory is allocated for such pages.
  pages_compacted  the number of pages freed during compaction
+ huge_pages      the number of incompressible pages
+
+File /sys/block/zram<id>/bd_stat
+
+The stat file represents device's backing device statistics. It consists of
+a single line of text and contains the following stats separated by whitespace:
+ bd_count      size of data written in backing device.
+               Unit: 4K bytes
+ bd_reads      the number of reads from backing device
+               Unit: 4K bytes
+ bd_writes     the number of writes to backing device
+               Unit: 4K bytes
 
 9) Deactivate:
        swapoff /dev/zram0
@@ -236,11 +252,104 @@ line of text and contains the following stats separated by whitespace:
 
 = writeback
 
-With incompressible pages, there is no memory saving with zram.
-Instead, with CONFIG_ZRAM_WRITEBACK, zram can write incompressible page
+With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page
 to backing storage rather than keeping it in memory.
-User should set up backing device via /sys/block/zramX/backing_dev
-before disksize setting.
+To use the feature, admin should set up backing device via
+
+       "echo /dev/sda5 > /sys/block/zramX/backing_dev"
+
+before disksize setting. It supports only partition at this moment.
+If admin want to use incompressible page writeback, they could do via
+
+       "echo huge > /sys/block/zramX/write"
+
+To use idle page writeback, first, user need to declare zram pages
+as idle.
+
+       "echo all > /sys/block/zramX/idle"
+
+From now on, any pages on zram are idle pages. The idle mark
+will be removed until someone request access of the block.
+IOW, unless there is access request, those pages are still idle pages.
+
+Admin can request writeback of those idle pages at right timing via
+
+       "echo idle > /sys/block/zramX/writeback"
+
+With the command, zram writeback idle pages from memory to the storage.
+
+If there are lots of write IO with flash device, potentially, it has
+flash wearout problem so that admin needs to design write limitation
+to guarantee storage health for entire product life.
+
+To overcome the concern, zram supports "writeback_limit" feature.
+The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
+any writeback. IOW, if admin want to apply writeback budget, he should
+enable writeback_limit_enable via
+
+       $ echo 1 > /sys/block/zramX/writeback_limit_enable
+
+Once writeback_limit_enable is set, zram doesn't allow any writeback
+until admin set the budget via /sys/block/zramX/writeback_limit.
+
+(If admin doesn't enable writeback_limit_enable, writeback_limit's value
+assigned via /sys/block/zramX/writeback_limit is meaninless.)
+
+If admin want to limit writeback as per-day 400M, he could do it
+like below.
+
+       $ MB_SHIFT=20
+       $ 4K_SHIFT=12
+       $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+               /sys/block/zram0/writeback_limit.
+       $ echo 1 > /sys/block/zram0/writeback_limit_enable
+
+If admin want to allow further write again once the bugdet is exausted,
+he could do it like below
+
+       $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+               /sys/block/zram0/writeback_limit
+
+If admin want to see remaining writeback budget since he set,
+
+       $ cat /sys/block/zramX/writeback_limit
+
+If admin want to disable writeback limit, he could do
+
+       $ echo 0 > /sys/block/zramX/writeback_limit_enable
+
+The writeback_limit count will reset whenever you reset zram(e.g.,
+system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
+writeback happened until you reset the zram to allocate extra writeback
+budget in next setting is user's job.
+
+If admin want to measure writeback count in a certain period, he could
+know it via /sys/block/zram0/bd_stat's 3rd column.
+
+= memory tracking
+
+With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
+zram block. It could be useful to catch cold or incompressible
+pages of the process with*pagemap.
+If you enable the feature, you could see block state via
+/sys/kernel/debug/zram/zram0/block_state". The output is as follows,
+
+         300    75.033841 .wh.
+         301    63.806904 s...
+         302    63.806919 ..hi
+
+First column is zram's block index.
+Second column is access time since the system was booted
+Third column is state of the block.
+(s: same page
+w: written page to backing store
+h: huge page
+i: idle page)
+
+First line of above example says 300th block is accessed at 75.033841sec
+and the block's state is huge so it is written back to the backing
+storage. It's a debugging feature so anyone shouldn't rely on it to work
+properly.
 
 Nitin Gupta
 ngupta@vflare.org
index 6d12da64f88a9d92cb0dc2dd624a710d4ecbb2e2..117f0b8dc6af479852e208850a4f28a651654795 100644 (file)
@@ -18,6 +18,7 @@ CONFIG_HZ_300=y
 CONFIG_SWAP=y
 CONFIG_ZSMALLOC=y
 CONFIG_ZRAM=y
+CONFIG_ZRAM_WRITEBACK=y
 CONFIG_CPUSETS=y
 CONFIG_CRYPTO_LZ4=y
 CONFIG_CRYPTO_GCM=y
index ac3a31d433b2e9f698d051bfcd06f76d0efad971..99a2c60c7c6f292eaf8ac50e30f2c4f2f43bc73d 100644 (file)
@@ -13,10 +13,10 @@ config ZRAM
          It has several use cases, for example: /tmp storage, use as swap
          disks and maybe many more.
 
-         See zram.txt for more information.
+         See Documentation/blockdev/zram.txt for more information.
 
 config ZRAM_WRITEBACK
-       bool "Write back incompressible page to backing device"
+       bool "Write back incompressible or idle page to backing device"
        depends on ZRAM
        default n
        help
@@ -25,4 +25,17 @@ config ZRAM_WRITEBACK
         For this feature, admin should set up backing device via
         /sys/block/zramX/backing_dev.
 
-        See zram.txt for more infomration.
+        With /sys/block/zramX/{idle,writeback}, application could ask
+        idle page's writeback to the backing device to save in memory.
+
+        See Documentation/blockdev/zram.txt for more information.
+
+config ZRAM_MEMORY_TRACKING
+       bool "Track zRam block status"
+       depends on ZRAM && DEBUG_FS
+       help
+         With this feature, admin can track the state of allocated blocks
+         of zRAM. Admin could see the information via
+         /sys/kernel/debug/zram/zramX/block_state.
+
+         See Documentation/blockdev/zram.txt for more information.
index 5b8992beffec865634897c078877cd15614db0aa..cc66daec7bbc38c445a040a8b9685d7f2c967417 100644 (file)
@@ -31,6 +31,9 @@ static const char * const backends[] = {
 #endif
 #if IS_ENABLED(CONFIG_CRYPTO_842)
        "842",
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_ZSTD)
+       "zstd",
 #endif
        NULL
 };
index a46776a84480cd642f46a4946a7828defe2bee1a..0edd2ddc449f7edcb84e033b09660be191641914 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/err.h>
 #include <linux/idr.h>
 #include <linux/sysfs.h>
+#include <linux/debugfs.h>
 #include <linux/cpuhotplug.h>
 
 #include "zram_drv.h"
@@ -44,8 +45,31 @@ static const char *default_compressor = "lzo";
 
 /* Module params (documentation at end) */
 static unsigned int num_devices = 1;
+/*
+ * Pages that compress to sizes equals or greater than this are stored
+ * uncompressed in memory.
+ */
+static size_t huge_class_size;
 
 static void zram_free_page(struct zram *zram, size_t index);
+static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
+                               u32 index, int offset, struct bio *bio);
+
+
+static int zram_slot_trylock(struct zram *zram, u32 index)
+{
+       return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static void zram_slot_lock(struct zram *zram, u32 index)
+{
+       bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static void zram_slot_unlock(struct zram *zram, u32 index)
+{
+       bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
+}
 
 static inline bool init_done(struct zram *zram)
 {
@@ -68,22 +92,22 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
 }
 
 /* flag operations require table entry bit_spin_lock() being held */
-static int zram_test_flag(struct zram *zram, u32 index,
+static bool zram_test_flag(struct zram *zram, u32 index,
                        enum zram_pageflags flag)
 {
-       return zram->table[index].value & BIT(flag);
+       return zram->table[index].flags & BIT(flag);
 }
 
 static void zram_set_flag(struct zram *zram, u32 index,
                        enum zram_pageflags flag)
 {
-       zram->table[index].value |= BIT(flag);
+       zram->table[index].flags |= BIT(flag);
 }
 
 static void zram_clear_flag(struct zram *zram, u32 index,
                        enum zram_pageflags flag)
 {
-       zram->table[index].value &= ~BIT(flag);
+       zram->table[index].flags &= ~BIT(flag);
 }
 
 static inline void zram_set_element(struct zram *zram, u32 index,
@@ -99,15 +123,22 @@ static unsigned long zram_get_element(struct zram *zram, u32 index)
 
 static size_t zram_get_obj_size(struct zram *zram, u32 index)
 {
-       return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+       return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
 }
 
 static void zram_set_obj_size(struct zram *zram,
                                        u32 index, size_t size)
 {
-       unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT;
+       unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
+
+       zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
+}
 
-       zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+static inline bool zram_allocated(struct zram *zram, u32 index)
+{
+       return zram_get_obj_size(zram, index) ||
+                       zram_test_flag(zram, index, ZRAM_SAME) ||
+                       zram_test_flag(zram, index, ZRAM_WB);
 }
 
 #if PAGE_SIZE != 4096
@@ -122,14 +153,6 @@ static inline bool is_partial_io(struct bio_vec *bvec)
 }
 #endif
 
-static void zram_revalidate_disk(struct zram *zram)
-{
-       revalidate_disk(zram->disk);
-       /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
-       zram->disk->queue->backing_dev_info->capabilities |=
-               BDI_CAP_STABLE_WRITES;
-}
-
 /*
  * Check if request is within bounds and aligned on zram logical blocks.
  */
@@ -261,17 +284,125 @@ static ssize_t mem_used_max_store(struct device *dev,
        return len;
 }
 
+static ssize_t idle_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct zram *zram = dev_to_zram(dev);
+       unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+       int index;
+       char mode_buf[8];
+       ssize_t sz;
+
+       sz = strscpy(mode_buf, buf, sizeof(mode_buf));
+       if (sz <= 0)
+               return -EINVAL;
+
+       /* ignore trailing new line */
+       if (mode_buf[sz - 1] == '\n')
+               mode_buf[sz - 1] = 0x00;
+
+       if (strcmp(mode_buf, "all"))
+               return -EINVAL;
+
+       down_read(&zram->init_lock);
+       if (!init_done(zram)) {
+               up_read(&zram->init_lock);
+               return -EINVAL;
+       }
+
+       for (index = 0; index < nr_pages; index++) {
+               /*
+                * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
+                * See the comment in writeback_store.
+                */
+               zram_slot_lock(zram, index);
+               if (zram_allocated(zram, index) &&
+                               !zram_test_flag(zram, index, ZRAM_UNDER_WB))
+                       zram_set_flag(zram, index, ZRAM_IDLE);
+               zram_slot_unlock(zram, index);
+       }
+
+       up_read(&zram->init_lock);
+
+       return len;
+}
+
 #ifdef CONFIG_ZRAM_WRITEBACK
-static bool zram_wb_enabled(struct zram *zram)
+static ssize_t writeback_limit_enable_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct zram *zram = dev_to_zram(dev);
+       u64 val;
+       ssize_t ret = -EINVAL;
+
+       if (kstrtoull(buf, 10, &val))
+               return ret;
+
+       down_read(&zram->init_lock);
+       spin_lock(&zram->wb_limit_lock);
+       zram->wb_limit_enable = val;
+       spin_unlock(&zram->wb_limit_lock);
+       up_read(&zram->init_lock);
+       ret = len;
+
+       return ret;
+}
+
+static ssize_t writeback_limit_enable_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       bool val;
+       struct zram *zram = dev_to_zram(dev);
+
+       down_read(&zram->init_lock);
+       spin_lock(&zram->wb_limit_lock);
+       val = zram->wb_limit_enable;
+       spin_unlock(&zram->wb_limit_lock);
+       up_read(&zram->init_lock);
+
+       return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+static ssize_t writeback_limit_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t len)
 {
-       return zram->backing_dev;
+       struct zram *zram = dev_to_zram(dev);
+       u64 val;
+       ssize_t ret = -EINVAL;
+
+       if (kstrtoull(buf, 10, &val))
+               return ret;
+
+       down_read(&zram->init_lock);
+       spin_lock(&zram->wb_limit_lock);
+       zram->bd_wb_limit = val;
+       spin_unlock(&zram->wb_limit_lock);
+       up_read(&zram->init_lock);
+       ret = len;
+
+       return ret;
+}
+
+static ssize_t writeback_limit_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       u64 val;
+       struct zram *zram = dev_to_zram(dev);
+
+       down_read(&zram->init_lock);
+       spin_lock(&zram->wb_limit_lock);
+       val = zram->bd_wb_limit;
+       spin_unlock(&zram->wb_limit_lock);
+       up_read(&zram->init_lock);
+
+       return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
 }
 
 static void reset_bdev(struct zram *zram)
 {
        struct block_device *bdev;
 
-       if (!zram_wb_enabled(zram))
+       if (!zram->backing_dev)
                return;
 
        bdev = zram->bdev;
@@ -297,7 +428,7 @@ static ssize_t backing_dev_show(struct device *dev,
        ssize_t ret;
 
        down_read(&zram->init_lock);
-       if (!zram_wb_enabled(zram)) {
+       if (!zram->backing_dev) {
                memcpy(buf, "none\n", 5);
                up_read(&zram->init_lock);
                return 5;
@@ -385,7 +516,6 @@ static ssize_t backing_dev_store(struct device *dev,
                goto out;
 
        reset_bdev(zram);
-       spin_lock_init(&zram->bitmap_lock);
 
        zram->old_block_size = old_block_size;
        zram->bdev = bdev;
@@ -415,35 +545,32 @@ out:
        return err;
 }
 
-static unsigned long get_entry_bdev(struct zram *zram)
+static unsigned long alloc_block_bdev(struct zram *zram)
 {
-       unsigned long entry;
-
-       spin_lock(&zram->bitmap_lock);
+       unsigned long blk_idx = 1;
+retry:
        /* skip 0 bit to confuse zram.handle = 0 */
-       entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
-       if (entry == zram->nr_pages) {
-               spin_unlock(&zram->bitmap_lock);
+       blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
+       if (blk_idx == zram->nr_pages)
                return 0;
-       }
 
-       set_bit(entry, zram->bitmap);
-       spin_unlock(&zram->bitmap_lock);
+       if (test_and_set_bit(blk_idx, zram->bitmap))
+               goto retry;
 
-       return entry;
+       atomic64_inc(&zram->stats.bd_count);
+       return blk_idx;
 }
 
-static void put_entry_bdev(struct zram *zram, unsigned long entry)
+static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
 {
        int was_set;
 
-       spin_lock(&zram->bitmap_lock);
-       was_set = test_and_clear_bit(entry, zram->bitmap);
-       spin_unlock(&zram->bitmap_lock);
+       was_set = test_and_clear_bit(blk_idx, zram->bitmap);
        WARN_ON_ONCE(!was_set);
+       atomic64_dec(&zram->stats.bd_count);
 }
 
-void zram_page_end_io(struct bio *bio)
+static void zram_page_end_io(struct bio *bio)
 {
        struct page *page = bio->bi_io_vec[0].bv_page;
 
@@ -483,6 +610,172 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
        return 1;
 }
 
+#define HUGE_WRITEBACK 1
+#define IDLE_WRITEBACK 2
+
+static ssize_t writeback_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct zram *zram = dev_to_zram(dev);
+       unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+       unsigned long index;
+       struct bio bio;
+       struct bio_vec bio_vec;
+       struct page *page;
+       ssize_t ret, sz;
+       char mode_buf[8];
+       int mode = -1;
+       unsigned long blk_idx = 0;
+
+       sz = strscpy(mode_buf, buf, sizeof(mode_buf));
+       if (sz <= 0)
+               return -EINVAL;
+
+       /* ignore trailing newline */
+       if (mode_buf[sz - 1] == '\n')
+               mode_buf[sz - 1] = 0x00;
+
+       if (!strcmp(mode_buf, "idle"))
+               mode = IDLE_WRITEBACK;
+       else if (!strcmp(mode_buf, "huge"))
+               mode = HUGE_WRITEBACK;
+
+       if (mode == -1)
+               return -EINVAL;
+
+       down_read(&zram->init_lock);
+       if (!init_done(zram)) {
+               ret = -EINVAL;
+               goto release_init_lock;
+       }
+
+       if (!zram->backing_dev) {
+               ret = -ENODEV;
+               goto release_init_lock;
+       }
+
+       page = alloc_page(GFP_KERNEL);
+       if (!page) {
+               ret = -ENOMEM;
+               goto release_init_lock;
+       }
+
+       for (index = 0; index < nr_pages; index++) {
+               struct bio_vec bvec;
+
+               bvec.bv_page = page;
+               bvec.bv_len = PAGE_SIZE;
+               bvec.bv_offset = 0;
+
+               spin_lock(&zram->wb_limit_lock);
+               if (zram->wb_limit_enable && !zram->bd_wb_limit) {
+                       spin_unlock(&zram->wb_limit_lock);
+                       ret = -EIO;
+                       break;
+               }
+               spin_unlock(&zram->wb_limit_lock);
+
+               if (!blk_idx) {
+                       blk_idx = alloc_block_bdev(zram);
+                       if (!blk_idx) {
+                               ret = -ENOSPC;
+                               break;
+                       }
+               }
+
+               zram_slot_lock(zram, index);
+               if (!zram_allocated(zram, index))
+                       goto next;
+
+               if (zram_test_flag(zram, index, ZRAM_WB) ||
+                               zram_test_flag(zram, index, ZRAM_SAME) ||
+                               zram_test_flag(zram, index, ZRAM_UNDER_WB))
+                       goto next;
+
+               if (mode == IDLE_WRITEBACK &&
+                         !zram_test_flag(zram, index, ZRAM_IDLE))
+                       goto next;
+               if (mode == HUGE_WRITEBACK &&
+                         !zram_test_flag(zram, index, ZRAM_HUGE))
+                       goto next;
+               /*
+                * Clearing ZRAM_UNDER_WB is duty of caller.
+                * IOW, zram_free_page never clear it.
+                */
+               zram_set_flag(zram, index, ZRAM_UNDER_WB);
+               /* Need for hugepage writeback racing */
+               zram_set_flag(zram, index, ZRAM_IDLE);
+               zram_slot_unlock(zram, index);
+               if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
+                       zram_slot_lock(zram, index);
+                       zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+                       zram_clear_flag(zram, index, ZRAM_IDLE);
+                       zram_slot_unlock(zram, index);
+                       continue;
+               }
+
+               bio_init(&bio, &bio_vec, 1);
+               bio_set_dev(&bio, zram->bdev);
+               bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
+               bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
+
+               bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
+                               bvec.bv_offset);
+               /*
+                * XXX: A single page IO would be inefficient for write
+                * but it would be not bad as starter.
+                */
+               ret = submit_bio_wait(&bio);
+               if (ret) {
+                       zram_slot_lock(zram, index);
+                       zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+                       zram_clear_flag(zram, index, ZRAM_IDLE);
+                       zram_slot_unlock(zram, index);
+                       continue;
+               }
+
+               atomic64_inc(&zram->stats.bd_writes);
+               /*
+                * We released zram_slot_lock so need to check if the slot was
+                * changed. If there is freeing for the slot, we can catch it
+                * easily by zram_allocated.
+                * A subtle case is the slot is freed/reallocated/marked as
+                * ZRAM_IDLE again. To close the race, idle_store doesn't
+                * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
+                * Thus, we could close the race by checking ZRAM_IDLE bit.
+                */
+               zram_slot_lock(zram, index);
+               if (!zram_allocated(zram, index) ||
+                         !zram_test_flag(zram, index, ZRAM_IDLE)) {
+                       zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+                       zram_clear_flag(zram, index, ZRAM_IDLE);
+                       goto next;
+               }
+
+               zram_free_page(zram, index);
+               zram_clear_flag(zram, index, ZRAM_UNDER_WB);
+               zram_set_flag(zram, index, ZRAM_WB);
+               zram_set_element(zram, index, blk_idx);
+               blk_idx = 0;
+               atomic64_inc(&zram->stats.pages_stored);
+               spin_lock(&zram->wb_limit_lock);
+               if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
+                       zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
+               spin_unlock(&zram->wb_limit_lock);
+next:
+               zram_slot_unlock(zram, index);
+       }
+
+       if (blk_idx)
+               free_block_bdev(zram, blk_idx);
+       ret = len;
+       __free_page(page);
+release_init_lock:
+       up_read(&zram->init_lock);
+
+       return ret;
+}
+
 struct zram_work {
        struct work_struct work;
        struct zram *zram;
@@ -535,82 +828,131 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
                        unsigned long entry, struct bio *parent, bool sync)
 {
+       atomic64_inc(&zram->stats.bd_reads);
        if (sync)
                return read_from_bdev_sync(zram, bvec, entry, parent);
        else
                return read_from_bdev_async(zram, bvec, entry, parent);
 }
+#else
+static inline void reset_bdev(struct zram *zram) {};
+static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
+                       unsigned long entry, struct bio *parent, bool sync)
+{
+       return -EIO;
+}
+
+static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
+#endif
+
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+
+static struct dentry *zram_debugfs_root;
+
+static void zram_debugfs_create(void)
+{
+       zram_debugfs_root = debugfs_create_dir("zram", NULL);
+}
 
-static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
-                                       u32 index, struct bio *parent,
-                                       unsigned long *pentry)
+static void zram_debugfs_destroy(void)
 {
-       struct bio *bio;
-       unsigned long entry;
+       debugfs_remove_recursive(zram_debugfs_root);
+}
 
-       bio = bio_alloc(GFP_ATOMIC, 1);
-       if (!bio)
+static void zram_accessed(struct zram *zram, u32 index)
+{
+       zram_clear_flag(zram, index, ZRAM_IDLE);
+       zram->table[index].ac_time = ktime_get_boottime();
+}
+
+static ssize_t read_block_state(struct file *file, char __user *buf,
+                               size_t count, loff_t *ppos)
+{
+       char *kbuf;
+       ssize_t index, written = 0;
+       struct zram *zram = file->private_data;
+       unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+       struct timespec64 ts;
+
+       kbuf = kvmalloc(count, GFP_KERNEL);
+       if (!kbuf)
                return -ENOMEM;
 
-       entry = get_entry_bdev(zram);
-       if (!entry) {
-               bio_put(bio);
-               return -ENOSPC;
+       down_read(&zram->init_lock);
+       if (!init_done(zram)) {
+               up_read(&zram->init_lock);
+               kvfree(kbuf);
+               return -EINVAL;
        }
 
-       bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
-       bio_set_dev(bio, zram->bdev);
-       if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
-                                       bvec->bv_offset)) {
-               bio_put(bio);
-               put_entry_bdev(zram, entry);
-               return -EIO;
-       }
+       for (index = *ppos; index < nr_pages; index++) {
+               int copied;
 
-       if (!parent) {
-               bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
-               bio->bi_end_io = zram_page_end_io;
-       } else {
-               bio->bi_opf = parent->bi_opf;
-               bio_chain(bio, parent);
+               zram_slot_lock(zram, index);
+               if (!zram_allocated(zram, index))
+                       goto next;
+
+               ts = ktime_to_timespec64(zram->table[index].ac_time);
+               copied = snprintf(kbuf + written, count,
+                       "%12zd %12lld.%06lu %c%c%c%c\n",
+                       index, (s64)ts.tv_sec,
+                       ts.tv_nsec / NSEC_PER_USEC,
+                       zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
+                       zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
+                       zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
+                       zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
+
+               if (count < copied) {
+                       zram_slot_unlock(zram, index);
+                       break;
+               }
+               written += copied;
+               count -= copied;
+next:
+               zram_slot_unlock(zram, index);
+               *ppos += 1;
        }
 
-       submit_bio(bio);
-       *pentry = entry;
+       up_read(&zram->init_lock);
+       if (copy_to_user(buf, kbuf, written))
+               written = -EFAULT;
+       kvfree(kbuf);
 
-       return 0;
+       return written;
 }
 
-static void zram_wb_clear(struct zram *zram, u32 index)
+static const struct file_operations proc_zram_block_state_op = {
+       .open = simple_open,
+       .read = read_block_state,
+       .llseek = default_llseek,
+};
+
+static void zram_debugfs_register(struct zram *zram)
 {
-       unsigned long entry;
+       if (!zram_debugfs_root)
+               return;
 
-       zram_clear_flag(zram, index, ZRAM_WB);
-       entry = zram_get_element(zram, index);
-       zram_set_element(zram, index, 0);
-       put_entry_bdev(zram, entry);
+       zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
+                                               zram_debugfs_root);
+       debugfs_create_file("block_state", 0400, zram->debugfs_dir,
+                               zram, &proc_zram_block_state_op);
 }
 
-#else
-static bool zram_wb_enabled(struct zram *zram) { return false; }
-static inline void reset_bdev(struct zram *zram) {};
-static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
-                                       u32 index, struct bio *parent,
-                                       unsigned long *pentry)
-
+static void zram_debugfs_unregister(struct zram *zram)
 {
-       return -EIO;
+       debugfs_remove_recursive(zram->debugfs_dir);
 }
-
-static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
-                       unsigned long entry, struct bio *parent, bool sync)
+#else
+static void zram_debugfs_create(void) {};
+static void zram_debugfs_destroy(void) {};
+static void zram_accessed(struct zram *zram, u32 index)
 {
-       return -EIO;
-}
-static void zram_wb_clear(struct zram *zram, u32 index) {}
+       zram_clear_flag(zram, index, ZRAM_IDLE);
+};
+static void zram_debugfs_register(struct zram *zram) {};
+static void zram_debugfs_unregister(struct zram *zram) {};
 #endif
 
-
 /*
  * We switched to per-cpu streams and this attr is not needed anymore.
  * However, we will keep it around for some time, because:
@@ -729,19 +1071,40 @@ static ssize_t mm_stat_show(struct device *dev,
        max_used = atomic_long_read(&zram->stats.max_used_pages);
 
        ret = scnprintf(buf, PAGE_SIZE,
-                       "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
+                       "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
                        orig_size << PAGE_SHIFT,
                        (u64)atomic64_read(&zram->stats.compr_data_size),
                        mem_used << PAGE_SHIFT,
                        zram->limit_pages << PAGE_SHIFT,
                        max_used << PAGE_SHIFT,
                        (u64)atomic64_read(&zram->stats.same_pages),
-                       pool_stats.pages_compacted);
+                       pool_stats.pages_compacted,
+                       (u64)atomic64_read(&zram->stats.huge_pages));
        up_read(&zram->init_lock);
 
        return ret;
 }
 
+#ifdef CONFIG_ZRAM_WRITEBACK
+#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
+static ssize_t bd_stat_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct zram *zram = dev_to_zram(dev);
+       ssize_t ret;
+
+       down_read(&zram->init_lock);
+       ret = scnprintf(buf, PAGE_SIZE,
+               "%8llu %8llu %8llu\n",
+                       FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
+                       FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
+                       FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
+       up_read(&zram->init_lock);
+
+       return ret;
+}
+#endif
+
 static ssize_t debug_stat_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
@@ -751,9 +1114,10 @@ static ssize_t debug_stat_show(struct device *dev,
 
        down_read(&zram->init_lock);
        ret = scnprintf(buf, PAGE_SIZE,
-                       "version: %d\n%8llu\n",
+                       "version: %d\n%8llu %8llu\n",
                        version,
-                       (u64)atomic64_read(&zram->stats.writestall));
+                       (u64)atomic64_read(&zram->stats.writestall),
+                       (u64)atomic64_read(&zram->stats.miss_free));
        up_read(&zram->init_lock);
 
        return ret;
@@ -761,18 +1125,11 @@ static ssize_t debug_stat_show(struct device *dev,
 
 static DEVICE_ATTR_RO(io_stat);
 static DEVICE_ATTR_RO(mm_stat);
+#ifdef CONFIG_ZRAM_WRITEBACK
+static DEVICE_ATTR_RO(bd_stat);
+#endif
 static DEVICE_ATTR_RO(debug_stat);
 
-static void zram_slot_lock(struct zram *zram, u32 index)
-{
-       bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value);
-}
-
-static void zram_slot_unlock(struct zram *zram, u32 index)
-{
-       bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value);
-}
-
 static void zram_meta_free(struct zram *zram, u64 disksize)
 {
        size_t num_pages = disksize >> PAGE_SHIFT;
@@ -801,6 +1158,8 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
                return false;
        }
 
+       if (!huge_class_size)
+               huge_class_size = zs_huge_class_size(zram->mem_pool);
        return true;
 }
 
@@ -813,10 +1172,21 @@ static void zram_free_page(struct zram *zram, size_t index)
 {
        unsigned long handle;
 
-       if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
-               zram_wb_clear(zram, index);
-               atomic64_dec(&zram->stats.pages_stored);
-               return;
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+       zram->table[index].ac_time = 0;
+#endif
+       if (zram_test_flag(zram, index, ZRAM_IDLE))
+               zram_clear_flag(zram, index, ZRAM_IDLE);
+
+       if (zram_test_flag(zram, index, ZRAM_HUGE)) {
+               zram_clear_flag(zram, index, ZRAM_HUGE);
+               atomic64_dec(&zram->stats.huge_pages);
+       }
+
+       if (zram_test_flag(zram, index, ZRAM_WB)) {
+               zram_clear_flag(zram, index, ZRAM_WB);
+               free_block_bdev(zram, zram_get_element(zram, index));
+               goto out;
        }
 
        /*
@@ -825,10 +1195,8 @@ static void zram_free_page(struct zram *zram, size_t index)
         */
        if (zram_test_flag(zram, index, ZRAM_SAME)) {
                zram_clear_flag(zram, index, ZRAM_SAME);
-               zram_set_element(zram, index, 0);
                atomic64_dec(&zram->stats.same_pages);
-               atomic64_dec(&zram->stats.pages_stored);
-               return;
+               goto out;
        }
 
        handle = zram_get_handle(zram, index);
@@ -839,10 +1207,12 @@ static void zram_free_page(struct zram *zram, size_t index)
 
        atomic64_sub(zram_get_obj_size(zram, index),
                        &zram->stats.compr_data_size);
+out:
        atomic64_dec(&zram->stats.pages_stored);
-
        zram_set_handle(zram, index, 0);
        zram_set_obj_size(zram, index, 0);
+       WARN_ON_ONCE(zram->table[index].flags &
+               ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
 }
 
 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
@@ -853,24 +1223,20 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
        unsigned int size;
        void *src, *dst;
 
-       if (zram_wb_enabled(zram)) {
-               zram_slot_lock(zram, index);
-               if (zram_test_flag(zram, index, ZRAM_WB)) {
-                       struct bio_vec bvec;
-
-                       zram_slot_unlock(zram, index);
+       zram_slot_lock(zram, index);
+       if (zram_test_flag(zram, index, ZRAM_WB)) {
+               struct bio_vec bvec;
 
-                       bvec.bv_page = page;
-                       bvec.bv_len = PAGE_SIZE;
-                       bvec.bv_offset = 0;
-                       return read_from_bdev(zram, &bvec,
-                                       zram_get_element(zram, index),
-                                       bio, partial_io);
-               }
                zram_slot_unlock(zram, index);
+
+               bvec.bv_page = page;
+               bvec.bv_len = PAGE_SIZE;
+               bvec.bv_offset = 0;
+               return read_from_bdev(zram, &bvec,
+                               zram_get_element(zram, index),
+                               bio, partial_io);
        }
 
-       zram_slot_lock(zram, index);
        handle = zram_get_handle(zram, index);
        if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
                unsigned long value;
@@ -955,7 +1321,6 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
        struct page *page = bvec->bv_page;
        unsigned long element = 0;
        enum zram_pageflags flags = 0;
-       bool allow_wb = true;
 
        mem = kmap_atomic(page);
        if (page_same_filled(mem, &element)) {
@@ -980,21 +1345,8 @@ compress_again:
                return ret;
        }
 
-       if (unlikely(comp_len > max_zpage_size)) {
-               if (zram_wb_enabled(zram) && allow_wb) {
-                       zcomp_stream_put(zram->comp);
-                       ret = write_to_bdev(zram, bvec, index, bio, &element);
-                       if (!ret) {
-                               flags = ZRAM_WB;
-                               ret = 1;
-                               goto out;
-                       }
-                       allow_wb = false;
-                       goto compress_again;
-               }
+       if (comp_len >= huge_class_size)
                comp_len = PAGE_SIZE;
-       }
-
        /*
         * handle allocation has 2 paths:
         * a) fast path is executed with preemption disabled (for
@@ -1054,6 +1406,11 @@ out:
        zram_slot_lock(zram, index);
        zram_free_page(zram, index);
 
+       if (comp_len == PAGE_SIZE) {
+               zram_set_flag(zram, index, ZRAM_HUGE);
+               atomic64_inc(&zram->stats.huge_pages);
+       }
+
        if (flags) {
                zram_set_flag(zram, index, flags);
                zram_set_element(zram, index, element);
@@ -1174,6 +1531,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 
        generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
 
+       zram_slot_lock(zram, index);
+       zram_accessed(zram, index);
+       zram_slot_unlock(zram, index);
+
        if (unlikely(ret < 0)) {
                if (!is_write)
                        atomic64_inc(&zram->stats.failed_reads);
@@ -1258,10 +1619,14 @@ static void zram_slot_free_notify(struct block_device *bdev,
 
        zram = bdev->bd_disk->private_data;
 
-       zram_slot_lock(zram, index);
+       atomic64_inc(&zram->stats.notify_free);
+       if (!zram_slot_trylock(zram, index)) {
+               atomic64_inc(&zram->stats.miss_free);
+               return;
+       }
+
        zram_free_page(zram, index);
        zram_slot_unlock(zram, index);
-       atomic64_inc(&zram->stats.notify_free);
 }
 
 static int zram_rw_page(struct block_device *bdev, sector_t sector,
@@ -1380,7 +1745,8 @@ static ssize_t disksize_store(struct device *dev,
        zram->comp = comp;
        zram->disksize = disksize;
        set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
-       zram_revalidate_disk(zram);
+
+       revalidate_disk(zram->disk);
        up_write(&zram->init_lock);
 
        return len;
@@ -1427,7 +1793,7 @@ static ssize_t reset_store(struct device *dev,
        /* Make sure all the pending I/O are finished */
        fsync_bdev(bdev);
        zram_reset_device(zram);
-       zram_revalidate_disk(zram);
+       revalidate_disk(zram->disk);
        bdput(bdev);
 
        mutex_lock(&bdev->bd_mutex);
@@ -1465,10 +1831,14 @@ static DEVICE_ATTR_RO(initstate);
 static DEVICE_ATTR_WO(reset);
 static DEVICE_ATTR_WO(mem_limit);
 static DEVICE_ATTR_WO(mem_used_max);
+static DEVICE_ATTR_WO(idle);
 static DEVICE_ATTR_RW(max_comp_streams);
 static DEVICE_ATTR_RW(comp_algorithm);
 #ifdef CONFIG_ZRAM_WRITEBACK
 static DEVICE_ATTR_RW(backing_dev);
+static DEVICE_ATTR_WO(writeback);
+static DEVICE_ATTR_RW(writeback_limit);
+static DEVICE_ATTR_RW(writeback_limit_enable);
 #endif
 
 static struct attribute *zram_disk_attrs[] = {
@@ -1478,13 +1848,20 @@ static struct attribute *zram_disk_attrs[] = {
        &dev_attr_compact.attr,
        &dev_attr_mem_limit.attr,
        &dev_attr_mem_used_max.attr,
+       &dev_attr_idle.attr,
        &dev_attr_max_comp_streams.attr,
        &dev_attr_comp_algorithm.attr,
 #ifdef CONFIG_ZRAM_WRITEBACK
        &dev_attr_backing_dev.attr,
+       &dev_attr_writeback.attr,
+       &dev_attr_writeback_limit.attr,
+       &dev_attr_writeback_limit_enable.attr,
 #endif
        &dev_attr_io_stat.attr,
        &dev_attr_mm_stat.attr,
+#ifdef CONFIG_ZRAM_WRITEBACK
+       &dev_attr_bd_stat.attr,
+#endif
        &dev_attr_debug_stat.attr,
        NULL,
 };
@@ -1518,7 +1895,9 @@ static int zram_add(void)
        device_id = ret;
 
        init_rwsem(&zram->init_lock);
-
+#ifdef CONFIG_ZRAM_WRITEBACK
+       spin_lock_init(&zram->wb_limit_lock);
+#endif
        queue = blk_alloc_queue(GFP_KERNEL);
        if (!queue) {
                pr_err("Error allocating disk queue for device %d\n",
@@ -1551,6 +1930,7 @@ static int zram_add(void)
        /* zram devices sort of resembles non-rotational disks */
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
        queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
+
        /*
         * To ensure that we always get PAGE_SIZE aligned
         * and n*PAGE_SIZED sized I/O requests.
@@ -1575,11 +1955,15 @@ static int zram_add(void)
        if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
                blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
 
+       zram->disk->queue->backing_dev_info->capabilities |=
+                                       BDI_CAP_STABLE_WRITES;
+
        disk_to_dev(zram->disk)->groups = zram_disk_attr_groups;
        add_disk(zram->disk);
 
        strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
 
+       zram_debugfs_register(zram);
        pr_info("Added device: %s\n", zram->disk->disk_name);
        return device_id;
 
@@ -1610,6 +1994,8 @@ static int zram_remove(struct zram *zram)
        zram->claim = true;
        mutex_unlock(&bdev->bd_mutex);
 
+       zram_debugfs_unregister(zram);
+
        /* Make sure all the pending I/O are finished */
        fsync_bdev(bdev);
        zram_reset_device(zram);
@@ -1617,8 +2003,8 @@ static int zram_remove(struct zram *zram)
 
        pr_info("Removed device: %s\n", zram->disk->disk_name);
 
-       blk_cleanup_queue(zram->disk->queue);
        del_gendisk(zram->disk);
+       blk_cleanup_queue(zram->disk->queue);
        put_disk(zram->disk);
        kfree(zram);
        return 0;
@@ -1702,6 +2088,7 @@ static void destroy_devices(void)
 {
        class_unregister(&zram_control_class);
        idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
+       zram_debugfs_destroy();
        idr_destroy(&zram_index_idr);
        unregister_blkdev(zram_major, "zram");
        cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
@@ -1723,6 +2110,7 @@ static int __init zram_init(void)
                return ret;
        }
 
+       zram_debugfs_create();
        zram_major = register_blkdev(0, "zram");
        if (zram_major <= 0) {
                pr_err("Unable to get major number\n");
index 31762db861e38486a86c9ea060ec841f90f142df..29af8d024b471188805abbbdf2ef8963c2a182e9 100644 (file)
 
 #include "zcomp.h"
 
-/*-- Configurable parameters */
-
-/*
- * Pages that compress to size greater than this are stored
- * uncompressed in memory.
- */
-static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
-
-/*
- * NOTE: max_zpage_size must be less than or equal to:
- *   ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would
- * always return failure.
- */
-
-/*-- End of configurable params */
-
 #define SECTOR_SHIFT           9
 #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
 #define SECTORS_PER_PAGE       (1 << SECTORS_PER_PAGE_SHIFT)
@@ -47,7 +31,7 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
 
 
 /*
- * The lower ZRAM_FLAG_SHIFT bits of table.value is for
+ * The lower ZRAM_FLAG_SHIFT bits of table.flags is for
  * object size (excluding header), the higher bits is for
  * zram_pageflags.
  *
@@ -58,12 +42,15 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
  */
 #define ZRAM_FLAG_SHIFT 24
 
-/* Flags for zram pages (table[page_no].value) */
+/* Flags for zram pages (table[page_no].flags) */
 enum zram_pageflags {
-       /* Page consists the same element */
-       ZRAM_SAME = ZRAM_FLAG_SHIFT,
-       ZRAM_ACCESS,    /* page is now accessed */
+       /* zram slot is locked */
+       ZRAM_LOCK = ZRAM_FLAG_SHIFT,
+       ZRAM_SAME,      /* Page consists the same element */
        ZRAM_WB,        /* page is stored on backing_device */
+       ZRAM_UNDER_WB,  /* page is under writeback */
+       ZRAM_HUGE,      /* Incompressible page */
+       ZRAM_IDLE,      /* not accessed page since last idle marking */
 
        __NR_ZRAM_PAGEFLAGS,
 };
@@ -76,7 +63,10 @@ struct zram_table_entry {
                unsigned long handle;
                unsigned long element;
        };
-       unsigned long value;
+       unsigned long flags;
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+       ktime_t ac_time;
+#endif
 };
 
 struct zram_stats {
@@ -88,9 +78,16 @@ struct zram_stats {
        atomic64_t invalid_io;  /* non-page-aligned I/O requests */
        atomic64_t notify_free; /* no. of swap slot free notifications */
        atomic64_t same_pages;          /* no. of same element filled pages */
+       atomic64_t huge_pages;          /* no. of huge pages */
        atomic64_t pages_stored;        /* no. of pages currently stored */
        atomic_long_t max_used_pages;   /* no. of maximum pages stored */
        atomic64_t writestall;          /* no. of write slow paths */
+       atomic64_t miss_free;           /* no. of missed free */
+#ifdef CONFIG_ZRAM_WRITEBACK
+       atomic64_t bd_count;            /* no. of pages in backing device */
+       atomic64_t bd_reads;            /* no. of reads from backing device */
+       atomic64_t bd_writes;           /* no. of writes from backing device */
+#endif
 };
 
 struct zram {
@@ -116,13 +113,18 @@ struct zram {
         * zram is claimed so open request will be failed
         */
        bool claim; /* Protected by bdev->bd_mutex */
-#ifdef CONFIG_ZRAM_WRITEBACK
        struct file *backing_dev;
+#ifdef CONFIG_ZRAM_WRITEBACK
+       spinlock_t wb_limit_lock;
+       bool wb_limit_enable;
+       u64 bd_wb_limit;
        struct block_device *bdev;
        unsigned int old_block_size;
        unsigned long *bitmap;
        unsigned long nr_pages;
-       spinlock_t bitmap_lock;
+#endif
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+       struct dentry *debugfs_dir;
 #endif
 };
 #endif