locks: print unsigned ino in /proc/locks
[GitHub/LineageOS/android_kernel_samsung_universal7580.git] / mm / zswap.c
CommitLineData
3c2a0909
S
1/*
2 * zswap.c - zswap driver file
3 *
4 * zswap is a backend for frontswap that takes pages that are in the process
5 * of being swapped out and attempts to compress and store them in a
6 * RAM-based memory pool. This can result in a significant I/O reduction on
7 * the swap device and, in the case where decompressing from RAM is faster
8 * than reading from the swap device, can also improve workload performance.
9 *
10 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21*/
22
23#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
24
25#include <linux/module.h>
26#include <linux/cpu.h>
27#include <linux/highmem.h>
28#include <linux/slab.h>
29#include <linux/spinlock.h>
30#include <linux/types.h>
31#include <linux/atomic.h>
32#include <linux/frontswap.h>
33#include <linux/rbtree.h>
34#include <linux/swap.h>
35#include <linux/crypto.h>
36#include <linux/mempool.h>
37#include <linux/zpool.h>
38
39#include <linux/mm_types.h>
40#include <linux/page-flags.h>
41#include <linux/swapops.h>
42#include <linux/writeback.h>
43#include <linux/pagemap.h>
44
45/*********************************
46* statistics
47**********************************/
48/* Total bytes used by the compressed storage */
49static u64 zswap_pool_total_size;
50/* Number of memory pages used by the compressed pool */
51u64 zswap_pool_pages;
52/* The number of compressed pages currently stored in zswap */
53atomic_t zswap_stored_pages = ATOMIC_INIT(0);
54
55/*
56 * The statistics below are not protected from concurrent access for
57 * performance reasons so they may not be a 100% accurate. However,
58 * they do provide useful information on roughly how many times a
59 * certain event is occurring.
60*/
61
62/* Pool limit was hit (see zswap_max_pool_percent) */
63static u64 zswap_pool_limit_hit;
64/* Pages written back when pool limit was reached */
65static u64 zswap_written_back_pages;
66/* Store failed due to a reclaim failure after pool limit was reached */
67static u64 zswap_reject_reclaim_fail;
68/* Compressed page was too big for the allocator to (optimally) store */
69static u64 zswap_reject_compress_poor;
70/* Store failed because underlying allocator could not get memory */
71static u64 zswap_reject_alloc_fail;
72/* Store failed because the entry metadata could not be allocated (rare) */
73static u64 zswap_reject_kmemcache_fail;
74/* Duplicate store was encountered (rare) */
75static u64 zswap_duplicate_entry;
76
77/* The number of zero pages currently stored in zswap */
78static atomic_t zswap_zero_pages = ATOMIC_INIT(0);
79
80/*********************************
81* tunables
82**********************************/
83/* Enable/disable zswap (disabled by default, fixed at boot for now) */
84static bool zswap_enabled __read_mostly = 1;
85module_param_named(enabled, zswap_enabled, bool, 0444);
86
87/* Compressor to be used by zswap (fixed at boot for now) */
88#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
89static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
90module_param_named(compressor, zswap_compressor, charp, 0444);
91
92/* The maximum percentage of memory that the compressed pool can occupy */
93static unsigned int zswap_max_pool_percent = 50;
94module_param_named(max_pool_percent,
95 zswap_max_pool_percent, uint, 0644);
96
97/* Compressed storage to use */
98#define ZSWAP_ZPOOL_DEFAULT "zsmalloc"
99static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
100module_param_named(zpool, zswap_zpool_type, charp, 0444);
101
102/* zpool is shared by all of zswap backend */
103static struct zpool *zswap_pool;
104
105/*********************************
106* compression functions
107**********************************/
108/* per-cpu compression transforms */
109static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
110
111enum comp_op {
112 ZSWAP_COMPOP_COMPRESS,
113 ZSWAP_COMPOP_DECOMPRESS
114};
115
116static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
117 u8 *dst, unsigned int *dlen)
118{
119 struct crypto_comp *tfm;
120 int ret;
121
122 tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
123 switch (op) {
124 case ZSWAP_COMPOP_COMPRESS:
125 ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
126 break;
127 case ZSWAP_COMPOP_DECOMPRESS:
128 ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
129 break;
130 default:
131 ret = -EINVAL;
132 }
133
134 put_cpu();
135 return ret;
136}
137
138static int __init zswap_comp_init(void)
139{
140 if (!crypto_has_comp(zswap_compressor, 0, 0)) {
141 pr_info("%s compressor not available\n", zswap_compressor);
142 /* fall back to default compressor */
143 zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
144 if (!crypto_has_comp(zswap_compressor, 0, 0))
145 /* can't even load the default compressor */
146 return -ENODEV;
147 }
148 pr_info("using %s compressor\n", zswap_compressor);
149
150 /* alloc percpu transforms */
151 zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
152 if (!zswap_comp_pcpu_tfms)
153 return -ENOMEM;
154 return 0;
155}
156
157static void zswap_comp_exit(void)
158{
159 /* free percpu transforms */
160 if (zswap_comp_pcpu_tfms)
161 free_percpu(zswap_comp_pcpu_tfms);
162}
163
164/*********************************
165* data structures
166**********************************/
167/*
168 * struct zswap_entry
169 *
170 * This structure contains the metadata for tracking a single compressed
171 * page within zswap.
172 *
173 * rbnode - links the entry into red-black tree for the appropriate swap type
174 * refcount - the number of outstanding reference to the entry. This is needed
175 * to protect against premature freeing of the entry by code
176 * concurrent calls to load, invalidate, and writeback. The lock
177 * for the zswap_tree structure that contains the entry must
178 * be held while changing the refcount. Since the lock must
179 * be held, there is no reason to also make refcount atomic.
180 * offset - the swap offset for the entry. Index into the red-black tree.
181 * handle - zpool allocation handle that stores the compressed page data
182 * length - the length in bytes of the compressed page data. Needed during
183 * decompression
184 * zero_flag - the flag indicating the page for the zswap_entry is a zero page.
185 * zswap does not store the page during compression.
186 * It memsets the page with 0 during decompression.
187 */
188struct zswap_entry {
189 struct rb_node rbnode;
190 pgoff_t offset;
191 int refcount;
192 unsigned int length;
193 unsigned long handle;
194 unsigned char zero_flag;
195};
196
197struct zswap_header {
198 swp_entry_t swpentry;
199};
200
201/*
202 * The tree lock in the zswap_tree struct protects a few things:
203 * - the rbtree
204 * - the refcount field of each entry in the tree
205 */
206struct zswap_tree {
207 struct rb_root rbroot;
208 spinlock_t lock;
209};
210
211static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
212
213/*********************************
214* zswap entry functions
215**********************************/
216static struct kmem_cache *zswap_entry_cache;
217
218static int zswap_entry_cache_create(void)
219{
220 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
221 return zswap_entry_cache == NULL;
222}
223
224static void __init zswap_entry_cache_destroy(void)
225{
226 kmem_cache_destroy(zswap_entry_cache);
227}
228
229static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
230{
231 struct zswap_entry *entry;
232 entry = kmem_cache_alloc(zswap_entry_cache, gfp);
233 if (!entry)
234 return NULL;
235 entry->refcount = 1;
236 entry->zero_flag = 0;
237 RB_CLEAR_NODE(&entry->rbnode);
238 return entry;
239}
240
241static void zswap_entry_cache_free(struct zswap_entry *entry)
242{
243 kmem_cache_free(zswap_entry_cache, entry);
244}
245
246/*********************************
247* rbtree functions
248**********************************/
249static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
250{
251 struct rb_node *node = root->rb_node;
252 struct zswap_entry *entry;
253
254 while (node) {
255 entry = rb_entry(node, struct zswap_entry, rbnode);
256 if (entry->offset > offset)
257 node = node->rb_left;
258 else if (entry->offset < offset)
259 node = node->rb_right;
260 else
261 return entry;
262 }
263 return NULL;
264}
265
266/*
267 * In the case that a entry with the same offset is found, a pointer to
268 * the existing entry is stored in dupentry and the function returns -EEXIST
269 */
270static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
271 struct zswap_entry **dupentry)
272{
273 struct rb_node **link = &root->rb_node, *parent = NULL;
274 struct zswap_entry *myentry;
275
276 while (*link) {
277 parent = *link;
278 myentry = rb_entry(parent, struct zswap_entry, rbnode);
279 if (myentry->offset > entry->offset)
280 link = &(*link)->rb_left;
281 else if (myentry->offset < entry->offset)
282 link = &(*link)->rb_right;
283 else {
284 *dupentry = myentry;
285 return -EEXIST;
286 }
287 }
288 rb_link_node(&entry->rbnode, parent, link);
289 rb_insert_color(&entry->rbnode, root);
290 return 0;
291}
292
293static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
294{
295 if (!RB_EMPTY_NODE(&entry->rbnode)) {
296 rb_erase(&entry->rbnode, root);
297 RB_CLEAR_NODE(&entry->rbnode);
298 }
299}
300
301/*
302 * Carries out the common pattern of freeing and entry's zpool allocation,
303 * freeing the entry itself, and decrementing the number of stored pages.
304 */
305static void zswap_free_entry(struct zswap_entry *entry)
306{
307 if (entry->zero_flag == 1) {
308 atomic_dec(&zswap_zero_pages);
309 goto zeropage_out;
310 }
311 zpool_free(zswap_pool, entry->handle);
312zeropage_out:
313 zswap_entry_cache_free(entry);
314 atomic_dec(&zswap_stored_pages);
315 zswap_pool_total_size = zpool_get_total_size(zswap_pool);
316 zswap_pool_pages = zswap_pool_total_size >> PAGE_SHIFT;
317}
318
319/* caller must hold the tree lock */
320static void zswap_entry_get(struct zswap_entry *entry)
321{
322 entry->refcount++;
323}
324
325/* caller must hold the tree lock
326* remove from the tree and free it, if nobody reference the entry
327*/
328static void zswap_entry_put(struct zswap_tree *tree,
329 struct zswap_entry *entry)
330{
331 int refcount = --entry->refcount;
332
333 BUG_ON(refcount < 0);
334 if (refcount == 0) {
335 zswap_rb_erase(&tree->rbroot, entry);
336 zswap_free_entry(entry);
337 }
338}
339
340/* caller must hold the tree lock */
341static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
342 pgoff_t offset)
343{
344 struct zswap_entry *entry = NULL;
345
346 entry = zswap_rb_search(root, offset);
347 if (entry)
348 zswap_entry_get(entry);
349
350 return entry;
351}
352
353/*********************************
354* per-cpu code
355**********************************/
356static DEFINE_PER_CPU(u8 *, zswap_dstmem);
357
358static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
359{
360 struct crypto_comp *tfm;
361 u8 *dst;
362
363 switch (action) {
364 case CPU_UP_PREPARE:
365 tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
366 if (IS_ERR(tfm)) {
367 pr_err("can't allocate compressor transform\n");
368 return NOTIFY_BAD;
369 }
370 *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
371 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
372 if (!dst) {
373 pr_err("can't allocate compressor buffer\n");
374 crypto_free_comp(tfm);
375 *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
376 return NOTIFY_BAD;
377 }
378 per_cpu(zswap_dstmem, cpu) = dst;
379 break;
380 case CPU_DEAD:
381 case CPU_UP_CANCELED:
382 tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
383 if (tfm) {
384 crypto_free_comp(tfm);
385 *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
386 }
387 dst = per_cpu(zswap_dstmem, cpu);
388 kfree(dst);
389 per_cpu(zswap_dstmem, cpu) = NULL;
390 break;
391 default:
392 break;
393 }
394 return NOTIFY_OK;
395}
396
397static int zswap_cpu_notifier(struct notifier_block *nb,
398 unsigned long action, void *pcpu)
399{
400 unsigned long cpu = (unsigned long)pcpu;
401 return __zswap_cpu_notifier(action, cpu);
402}
403
404static struct notifier_block zswap_cpu_notifier_block = {
405 .notifier_call = zswap_cpu_notifier
406};
407
408static int zswap_cpu_init(void)
409{
410 unsigned long cpu;
411
412 cpu_notifier_register_begin();
413 for_each_online_cpu(cpu)
414 if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
415 goto cleanup;
416 __register_cpu_notifier(&zswap_cpu_notifier_block);
417 cpu_notifier_register_done();
418 return 0;
419
420cleanup:
421 for_each_online_cpu(cpu)
422 __zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
423 cpu_notifier_register_done();
424 return -ENOMEM;
425}
426
427/*********************************
428* helpers
429**********************************/
430static bool zswap_is_full(void)
431{
432 return totalram_pages * zswap_max_pool_percent / 100 <
433 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
434}
435
436/*********************************
437* writeback code
438**********************************/
439/* return enum for zswap_get_swap_cache_page */
440enum zswap_get_swap_ret {
441 ZSWAP_SWAPCACHE_NEW,
442 ZSWAP_SWAPCACHE_EXIST,
443 ZSWAP_SWAPCACHE_FAIL,
444};
445
446/*
447 * zswap_get_swap_cache_page
448 *
449 * This is an adaption of read_swap_cache_async()
450 *
451 * This function tries to find a page with the given swap entry
452 * in the swapper_space address space (the swap cache). If the page
453 * is found, it is returned in retpage. Otherwise, a page is allocated,
454 * added to the swap cache, and returned in retpage.
455 *
456 * If success, the swap cache page is returned in retpage
457 * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
458 * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
459 * the new page is added to swapcache and locked
460 * Returns ZSWAP_SWAPCACHE_FAIL on error
461 */
462static int zswap_get_swap_cache_page(swp_entry_t entry,
463 struct page **retpage)
464{
465 struct page *found_page, *new_page = NULL;
466 struct address_space *swapper_space = swap_address_space(entry);
467 int err;
468
469 *retpage = NULL;
470 do {
471 /*
472 * First check the swap cache. Since this is normally
473 * called after lookup_swap_cache() failed, re-calling
474 * that would confuse statistics.
475 */
476 found_page = find_get_page(swapper_space, entry.val);
477 if (found_page)
478 break;
479
480 /*
481 * Get a new page to read into from swap.
482 */
483 if (!new_page) {
484 new_page = alloc_page(GFP_KERNEL);
485 if (!new_page)
486 break; /* Out of memory */
487 }
488
489 /*
490 * call radix_tree_preload() while we can wait.
491 */
492 err = radix_tree_preload(GFP_KERNEL);
493 if (err)
494 break;
495
496 /*
497 * Swap entry may have been freed since our caller observed it.
498 */
499 err = swapcache_prepare(entry);
500 if (err == -EEXIST) { /* seems racy */
501 radix_tree_preload_end();
502 continue;
503 }
504 if (err) { /* swp entry is obsolete ? */
505 radix_tree_preload_end();
506 break;
507 }
508
509 /* May fail (-ENOMEM) if radix-tree node allocation failed. */
510 __set_page_locked(new_page);
511 SetPageSwapBacked(new_page);
512 err = __add_to_swap_cache(new_page, entry);
513 if (likely(!err)) {
514 radix_tree_preload_end();
515 lru_cache_add_anon(new_page);
516 *retpage = new_page;
517 return ZSWAP_SWAPCACHE_NEW;
518 }
519 radix_tree_preload_end();
520 ClearPageSwapBacked(new_page);
521 __clear_page_locked(new_page);
522 /*
523 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
524 * clear SWAP_HAS_CACHE flag.
525 */
526 swapcache_free(entry, NULL);
527 } while (err != -ENOMEM);
528
529 if (new_page)
530 page_cache_release(new_page);
531 if (!found_page)
532 return ZSWAP_SWAPCACHE_FAIL;
533 *retpage = found_page;
534 return ZSWAP_SWAPCACHE_EXIST;
535}
536
537/*
538 * Attempts to free an entry by adding a page to the swap cache,
539 * decompressing the entry data into the page, and issuing a
540 * bio write to write the page back to the swap device.
541 *
542 * This can be thought of as a "resumed writeback" of the page
543 * to the swap device. We are basically resuming the same swap
544 * writeback path that was intercepted with the frontswap_store()
545 * in the first place. After the page has been decompressed into
546 * the swap cache, the compressed version stored by zswap can be
547 * freed.
548 */
549static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
550{
551 struct zswap_header *zhdr;
552 swp_entry_t swpentry;
553 struct zswap_tree *tree;
554 pgoff_t offset;
555 struct zswap_entry *entry;
556 struct page *page;
557 u8 *src, *dst;
558 unsigned int dlen;
559 int ret;
560 struct writeback_control wbc = {
561 .sync_mode = WB_SYNC_NONE,
562 };
563
564 /* extract swpentry from data */
565 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
566 swpentry = zhdr->swpentry; /* here */
567 zpool_unmap_handle(pool, handle);
568 tree = zswap_trees[swp_type(swpentry)];
569 offset = swp_offset(swpentry);
570
571 /* find and ref zswap entry */
572 spin_lock(&tree->lock);
573 entry = zswap_entry_find_get(&tree->rbroot, offset);
574 if (!entry) {
575 /* entry was invalidated */
576 spin_unlock(&tree->lock);
577 return 0;
578 }
579 spin_unlock(&tree->lock);
580 BUG_ON(offset != entry->offset);
581
582 /* try to allocate swap cache page */
583 switch (zswap_get_swap_cache_page(swpentry, &page)) {
584 case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
585 ret = -ENOMEM;
586 goto fail;
587
588 case ZSWAP_SWAPCACHE_EXIST:
589 /* page is already in the swap cache, ignore for now */
590 page_cache_release(page);
591 ret = -EEXIST;
592 goto fail;
593
594 case ZSWAP_SWAPCACHE_NEW: /* page is locked */
595 /* decompress */
596 dlen = PAGE_SIZE;
597 src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
598 ZPOOL_MM_RO) + sizeof(struct zswap_header);
599 dst = kmap_atomic(page);
600 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
601 entry->length, dst, &dlen);
602 kunmap_atomic(dst);
603 zpool_unmap_handle(zswap_pool, entry->handle);
604 BUG_ON(ret);
605 BUG_ON(dlen != PAGE_SIZE);
606
607 /* page is up to date */
608 SetPageUptodate(page);
609 }
610
611 /* move it to the tail of the inactive list after end_writeback */
612 SetPageReclaim(page);
613
614 /* start writeback */
615 __swap_writepage(page, &wbc, end_swap_bio_write);
616 page_cache_release(page);
617 zswap_written_back_pages++;
618
619 spin_lock(&tree->lock);
620 /* drop local reference */
621 zswap_entry_put(tree, entry);
622
623 /*
624 * There are two possible situations for entry here:
625 * (1) refcount is 1(normal case), entry is valid and on the tree
626 * (2) refcount is 0, entry is freed and not on the tree
627 * because invalidate happened during writeback
628 * search the tree and free the entry if find entry
629 */
630 if (entry == zswap_rb_search(&tree->rbroot, offset))
631 zswap_entry_put(tree, entry);
632 spin_unlock(&tree->lock);
633
634 goto end;
635
636 /*
637 * if we get here due to ZSWAP_SWAPCACHE_EXIST
638 * a load may happening concurrently
639 * it is safe and okay to not free the entry
640 * if we free the entry in the following put
641 * it it either okay to return !0
642 */
643fail:
644 spin_lock(&tree->lock);
645 zswap_entry_put(tree, entry);
646 spin_unlock(&tree->lock);
647
648end:
649 return ret;
650}
651
652static int page_zero_filled(void *ptr)
653{
654 unsigned int pos;
655 unsigned long *page;
656
657 page = (unsigned long *)ptr;
658
659 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
660 if (page[pos])
661 return 0;
662 }
663
664 return 1;
665}
666
667/*********************************
668* frontswap hooks
669**********************************/
670/* attempts to compress and store an single page */
671static int zswap_frontswap_store(unsigned type, pgoff_t offset,
672 struct page *page)
673{
674 struct zswap_tree *tree = zswap_trees[type];
675 struct zswap_entry *entry, *dupentry;
676 int ret;
677 unsigned int dlen = PAGE_SIZE, len;
678 unsigned long handle;
679 char *buf;
680 u8 *src, *dst;
681 struct zswap_header *zhdr;
682
683 if (!tree) {
684 ret = -ENODEV;
685 goto reject;
686 }
687
688 /* if this page got EIO on pageout before, give up immediately */
689 if (PageError(page)) {
690 ret = -ENOMEM;
691 goto reject;
692 }
693
694 /* reclaim space if needed */
695 if (zswap_is_full()) {
696 zswap_pool_limit_hit++;
697 if (zpool_shrink(zswap_pool, 1, NULL)) {
698 zswap_reject_reclaim_fail++;
699 ret = -ENOMEM;
700 goto reject;
701 }
702 }
703
704 /* allocate entry */
705 entry = zswap_entry_cache_alloc(GFP_KERNEL);
706 if (!entry) {
707 zswap_reject_kmemcache_fail++;
708 ret = -ENOMEM;
709 goto reject;
710 }
711
712 /* compress */
713 src = kmap_atomic(page);
714 if (page_zero_filled(src)) {
715 atomic_inc(&zswap_zero_pages);
716 entry->zero_flag = 1;
717 kunmap_atomic(src);
718
719 handle = 0;
720 dlen = PAGE_SIZE;
721 goto zeropage_out;
722 }
723 dst = get_cpu_var(zswap_dstmem);
724
725 ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
726 kunmap_atomic(src);
727 if (ret) {
728 ret = -EINVAL;
729 goto freepage;
730 }
731
732 /* store */
733 len = dlen + sizeof(struct zswap_header);
734 ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
735 &handle);
736 if (ret == -ENOSPC) {
737 zswap_reject_compress_poor++;
738 goto freepage;
739 }
740 if (ret) {
741 zswap_reject_alloc_fail++;
742 goto freepage;
743 }
744 zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
745 zhdr->swpentry = swp_entry(type, offset);
746 buf = (u8 *)(zhdr + 1);
747 memcpy(buf, dst, dlen);
748 zpool_unmap_handle(zswap_pool, handle);
749 put_cpu_var(zswap_dstmem);
750
751zeropage_out:
752 /* populate entry */
753 entry->offset = offset;
754 entry->handle = handle;
755 entry->length = dlen;
756
757 /* map */
758 spin_lock(&tree->lock);
759 do {
760 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
761 if (ret == -EEXIST) {
762 zswap_duplicate_entry++;
763 /* remove from rbtree */
764 zswap_rb_erase(&tree->rbroot, dupentry);
765 zswap_entry_put(tree, dupentry);
766 }
767 } while (ret == -EEXIST);
768 spin_unlock(&tree->lock);
769
770 /* update stats */
771 atomic_inc(&zswap_stored_pages);
772 zswap_pool_total_size = zpool_get_total_size(zswap_pool);
773 zswap_pool_pages = zswap_pool_total_size >> PAGE_SHIFT;
774
775 return 0;
776
777freepage:
778 put_cpu_var(zswap_dstmem);
779 zswap_entry_cache_free(entry);
780reject:
781 return ret;
782}
783
784/*
785 * returns 0 if the page was successfully decompressed
786 * return -1 on entry not found or error
787*/
788static int zswap_frontswap_load(unsigned type, pgoff_t offset,
789 struct page *page)
790{
791 struct zswap_tree *tree = zswap_trees[type];
792 struct zswap_entry *entry;
793 u8 *src, *dst;
794 unsigned int dlen;
795 int ret;
796
797 /* find */
798 spin_lock(&tree->lock);
799 entry = zswap_entry_find_get(&tree->rbroot, offset);
800 if (!entry) {
801 /* entry was written back */
802 spin_unlock(&tree->lock);
803 return -1;
804 }
805 spin_unlock(&tree->lock);
806
807 if (entry->zero_flag == 1) {
808 dst = kmap_atomic(page);
809 memset(dst, 0, PAGE_SIZE);
810 kunmap_atomic(dst);
811 goto zeropage_out;
812 }
813
814 /* decompress */
815 dlen = PAGE_SIZE;
816 src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
817 ZPOOL_MM_RO) + sizeof(struct zswap_header);
818 dst = kmap_atomic(page);
819 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
820 dst, &dlen);
821 kunmap_atomic(dst);
822 zpool_unmap_handle(zswap_pool, entry->handle);
823 BUG_ON(ret);
824
825zeropage_out:
826 spin_lock(&tree->lock);
827 zswap_entry_put(tree, entry);
828 spin_unlock(&tree->lock);
829
830 return 0;
831}
832
833/* frees an entry in zswap */
834static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
835{
836 struct zswap_tree *tree = zswap_trees[type];
837 struct zswap_entry *entry;
838
839 /* find */
840 spin_lock(&tree->lock);
841 entry = zswap_rb_search(&tree->rbroot, offset);
842 if (!entry) {
843 /* entry was written back */
844 spin_unlock(&tree->lock);
845 return;
846 }
847
848 /* remove from rbtree */
849 zswap_rb_erase(&tree->rbroot, entry);
850
851 /* drop the initial reference from entry creation */
852 zswap_entry_put(tree, entry);
853
854 spin_unlock(&tree->lock);
855}
856
857/* frees all zswap entries for the given swap type */
858static void zswap_frontswap_invalidate_area(unsigned type)
859{
860 struct zswap_tree *tree = zswap_trees[type];
861 struct zswap_entry *entry, *n;
862
863 if (!tree)
864 return;
865
866 /* walk the tree and free everything */
867 spin_lock(&tree->lock);
868 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
869 zswap_free_entry(entry);
870 tree->rbroot = RB_ROOT;
871 spin_unlock(&tree->lock);
872 kfree(tree);
873 zswap_trees[type] = NULL;
874}
875
876static struct zpool_ops zswap_zpool_ops = {
877 .evict = zswap_writeback_entry
878};
879
880static void zswap_frontswap_init(unsigned type)
881{
882 struct zswap_tree *tree;
883
884 tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
885 if (!tree) {
886 pr_err("alloc failed, zswap disabled for swap type %d\n", type);
887 return;
888 }
889
890 tree->rbroot = RB_ROOT;
891 spin_lock_init(&tree->lock);
892 zswap_trees[type] = tree;
893}
894
895static struct frontswap_ops zswap_frontswap_ops = {
896 .store = zswap_frontswap_store,
897 .load = zswap_frontswap_load,
898 .invalidate_page = zswap_frontswap_invalidate_page,
899 .invalidate_area = zswap_frontswap_invalidate_area,
900 .init = zswap_frontswap_init
901};
902
903/*********************************
904* debugfs functions
905**********************************/
906#ifdef CONFIG_DEBUG_FS
907#include <linux/debugfs.h>
908
909static struct dentry *zswap_debugfs_root;
910
911static int __init zswap_debugfs_init(void)
912{
913 if (!debugfs_initialized())
914 return -ENODEV;
915
916 zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
917 if (!zswap_debugfs_root)
918 return -ENOMEM;
919
920 debugfs_create_u64("pool_limit_hit", S_IRUGO,
921 zswap_debugfs_root, &zswap_pool_limit_hit);
922 debugfs_create_u64("reject_reclaim_fail", S_IRUGO,
923 zswap_debugfs_root, &zswap_reject_reclaim_fail);
924 debugfs_create_u64("reject_alloc_fail", S_IRUGO,
925 zswap_debugfs_root, &zswap_reject_alloc_fail);
926 debugfs_create_u64("reject_kmemcache_fail", S_IRUGO,
927 zswap_debugfs_root, &zswap_reject_kmemcache_fail);
928 debugfs_create_u64("reject_compress_poor", S_IRUGO,
929 zswap_debugfs_root, &zswap_reject_compress_poor);
930 debugfs_create_u64("written_back_pages", S_IRUGO,
931 zswap_debugfs_root, &zswap_written_back_pages);
932 debugfs_create_u64("duplicate_entry", S_IRUGO,
933 zswap_debugfs_root, &zswap_duplicate_entry);
934 debugfs_create_u64("pool_total_size", S_IRUGO,
935 zswap_debugfs_root, &zswap_pool_total_size);
936 debugfs_create_u64("pool_pages", S_IRUGO,
937 zswap_debugfs_root, &zswap_pool_pages);
938 debugfs_create_atomic_t("stored_pages", S_IRUGO,
939 zswap_debugfs_root, &zswap_stored_pages);
940 debugfs_create_atomic_t("zero_pages", S_IRUGO,
941 zswap_debugfs_root, &zswap_zero_pages);
942
943 return 0;
944}
945
946static void __exit zswap_debugfs_exit(void)
947{
948 debugfs_remove_recursive(zswap_debugfs_root);
949}
950#else
951static int __init zswap_debugfs_init(void)
952{
953 return 0;
954}
955
956static void __exit zswap_debugfs_exit(void) { }
957#endif
958
959/*********************************
960* module init and exit
961**********************************/
962static int __init init_zswap(void)
963{
964 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_HIGHMEM;
965
966 if (!zswap_enabled)
967 return 0;
968
969 pr_info("loading zswap\n");
970
971 zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops);
972 if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
973 pr_info("%s zpool not available\n", zswap_zpool_type);
974 zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
975 zswap_pool = zpool_create_pool(zswap_zpool_type, gfp,
976 &zswap_zpool_ops);
977 }
978 if (!zswap_pool) {
979 pr_err("%s zpool not available\n", zswap_zpool_type);
980 pr_err("zpool creation failed\n");
981 goto error;
982 }
983 pr_info("using %s pool\n", zswap_zpool_type);
984
985 if (zswap_entry_cache_create()) {
986 pr_err("entry cache creation failed\n");
987 goto cachefail;
988 }
989 if (zswap_comp_init()) {
990 pr_err("compressor initialization failed\n");
991 goto compfail;
992 }
993 if (zswap_cpu_init()) {
994 pr_err("per-cpu initialization failed\n");
995 goto pcpufail;
996 }
997
998 frontswap_register_ops(&zswap_frontswap_ops);
999 if (zswap_debugfs_init())
1000 pr_warn("debugfs initialization failed\n");
1001 return 0;
1002pcpufail:
1003 zswap_comp_exit();
1004compfail:
1005 zswap_entry_cache_destroy();
1006cachefail:
1007 zpool_destroy_pool(zswap_pool);
1008error:
1009 return -ENOMEM;
1010}
1011/* must be late so crypto has time to come up */
1012late_initcall(init_zswap);
1013
1014MODULE_LICENSE("GPL");
1015MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>");
1016MODULE_DESCRIPTION("Compressed cache for swap pages");