*
* If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
*/
-static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+static void __shrink_dcache_sb(struct super_block *sb, int count, int flags)
{
- /* called from prune_dcache() and shrink_dcache_parent() */
struct dentry *dentry;
LIST_HEAD(referenced);
LIST_HEAD(tmp);
- int cnt = *count;
relock:
spin_lock(&dcache_lru_lock);
} else {
list_move_tail(&dentry->d_lru, &tmp);
spin_unlock(&dentry->d_lock);
- if (!--cnt)
+ if (!--count)
break;
}
cond_resched_lock(&dcache_lru_lock);
spin_unlock(&dcache_lru_lock);
shrink_dentry_list(&tmp);
-
- *count = cnt;
}
/**
- * prune_dcache - shrink the dcache
- * @count: number of entries to try to free
+ * prune_dcache_sb - shrink the dcache
+ * @nr_to_scan: number of entries to try to free
*
- * Shrink the dcache. This is done when we need more memory, or simply when we
- * need to unmount something (at which point we need to unuse all dentries).
+ * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
+ * done when we need more memory an called from the superblock shrinker
+ * function.
*
- * This function may fail to free any resources if all the dentries are in use.
+ * This function may fail to free any resources if all the dentries are in
+ * use.
*/
-static void prune_dcache(int count)
+void prune_dcache_sb(struct super_block *sb, int nr_to_scan)
{
- struct super_block *sb, *p = NULL;
- int w_count;
- int unused = dentry_stat.nr_unused;
- int prune_ratio;
- int pruned;
-
- if (unused == 0 || count == 0)
- return;
- if (count >= unused)
- prune_ratio = 1;
- else
- prune_ratio = unused / count;
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
- if (list_empty(&sb->s_instances))
- continue;
- if (sb->s_nr_dentry_unused == 0)
- continue;
- sb->s_count++;
- /* Now, we reclaim unused dentrins with fairness.
- * We reclaim them same percentage from each superblock.
- * We calculate number of dentries to scan on this sb
- * as follows, but the implementation is arranged to avoid
- * overflows:
- * number of dentries to scan on this sb =
- * count * (number of dentries on this sb /
- * number of dentries in the machine)
- */
- spin_unlock(&sb_lock);
- if (prune_ratio != 1)
- w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
- else
- w_count = sb->s_nr_dentry_unused;
- pruned = w_count;
- /*
- * We need to be sure this filesystem isn't being unmounted,
- * otherwise we could race with generic_shutdown_super(), and
- * end up holding a reference to an inode while the filesystem
- * is unmounted. So we try to get s_umount, and make sure
- * s_root isn't NULL.
- */
- if (down_read_trylock(&sb->s_umount)) {
- if ((sb->s_root != NULL) &&
- (!list_empty(&sb->s_dentry_lru))) {
- __shrink_dcache_sb(sb, &w_count,
- DCACHE_REFERENCED);
- pruned -= w_count;
- }
- up_read(&sb->s_umount);
- }
- spin_lock(&sb_lock);
- if (p)
- __put_super(p);
- count -= pruned;
- p = sb;
- /* more work left to do? */
- if (count <= 0)
- break;
- }
- if (p)
- __put_super(p);
- spin_unlock(&sb_lock);
+ __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED);
}
/**
int found;
while ((found = select_parent(parent)) != 0)
- __shrink_dcache_sb(sb, &found, 0);
+ __shrink_dcache_sb(sb, found, 0);
}
EXPORT_SYMBOL(shrink_dcache_parent);
-/*
- * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
- *
- * We need to avoid reentering the filesystem if the caller is performing a
- * GFP_NOFS allocation attempt. One example deadlock is:
- *
- * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
- * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
- * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
- *
- * In this case we return -1 to tell the caller that we baled.
- */
-static int shrink_dcache_memory(struct shrinker *shrink,
- struct shrink_control *sc)
-{
- int nr = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
-
- if (nr) {
- if (!(gfp_mask & __GFP_FS))
- return -1;
- prune_dcache(nr);
- }
-
- return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
-}
-
-static struct shrinker dcache_shrinker = {
- .shrink = shrink_dcache_memory,
- .seeks = DEFAULT_SEEKS,
-};
-
/**
* __d_alloc - allocate a dcache entry
* @sb: filesystem it will belong to
*/
dentry_cache = KMEM_CACHE(dentry,
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
-
- register_shrinker(&dcache_shrinker);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
*
* We don't actually need it to protect anything in the umount path,
* but only need to cycle through it to make sure any inode that
- * prune_icache took off the LRU list has been fully torn down by the
+ * prune_icache_sb took off the LRU list has been fully torn down by the
* time we are past evict_inodes.
*/
static DECLARE_RWSEM(iprune_sem);
dispose_list(&dispose);
/*
- * Cycle through iprune_sem to make sure any inode that prune_icache
+ * Cycle through iprune_sem to make sure any inode that prune_icache_sb
* moved off the list before we took the lock has been fully torn
* down.
*/
}
/*
- * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
- * temporary list and then are freed outside sb->s_inode_lru_lock by
- * dispose_list().
+ * Walk the superblock inode LRU for freeable inodes and attempt to free them.
+ * This is called from the superblock shrinker function with a number of inodes
+ * to trim from the LRU. Inodes to be freed are moved to a temporary list and
+ * then are freed outside inode_lock by dispose_list().
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. If the inode has metadata buffers attached to
* LRU does not have strict ordering. Hence we don't want to reclaim inodes
* with this flag set because they are the inodes that are out of order.
*/
-static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan)
+void prune_icache_sb(struct super_block *sb, int nr_to_scan)
{
LIST_HEAD(freeable);
int nr_scanned;
unsigned long reap = 0;
+ down_read(&iprune_sem);
spin_lock(&sb->s_inode_lru_lock);
- for (nr_scanned = *nr_to_scan; nr_scanned >= 0; nr_scanned--) {
+ for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
struct inode *inode;
if (list_empty(&sb->s_inode_lru))
else
__count_vm_events(PGINODESTEAL, reap);
spin_unlock(&sb->s_inode_lru_lock);
- *nr_to_scan = nr_scanned;
dispose_list(&freeable);
-}
-
-static void prune_icache(int count)
-{
- struct super_block *sb, *p = NULL;
- int w_count;
- int unused = inodes_stat.nr_unused;
- int prune_ratio;
- int pruned;
-
- if (unused == 0 || count == 0)
- return;
- down_read(&iprune_sem);
- if (count >= unused)
- prune_ratio = 1;
- else
- prune_ratio = unused / count;
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
- if (list_empty(&sb->s_instances))
- continue;
- if (sb->s_nr_inodes_unused == 0)
- continue;
- sb->s_count++;
- /* Now, we reclaim unused dentrins with fairness.
- * We reclaim them same percentage from each superblock.
- * We calculate number of dentries to scan on this sb
- * as follows, but the implementation is arranged to avoid
- * overflows:
- * number of dentries to scan on this sb =
- * count * (number of dentries on this sb /
- * number of dentries in the machine)
- */
- spin_unlock(&sb_lock);
- if (prune_ratio != 1)
- w_count = (sb->s_nr_inodes_unused / prune_ratio) + 1;
- else
- w_count = sb->s_nr_inodes_unused;
- pruned = w_count;
- /*
- * We need to be sure this filesystem isn't being unmounted,
- * otherwise we could race with generic_shutdown_super(), and
- * end up holding a reference to an inode while the filesystem
- * is unmounted. So we try to get s_umount, and make sure
- * s_root isn't NULL.
- */
- if (down_read_trylock(&sb->s_umount)) {
- if ((sb->s_root != NULL) &&
- (!list_empty(&sb->s_dentry_lru))) {
- shrink_icache_sb(sb, &w_count);
- pruned -= w_count;
- }
- up_read(&sb->s_umount);
- }
- spin_lock(&sb_lock);
- if (p)
- __put_super(p);
- count -= pruned;
- p = sb;
- /* more work left to do? */
- if (count <= 0)
- break;
- }
- if (p)
- __put_super(p);
- spin_unlock(&sb_lock);
up_read(&iprune_sem);
}
-/*
- * shrink_icache_memory() will attempt to reclaim some unused inodes. Here,
- * "unused" means that no dentries are referring to the inodes: the files are
- * not open and the dcache references to those inodes have already been
- * reclaimed.
- *
- * This function is passed the number of inodes to scan, and it returns the
- * total number of remaining possibly-reclaimable inodes.
- */
-static int shrink_icache_memory(struct shrinker *shrink,
- struct shrink_control *sc)
-{
- int nr = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
-
- if (nr) {
- /*
- * Nasty deadlock avoidance. We may hold various FS locks,
- * and we don't want to recurse into the FS that called us
- * in clear_inode() and friends..
- */
- if (!(gfp_mask & __GFP_FS))
- return -1;
- prune_icache(nr);
- }
- return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
-}
-
-static struct shrinker icache_shrinker = {
- .shrink = shrink_icache_memory,
- .seeks = DEFAULT_SEEKS,
-};
-
static void __wait_on_freeing_inode(struct inode *inode);
/*
* Called with the inode lock held.
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
SLAB_MEM_SPREAD),
init_once);
- register_shrinker(&icache_shrinker);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
LIST_HEAD(super_blocks);
DEFINE_SPINLOCK(sb_lock);
+/*
+ * One thing we have to be careful of with a per-sb shrinker is that we don't
+ * drop the last active reference to the superblock from within the shrinker.
+ * If that happens we could trigger unregistering the shrinker from within the
+ * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
+ * take a passive reference to the superblock to avoid this from occurring.
+ */
+static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct super_block *sb;
+ int count;
+
+ sb = container_of(shrink, struct super_block, s_shrink);
+
+ /*
+ * Deadlock avoidance. We may hold various FS locks, and we don't want
+ * to recurse into the FS that called us in clear_inode() and friends..
+ */
+ if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
+ return -1;
+
+ if (!grab_super_passive(sb))
+ return -1;
+
+ if (sc->nr_to_scan) {
+ /* proportion the scan between the two caches */
+ int total;
+
+ total = sb->s_nr_dentry_unused + sb->s_nr_inodes_unused + 1;
+ count = (sc->nr_to_scan * sb->s_nr_dentry_unused) / total;
+
+ /* prune dcache first as icache is pinned by it */
+ prune_dcache_sb(sb, count);
+ prune_icache_sb(sb, sc->nr_to_scan - count);
+ }
+
+ count = ((sb->s_nr_dentry_unused + sb->s_nr_inodes_unused) / 100)
+ * sysctl_vfs_cache_pressure;
+ drop_super(sb);
+ return count;
+}
+
/**
* alloc_super - create new superblock
* @type: filesystem type superblock should belong to
s->s_op = &default_op;
s->s_time_gran = 1000000000;
s->cleancache_poolid = -1;
+
+ s->s_shrink.seeks = DEFAULT_SEEKS;
+ s->s_shrink.shrink = prune_super;
}
out:
return s;
if (atomic_dec_and_test(&s->s_active)) {
cleancache_flush_fs(s);
fs->kill_sb(s);
+
+ /* caches are now gone, we can safely kill the shrinker now */
+ unregister_shrinker(&s->s_shrink);
+
/*
* We need to call rcu_barrier so all the delayed rcu free
* inodes are flushed before we release the fs module.
{
const struct super_operations *sop = sb->s_op;
-
if (sb->s_root) {
shrink_dcache_for_umount(sb);
sync_filesystem(sb);
list_add(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
+ register_shrinker(&s->s_shrink);
return s;
}
#include <linux/semaphore.h>
#include <linux/fiemap.h>
#include <linux/rculist_bl.h>
+#include <linux/shrinker.h>
#include <linux/atomic.h>
#include <asm/byteorder.h>
* Saved pool identifier for cleancache (-1 means none)
*/
int cleancache_poolid;
+
+ struct shrinker s_shrink; /* per-sb shrinker handle */
};
+/* superblock cache pruning functions */
+extern void prune_icache_sb(struct super_block *sb, int nr_to_scan);
+extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan);
+
extern struct timespec current_fs_time(struct super_block *sb);
/*
#include <linux/range.h>
#include <linux/pfn.h>
#include <linux/bit_spinlock.h>
+#include <linux/shrinker.h>
struct mempolicy;
struct anon_vma;
}
#endif
-/*
- * This struct is used to pass information from page reclaim to the shrinkers.
- * We consolidate the values for easier extention later.
- */
-struct shrink_control {
- gfp_t gfp_mask;
-
- /* How many slab objects shrinker() should scan and try to reclaim */
- unsigned long nr_to_scan;
-};
-
-/*
- * A callback you can register to apply pressure to ageable caches.
- *
- * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
- * and a 'gfpmask'. It should look through the least-recently-used
- * 'nr_to_scan' entries and attempt to free them up. It should return
- * the number of objects which remain in the cache. If it returns -1, it means
- * it cannot do any scanning at this time (eg. there is a risk of deadlock).
- *
- * The 'gfpmask' refers to the allocation we are currently trying to
- * fulfil.
- *
- * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is
- * querying the cache size, so a fastpath for that case is appropriate.
- */
-struct shrinker {
- int (*shrink)(struct shrinker *, struct shrink_control *sc);
- int seeks; /* seeks to recreate an obj */
- long batch; /* reclaim batch size, 0 = default */
-
- /* These are for internal use */
- struct list_head list;
- long nr; /* objs pending delete */
-};
-#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
-extern void register_shrinker(struct shrinker *);
-extern void unregister_shrinker(struct shrinker *);
-
int vma_wants_writenotify(struct vm_area_struct *vma);
extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
--- /dev/null
+#ifndef _LINUX_SHRINKER_H
+#define _LINUX_SHRINKER_H
+
+/*
+ * This struct is used to pass information from page reclaim to the shrinkers.
+ * We consolidate the values for easier extention later.
+ */
+struct shrink_control {
+ gfp_t gfp_mask;
+
+ /* How many slab objects shrinker() should scan and try to reclaim */
+ unsigned long nr_to_scan;
+};
+
+/*
+ * A callback you can register to apply pressure to ageable caches.
+ *
+ * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
+ * and a 'gfpmask'. It should look through the least-recently-used
+ * 'nr_to_scan' entries and attempt to free them up. It should return
+ * the number of objects which remain in the cache. If it returns -1, it means
+ * it cannot do any scanning at this time (eg. there is a risk of deadlock).
+ *
+ * The 'gfpmask' refers to the allocation we are currently trying to
+ * fulfil.
+ *
+ * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is
+ * querying the cache size, so a fastpath for that case is appropriate.
+ */
+struct shrinker {
+ int (*shrink)(struct shrinker *, struct shrink_control *sc);
+ int seeks; /* seeks to recreate an obj */
+ long batch; /* reclaim batch size, 0 = default */
+
+ /* These are for internal use */
+ struct list_head list;
+ long nr; /* objs pending delete */
+};
+#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
+extern void register_shrinker(struct shrinker *);
+extern void unregister_shrinker(struct shrinker *);
+#endif