fs: inode split IO and LRU lists
authorNick Piggin <npiggin@suse.de>
Thu, 21 Oct 2010 00:49:30 +0000 (11:49 +1100)
committerAl Viro <viro@zeniv.linux.org.uk>
Tue, 26 Oct 2010 01:26:15 +0000 (21:26 -0400)
The use of the same inode list structure (inode->i_list) for two
different list constructs with different lifecycles and purposes
makes it impossible to separate the locking of the different
operations. Therefore, to enable the separation of the locking of
the writeback and reclaim lists, split the inode->i_list into two
separate lists dedicated to their specific tracking functions.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/block_dev.c
fs/fs-writeback.c
fs/inode.c
include/linux/fs.h
mm/backing-dev.c

index 4e847e53051f2fe000d9af44773886660e7e3a3c..dea3b628a6ce9a9058c24d2b13c364b7c6adb3c3 100644 (file)
@@ -59,7 +59,7 @@ static void bdev_inode_switch_bdi(struct inode *inode,
        spin_lock(&inode_lock);
        inode->i_data.backing_dev_info = dst;
        if (inode->i_state & I_DIRTY)
-               list_move(&inode->i_list, &dst->wb.b_dirty);
+               list_move(&inode->i_wb_list, &dst->wb.b_dirty);
        spin_unlock(&inode_lock);
 }
 
index e8f65290e83666bd1a32c49880d502a698a22904..7a24cc957f05a873aa050f631bfa1c801616bb39 100644 (file)
@@ -79,6 +79,11 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
        return sb->s_bdi;
 }
 
+static inline struct inode *wb_inode(struct list_head *head)
+{
+       return list_entry(head, struct inode, i_wb_list);
+}
+
 static void bdi_queue_work(struct backing_dev_info *bdi,
                struct wb_writeback_work *work)
 {
@@ -172,11 +177,11 @@ static void redirty_tail(struct inode *inode)
        if (!list_empty(&wb->b_dirty)) {
                struct inode *tail;
 
-               tail = list_entry(wb->b_dirty.next, struct inode, i_list);
+               tail = wb_inode(wb->b_dirty.next);
                if (time_before(inode->dirtied_when, tail->dirtied_when))
                        inode->dirtied_when = jiffies;
        }
-       list_move(&inode->i_list, &wb->b_dirty);
+       list_move(&inode->i_wb_list, &wb->b_dirty);
 }
 
 /*
@@ -186,7 +191,7 @@ static void requeue_io(struct inode *inode)
 {
        struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 
-       list_move(&inode->i_list, &wb->b_more_io);
+       list_move(&inode->i_wb_list, &wb->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
@@ -227,14 +232,14 @@ static void move_expired_inodes(struct list_head *delaying_queue,
        int do_sb_sort = 0;
 
        while (!list_empty(delaying_queue)) {
-               inode = list_entry(delaying_queue->prev, struct inode, i_list);
+               inode = wb_inode(delaying_queue->prev);
                if (older_than_this &&
                    inode_dirtied_after(inode, *older_than_this))
                        break;
                if (sb && sb != inode->i_sb)
                        do_sb_sort = 1;
                sb = inode->i_sb;
-               list_move(&inode->i_list, &tmp);
+               list_move(&inode->i_wb_list, &tmp);
        }
 
        /* just one sb in list, splice to dispatch_queue and we're done */
@@ -245,12 +250,11 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 
        /* Move inodes from one superblock together */
        while (!list_empty(&tmp)) {
-               inode = list_entry(tmp.prev, struct inode, i_list);
-               sb = inode->i_sb;
+               sb = wb_inode(tmp.prev)->i_sb;
                list_for_each_prev_safe(pos, node, &tmp) {
-                       inode = list_entry(pos, struct inode, i_list);
+                       inode = wb_inode(pos);
                        if (inode->i_sb == sb)
-                               list_move(&inode->i_list, dispatch_queue);
+                               list_move(&inode->i_wb_list, dispatch_queue);
                }
        }
 }
@@ -414,7 +418,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
                         * a reference to the inode or it's on it's way out.
                         * No need to add it back to the LRU.
                         */
-                       list_del_init(&inode->i_list);
+                       list_del_init(&inode->i_wb_list);
                }
        }
        inode_sync_complete(inode);
@@ -462,8 +466,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 {
        while (!list_empty(&wb->b_io)) {
                long pages_skipped;
-               struct inode *inode = list_entry(wb->b_io.prev,
-                                                struct inode, i_list);
+               struct inode *inode = wb_inode(wb->b_io.prev);
 
                if (inode->i_sb != sb) {
                        if (only_this_sb) {
@@ -533,8 +536,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
                queue_io(wb, wbc->older_than_this);
 
        while (!list_empty(&wb->b_io)) {
-               struct inode *inode = list_entry(wb->b_io.prev,
-                                                struct inode, i_list);
+               struct inode *inode = wb_inode(wb->b_io.prev);
                struct super_block *sb = inode->i_sb;
 
                if (!pin_sb_for_writeback(sb)) {
@@ -672,8 +674,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                 */
                spin_lock(&inode_lock);
                if (!list_empty(&wb->b_more_io))  {
-                       inode = list_entry(wb->b_more_io.prev,
-                                               struct inode, i_list);
+                       inode = wb_inode(wb->b_more_io.prev);
                        trace_wbc_writeback_wait(&wbc, wb->bdi);
                        inode_wait_for_writeback(inode);
                }
@@ -987,7 +988,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                        }
 
                        inode->dirtied_when = jiffies;
-                       list_move(&inode->i_list, &bdi->wb.b_dirty);
+                       list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
                }
        }
 out:
index 4bedac32154fcb2a8632e710f615ef38a3c998f5..09e2d7a5f1d2f0b22e21d7e9ef63f7a947d1b049 100644 (file)
@@ -71,7 +71,7 @@ static unsigned int i_hash_shift __read_mostly;
  * allowing for low-overhead inode sync() operations.
  */
 
-static LIST_HEAD(inode_unused);
+static LIST_HEAD(inode_lru);
 static struct hlist_head *inode_hashtable __read_mostly;
 
 /*
@@ -271,6 +271,7 @@ EXPORT_SYMBOL(__destroy_inode);
 
 static void destroy_inode(struct inode *inode)
 {
+       BUG_ON(!list_empty(&inode->i_lru));
        __destroy_inode(inode);
        if (inode->i_sb->s_op->destroy_inode)
                inode->i_sb->s_op->destroy_inode(inode);
@@ -289,7 +290,8 @@ void inode_init_once(struct inode *inode)
        INIT_HLIST_NODE(&inode->i_hash);
        INIT_LIST_HEAD(&inode->i_dentry);
        INIT_LIST_HEAD(&inode->i_devices);
-       INIT_LIST_HEAD(&inode->i_list);
+       INIT_LIST_HEAD(&inode->i_wb_list);
+       INIT_LIST_HEAD(&inode->i_lru);
        INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
        spin_lock_init(&inode->i_data.tree_lock);
        spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -330,16 +332,16 @@ EXPORT_SYMBOL(ihold);
 
 static void inode_lru_list_add(struct inode *inode)
 {
-       if (list_empty(&inode->i_list)) {
-               list_add(&inode->i_list, &inode_unused);
+       if (list_empty(&inode->i_lru)) {
+               list_add(&inode->i_lru, &inode_lru);
                percpu_counter_inc(&nr_inodes_unused);
        }
 }
 
 static void inode_lru_list_del(struct inode *inode)
 {
-       if (!list_empty(&inode->i_list)) {
-               list_del_init(&inode->i_list);
+       if (!list_empty(&inode->i_lru)) {
+               list_del_init(&inode->i_lru);
                percpu_counter_dec(&nr_inodes_unused);
        }
 }
@@ -460,8 +462,8 @@ static void dispose_list(struct list_head *head)
        while (!list_empty(head)) {
                struct inode *inode;
 
-               inode = list_first_entry(head, struct inode, i_list);
-               list_del_init(&inode->i_list);
+               inode = list_first_entry(head, struct inode, i_lru);
+               list_del_init(&inode->i_lru);
 
                evict(inode);
 
@@ -507,8 +509,14 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
                        continue;
                }
 
-               list_move(&inode->i_list, dispose);
                inode->i_state |= I_FREEING;
+
+               /*
+                * Move the inode off the IO lists and LRU once I_FREEING is
+                * set so that it won't get moved back on there if it is dirty.
+                */
+               list_move(&inode->i_lru, dispose);
+               list_del_init(&inode->i_wb_list);
                if (!(inode->i_state & (I_DIRTY | I_SYNC)))
                        percpu_counter_dec(&nr_inodes_unused);
        }
@@ -580,10 +588,10 @@ static void prune_icache(int nr_to_scan)
        for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
                struct inode *inode;
 
-               if (list_empty(&inode_unused))
+               if (list_empty(&inode_lru))
                        break;
 
-               inode = list_entry(inode_unused.prev, struct inode, i_list);
+               inode = list_entry(inode_lru.prev, struct inode, i_lru);
 
                /*
                 * Referenced or dirty inodes are still in use. Give them
@@ -591,14 +599,14 @@ static void prune_icache(int nr_to_scan)
                 */
                if (atomic_read(&inode->i_count) ||
                    (inode->i_state & ~I_REFERENCED)) {
-                       list_del_init(&inode->i_list);
+                       list_del_init(&inode->i_lru);
                        percpu_counter_dec(&nr_inodes_unused);
                        continue;
                }
 
                /* recently referenced inodes get one more pass */
                if (inode->i_state & I_REFERENCED) {
-                       list_move(&inode->i_list, &inode_unused);
+                       list_move(&inode->i_lru, &inode_lru);
                        inode->i_state &= ~I_REFERENCED;
                        continue;
                }
@@ -611,15 +619,21 @@ static void prune_icache(int nr_to_scan)
                        iput(inode);
                        spin_lock(&inode_lock);
 
-                       if (inode != list_entry(inode_unused.next,
-                                               struct inode, i_list))
+                       if (inode != list_entry(inode_lru.next,
+                                               struct inode, i_lru))
                                continue;       /* wrong inode or list_empty */
                        if (!can_unuse(inode))
                                continue;
                }
-               list_move(&inode->i_list, &freeable);
                WARN_ON(inode->i_state & I_NEW);
                inode->i_state |= I_FREEING;
+
+               /*
+                * Move the inode off the IO lists and LRU once I_FREEING is
+                * set so that it won't get moved back on there if it is dirty.
+                */
+               list_move(&inode->i_lru, &freeable);
+               list_del_init(&inode->i_wb_list);
                percpu_counter_dec(&nr_inodes_unused);
        }
        if (current_is_kswapd())
@@ -1340,15 +1354,16 @@ static void iput_final(struct inode *inode)
                inode->i_state &= ~I_WILL_FREE;
                __remove_inode_hash(inode);
        }
+
        WARN_ON(inode->i_state & I_NEW);
        inode->i_state |= I_FREEING;
 
        /*
-        * After we delete the inode from the LRU here, we avoid moving dirty
-        * inodes back onto the LRU now because I_FREEING is set and hence
-        * writeback_single_inode() won't move the inode around.
+        * Move the inode off the IO lists and LRU once I_FREEING is
+        * set so that it won't get moved back on there if it is dirty.
         */
        inode_lru_list_del(inode);
+       list_del_init(&inode->i_wb_list);
 
        __inode_sb_list_del(inode);
        spin_unlock(&inode_lock);
index d580599448012e69d1956ae14d12a41e30313612..f300a650881814da8f60402d9d933624445c90c5 100644 (file)
@@ -723,7 +723,8 @@ struct posix_acl;
 
 struct inode {
        struct hlist_node       i_hash;
-       struct list_head        i_list;         /* backing dev IO list */
+       struct list_head        i_wb_list;      /* backing dev IO list */
+       struct list_head        i_lru;          /* inode LRU list */
        struct list_head        i_sb_list;
        struct list_head        i_dentry;
        unsigned long           i_ino;
index 65d420499a615bf68a3be8313c3b0d8b1b330178..15d5097de821bbaa3c12ba12e59d32392d6e6a8f 100644 (file)
@@ -74,11 +74,11 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 
        nr_wb = nr_dirty = nr_io = nr_more_io = 0;
        spin_lock(&inode_lock);
-       list_for_each_entry(inode, &wb->b_dirty, i_list)
+       list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
                nr_dirty++;
-       list_for_each_entry(inode, &wb->b_io, i_list)
+       list_for_each_entry(inode, &wb->b_io, i_wb_list)
                nr_io++;
-       list_for_each_entry(inode, &wb->b_more_io, i_list)
+       list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
                nr_more_io++;
        spin_unlock(&inode_lock);