jbd2: Fix I/O hang in jbd2_journal_release_jbd_inode
authorBrian King <brking@linux.vnet.ibm.com>
Thu, 28 Oct 2010 01:25:12 +0000 (21:25 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 28 Oct 2010 01:25:12 +0000 (21:25 -0400)
This fixes a hang seen in jbd2_journal_release_jbd_inode
on a lot of Power 6 systems running with ext4. When we get
in the hung state, all I/O to the disk in question gets blocked
where we stay indefinitely. Looking at the task list, I can see
we are stuck in jbd2_journal_release_jbd_inode waiting on a
wake up. I added some debug code to detect this scenario and
dump additional data if we were stuck in jbd2_journal_release_jbd_inode
for longer than 30 minutes. When it hit, I was able to see that
i_flags was 0, suggesting we missed the wake up.

This patch changes i_flags to be an unsigned long, uses bit operators
to access it, and adds barriers around the accesses. Prior to applying
this patch, we were regularly hitting this hang on numerous systems
in our test environment. After applying the patch, the hangs no longer
occur.

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/jbd2/commit.c
fs/jbd2/journal.c
include/linux/jbd2.h

index 7c068c189d80d713d56705e63c5b5e0bf6982ab9..6494c81e3b0a9cb3d07d2eec52d585e6e3a46117 100644 (file)
@@ -26,7 +26,9 @@
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/bitops.h>
 #include <trace/events/jbd2.h>
+#include <asm/system.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -236,7 +238,7 @@ static int journal_submit_data_buffers(journal_t *journal,
        spin_lock(&journal->j_list_lock);
        list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
                mapping = jinode->i_vfs_inode->i_mapping;
-               jinode->i_flags |= JI_COMMIT_RUNNING;
+               set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
                spin_unlock(&journal->j_list_lock);
                /*
                 * submit the inode data buffers. We use writepage
@@ -251,7 +253,8 @@ static int journal_submit_data_buffers(journal_t *journal,
                spin_lock(&journal->j_list_lock);
                J_ASSERT(jinode->i_transaction == commit_transaction);
                commit_transaction->t_flushed_data_blocks = 1;
-               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+               smp_mb__after_clear_bit();
                wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
        }
        spin_unlock(&journal->j_list_lock);
@@ -272,7 +275,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
        /* For locking, see the comment in journal_submit_data_buffers() */
        spin_lock(&journal->j_list_lock);
        list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
-               jinode->i_flags |= JI_COMMIT_RUNNING;
+               set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
                spin_unlock(&journal->j_list_lock);
                err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
                if (err) {
@@ -288,7 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
                                ret = err;
                }
                spin_lock(&journal->j_list_lock);
-               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+               smp_mb__after_clear_bit();
                wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
        }
 
index 0e8014ea6b94ad8985f1b0b842f2cea550578e67..75e1b5a0bc2defa4aac837354dcb95cd4dedd328 100644 (file)
 #include <linux/log2.h>
 #include <linux/vmalloc.h>
 #include <linux/backing-dev.h>
+#include <linux/bitops.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/jbd2.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
+#include <asm/system.h>
 
 EXPORT_SYMBOL(jbd2_journal_extend);
 EXPORT_SYMBOL(jbd2_journal_stop);
@@ -2206,7 +2208,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
 restart:
        spin_lock(&journal->j_list_lock);
        /* Is commit writing out inode - we have to wait */
-       if (jinode->i_flags & JI_COMMIT_RUNNING) {
+       if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
                wait_queue_head_t *wq;
                DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
                wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
index 0b52924a0cb6ac5acda48540cb93d6b45a43beaf..2ae86aa21fcee12e169489a0e1dfdb5821ba48f2 100644 (file)
@@ -395,7 +395,7 @@ struct jbd2_inode {
        struct inode *i_vfs_inode;
 
        /* Flags of inode [j_list_lock] */
-       unsigned int i_flags;
+       unsigned long i_flags;
 };
 
 struct jbd2_revoke_table_s;