Btrfs: fix deadlock in btrfs_commit_transaction
authorSage Weil <sage@newdream.net>
Fri, 29 Oct 2010 19:37:34 +0000 (15:37 -0400)
committerChris Mason <chris.mason@oracle.com>
Fri, 29 Oct 2010 19:37:34 +0000 (15:37 -0400)
We calculate timeout (either 1 or MAX_SCHEDULE_TIMEOUT) based on whether
num_writers > 1 or should_grow at the top of the loop.  Then, much much
later, we wait for that timeout if either num_writers or should_grow is
true.  However, it's possible for a racing process (calling
btrfs_end_transaction()) to decrement num_writers such that we wait
forever instead of for 1.

Fix this by deciding how long to wait when we wait.  Include a smp_mb()
before checking if the waitqueue is active to ensure the num_writers
is visible.

Signed-off-by: Sage Weil <sage@newdream.net>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/transaction.c

index 325d9a5f0128d90573fb41c799765dc6c1c22d61..700dc4b34ada51fc26ec2de74a7b01bf2e3319a0 100644 (file)
@@ -402,6 +402,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        WARN_ON(cur_trans->num_writers < 1);
        cur_trans->num_writers--;
 
+       smp_mb();
        if (waitqueue_active(&cur_trans->writer_wait))
                wake_up(&cur_trans->writer_wait);
        put_transaction(cur_trans);
@@ -1010,7 +1011,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root)
 {
        unsigned long joined = 0;
-       unsigned long timeout = 1;
        struct btrfs_transaction *cur_trans;
        struct btrfs_transaction *prev_trans = NULL;
        DEFINE_WAIT(wait);
@@ -1081,11 +1081,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                        snap_pending = 1;
 
                WARN_ON(cur_trans != trans->transaction);
-               if (cur_trans->num_writers > 1)
-                       timeout = MAX_SCHEDULE_TIMEOUT;
-               else if (should_grow)
-                       timeout = 1;
-
                mutex_unlock(&root->fs_info->trans_mutex);
 
                if (flush_on_commit || snap_pending) {
@@ -1107,8 +1102,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                                TASK_UNINTERRUPTIBLE);
 
                smp_mb();
-               if (cur_trans->num_writers > 1 || should_grow)
-                       schedule_timeout(timeout);
+               if (cur_trans->num_writers > 1)
+                       schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+               else if (should_grow)
+                       schedule_timeout(1);
 
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);