Btrfs: fix deadlocks with trylock on tree nodes
authorChris Mason <clm@fb.com>
Thu, 19 Jun 2014 21:16:52 +0000 (14:16 -0700)
committerChris Mason <clm@fb.com>
Thu, 19 Jun 2014 21:19:55 +0000 (14:19 -0700)
The Btrfs tree trylock function is poorly named.  It always takes
the spinlock and backs off if the blocking lock is held.  This
can lead to surprising lockups because people expect it to really be a
trylock.

This commit makes it a pure trylock, both for the spinlock and the
blocking lock.  It also reworks the nested lock handling slightly to
avoid taking the read lock while a spinning write lock might be held.

Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/locking.c

index 01277b8f2373c20615fea792d2001b9c30e406d2..5665d2149249d1d83a260c74f21889e1d394a6c3 100644 (file)
@@ -33,14 +33,14 @@ static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
  */
 void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
-       if (eb->lock_nested) {
-               read_lock(&eb->lock);
-               if (eb->lock_nested && current->pid == eb->lock_owner) {
-                       read_unlock(&eb->lock);
-                       return;
-               }
-               read_unlock(&eb->lock);
-       }
+       /*
+        * no lock is required.  The lock owner may change if
+        * we have a read lock, but it won't change to or away
+        * from us.  If we have the write lock, we are the owner
+        * and it'll never change.
+        */
+       if (eb->lock_nested && current->pid == eb->lock_owner)
+               return;
        if (rw == BTRFS_WRITE_LOCK) {
                if (atomic_read(&eb->blocking_writers) == 0) {
                        WARN_ON(atomic_read(&eb->spinning_writers) != 1);
@@ -65,14 +65,15 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
  */
 void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
 {
-       if (eb->lock_nested) {
-               read_lock(&eb->lock);
-               if (eb->lock_nested && current->pid == eb->lock_owner) {
-                       read_unlock(&eb->lock);
-                       return;
-               }
-               read_unlock(&eb->lock);
-       }
+       /*
+        * no lock is required.  The lock owner may change if
+        * we have a read lock, but it won't change to or away
+        * from us.  If we have the write lock, we are the owner
+        * and it'll never change.
+        */
+       if (eb->lock_nested && current->pid == eb->lock_owner)
+               return;
+
        if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
                BUG_ON(atomic_read(&eb->blocking_writers) != 1);
                write_lock(&eb->lock);
@@ -99,6 +100,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
 void btrfs_tree_read_lock(struct extent_buffer *eb)
 {
 again:
+       BUG_ON(!atomic_read(&eb->blocking_writers) &&
+              current->pid == eb->lock_owner);
+
        read_lock(&eb->lock);
        if (atomic_read(&eb->blocking_writers) &&
            current->pid == eb->lock_owner) {
@@ -132,7 +136,9 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
        if (atomic_read(&eb->blocking_writers))
                return 0;
 
-       read_lock(&eb->lock);
+       if (!read_trylock(&eb->lock))
+               return 0;
+
        if (atomic_read(&eb->blocking_writers)) {
                read_unlock(&eb->lock);
                return 0;
@@ -151,7 +157,10 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
        if (atomic_read(&eb->blocking_writers) ||
            atomic_read(&eb->blocking_readers))
                return 0;
-       write_lock(&eb->lock);
+
+       if (!write_trylock(&eb->lock))
+               return 0;
+
        if (atomic_read(&eb->blocking_writers) ||
            atomic_read(&eb->blocking_readers)) {
                write_unlock(&eb->lock);
@@ -168,14 +177,15 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
  */
 void btrfs_tree_read_unlock(struct extent_buffer *eb)
 {
-       if (eb->lock_nested) {
-               read_lock(&eb->lock);
-               if (eb->lock_nested && current->pid == eb->lock_owner) {
-                       eb->lock_nested = 0;
-                       read_unlock(&eb->lock);
-                       return;
-               }
-               read_unlock(&eb->lock);
+       /*
+        * if we're nested, we have the write lock.  No new locking
+        * is needed as long as we are the lock owner.
+        * The write unlock will do a barrier for us, and the lock_nested
+        * field only matters to the lock owner.
+        */
+       if (eb->lock_nested && current->pid == eb->lock_owner) {
+               eb->lock_nested = 0;
+               return;
        }
        btrfs_assert_tree_read_locked(eb);
        WARN_ON(atomic_read(&eb->spinning_readers) == 0);
@@ -189,14 +199,15 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
  */
 void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
 {
-       if (eb->lock_nested) {
-               read_lock(&eb->lock);
-               if (eb->lock_nested && current->pid == eb->lock_owner) {
-                       eb->lock_nested = 0;
-                       read_unlock(&eb->lock);
-                       return;
-               }
-               read_unlock(&eb->lock);
+       /*
+        * if we're nested, we have the write lock.  No new locking
+        * is needed as long as we are the lock owner.
+        * The write unlock will do a barrier for us, and the lock_nested
+        * field only matters to the lock owner.
+        */
+       if (eb->lock_nested && current->pid == eb->lock_owner) {
+               eb->lock_nested = 0;
+               return;
        }
        btrfs_assert_tree_read_locked(eb);
        WARN_ON(atomic_read(&eb->blocking_readers) == 0);
@@ -244,6 +255,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
        BUG_ON(blockers > 1);
 
        btrfs_assert_tree_locked(eb);
+       eb->lock_owner = 0;
        atomic_dec(&eb->write_locks);
 
        if (blockers) {