btrfs: fix lockups from btrfs_clear_path_blocking
authorChris Mason <clm@fb.com>
Wed, 19 Nov 2014 18:25:09 +0000 (10:25 -0800)
committerChris Mason <clm@fb.com>
Wed, 19 Nov 2014 18:34:35 +0000 (10:34 -0800)
The fair reader/writer locks mean that btrfs_clear_path_blocking needs
to strictly follow lock ordering rules even when we already have
blocking locks on a given path.

Before we can clear a blocking lock on the path, we need to make sure
all of the locks have been converted to blocking.  This will remove lock
inversions against anyone spinning in write_lock() against the buffers
we're trying to get read locks on.  These inversions didn't exist before
the fair read/writer locks, but now we need to be more careful.

We papered over this deadlock in the past by changing
btrfs_try_read_lock() to be a true trylock against both the spinlock and
the blocking lock.  This was slower, and not sufficient to fix all the
deadlocks.  This patch adds a btrfs_tree_read_lock_atomic(), which
basically means get the spinlock but trylock on the blocking lock.

Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reported-by: Patrick Schmid <schmid@phys.ethz.ch>
cc: stable@vger.kernel.org #v3.15+

fs/btrfs/ctree.c
fs/btrfs/locking.c
fs/btrfs/locking.h

index 19bc6162fb8e899cc51bd3d777fcbddf91589aa6..150822ee0a0b9f9668f071885c4f018b1c9f6bf3 100644 (file)
@@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
 {
        int i;
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-       /* lockdep really cares that we take all of these spinlocks
-        * in the right order.  If any of the locks in the path are not
-        * currently blocking, it is going to complain.  So, make really
-        * really sure by forcing the path to blocking before we clear
-        * the path blocking.
-        */
        if (held) {
                btrfs_set_lock_blocking_rw(held, held_rw);
                if (held_rw == BTRFS_WRITE_LOCK)
@@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
                        held_rw = BTRFS_READ_LOCK_BLOCKING;
        }
        btrfs_set_path_blocking(p);
-#endif
 
        for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
                if (p->nodes[i] && p->locks[i]) {
@@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
                }
        }
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
        if (held)
                btrfs_clear_lock_blocking_rw(held, held_rw);
-#endif
 }
 
 /* this also releases the path */
@@ -2893,7 +2883,7 @@ cow_done:
                                        }
                                        p->locks[level] = BTRFS_WRITE_LOCK;
                                } else {
-                                       err = btrfs_try_tree_read_lock(b);
+                                       err = btrfs_tree_read_lock_atomic(b);
                                        if (!err) {
                                                btrfs_set_path_blocking(p);
                                                btrfs_tree_read_lock(b);
@@ -3025,7 +3015,7 @@ again:
                        }
 
                        level = btrfs_header_level(b);
-                       err = btrfs_try_tree_read_lock(b);
+                       err = btrfs_tree_read_lock_atomic(b);
                        if (!err) {
                                btrfs_set_path_blocking(p);
                                btrfs_tree_read_lock(b);
index 5665d2149249d1d83a260c74f21889e1d394a6c3..f8229ef1b46df098a3b04260c4f943db3e924d49 100644 (file)
@@ -127,6 +127,26 @@ again:
        atomic_inc(&eb->spinning_readers);
 }
 
+/*
+ * take a spinning read lock.
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers
+ */
+int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
+{
+       if (atomic_read(&eb->blocking_writers))
+               return 0;
+
+       read_lock(&eb->lock);
+       if (atomic_read(&eb->blocking_writers)) {
+               read_unlock(&eb->lock);
+               return 0;
+       }
+       atomic_inc(&eb->read_locks);
+       atomic_inc(&eb->spinning_readers);
+       return 1;
+}
+
 /*
  * returns 1 if we get the read lock and 0 if we don't
  * this won't wait for blocking writers
@@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
            atomic_read(&eb->blocking_readers))
                return 0;
 
-       if (!write_trylock(&eb->lock))
-               return 0;
-
+       write_lock(&eb->lock);
        if (atomic_read(&eb->blocking_writers) ||
            atomic_read(&eb->blocking_readers)) {
                write_unlock(&eb->lock);
index b81e0e9a48941891681eef385d10d071f6cbe51b..c44a9d5f5362b0dcb1c525eca16b57aead8a58ae 100644 (file)
@@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
 void btrfs_assert_tree_locked(struct extent_buffer *eb);
 int btrfs_try_tree_read_lock(struct extent_buffer *eb);
 int btrfs_try_tree_write_lock(struct extent_buffer *eb);
+int btrfs_tree_read_lock_atomic(struct extent_buffer *eb);
+
 
 static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
 {