shrink_dentry_list(): take parent's ->d_lock earlier
authorAl Viro <viro@zeniv.linux.org.uk>
Thu, 29 May 2014 12:54:52 +0000 (08:54 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Fri, 30 May 2014 15:03:21 +0000 (11:03 -0400)
The cause of livelocks there is that we are taking ->d_lock on
dentry and its parent in the wrong order, forcing us to use
trylock on the parent's one.  d_walk() takes them in the right
order, and unfortunately it's not hard to create a situation
when shrink_dentry_list() can't make progress since trylock
keeps failing, and shrink_dcache_parent() or check_submounts_and_drop()
keeps calling d_walk() disrupting the very shrink_dentry_list() it's
waiting for.

Solution is straightforward - if that trylock fails, let's unlock
the dentry itself and take locks in the right order.  We need to
stabilize ->d_parent without holding ->d_lock, but that's doable
using RCU.  And we'd better do that in the very beginning of the
loop in shrink_dentry_list(), since the checks on refcount, etc.
would need to be redone anyway.

That deals with a half of the problem - killing dentries on the
shrink list itself.  Another one (dropping their parents) is
in the next commit.

locking parent is interesting - it would be easy to do rcu_read_lock(),
lock whatever we think is a parent, lock dentry itself and check
if the parent is still the right one.  Except that we need to check
that *before* locking the dentry, or we are risking taking ->d_lock
out of order.  Fortunately, once the D1 is locked, we can check if
D2->d_parent is equal to D1 without the need to lock D2; D2->d_parent
can start or stop pointing to D1 only under D1->d_lock, so taking
D1->d_lock is enough.  In other words, the right solution is
rcu_read_lock/lock what looks like parent right now/check if it's
still our parent/rcu_read_unlock/lock the child.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/dcache.c

index c23f78a9d156cc0c7165e624870d45f178ab800f..d54a99baf4f314d43bf1d79abf814da0ff3bef01 100644 (file)
@@ -530,6 +530,38 @@ failed:
        return dentry; /* try again with same dentry */
 }
 
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+       struct dentry *parent = dentry->d_parent;
+       if (IS_ROOT(dentry))
+               return NULL;
+       if (likely(spin_trylock(&parent->d_lock)))
+               return parent;
+       spin_unlock(&dentry->d_lock);
+       rcu_read_lock();
+again:
+       parent = ACCESS_ONCE(dentry->d_parent);
+       spin_lock(&parent->d_lock);
+       /*
+        * We can't blindly lock dentry until we are sure
+        * that we won't violate the locking order.
+        * Any changes of dentry->d_parent must have
+        * been done with parent->d_lock held, so
+        * spin_lock() above is enough of a barrier
+        * for checking if it's still our child.
+        */
+       if (unlikely(parent != dentry->d_parent)) {
+               spin_unlock(&parent->d_lock);
+               goto again;
+       }
+       rcu_read_unlock();
+       if (parent != dentry)
+               spin_lock(&dentry->d_lock);
+       else
+               parent = NULL;
+       return parent;
+}
+
 /* 
  * This is dput
  *
@@ -804,6 +836,8 @@ static void shrink_dentry_list(struct list_head *list)
                struct inode *inode;
                dentry = list_entry(list->prev, struct dentry, d_lru);
                spin_lock(&dentry->d_lock);
+               parent = lock_parent(dentry);
+
                /*
                 * The dispose list is isolated and dentries are not accounted
                 * to the LRU here, so we can simply remove it from the list
@@ -817,6 +851,8 @@ static void shrink_dentry_list(struct list_head *list)
                 */
                if ((int)dentry->d_lockref.count > 0) {
                        spin_unlock(&dentry->d_lock);
+                       if (parent)
+                               spin_unlock(&parent->d_lock);
                        continue;
                }
 
@@ -824,6 +860,8 @@ static void shrink_dentry_list(struct list_head *list)
                if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
                        bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
                        spin_unlock(&dentry->d_lock);
+                       if (parent)
+                               spin_unlock(&parent->d_lock);
                        if (can_free)
                                dentry_free(dentry);
                        continue;
@@ -833,22 +871,13 @@ static void shrink_dentry_list(struct list_head *list)
                if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
                        d_shrink_add(dentry, list);
                        spin_unlock(&dentry->d_lock);
+                       if (parent)
+                               spin_unlock(&parent->d_lock);
                        continue;
                }
 
-               parent = NULL;
-               if (!IS_ROOT(dentry)) {
-                       parent = dentry->d_parent;
-                       if (unlikely(!spin_trylock(&parent->d_lock))) {
-                               if (inode)
-                                       spin_unlock(&inode->i_lock);
-                               d_shrink_add(dentry, list);
-                               spin_unlock(&dentry->d_lock);
-                               continue;
-                       }
-               }
-
                __dentry_kill(dentry);
+
                /*
                 * We need to prune ancestors too. This is necessary to prevent
                 * quadratic behavior of shrink_dcache_parent(), but is also