[XFS] Fix race when looking up reclaimable inodes
authorDavid Chinner <david@fromorbit.com>
Thu, 30 Oct 2008 07:32:43 +0000 (18:32 +1100)
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>
Thu, 30 Oct 2008 07:32:43 +0000 (18:32 +1100)
If we get a race looking up a reclaimable inode, we can end up with the
winner proceeding to use the inode before it has been completely
re-initialised. This is a Bad Thing.

Fix the race by checking whether we are still initialising the inod eonce
we have a reference to it, and if so wait for the initialisation to
complete before continuing.

While there, fix a leaked reference count in the same code when
encountering an unlinked inode and we are not doing a lookup for a create
operation.

SGI-PV: 987246

SGI-Modid: xfs-linux-melb:xfs-kern:32429a

Signed-off-by: David Chinner <david@fromorbit.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
fs/xfs/linux-2.6/xfs_linux.h
fs/xfs/xfs_iget.c

index 214717650b236c0e384b79c22a7127499a7ee0fb..77d6ddcaf54735b991c7ea10bedffdfdea4bee95 100644 (file)
@@ -77,6 +77,7 @@
 #include <linux/spinlock.h>
 #include <linux/random.h>
 #include <linux/ctype.h>
+#include <linux/writeback.h>
 
 #include <asm/page.h>
 #include <asm/div64.h>
index 837cae781536c4716b92286a008fe70a24e38aa4..bf4dc5eb4cfc77c673c6843ec1ad1c12fb808182 100644 (file)
@@ -52,7 +52,7 @@ xfs_iget_cache_hit(
        int                     lock_flags) __releases(pag->pag_ici_lock)
 {
        struct xfs_mount        *mp = ip->i_mount;
-       int                     error = 0;
+       int                     error = EAGAIN;
 
        /*
         * If INEW is set this inode is being set up
@@ -60,7 +60,6 @@ xfs_iget_cache_hit(
         * Pause and try again.
         */
        if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
-               error = EAGAIN;
                XFS_STATS_INC(xs_ig_frecycle);
                goto out_error;
        }
@@ -73,7 +72,6 @@ xfs_iget_cache_hit(
                 * error immediately so we don't remove it from the reclaim
                 * list and potentially leak the inode.
                 */
-
                if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
                        error = ENOENT;
                        goto out_error;
@@ -91,27 +89,42 @@ xfs_iget_cache_hit(
                        error = ENOMEM;
                        goto out_error;
                }
+
+               /*
+                * We must set the XFS_INEW flag before clearing the
+                * XFS_IRECLAIMABLE flag so that if a racing lookup does
+                * not find the XFS_IRECLAIMABLE above but has the igrab()
+                * below succeed we can safely check XFS_INEW to detect
+                * that this inode is still being initialised.
+                */
                xfs_iflags_set(ip, XFS_INEW);
                xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
 
                /* clear the radix tree reclaim flag as well. */
                __xfs_inode_clear_reclaim_tag(mp, pag, ip);
-               read_unlock(&pag->pag_ici_lock);
        } else if (!igrab(VFS_I(ip))) {
                /* If the VFS inode is being torn down, pause and try again. */
-               error = EAGAIN;
                XFS_STATS_INC(xs_ig_frecycle);
                goto out_error;
-       } else {
-               /* we've got a live one */
-               read_unlock(&pag->pag_ici_lock);
+       } else if (xfs_iflags_test(ip, XFS_INEW)) {
+               /*
+                * We are racing with another cache hit that is
+                * currently recycling this inode out of the XFS_IRECLAIMABLE
+                * state. Wait for the initialisation to complete before
+                * continuing.
+                */
+               wait_on_inode(VFS_I(ip));
        }
 
        if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
                error = ENOENT;
-               goto out;
+               iput(VFS_I(ip));
+               goto out_error;
        }
 
+       /* We've got a live one. */
+       read_unlock(&pag->pag_ici_lock);
+
        if (lock_flags != 0)
                xfs_ilock(ip, lock_flags);
 
@@ -122,7 +135,6 @@ xfs_iget_cache_hit(
 
 out_error:
        read_unlock(&pag->pag_ici_lock);
-out:
        return error;
 }