xfs: rework the inline directory verifiers
authorDarrick J. Wong <darrick.wong@oracle.com>
Tue, 28 Mar 2017 21:51:10 +0000 (14:51 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Tue, 28 Mar 2017 21:51:10 +0000 (14:51 -0700)
The inline directory verifiers should be called on the inode fork data,
which means after iformat_local on the read side, and prior to
ifork_flush on the write side.  This makes the fork verifier more
consistent with the way buffer verifiers work -- i.e. they will operate
on the memory buffer that the code will be reading and writing directly.

Furthermore, revise the verifier function to return -EFSCORRUPTED so
that we don't flood the logs with corruption messages and assert
notices.  This has been a particular problem with xfs/348, which
triggers the XFS_WANT_CORRUPTED_RETURN assertions, which halts the
kernel when CONFIG_XFS_DEBUG=y.  Disk corruption isn't supposed to do
that, at least not in a verifier.

Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
v2: get the inode d_ops the proper way
v3: describe the bug that this patch fixes; no code changes

fs/xfs/libxfs/xfs_dir2_priv.h
fs/xfs/libxfs/xfs_dir2_sf.c
fs/xfs/libxfs/xfs_inode_fork.c
fs/xfs/libxfs/xfs_inode_fork.h
fs/xfs/xfs_inode.c

index eb00bc133bca673c556eb85a18385bbc3748dfcf..39f8604f764e13147fd576e95f5ad30f98dc4d1e 100644 (file)
@@ -125,8 +125,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
-               int size);
+extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
 
 /* xfs_dir2_readdir.c */
 extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
index 96b45cd6c63f0686d3c1cce5c41b232f0ab82080..e84af093b2ab99e5d7a75467bc6d7a490682611a 100644 (file)
@@ -632,36 +632,49 @@ xfs_dir2_sf_check(
 /* Verify the consistency of an inline directory. */
 int
 xfs_dir2_sf_verify(
-       struct xfs_mount                *mp,
-       struct xfs_dir2_sf_hdr          *sfp,
-       int                             size)
+       struct xfs_inode                *ip)
 {
+       struct xfs_mount                *mp = ip->i_mount;
+       struct xfs_dir2_sf_hdr          *sfp;
        struct xfs_dir2_sf_entry        *sfep;
        struct xfs_dir2_sf_entry        *next_sfep;
        char                            *endp;
        const struct xfs_dir_ops        *dops;
+       struct xfs_ifork                *ifp;
        xfs_ino_t                       ino;
        int                             i;
        int                             i8count;
        int                             offset;
+       int                             size;
+       int                             error;
        __uint8_t                       filetype;
 
+       ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+       /*
+        * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops,
+        * so we can only trust the mountpoint to have the right pointer.
+        */
        dops = xfs_dir_get_ops(mp, NULL);
 
+       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
+       size = ifp->if_bytes;
+
        /*
         * Give up if the directory is way too short.
         */
-       XFS_WANT_CORRUPTED_RETURN(mp, size >
-                       offsetof(struct xfs_dir2_sf_hdr, parent));
-       XFS_WANT_CORRUPTED_RETURN(mp, size >=
-                       xfs_dir2_sf_hdr_size(sfp->i8count));
+       if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) ||
+           size < xfs_dir2_sf_hdr_size(sfp->i8count))
+               return -EFSCORRUPTED;
 
        endp = (char *)sfp + size;
 
        /* Check .. entry */
        ino = dops->sf_get_parent_ino(sfp);
        i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
-       XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+       error = xfs_dir_ino_validate(mp, ino);
+       if (error)
+               return error;
        offset = dops->data_first_offset;
 
        /* Check all reported entries */
@@ -672,12 +685,12 @@ xfs_dir2_sf_verify(
                 * Check the fixed-offset parts of the structure are
                 * within the data buffer.
                 */
-               XFS_WANT_CORRUPTED_RETURN(mp,
-                               ((char *)sfep + sizeof(*sfep)) < endp);
+               if (((char *)sfep + sizeof(*sfep)) >= endp)
+                       return -EFSCORRUPTED;
 
                /* Don't allow names with known bad length. */
-               XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
-               XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+               if (sfep->namelen == 0)
+                       return -EFSCORRUPTED;
 
                /*
                 * Check that the variable-length part of the structure is
@@ -685,33 +698,39 @@ xfs_dir2_sf_verify(
                 * name component, so nextentry is an acceptable test.
                 */
                next_sfep = dops->sf_nextentry(sfp, sfep);
-               XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+               if (endp < (char *)next_sfep)
+                       return -EFSCORRUPTED;
 
                /* Check that the offsets always increase. */
-               XFS_WANT_CORRUPTED_RETURN(mp,
-                               xfs_dir2_sf_get_offset(sfep) >= offset);
+               if (xfs_dir2_sf_get_offset(sfep) < offset)
+                       return -EFSCORRUPTED;
 
                /* Check the inode number. */
                ino = dops->sf_get_ino(sfp, sfep);
                i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
-               XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+               error = xfs_dir_ino_validate(mp, ino);
+               if (error)
+                       return error;
 
                /* Check the file type. */
                filetype = dops->sf_get_ftype(sfep);
-               XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+               if (filetype >= XFS_DIR3_FT_MAX)
+                       return -EFSCORRUPTED;
 
                offset = xfs_dir2_sf_get_offset(sfep) +
                                dops->data_entsize(sfep->namelen);
 
                sfep = next_sfep;
        }
-       XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
-       XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+       if (i8count != sfp->i8count)
+               return -EFSCORRUPTED;
+       if ((void *)sfep != (void *)endp)
+               return -EFSCORRUPTED;
 
        /* Make sure this whole thing ought to be in local format. */
-       XFS_WANT_CORRUPTED_RETURN(mp, offset +
-              (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
-              (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+       if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+           (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize)
+               return -EFSCORRUPTED;
 
        return 0;
 }
index 9653e964eda4f99ca611bb2cb6449a470be45d48..8a37efe04de3235d72357914647e5f480cbbd512 100644 (file)
@@ -212,6 +212,16 @@ xfs_iformat_fork(
        if (error)
                return error;
 
+       /* Check inline dir contents. */
+       if (S_ISDIR(VFS_I(ip)->i_mode) &&
+           dip->di_format == XFS_DINODE_FMT_LOCAL) {
+               error = xfs_dir2_sf_verify(ip);
+               if (error) {
+                       xfs_idestroy_fork(ip, XFS_DATA_FORK);
+                       return error;
+               }
+       }
+
        if (xfs_is_reflink_inode(ip)) {
                ASSERT(ip->i_cowfp == NULL);
                xfs_ifork_init_cow(ip);
@@ -322,8 +332,6 @@ xfs_iformat_local(
        int             whichfork,
        int             size)
 {
-       int             error;
-
        /*
         * If the size is unreasonable, then something
         * is wrong and we just bail out rather than crash in
@@ -339,14 +347,6 @@ xfs_iformat_local(
                return -EFSCORRUPTED;
        }
 
-       if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
-               error = xfs_dir2_sf_verify(ip->i_mount,
-                               (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
-                               size);
-               if (error)
-                       return error;
-       }
-
        xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
        return 0;
 }
@@ -867,7 +867,7 @@ xfs_iextents_copy(
  * In these cases, the format always takes precedence, because the
  * format indicates the current state of the fork.
  */
-int
+void
 xfs_iflush_fork(
        xfs_inode_t             *ip,
        xfs_dinode_t            *dip,
@@ -877,7 +877,6 @@ xfs_iflush_fork(
        char                    *cp;
        xfs_ifork_t             *ifp;
        xfs_mount_t             *mp;
-       int                     error;
        static const short      brootflag[2] =
                { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
        static const short      dataflag[2] =
@@ -886,7 +885,7 @@ xfs_iflush_fork(
                { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
 
        if (!iip)
-               return 0;
+               return;
        ifp = XFS_IFORK_PTR(ip, whichfork);
        /*
         * This can happen if we gave up in iformat in an error path,
@@ -894,19 +893,12 @@ xfs_iflush_fork(
         */
        if (!ifp) {
                ASSERT(whichfork == XFS_ATTR_FORK);
-               return 0;
+               return;
        }
        cp = XFS_DFORK_PTR(dip, whichfork);
        mp = ip->i_mount;
        switch (XFS_IFORK_FORMAT(ip, whichfork)) {
        case XFS_DINODE_FMT_LOCAL:
-               if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
-                       error = xfs_dir2_sf_verify(mp,
-                                       (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
-                                       ifp->if_bytes);
-                       if (error)
-                               return error;
-               }
                if ((iip->ili_fields & dataflag[whichfork]) &&
                    (ifp->if_bytes > 0)) {
                        ASSERT(ifp->if_u1.if_data != NULL);
@@ -959,7 +951,6 @@ xfs_iflush_fork(
                ASSERT(0);
                break;
        }
-       return 0;
 }
 
 /*
index 132dc59fdde6942cd22fca4ae11b8adbc193f051..7fb8365326d1a745583c4f133bc5a63668316b33 100644 (file)
@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
 struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
 
 int            xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-int            xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+void           xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
                                struct xfs_inode_log_item *, int);
 void           xfs_idestroy_fork(struct xfs_inode *, int);
 void           xfs_idata_realloc(struct xfs_inode *, int, int);
index c7fe2c2123ab8375caf0e0349a454ed8b2762095..7605d83965963566e1d6acef679591cf9787d161 100644 (file)
@@ -50,6 +50,7 @@
 #include "xfs_log.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_reflink.h"
+#include "xfs_dir2_priv.h"
 
 kmem_zone_t *xfs_inode_zone;
 
@@ -3475,7 +3476,6 @@ xfs_iflush_int(
        struct xfs_inode_log_item *iip = ip->i_itemp;
        struct xfs_dinode       *dip;
        struct xfs_mount        *mp = ip->i_mount;
-       int                     error;
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        ASSERT(xfs_isiflocked(ip));
@@ -3547,6 +3547,12 @@ xfs_iflush_int(
        if (ip->i_d.di_version < 3)
                ip->i_d.di_flushiter++;
 
+       /* Check the inline directory data. */
+       if (S_ISDIR(VFS_I(ip)->i_mode) &&
+           ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
+           xfs_dir2_sf_verify(ip))
+               goto corrupt_out;
+
        /*
         * Copy the dirty parts of the inode into the on-disk inode.  We always
         * copy out the core of the inode, because if the inode is dirty at all
@@ -3558,14 +3564,9 @@ xfs_iflush_int(
        if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
                ip->i_d.di_flushiter = 0;
 
-       error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
-       if (error)
-               return error;
-       if (XFS_IFORK_Q(ip)) {
-               error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
-               if (error)
-                       return error;
-       }
+       xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+       if (XFS_IFORK_Q(ip))
+               xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
        xfs_inobp_check(mp, bp);
 
        /*