xfs: allow unwritten extents in the CoW fork
authorDarrick J. Wong <darrick.wong@oracle.com>
Thu, 2 Feb 2017 23:14:01 +0000 (15:14 -0800)
committerDarrick J. Wong <darrick.wong@oracle.com>
Thu, 2 Feb 2017 23:14:01 +0000 (15:14 -0800)
In the data fork, we only allow extents to perform the following state
transitions:

delay -> real <-> unwritten

There's no way to move directly from a delalloc reservation to an
/unwritten/ allocated extent.  However, for the CoW fork we want to be
able to do the following to each extent:

delalloc -> unwritten -> written -> remapped to data fork

This will help us to avoid a race in the speculative CoW preallocation
code between a first thread that is allocating a CoW extent and a second
thread that is remapping part of a file after a write.  In order to do
this, however, we need two things: first, we have to be able to
transition from da to unwritten, and second the function that converts
between real and unwritten has to be made aware of the cow fork.  Do
both of those things.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/libxfs/xfs_bmap.c

index 2e91eb66d32f666d42b0dd5902ed7f2d5f66d643..dcffbb09444e3f1bc9573d469bb494a90dd598e3 100644 (file)
@@ -1850,6 +1850,7 @@ xfs_bmap_add_extent_delay_real(
                 */
                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
                xfs_bmbt_set_startblock(ep, new->br_startblock);
+               xfs_bmbt_set_state(ep, new->br_state);
                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
 
                (*nextents)++;
@@ -2188,6 +2189,7 @@ STATIC int                                /* error */
 xfs_bmap_add_extent_unwritten_real(
        struct xfs_trans        *tp,
        xfs_inode_t             *ip,    /* incore inode pointer */
+       int                     whichfork,
        xfs_extnum_t            *idx,   /* extent number to update/insert */
        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
@@ -2207,12 +2209,14 @@ xfs_bmap_add_extent_unwritten_real(
                                        /* left is 0, right is 1, prev is 2 */
        int                     rval=0; /* return value (logging flags) */
        int                     state = 0;/* state bits, accessed thru macros */
-       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_mount        *mp = ip->i_mount;
 
        *logflagsp = 0;
 
        cur = *curp;
-       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (whichfork == XFS_COW_FORK)
+               state |= BMAP_COWFORK;
 
        ASSERT(*idx >= 0);
        ASSERT(*idx <= xfs_iext_count(ifp));
@@ -2271,7 +2275,7 @@ xfs_bmap_add_extent_unwritten_real(
         * Don't set contiguous if the combined extent would be too large.
         * Also check for all-three-contiguous being too large.
         */
-       if (*idx < xfs_iext_count(&ip->i_df) - 1) {
+       if (*idx < xfs_iext_count(ifp) - 1) {
                state |= BMAP_RIGHT_VALID;
                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
                if (isnullstartblock(RIGHT.br_startblock))
@@ -2311,7 +2315,8 @@ xfs_bmap_add_extent_unwritten_real(
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
 
                xfs_iext_remove(ip, *idx + 1, 2, state);
-               ip->i_d.di_nextents -= 2;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2354,7 +2359,8 @@ xfs_bmap_add_extent_unwritten_real(
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
 
                xfs_iext_remove(ip, *idx + 1, 1, state);
-               ip->i_d.di_nextents--;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2389,7 +2395,8 @@ xfs_bmap_add_extent_unwritten_real(
                xfs_bmbt_set_state(ep, newext);
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
                xfs_iext_remove(ip, *idx + 1, 1, state);
-               ip->i_d.di_nextents--;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2501,7 +2508,8 @@ xfs_bmap_add_extent_unwritten_real(
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
 
                xfs_iext_insert(ip, *idx, 1, new, state);
-               ip->i_d.di_nextents++;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2579,7 +2587,8 @@ xfs_bmap_add_extent_unwritten_real(
                ++*idx;
                xfs_iext_insert(ip, *idx, 1, new, state);
 
-               ip->i_d.di_nextents++;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2627,7 +2636,8 @@ xfs_bmap_add_extent_unwritten_real(
                ++*idx;
                xfs_iext_insert(ip, *idx, 2, &r[0], state);
 
-               ip->i_d.di_nextents += 2;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2681,17 +2691,17 @@ xfs_bmap_add_extent_unwritten_real(
        }
 
        /* update reverse mappings */
-       error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
+       error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
        if (error)
                goto done;
 
        /* convert to a btree if necessary */
-       if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
+       if (xfs_bmap_needs_btree(ip, whichfork)) {
                int     tmp_logflags;   /* partial log flag return val */
 
                ASSERT(cur == NULL);
                error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
-                               0, &tmp_logflags, XFS_DATA_FORK);
+                               0, &tmp_logflags, whichfork);
                *logflagsp |= tmp_logflags;
                if (error)
                        goto done;
@@ -2703,7 +2713,7 @@ xfs_bmap_add_extent_unwritten_real(
                *curp = cur;
        }
 
-       xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
+       xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
 done:
        *logflagsp |= rval;
        return error;
@@ -4354,10 +4364,16 @@ xfs_bmapi_allocate(
        bma->got.br_state = XFS_EXT_NORM;
 
        /*
-        * A wasdelay extent has been initialized, so shouldn't be flagged
-        * as unwritten.
+        * In the data fork, a wasdelay extent has been initialized, so
+        * shouldn't be flagged as unwritten.
+        *
+        * For the cow fork, however, we convert delalloc reservations
+        * (extents allocated for speculative preallocation) to
+        * allocated unwritten extents, and only convert the unwritten
+        * extents to real extents when we're about to write the data.
         */
-       if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
+       if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
+           (bma->flags & XFS_BMAPI_PREALLOC) &&
            xfs_sb_version_hasextflgbit(&mp->m_sb))
                bma->got.br_state = XFS_EXT_UNWRITTEN;
 
@@ -4408,8 +4424,6 @@ xfs_bmapi_convert_unwritten(
                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
                return 0;
 
-       ASSERT(whichfork != XFS_COW_FORK);
-
        /*
         * Modify (by adding) the state flag, if writing.
         */
@@ -4434,8 +4448,8 @@ xfs_bmapi_convert_unwritten(
                        return error;
        }
 
-       error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
-                       &bma->cur, mval, bma->firstblock, bma->dfops,
+       error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
+                       &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
                        &tmp_logflags);
        /*
         * Log the inode core unconditionally in the unwritten extent conversion
@@ -4444,8 +4458,12 @@ xfs_bmapi_convert_unwritten(
         * in the transaction for the sake of fsync(), even if nothing has
         * changed, because fsync() will not force the log for this transaction
         * unless it sees the inode pinned.
+        *
+        * Note: If we're only converting cow fork extents, there aren't
+        * any on-disk updates to make, so we don't need to log anything.
         */
-       bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
+       if (whichfork != XFS_COW_FORK)
+               bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
        if (error)
                return error;
 
@@ -4519,15 +4537,15 @@ xfs_bmapi_write(
        ASSERT(*nmap >= 1);
        ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
        ASSERT(!(flags & XFS_BMAPI_IGSTATE));
-       ASSERT(tp != NULL);
+       ASSERT(tp != NULL ||
+              (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
+                       (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
        ASSERT(len > 0);
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
        ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
        ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
-       ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
-       ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
 
        /* zeroing is for currently only for data extents, not metadata */
        ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
@@ -5542,8 +5560,8 @@ __xfs_bunmapi(
                        }
                        del.br_state = XFS_EXT_UNWRITTEN;
                        error = xfs_bmap_add_extent_unwritten_real(tp, ip,
-                                       &lastx, &cur, &del, firstblock, dfops,
-                                       &logflags);
+                                       whichfork, &lastx, &cur, &del,
+                                       firstblock, dfops, &logflags);
                        if (error)
                                goto error0;
                        goto nodelete;
@@ -5596,8 +5614,9 @@ __xfs_bunmapi(
                                prev.br_state = XFS_EXT_UNWRITTEN;
                                lastx--;
                                error = xfs_bmap_add_extent_unwritten_real(tp,
-                                               ip, &lastx, &cur, &prev,
-                                               firstblock, dfops, &logflags);
+                                               ip, whichfork, &lastx, &cur,
+                                               &prev, firstblock, dfops,
+                                               &logflags);
                                if (error)
                                        goto error0;
                                goto nodelete;
@@ -5605,8 +5624,9 @@ __xfs_bunmapi(
                                ASSERT(del.br_state == XFS_EXT_NORM);
                                del.br_state = XFS_EXT_UNWRITTEN;
                                error = xfs_bmap_add_extent_unwritten_real(tp,
-                                               ip, &lastx, &cur, &del,
-                                               firstblock, dfops, &logflags);
+                                               ip, whichfork, &lastx, &cur,
+                                               &del, firstblock, dfops,
+                                               &logflags);
                                if (error)
                                        goto error0;
                                goto nodelete;