xfs: implement swapext for rmap filesystems
authorDarrick J. Wong <darrick.wong@oracle.com>
Mon, 3 Oct 2016 16:11:53 +0000 (09:11 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Wed, 5 Oct 2016 23:26:32 +0000 (16:26 -0700)
Implement swapext for filesystems that have reverse mapping.  Back in
the reflink patches, we augmented the bmap code with a 'REMAP' flag
that updates only the bmbt and doesn't touch the allocator and
implemented log redo items for those two operations.  Now we can
rewrite extent swapping as a (looong) series of remap operations.

This is far less efficient than the fork swapping method implemented
in the past, so we only switch this on for rmap.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/libxfs/xfs_trans_space.h
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_trace.h

index 41e0428d8175a2ab7ea4be8d7f67ce932a7e3a3f..7917f6e44286a4591e9109138a7be07dc1a18eb1 100644 (file)
@@ -21,6 +21,8 @@
 /*
  * Components of space reservations.
  */
+#define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)    \
+               (((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0]))
 #define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)    \
                (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0]))
 #define        XFS_EXTENTADD_SPACE_RES(mp,w)   (XFS_BM_MAXLEVELS(mp,w) - 1)
        (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
          XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
          XFS_EXTENTADD_SPACE_RES(mp,w))
+#define XFS_SWAP_RMAP_SPACE_RES(mp,b,w)\
+       (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
+         XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
+         XFS_EXTENTADD_SPACE_RES(mp,w) + \
+        ((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \
+         XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \
+         (mp)->m_rmap_maxlevels)
 #define        XFS_DAENTER_1B(mp,w)    \
        ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1)
 #define        XFS_DAENTER_DBS(mp,w)   \
index b278d62b7152f9dafebf96253f9209ef508e754b..552465e011ecb0df23aaeae57b94108dcdd24360 100644 (file)
@@ -1580,6 +1580,13 @@ xfs_swap_extents_check_format(
        if (ip->i_d.di_nextents < tip->i_d.di_nextents)
                return -EINVAL;
 
+       /*
+        * If we have to use the (expensive) rmap swap method, we can
+        * handle any number of extents and any format.
+        */
+       if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb))
+               return 0;
+
        /*
         * if the target inode is in extent form and the temp inode is in btree
         * form then we will end up with the target inode in the wrong format
@@ -1649,6 +1656,130 @@ xfs_swap_extent_flush(
        return 0;
 }
 
+/*
+ * Move extents from one file to another, when rmap is enabled.
+ */
+STATIC int
+xfs_swap_extent_rmap(
+       struct xfs_trans                **tpp,
+       struct xfs_inode                *ip,
+       struct xfs_inode                *tip)
+{
+       struct xfs_bmbt_irec            irec;
+       struct xfs_bmbt_irec            uirec;
+       struct xfs_bmbt_irec            tirec;
+       xfs_fileoff_t                   offset_fsb;
+       xfs_fileoff_t                   end_fsb;
+       xfs_filblks_t                   count_fsb;
+       xfs_fsblock_t                   firstfsb;
+       struct xfs_defer_ops            dfops;
+       int                             error;
+       xfs_filblks_t                   ilen;
+       xfs_filblks_t                   rlen;
+       int                             nimaps;
+       __uint64_t                      tip_flags2;
+
+       /*
+        * If the source file has shared blocks, we must flag the donor
+        * file as having shared blocks so that we get the shared-block
+        * rmap functions when we go to fix up the rmaps.  The flags
+        * will be switch for reals later.
+        */
+       tip_flags2 = tip->i_d.di_flags2;
+       if (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)
+               tip->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK;
+
+       offset_fsb = 0;
+       end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip)));
+       count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
+
+       while (count_fsb) {
+               /* Read extent from the donor file */
+               nimaps = 1;
+               error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec,
+                               &nimaps, 0);
+               if (error)
+                       goto out;
+               ASSERT(nimaps == 1);
+               ASSERT(tirec.br_startblock != DELAYSTARTBLOCK);
+
+               trace_xfs_swap_extent_rmap_remap(tip, &tirec);
+               ilen = tirec.br_blockcount;
+
+               /* Unmap the old blocks in the source file. */
+               while (tirec.br_blockcount) {
+                       xfs_defer_init(&dfops, &firstfsb);
+                       trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
+
+                       /* Read extent from the source file */
+                       nimaps = 1;
+                       error = xfs_bmapi_read(ip, tirec.br_startoff,
+                                       tirec.br_blockcount, &irec,
+                                       &nimaps, 0);
+                       if (error)
+                               goto out_defer;
+                       ASSERT(nimaps == 1);
+                       ASSERT(tirec.br_startoff == irec.br_startoff);
+                       trace_xfs_swap_extent_rmap_remap_piece(ip, &irec);
+
+                       /* Trim the extent. */
+                       uirec = tirec;
+                       uirec.br_blockcount = rlen = min_t(xfs_filblks_t,
+                                       tirec.br_blockcount,
+                                       irec.br_blockcount);
+                       trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
+
+                       /* Remove the mapping from the donor file. */
+                       error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops,
+                                       tip, &uirec);
+                       if (error)
+                               goto out_defer;
+
+                       /* Remove the mapping from the source file. */
+                       error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops,
+                                       ip, &irec);
+                       if (error)
+                               goto out_defer;
+
+                       /* Map the donor file's blocks into the source file. */
+                       error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops,
+                                       ip, &uirec);
+                       if (error)
+                               goto out_defer;
+
+                       /* Map the source file's blocks into the donor file. */
+                       error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops,
+                                       tip, &irec);
+                       if (error)
+                               goto out_defer;
+
+                       error = xfs_defer_finish(tpp, &dfops, ip);
+                       if (error)
+                               goto out_defer;
+
+                       tirec.br_startoff += rlen;
+                       if (tirec.br_startblock != HOLESTARTBLOCK &&
+                           tirec.br_startblock != DELAYSTARTBLOCK)
+                               tirec.br_startblock += rlen;
+                       tirec.br_blockcount -= rlen;
+               }
+
+               /* Roll on... */
+               count_fsb -= ilen;
+               offset_fsb += ilen;
+       }
+
+       tip->i_d.di_flags2 = tip_flags2;
+       return 0;
+
+out_defer:
+       xfs_defer_cancel(&dfops);
+out:
+       trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
+       tip->i_d.di_flags2 = tip_flags2;
+       return error;
+}
+
 /* Swap the extents of two files by swapping data forks. */
 STATIC int
 xfs_swap_extent_forks(
@@ -1799,6 +1930,7 @@ xfs_swap_extents(
        int                     lock_flags;
        struct xfs_ifork        *cowfp;
        __uint64_t              f;
+       int                     resblks;
 
        /*
         * Lock the inodes against other IO, page faults and truncate to
@@ -1829,7 +1961,28 @@ xfs_swap_extents(
        if (error)
                goto out_unlock;
 
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+       /*
+        * Extent "swapping" with rmap requires a permanent reservation and
+        * a block reservation because it's really just a remap operation
+        * performed with log redo items!
+        */
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+               /*
+                * Conceptually this shouldn't affect the shape of either
+                * bmbt, but since we atomically move extents one by one,
+                * we reserve enough space to rebuild both trees.
+                */
+               resblks = XFS_SWAP_RMAP_SPACE_RES(mp,
+                               XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK),
+                               XFS_DATA_FORK) +
+                         XFS_SWAP_RMAP_SPACE_RES(mp,
+                               XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
+                               XFS_DATA_FORK);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
+                               0, 0, &tp);
+       } else
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0,
+                               0, 0, &tp);
        if (error)
                goto out_unlock;
 
@@ -1888,8 +2041,11 @@ xfs_swap_extents(
        src_log_flags = XFS_ILOG_CORE;
        target_log_flags = XFS_ILOG_CORE;
 
-       error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
-                       &target_log_flags);
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               error = xfs_swap_extent_rmap(&tp, ip, tip);
+       else
+               error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
+                               &target_log_flags);
        if (error)
                goto out_trans_cancel;
 
index 75bf18bc275b785d125666b22a6f56fe36037f78..2586c9c9cd9169f5967bb7c6692d90986bb2b65d 100644 (file)
@@ -3373,6 +3373,11 @@ DEFINE_INODE_EVENT(xfs_reflink_cancel_pending_cow);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_pending_cow_error);
 
+/* rmap swapext tracepoints */
+DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
+DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
+DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH