xfs: add online discard support
authorChristoph Hellwig <hch@infradead.org>
Fri, 20 May 2011 13:45:32 +0000 (13:45 +0000)
committerAlex Elder <aelder@sgi.com>
Tue, 24 May 2011 16:17:13 +0000 (11:17 -0500)
Now that we have reliably tracking of deleted extents in a
transaction we can easily implement "online" discard support
which calls blkdev_issue_discard once a transaction commits.

The actual discard is a two stage operation as we first have
to mark the busy extent as not available for reuse before we
can start the actual discard.  Note that we don't bother
supporting discard for the non-delaylog mode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
Documentation/filesystems/xfs.txt
fs/xfs/linux-2.6/xfs_discard.c
fs/xfs/linux-2.6/xfs_discard.h
fs/xfs/linux-2.6/xfs_super.c
fs/xfs/xfs_ag.h
fs/xfs/xfs_alloc.c
fs/xfs/xfs_alloc.h
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_trans.c

index 7bff3e4f35df84a9def7507badb42ccdeffef419..3fc0c31a6f5dc5f8ee1220d9bcf8062038297aa5 100644 (file)
@@ -39,6 +39,12 @@ When mounting an XFS filesystem, the following options are accepted.
        drive level write caching to be enabled, for devices that
        support write barriers.
 
+  discard
+       Issue command to let the block device reclaim space freed by the
+       filesystem.  This is useful for SSD devices, thinly provisioned
+       LUNs and virtual machine images, but may have a performance
+       impact.  This option is incompatible with the nodelaylog option.
+
   dmapi
        Enable the DMAPI (Data Management API) event callouts.
        Use with the "mtpt" option.
index d61611c88012c8daaab1840f9534983fac62bd2d..244e797dae327a7da95992e23697489397c4bbc1 100644 (file)
@@ -191,3 +191,32 @@ xfs_ioc_trim(
                return -XFS_ERROR(EFAULT);
        return 0;
 }
+
+int
+xfs_discard_extents(
+       struct xfs_mount        *mp,
+       struct list_head        *list)
+{
+       struct xfs_busy_extent  *busyp;
+       int                     error = 0;
+
+       list_for_each_entry(busyp, list, list) {
+               trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
+                                        busyp->length);
+
+               error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
+                               XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
+                               XFS_FSB_TO_BB(mp, busyp->length),
+                               GFP_NOFS, 0);
+               if (error && error != EOPNOTSUPP) {
+                       xfs_info(mp,
+        "discard failed for extent [0x%llu,%u], error %d",
+                                (unsigned long long)busyp->bno,
+                                busyp->length,
+                                error);
+                       return error;
+               }
+       }
+
+       return 0;
+}
index e82b6dd3e127707ed0a8e154ab1617da23444a90..344879aea646cfbaf3ac1c57916f9fce5e6a50c2 100644 (file)
@@ -2,7 +2,9 @@
 #define XFS_DISCARD_H 1
 
 struct fstrim_range;
+struct list_head;
 
 extern int     xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
+extern int     xfs_discard_extents(struct xfs_mount *, struct list_head *);
 
 #endif /* XFS_DISCARD_H */
index b0aa59e51fd066377d29f92da999dff54b1a1620..98b9c91fcdf1d9d101339aadaea2740e74cb6506 100644 (file)
@@ -110,8 +110,10 @@ mempool_t *xfs_ioend_pool;
 #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
 #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
 #define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
-#define MNTOPT_DELAYLOG   "delaylog"   /* Delayed loging enabled */
-#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */
+#define MNTOPT_DELAYLOG    "delaylog"  /* Delayed logging enabled */
+#define MNTOPT_NODELAYLOG  "nodelaylog"        /* Delayed logging disabled */
+#define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
+#define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
 
 /*
  * Table driven mount option parser.
@@ -355,6 +357,10 @@ xfs_parseargs(
                        mp->m_flags |= XFS_MOUNT_DELAYLOG;
                } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
                        mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
+               } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
+                       mp->m_flags |= XFS_MOUNT_DISCARD;
+               } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
+                       mp->m_flags &= ~XFS_MOUNT_DISCARD;
                } else if (!strcmp(this_char, "ihashsize")) {
                        xfs_warn(mp,
        "ihashsize no longer used, option is deprecated.");
@@ -388,6 +394,13 @@ xfs_parseargs(
                return EINVAL;
        }
 
+       if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
+           !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
+               xfs_warn(mp,
+       "the discard option is incompatible with the nodelaylog option");
+               return EINVAL;
+       }
+
 #ifndef CONFIG_XFS_QUOTA
        if (XFS_IS_QUOTA_RUNNING(mp)) {
                xfs_warn(mp, "quota support not available in this kernel.");
@@ -488,6 +501,7 @@ xfs_showargs(
                { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
                { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
                { XFS_MOUNT_DELAYLOG,           "," MNTOPT_DELAYLOG },
+               { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
                { 0, NULL }
        };
        static struct proc_xfs_info xfs_info_unset[] = {
index da0a561ffba2abc2ed277992f21f44c1a38dbc54..8d52ba4c87e54314f5106bd19bc92a3626a3f2da 100644 (file)
@@ -187,6 +187,8 @@ struct xfs_busy_extent {
        xfs_agnumber_t  agno;
        xfs_agblock_t   bno;
        xfs_extlen_t    length;
+       unsigned int    flags;
+#define XFS_ALLOC_BUSY_DISCARDED       0x01    /* undergoing a discard op. */
 };
 
 /*
index acdced86413ce0d90e5fb07ac7ea08005727a2e4..721db22c6ec9e29bd4448d01eb6c0caf2486bb3e 100644 (file)
@@ -2608,6 +2608,18 @@ xfs_alloc_busy_update_extent(
        xfs_agblock_t           bbno = busyp->bno;
        xfs_agblock_t           bend = bbno + busyp->length;
 
+       /*
+        * This extent is currently being discarded.  Give the thread
+        * performing the discard a chance to mark the extent unbusy
+        * and retry.
+        */
+       if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) {
+               spin_unlock(&pag->pagb_lock);
+               delay(1);
+               spin_lock(&pag->pagb_lock);
+               return false;
+       }
+
        /*
         * If there is a busy extent overlapping a user allocation, we have
         * no choice but to force the log and retry the search.
@@ -2813,7 +2825,8 @@ restart:
                 * If this is a metadata allocation, try to reuse the busy
                 * extent instead of trimming the allocation.
                 */
-               if (!args->userdata) {
+               if (!args->userdata &&
+                   !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) {
                        if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
                                                          busyp, fbno, flen,
                                                          false))
@@ -2979,10 +2992,16 @@ xfs_alloc_busy_clear_one(
        kmem_free(busyp);
 }
 
+/*
+ * Remove all extents on the passed in list from the busy extents tree.
+ * If do_discard is set skip extents that need to be discarded, and mark
+ * these as undergoing a discard operation instead.
+ */
 void
 xfs_alloc_busy_clear(
        struct xfs_mount        *mp,
-       struct list_head        *list)
+       struct list_head        *list,
+       bool                    do_discard)
 {
        struct xfs_busy_extent  *busyp, *n;
        struct xfs_perag        *pag = NULL;
@@ -2999,7 +3018,10 @@ xfs_alloc_busy_clear(
                        agno = busyp->agno;
                }
 
-               xfs_alloc_busy_clear_one(mp, pag, busyp);
+               if (do_discard && busyp->length)
+                       busyp->flags = XFS_ALLOC_BUSY_DISCARDED;
+               else
+                       xfs_alloc_busy_clear_one(mp, pag, busyp);
        }
 
        if (pag) {
index 240ad288f2f99d7b6805591460846ce6a4c905b8..06aa8217452b511811b631fa50be303bba6dc56b 100644 (file)
@@ -140,7 +140,8 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
        xfs_agblock_t bno, xfs_extlen_t len);
 
 void
-xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list);
+xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list,
+       bool do_discard);
 
 int
 xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
index 7d56e88a3f0eb6c671e863c4977d2f47a500b830..c7755d5a5fbe967ed58bb04ffcc0d47d082a87e4 100644 (file)
@@ -29,6 +29,7 @@
 #include "xfs_mount.h"
 #include "xfs_error.h"
 #include "xfs_alloc.h"
+#include "xfs_discard.h"
 
 /*
  * Perform initial CIL structure initialisation. If the CIL is not
@@ -361,18 +362,28 @@ xlog_cil_committed(
        int     abort)
 {
        struct xfs_cil_ctx      *ctx = args;
+       struct xfs_mount        *mp = ctx->cil->xc_log->l_mp;
 
        xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
                                        ctx->start_lsn, abort);
 
        xfs_alloc_busy_sort(&ctx->busy_extents);
-       xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents);
+       xfs_alloc_busy_clear(mp, &ctx->busy_extents,
+                            (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
 
        spin_lock(&ctx->cil->xc_cil_lock);
        list_del(&ctx->committing);
        spin_unlock(&ctx->cil->xc_cil_lock);
 
        xlog_cil_free_logvec(ctx->lv_chain);
+
+       if (!list_empty(&ctx->busy_extents)) {
+               ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
+
+               xfs_discard_extents(mp, &ctx->busy_extents);
+               xfs_alloc_busy_clear(mp, &ctx->busy_extents, false);
+       }
+
        kmem_free(ctx);
 }
 
index 19af0ab0d0c6c6c1cab862016dbe258263e39d31..3d68bb267c5fc064279c5545cf2039974c89fa7b 100644 (file)
@@ -224,6 +224,7 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_FS_SHUTDOWN  (1ULL << 4)     /* atomic stop of all filesystem
                                                   operations, typically for
                                                   disk errors in metadata */
+#define XFS_MOUNT_DISCARD      (1ULL << 5)     /* discard unused blocks */
 #define XFS_MOUNT_RETERR       (1ULL << 6)     /* return alignment errors to
                                                   user */
 #define XFS_MOUNT_NOALIGN      (1ULL << 7)     /* turn off stripe alignment
index d1f24858ccc4d365db467f8bdbe222ef4511d354..7c7bc2b786bd47d6ec89e31bcf966f6dc5121ec1 100644 (file)
@@ -609,7 +609,7 @@ xfs_trans_free(
        struct xfs_trans        *tp)
 {
        xfs_alloc_busy_sort(&tp->t_busy);
-       xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy);
+       xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false);
 
        atomic_dec(&tp->t_mountp->m_active_trans);
        xfs_trans_free_dqinfo(tp);