xfs: prevent unwritten extent conversion from blocking I/O completion
authorDave Chinner <david@fromorbit.com>
Mon, 6 Apr 2009 16:42:11 +0000 (18:42 +0200)
committerChristoph Hellwig <hch@brick.lst.de>
Mon, 6 Apr 2009 16:42:11 +0000 (18:42 +0200)
Unwritten extent conversion can recurse back into the filesystem due
to memory allocation. Memory reclaim requires I/O completions to be
processed to allow the callers to make progress. If the I/O
completion workqueue thread is doing the recursion, then we have a
deadlock situation.

Move unwritten extent completion into it's own workqueue so it
doesn't block I/O completions for normal delayed allocation or
overwrite data.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/linux-2.6/xfs_aops.c
fs/xfs/linux-2.6/xfs_aops.h
fs/xfs/linux-2.6/xfs_buf.c

index c13f67300fe76aa0a40ee64b0690dd2c8161a8ea..7ec89fc05b2b66d9ed5ca462d2d7f05d44c83dbc 100644 (file)
@@ -152,23 +152,6 @@ xfs_find_bdev_for_inode(
                return mp->m_ddev_targp->bt_bdev;
 }
 
-/*
- * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend. If we are asked to wait,
- * flush the workqueue.
- */
-STATIC void
-xfs_finish_ioend(
-       xfs_ioend_t     *ioend,
-       int             wait)
-{
-       if (atomic_dec_and_test(&ioend->io_remaining)) {
-               queue_work(xfsdatad_workqueue, &ioend->io_work);
-               if (wait)
-                       flush_workqueue(xfsdatad_workqueue);
-       }
-}
-
 /*
  * We're now finished for good with this ioend structure.
  * Update the page state via the associated buffer_heads,
@@ -309,6 +292,27 @@ xfs_end_bio_read(
        xfs_destroy_ioend(ioend);
 }
 
+/*
+ * Schedule IO completion handling on a xfsdatad if this was
+ * the final hold on this ioend. If we are asked to wait,
+ * flush the workqueue.
+ */
+STATIC void
+xfs_finish_ioend(
+       xfs_ioend_t     *ioend,
+       int             wait)
+{
+       if (atomic_dec_and_test(&ioend->io_remaining)) {
+               struct workqueue_struct *wq = xfsdatad_workqueue;
+               if (ioend->io_work.func == xfs_end_bio_unwritten)
+                       wq = xfsconvertd_workqueue;
+
+               queue_work(wq, &ioend->io_work);
+               if (wait)
+                       flush_workqueue(wq);
+       }
+}
+
 /*
  * Allocate and initialise an IO completion structure.
  * We need to track unwritten extent write completion here initially.
index 1dd528849755149a8e159bd2b7fb31dd72b848c5..221b3e66ceef3dd3e42db058991334eeb09d1d7e 100644 (file)
@@ -19,6 +19,7 @@
 #define __XFS_AOPS_H__
 
 extern struct workqueue_struct *xfsdatad_workqueue;
+extern struct workqueue_struct *xfsconvertd_workqueue;
 extern mempool_t *xfs_ioend_pool;
 
 /*
index aa1016bb913405e4e4f77c5bee28e7f38db2b44c..e28800a9f2b53890a1357a5db5d44da4943337f9 100644 (file)
@@ -51,6 +51,7 @@ static struct shrinker xfs_buf_shake = {
 
 static struct workqueue_struct *xfslogd_workqueue;
 struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsconvertd_workqueue;
 
 #ifdef XFS_BUF_TRACE
 void
@@ -1775,6 +1776,7 @@ xfs_flush_buftarg(
        xfs_buf_t       *bp, *n;
        int             pincount = 0;
 
+       xfs_buf_runall_queues(xfsconvertd_workqueue);
        xfs_buf_runall_queues(xfsdatad_workqueue);
        xfs_buf_runall_queues(xfslogd_workqueue);
 
@@ -1831,9 +1833,15 @@ xfs_buf_init(void)
        if (!xfsdatad_workqueue)
                goto out_destroy_xfslogd_workqueue;
 
+       xfsconvertd_workqueue = create_workqueue("xfsconvertd");
+       if (!xfsconvertd_workqueue)
+               goto out_destroy_xfsdatad_workqueue;
+
        register_shrinker(&xfs_buf_shake);
        return 0;
 
+ out_destroy_xfsdatad_workqueue:
+       destroy_workqueue(xfsdatad_workqueue);
  out_destroy_xfslogd_workqueue:
        destroy_workqueue(xfslogd_workqueue);
  out_free_buf_zone:
@@ -1849,6 +1857,7 @@ void
 xfs_buf_terminate(void)
 {
        unregister_shrinker(&xfs_buf_shake);
+       destroy_workqueue(xfsconvertd_workqueue);
        destroy_workqueue(xfsdatad_workqueue);
        destroy_workqueue(xfslogd_workqueue);
        kmem_zone_destroy(xfs_buf_zone);