dm: enhance internal suspend and resume interface
authorMike Snitzer <snitzer@redhat.com>
Tue, 28 Oct 2014 22:34:52 +0000 (18:34 -0400)
committerMike Snitzer <snitzer@redhat.com>
Wed, 19 Nov 2014 17:31:17 +0000 (12:31 -0500)
Rename dm_internal_{suspend,resume} to dm_internal_{suspend,resume}_fast
-- dm-stats will continue using these methods to avoid all the extra
suspend/resume logic that is not needed in order to quickly flush IO.

Introduce dm_internal_suspend_noflush() variant that actually calls the
mapped_device's target callbacks -- otherwise target-specific hooks are
avoided (e.g. dm-thin's thin_presuspend and thin_postsuspend).  Common
code between dm_internal_{suspend_noflush,resume} and
dm_{suspend,resume} was factored out as __dm_{suspend,resume}.

Update dm_internal_{suspend_noflush,resume} to always take and release
the mapped_device's suspend_lock.  Also update dm_{suspend,resume} to be
aware of potential for DM_INTERNAL_SUSPEND_FLAG to be set and respond
accordingly by interruptibly waiting for the DM_INTERNAL_SUSPEND_FLAG to
be cleared.  Add lockdep annotation to dm_suspend() and dm_resume().

The existing DM_SUSPEND_FLAG remains unchanged.
DM_INTERNAL_SUSPEND_FLAG is set by dm_internal_suspend_noflush() and
cleared by dm_internal_resume().

Both DM_SUSPEND_FLAG and DM_INTERNAL_SUSPEND_FLAG may be set if a device
was already suspended when dm_internal_suspend_noflush() was called --
this can be thought of as a "nested suspend".  A "nested suspend" can
occur with legacy userspace dm-thin code that might suspend all active
thin volumes before suspending the pool for resize.

But otherwise, in the normal dm-thin-pool suspend case moving forward:
the thin-pool will have DM_SUSPEND_FLAG set and all active thins from
that thin-pool will have DM_INTERNAL_SUSPEND_FLAG set.

Also add DM_INTERNAL_SUSPEND_FLAG to status report.  This new
DM_INTERNAL_SUSPEND_FLAG state is being reported to assist with
debugging (e.g. 'dmsetup info' will report an internally suspended
device accordingly).

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>
drivers/md/dm-ioctl.c
drivers/md/dm-stats.c
drivers/md/dm.c
drivers/md/dm.h
include/uapi/linux/dm-ioctl.h

index 0be9381365d7a7fb6b7672d91246b88677ff489e..73f791bb9ea4f06bd6c45da8356ba476a3ef76c3 100644 (file)
@@ -684,11 +684,14 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
        int srcu_idx;
 
        param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
-                         DM_ACTIVE_PRESENT_FLAG);
+                         DM_ACTIVE_PRESENT_FLAG | DM_INTERNAL_SUSPEND_FLAG);
 
        if (dm_suspended_md(md))
                param->flags |= DM_SUSPEND_FLAG;
 
+       if (dm_suspended_internally_md(md))
+               param->flags |= DM_INTERNAL_SUSPEND_FLAG;
+
        if (dm_test_deferred_remove_flag(md))
                param->flags |= DM_DEFERRED_REMOVE;
 
index 28a90122a5a89272f5873ed0911d2de19125c15b..42a057aa3811ac863a1db066d948a6e60d01b67d 100644 (file)
@@ -824,7 +824,7 @@ static int message_stats_create(struct mapped_device *md,
                return 1;
 
        id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data,
-                            dm_internal_suspend, dm_internal_resume, md);
+                            dm_internal_suspend_fast, dm_internal_resume_fast, md);
        if (id < 0)
                return id;
 
index f84de3215982f883165835f9b966d6f061747bba..a0ece87ad4268d1e8c8b6825339d036c291f3e5f 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/idr.h>
 #include <linux/hdreg.h>
 #include <linux/delay.h>
+#include <linux/wait.h>
 
 #include <trace/events/block.h>
 
@@ -117,6 +118,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
 #define DMF_NOFLUSH_SUSPENDING 5
 #define DMF_MERGE_IS_OPTIONAL 6
 #define DMF_DEFERRED_REMOVE 7
+#define DMF_SUSPENDED_INTERNALLY 8
 
 /*
  * A dummy definition to make RCU happy.
@@ -2718,36 +2720,18 @@ static void unlock_fs(struct mapped_device *md)
 }
 
 /*
- * We need to be able to change a mapping table under a mounted
- * filesystem.  For example we might want to move some data in
- * the background.  Before the table can be swapped with
- * dm_bind_table, dm_suspend must be called to flush any in
- * flight bios and ensure that any further io gets deferred.
- */
-/*
- * Suspend mechanism in request-based dm.
- *
- * 1. Flush all I/Os by lock_fs() if needed.
- * 2. Stop dispatching any I/O by stopping the request_queue.
- * 3. Wait for all in-flight I/Os to be completed or requeued.
+ * If __dm_suspend returns 0, the device is completely quiescent
+ * now. There is no request-processing activity. All new requests
+ * are being added to md->deferred list.
  *
- * To abort suspend, start the request_queue.
+ * Caller must hold md->suspend_lock
  */
-int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
+static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
+                       unsigned suspend_flags, int interruptible)
 {
-       struct dm_table *map = NULL;
-       int r = 0;
-       int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
-       int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
-
-       mutex_lock(&md->suspend_lock);
-
-       if (dm_suspended_md(md)) {
-               r = -EINVAL;
-               goto out_unlock;
-       }
-
-       map = rcu_dereference(md->map);
+       bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
+       bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
+       int r;
 
        /*
         * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
@@ -2772,7 +2756,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
                r = lock_fs(md);
                if (r) {
                        dm_table_presuspend_undo_targets(map);
-                       goto out_unlock;
+                       return r;
                }
        }
 
@@ -2806,7 +2790,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
         * We call dm_wait_for_completion to wait for all existing requests
         * to finish.
         */
-       r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
+       r = dm_wait_for_completion(md, interruptible);
 
        if (noflush)
                clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
@@ -2822,14 +2806,55 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 
                unlock_fs(md);
                dm_table_presuspend_undo_targets(map);
-               goto out_unlock; /* pushback list is already flushed, so skip flush */
+               /* pushback list is already flushed, so skip flush */
        }
 
-       /*
-        * If dm_wait_for_completion returned 0, the device is completely
-        * quiescent now. There is no request-processing activity. All new
-        * requests are being added to md->deferred list.
-        */
+       return r;
+}
+
+/*
+ * We need to be able to change a mapping table under a mounted
+ * filesystem.  For example we might want to move some data in
+ * the background.  Before the table can be swapped with
+ * dm_bind_table, dm_suspend must be called to flush any in
+ * flight bios and ensure that any further io gets deferred.
+ */
+/*
+ * Suspend mechanism in request-based dm.
+ *
+ * 1. Flush all I/Os by lock_fs() if needed.
+ * 2. Stop dispatching any I/O by stopping the request_queue.
+ * 3. Wait for all in-flight I/Os to be completed or requeued.
+ *
+ * To abort suspend, start the request_queue.
+ */
+int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
+{
+       struct dm_table *map = NULL;
+       int r = 0;
+
+retry:
+       mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
+
+       if (dm_suspended_md(md)) {
+               r = -EINVAL;
+               goto out_unlock;
+       }
+
+       if (dm_suspended_internally_md(md)) {
+               /* already internally suspended, wait for internal resume */
+               mutex_unlock(&md->suspend_lock);
+               r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
+               if (r)
+                       return r;
+               goto retry;
+       }
+
+       map = rcu_dereference(md->map);
+
+       r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE);
+       if (r)
+               goto out_unlock;
 
        set_bit(DMF_SUSPENDED, &md->flags);
 
@@ -2840,35 +2865,57 @@ out_unlock:
        return r;
 }
 
+static int __dm_resume(struct mapped_device *md, struct dm_table *map)
+{
+       if (map) {
+               int r = dm_table_resume_targets(map);
+               if (r)
+                       return r;
+       }
+
+       dm_queue_flush(md);
+
+       /*
+        * Flushing deferred I/Os must be done after targets are resumed
+        * so that mapping of targets can work correctly.
+        * Request-based dm is queueing the deferred I/Os in its request_queue.
+        */
+       if (dm_request_based(md))
+               start_queue(md->queue);
+
+       unlock_fs(md);
+
+       return 0;
+}
+
 int dm_resume(struct mapped_device *md)
 {
        int r = -EINVAL;
        struct dm_table *map = NULL;
 
-       mutex_lock(&md->suspend_lock);
+retry:
+       mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
+
        if (!dm_suspended_md(md))
                goto out;
 
+       if (dm_suspended_internally_md(md)) {
+               /* already internally suspended, wait for internal resume */
+               mutex_unlock(&md->suspend_lock);
+               r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
+               if (r)
+                       return r;
+               goto retry;
+       }
+
        map = rcu_dereference(md->map);
        if (!map || !dm_table_get_size(map))
                goto out;
 
-       r = dm_table_resume_targets(map);
+       r = __dm_resume(md, map);
        if (r)
                goto out;
 
-       dm_queue_flush(md);
-
-       /*
-        * Flushing deferred I/Os must be done after targets are resumed
-        * so that mapping of targets can work correctly.
-        * Request-based dm is queueing the deferred I/Os in its request_queue.
-        */
-       if (dm_request_based(md))
-               start_queue(md->queue);
-
-       unlock_fs(md);
-
        clear_bit(DMF_SUSPENDED, &md->flags);
 
        r = 0;
@@ -2882,15 +2929,80 @@ out:
  * Internal suspend/resume works like userspace-driven suspend. It waits
  * until all bios finish and prevents issuing new bios to the target drivers.
  * It may be used only from the kernel.
- *
- * Internal suspend holds md->suspend_lock, which prevents interaction with
- * userspace-driven suspend.
  */
 
-void dm_internal_suspend(struct mapped_device *md)
+static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
 {
-       mutex_lock(&md->suspend_lock);
+       struct dm_table *map = NULL;
+
+       if (dm_suspended_internally_md(md))
+               return; /* nested internal suspend */
+
+       if (dm_suspended_md(md)) {
+               set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+               return; /* nest suspend */
+       }
+
+       map = rcu_dereference(md->map);
+
+       /*
+        * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is
+        * supported.  Properly supporting a TASK_INTERRUPTIBLE internal suspend
+        * would require changing .presuspend to return an error -- avoid this
+        * until there is a need for more elaborate variants of internal suspend.
+        */
+       (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE);
+
+       set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+
+       dm_table_postsuspend_targets(map);
+}
+
+static void __dm_internal_resume(struct mapped_device *md)
+{
+       if (!dm_suspended_internally_md(md))
+               return; /* resume from nested internal suspend */
+
        if (dm_suspended_md(md))
+               goto done; /* resume from nested suspend */
+
+       /*
+        * NOTE: existing callers don't need to call dm_table_resume_targets
+        * (which may fail -- so best to avoid it for now by passing NULL map)
+        */
+       (void) __dm_resume(md, NULL);
+
+done:
+       clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+       smp_mb__after_atomic();
+       wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
+}
+
+void dm_internal_suspend_noflush(struct mapped_device *md)
+{
+       mutex_lock(&md->suspend_lock);
+       __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
+       mutex_unlock(&md->suspend_lock);
+}
+EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
+
+void dm_internal_resume(struct mapped_device *md)
+{
+       mutex_lock(&md->suspend_lock);
+       __dm_internal_resume(md);
+       mutex_unlock(&md->suspend_lock);
+}
+EXPORT_SYMBOL_GPL(dm_internal_resume);
+
+/*
+ * Fast variants of internal suspend/resume hold md->suspend_lock,
+ * which prevents interaction with userspace-driven suspend.
+ */
+
+void dm_internal_suspend_fast(struct mapped_device *md)
+{
+       mutex_lock(&md->suspend_lock);
+       if (dm_suspended_md(md) || dm_suspended_internally_md(md))
                return;
 
        set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
@@ -2899,9 +3011,9 @@ void dm_internal_suspend(struct mapped_device *md)
        dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
 }
 
-void dm_internal_resume(struct mapped_device *md)
+void dm_internal_resume_fast(struct mapped_device *md)
 {
-       if (dm_suspended_md(md))
+       if (dm_suspended_md(md) || dm_suspended_internally_md(md))
                goto done;
 
        dm_queue_flush(md);
@@ -2987,6 +3099,11 @@ int dm_suspended_md(struct mapped_device *md)
        return test_bit(DMF_SUSPENDED, &md->flags);
 }
 
+int dm_suspended_internally_md(struct mapped_device *md)
+{
+       return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
+}
+
 int dm_test_deferred_remove_flag(struct mapped_device *md)
 {
        return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
index 781994093bf59a26da8d009337fdb4b0e652b42a..84b0f9e4ba6ca14aa4885935ddad8a7a7f8255cc 100644 (file)
@@ -129,6 +129,15 @@ int dm_deleting_md(struct mapped_device *md);
  */
 int dm_suspended_md(struct mapped_device *md);
 
+/*
+ * Internal suspend and resume methods.
+ */
+int dm_suspended_internally_md(struct mapped_device *md);
+void dm_internal_suspend_fast(struct mapped_device *md);
+void dm_internal_resume_fast(struct mapped_device *md);
+void dm_internal_suspend_noflush(struct mapped_device *md);
+void dm_internal_resume(struct mapped_device *md);
+
 /*
  * Test if the device is scheduled for deferred remove.
  */
index 2be66f4be2f9292e82d30467ec6cc13b9873a4d6..a570d7b5796c58950ad3528a0f1b7eded09f234c 100644 (file)
@@ -352,4 +352,9 @@ enum {
  */
 #define DM_DEFERRED_REMOVE             (1 << 17) /* In/Out */
 
+/*
+ * If set, the device is suspended internally.
+ */
+#define DM_INTERNAL_SUSPEND_FLAG       (1 << 18) /* Out */
+
 #endif                         /* _LINUX_DM_IOCTL_H */