blkcg: shoot down blkio_groups on elevator switch

author Tejun Heo <tj@kernel.org>

Mon, 5 Mar 2012 21:15:00 +0000 (13:15 -0800)

committer Jens Axboe <axboe@kernel.dk>

Tue, 6 Mar 2012 20:27:22 +0000 (21:27 +0100)
author Tejun Heo <tj@kernel.org>
Mon, 5 Mar 2012 21:15:00 +0000 (13:15 -0800)
committer Jens Axboe <axboe@kernel.dk>
Tue, 6 Mar 2012 20:27:22 +0000 (21:27 +0100)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c

index 970a717a056f93a6b8d2fc899303bcee4a13fe8d..159aef59589fa1b4197e5900035d6bb5bb17ae3e 100644 (file)
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -17,8 +17,9 @@
  #include <linux/err.h>
  #include <linux/blkdev.h>
  #include <linux/slab.h>
-#include "blk-cgroup.h"
  #include <linux/genhd.h>
+#include <linux/delay.h>
+#include "blk-cgroup.h"
  
  #define MAX_KEY_LEN 100
  
@@ -546,6 +547,37 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
  }
  EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
  
+void blkg_destroy_all(struct request_queue *q)
+{
+       struct blkio_policy_type *pol;
+
+       while (true) {
+               bool done = true;
+
+               spin_lock(&blkio_list_lock);
+               spin_lock_irq(q->queue_lock);
+
+               /*
+                * clear_queue_fn() might return with non-empty group list
+                * if it raced cgroup removal and lost.  cgroup removal is
+                * guaranteed to make forward progress and retrying after a
+                * while is enough.  This ugliness is scheduled to be
+                * removed after locking update.
+                */
+               list_for_each_entry(pol, &blkio_list, list)
+                       if (!pol->ops.blkio_clear_queue_fn(q))
+                               done = false;
+
+               spin_unlock_irq(q->queue_lock);
+               spin_unlock(&blkio_list_lock);
+
+               if (done)
+                       break;
+
+               msleep(10);     /* just some random duration I like */
+       }
+}
+
  static void blkio_reset_stats_cpu(struct blkio_group *blkg)
  {
         struct blkio_group_stats_cpu *stats_cpu;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h

index 355168772f51b2521815086498642a77c287eb58..e5cfcbd4d2f49a4a62fd9791a98bc699323a4c8d 100644 (file)
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -203,7 +203,7 @@ extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
                                      dev_t dev);
  
  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
-
+typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
  typedef void (blkio_update_group_weight_fn) (void *key,
                         struct blkio_group *blkg, unsigned int weight);
  typedef void (blkio_update_group_read_bps_fn) (void * key,
@@ -217,6 +217,7 @@ typedef void (blkio_update_group_write_iops_fn) (void *key,
  
  struct blkio_policy_ops {
         blkio_unlink_group_fn *blkio_unlink_group_fn;
+       blkio_clear_queue_fn *blkio_clear_queue_fn;
         blkio_update_group_weight_fn *blkio_update_group_weight_fn;
         blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
         blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
@@ -233,6 +234,7 @@ struct blkio_policy_type {
  /* Blkio controller policy registration */
  extern void blkio_policy_register(struct blkio_policy_type *);
  extern void blkio_policy_unregister(struct blkio_policy_type *);
+extern void blkg_destroy_all(struct request_queue *q);
  
  static inline char *blkg_path(struct blkio_group *blkg)
  {
@@ -249,6 +251,7 @@ struct blkio_policy_type {
  
  static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
  static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
+static inline void blkg_destroy_all(struct request_queue *q) { }
  
  static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
  
diff --git a/block/blk-throttle.c b/block/blk-throttle.c

index 702c0e64e09fd00921b68006fc6cf5e6da9bf2e8..3699ab40d494cea08cff0913c18612bd3a5f249f 100644 (file)
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -989,12 +989,17 @@ throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
         td->nr_undestroyed_grps--;
  }
  
-static void throtl_release_tgs(struct throtl_data *td)
+static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
  {
         struct hlist_node *pos, *n;
         struct throtl_grp *tg;
+       bool empty = true;
  
         hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
+               /* skip root? */
+               if (!release_root && tg == td->root_tg)
+                       continue;
+
                 /*
                  * If cgroup removal path got to blk_group first and removed
                  * it from cgroup list, then it will take care of destroying
@@ -1002,7 +1007,10 @@ static void throtl_release_tgs(struct throtl_data *td)
                  */
                 if (!blkiocg_del_blkio_group(&tg->blkg))
                         throtl_destroy_tg(td, tg);
+               else
+                       empty = false;
         }
+       return empty;
  }
  
  /*
@@ -1029,6 +1037,20 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
         spin_unlock_irqrestore(td->queue->queue_lock, flags);
  }
  
+static bool throtl_clear_queue(struct request_queue *q)
+{
+       lockdep_assert_held(q->queue_lock);
+
+       /*
+        * Clear tgs but leave the root one alone.  This is necessary
+        * because root_tg is expected to be persistent and safe because
+        * blk-throtl can never be disabled while @q is alive.  This is a
+        * kludge to prepare for unified blkg.  This whole function will be
+        * removed soon.
+        */
+       return throtl_release_tgs(q->td, false);
+}
+
  static void throtl_update_blkio_group_common(struct throtl_data *td,
                                 struct throtl_grp *tg)
  {
@@ -1097,6 +1119,7 @@ static void throtl_shutdown_wq(struct request_queue *q)
  static struct blkio_policy_type blkio_policy_throtl = {
         .ops = {
                 .blkio_unlink_group_fn = throtl_unlink_blkio_group,
+               .blkio_clear_queue_fn = throtl_clear_queue,
                 .blkio_update_group_read_bps_fn =
                                         throtl_update_blkio_group_read_bps,
                 .blkio_update_group_write_bps_fn =
@@ -1282,7 +1305,7 @@ void blk_throtl_exit(struct request_queue *q)
         throtl_shutdown_wq(q);
  
         spin_lock_irq(q->queue_lock);
-       throtl_release_tgs(td);
+       throtl_release_tgs(td, true);
  
         /* If there are other groups */
         if (td->nr_undestroyed_grps > 0)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c

index 72680a6715fc03bd8d2af4f195e3cfaab298d2d4..61693d3404d081613423a0658ff4851c88ddb443 100644 (file)
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1225,10 +1225,11 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
         cfq_put_cfqg(cfqg);
  }
  
-static void cfq_release_cfq_groups(struct cfq_data *cfqd)
+static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
  {
         struct hlist_node *pos, *n;
         struct cfq_group *cfqg;
+       bool empty = true;
  
         hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
                 /*
@@ -1238,7 +1239,10 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
                  */
                 if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg))
                         cfq_destroy_cfqg(cfqd, cfqg);
+               else
+                       empty = false;
         }
+       return empty;
  }
  
  /*
@@ -1265,6 +1269,19 @@ static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
         spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
  }
  
+static struct elevator_type iosched_cfq;
+
+static bool cfq_clear_queue(struct request_queue *q)
+{
+       lockdep_assert_held(q->queue_lock);
+
+       /* shoot down blkgs iff the current elevator is cfq */
+       if (!q->elevator || q->elevator->type != &iosched_cfq)
+               return true;
+
+       return cfq_release_cfq_groups(q->elevator->elevator_data);
+}
+
  #else /* GROUP_IOSCHED */
  static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
  {
@@ -3875,6 +3892,7 @@ static struct elevator_type iosched_cfq = {
  static struct blkio_policy_type blkio_policy_cfq = {
         .ops = {
                 .blkio_unlink_group_fn =        cfq_unlink_blkio_group,
+               .blkio_clear_queue_fn = cfq_clear_queue,
                 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
         },
         .plid = BLKIO_POLICY_PROP,
diff --git a/block/elevator.c b/block/elevator.c

index 0bdea0ed03a3a37d82fe93fe67d4a92295887d83..8c7561fd2c7983f5934d84113401646c445a37e2 100644 (file)
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -38,6 +38,7 @@
  #include <trace/events/block.h>
  
  #include "blk.h"
+#include "blk-cgroup.h"
  
  static DEFINE_SPINLOCK(elv_list_lock);
  static LIST_HEAD(elv_list);
@@ -894,6 +895,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         ioc_clear_queue(q);
         spin_unlock_irq(q->queue_lock);
  
+       blkg_destroy_all(q);
+
         /* allocate, init and register new elevator */
         err = -ENOMEM;
         q->elevator = elevator_alloc(q, new_e);
author	Tejun Heo <tj@kernel.org>
	Mon, 5 Mar 2012 21:15:00 +0000 (13:15 -0800)
committer	Jens Axboe <axboe@kernel.dk>
	Tue, 6 Mar 2012 20:27:22 +0000 (21:27 +0100)
block/blk-cgroup.c		patch \| blob \| blame \| history
block/blk-cgroup.h		patch \| blob \| blame \| history
block/blk-throttle.c		patch \| blob \| blame \| history
block/cfq-iosched.c		patch \| blob \| blame \| history
block/elevator.c		patch \| blob \| blame \| history