bcache: Convert try_wait to wait_queue_head_t

author Kent Overstreet <kmo@daterainc.com>

Thu, 25 Jul 2013 00:27:07 +0000 (17:27 -0700)

committer Kent Overstreet <kmo@daterainc.com>

Mon, 11 Nov 2013 05:56:03 +0000 (21:56 -0800)
author Kent Overstreet <kmo@daterainc.com>
Thu, 25 Jul 2013 00:27:07 +0000 (17:27 -0700)
committer Kent Overstreet <kmo@daterainc.com>
Mon, 11 Nov 2013 05:56:03 +0000 (21:56 -0800)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h

index 10ce0c825fcebd5908349cbd65f810bd43fcb995..c1c44191afb1a633fb42be7f23efaa083b92570e 100644 (file)
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -735,8 +735,8 @@ struct cache_set {
          * basically a lock for this that we can wait on asynchronously. The
          * btree_root() macro releases the lock when it returns.
          */
-       struct closure          *try_harder;
-       struct closure_waitlist try_wait;
+       struct task_struct      *try_harder;
+       wait_queue_head_t       try_wait;
         uint64_t                try_harder_start;
  
         /*
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c

index 731cd8e3fe902773d70729017f774d6efc7f74d1..4d50f1e7006ef011e12587366f54a1a448afb2ae 100644 (file)
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -437,7 +437,7 @@ static void bch_btree_leaf_dirty(struct btree *b, struct btree_op *op)
  
         set_btree_node_dirty(b);
  
-       if (op && op->journal) {
+       if (op->journal) {
                 if (w->journal &&
                     journal_pin_cmp(b->c, w, op)) {
                         atomic_dec_bug(w->journal);
@@ -574,34 +574,35 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
         return b;
  }
  
-static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order)
+static int mca_reap(struct btree *b, unsigned min_order, bool flush)
  {
+       struct closure cl;
+
+       closure_init_stack(&cl);
         lockdep_assert_held(&b->c->bucket_lock);
  
         if (!down_write_trylock(&b->lock))
                 return -ENOMEM;
  
-       if (b->page_order < min_order) {
+       BUG_ON(btree_node_dirty(b) && !b->sets[0].data);
+
+       if (b->page_order < min_order ||
+           (!flush &&
+            (btree_node_dirty(b) ||
+             atomic_read(&b->io.cl.remaining) != -1))) {
                 rw_unlock(true, b);
                 return -ENOMEM;
         }
  
-       BUG_ON(btree_node_dirty(b) && !b->sets[0].data);
-
-       if (cl && btree_node_dirty(b))
-               bch_btree_node_write(b, NULL);
-
-       if (cl)
-               closure_wait_event_async(&b->io.wait, cl,
-                        atomic_read(&b->io.cl.remaining) == -1);
-
-       if (btree_node_dirty(b) ||
-           !closure_is_unlocked(&b->io.cl) ||
-           work_pending(&b->work.work)) {
-               rw_unlock(true, b);
-               return -EAGAIN;
+       if (btree_node_dirty(b)) {
+               bch_btree_node_write(b, &cl);
+               closure_sync(&cl);
         }
  
+       /* wait for any in flight btree write */
+       closure_wait_event_sync(&b->io.wait, &cl,
+               atomic_read(&b->io.cl.remaining) == -1);
+
         return 0;
  }
  
@@ -641,7 +642,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
                         break;
  
                 if (++i > 3 &&
-                   !mca_reap(b, NULL, 0)) {
+                   !mca_reap(b, 0, false)) {
                         mca_data_free(b);
                         rw_unlock(true, b);
                         freed++;
@@ -660,7 +661,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
                 list_rotate_left(&c->btree_cache);
  
                 if (!b->accessed &&
-                   !mca_reap(b, NULL, 0)) {
+                   !mca_reap(b, 0, false)) {
                         mca_bucket_free(b);
                         mca_data_free(b);
                         rw_unlock(true, b);
@@ -783,52 +784,27 @@ out:
         return b;
  }
  
-static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
-                                    int level, struct closure *cl)
+static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k)
  {
-       int ret = -ENOMEM;
-       struct btree *i;
+       struct btree *b;
  
         trace_bcache_btree_cache_cannibalize(c);
  
-       if (!cl)
-               return ERR_PTR(-ENOMEM);
-
-       /*
-        * Trying to free up some memory - i.e. reuse some btree nodes - may
-        * require initiating IO to flush the dirty part of the node. If we're
-        * running under generic_make_request(), that IO will never finish and
-        * we would deadlock. Returning -EAGAIN causes the cache lookup code to
-        * punt to workqueue and retry.
-        */
-       if (current->bio_list)
-               return ERR_PTR(-EAGAIN);
-
-       if (c->try_harder && c->try_harder != cl) {
-               closure_wait_event_async(&c->try_wait, cl, !c->try_harder);
-               return ERR_PTR(-EAGAIN);
-       }
+       if (!c->try_harder) {
+               c->try_harder = current;
+               c->try_harder_start = local_clock();
+       } else if (c->try_harder != current)
+               return ERR_PTR(-ENOSPC);
  
-       c->try_harder = cl;
-       c->try_harder_start = local_clock();
-retry:
-       list_for_each_entry_reverse(i, &c->btree_cache, list) {
-               int r = mca_reap(i, cl, btree_order(k));
-               if (!r)
-                       return i;
-               if (r != -ENOMEM)
-                       ret = r;
-       }
+       list_for_each_entry_reverse(b, &c->btree_cache, list)
+               if (!mca_reap(b, btree_order(k), false))
+                       return b;
  
-       if (ret == -EAGAIN &&
-           closure_blocking(cl)) {
-               mutex_unlock(&c->bucket_lock);
-               closure_sync(cl);
-               mutex_lock(&c->bucket_lock);
-               goto retry;
-       }
+       list_for_each_entry_reverse(b, &c->btree_cache, list)
+               if (!mca_reap(b, btree_order(k), true))
+                       return b;
  
-       return ERR_PTR(ret);
+       return ERR_PTR(-ENOMEM);
  }
  
  /*
@@ -839,18 +815,19 @@ retry:
   */
  void bch_cannibalize_unlock(struct cache_set *c, struct closure *cl)
  {
-       if (c->try_harder == cl) {
+       if (c->try_harder == current) {
                 bch_time_stats_update(&c->try_harder_time, c->try_harder_start);
                 c->try_harder = NULL;
-               __closure_wake_up(&c->try_wait);
+               wake_up(&c->try_wait);
         }
  }
  
-static struct btree *mca_alloc(struct cache_set *c, struct bkey *k,
-                              int level, struct closure *cl)
+static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level)
  {
         struct btree *b;
  
+       BUG_ON(current->bio_list);
+
         lockdep_assert_held(&c->bucket_lock);
  
         if (mca_find(c, k))
@@ -860,14 +837,14 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k,
          * the list. Check if there's any freed nodes there:
          */
         list_for_each_entry(b, &c->btree_cache_freeable, list)
-               if (!mca_reap(b, NULL, btree_order(k)))
+               if (!mca_reap(b, btree_order(k), false))
                         goto out;
  
         /* We never free struct btree itself, just the memory that holds the on
          * disk node. Check the freed list before allocating a new one:
          */
         list_for_each_entry(b, &c->btree_cache_freed, list)
-               if (!mca_reap(b, NULL, 0)) {
+               if (!mca_reap(b, 0, false)) {
                         mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO);
                         if (!b->sets[0].data)
                                 goto err;
@@ -901,7 +878,7 @@ err:
         if (b)
                 rw_unlock(true, b);
  
-       b = mca_cannibalize(c, k, level, cl);
+       b = mca_cannibalize(c, k);
         if (!IS_ERR(b))
                 goto out;
  
@@ -919,10 +896,9 @@ err:
   * level and op->lock.
   */
  struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k,
-                                int level, struct btree_op *op)
+                                int level, bool write)
  {
         int i = 0;
-       bool write = level <= op->lock;
         struct btree *b;
  
         BUG_ON(level < 0);
@@ -934,7 +910,7 @@ retry:
                         return ERR_PTR(-EAGAIN);
  
                 mutex_lock(&c->bucket_lock);
-               b = mca_alloc(c, k, level, &op->cl);
+               b = mca_alloc(c, k, level);
                 mutex_unlock(&c->bucket_lock);
  
                 if (!b)
@@ -980,7 +956,7 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
         struct btree *b;
  
         mutex_lock(&c->bucket_lock);
-       b = mca_alloc(c, k, level, NULL);
+       b = mca_alloc(c, k, level);
         mutex_unlock(&c->bucket_lock);
  
         if (!IS_ERR_OR_NULL(b)) {
@@ -991,17 +967,12 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
  
  /* Btree alloc */
  
-static void btree_node_free(struct btree *b, struct btree_op *op)
+static void btree_node_free(struct btree *b)
  {
         unsigned i;
  
         trace_bcache_btree_node_free(b);
  
-       /*
-        * The BUG_ON() in btree_node_get() implies that we must have a write
-        * lock on parent to free or even invalidate a node
-        */
-       BUG_ON(op->lock <= b->level);
         BUG_ON(b == b->c->root);
  
         if (btree_node_dirty(b))
@@ -1037,7 +1008,7 @@ retry:
  
         SET_KEY_SIZE(&k.key, c->btree_pages * PAGE_SECTORS);
  
-       b = mca_alloc(c, &k.key, level, cl);
+       b = mca_alloc(c, &k.key, level);
         if (IS_ERR(b))
                 goto err_free;
  
@@ -1173,8 +1144,7 @@ static int btree_gc_mark_node(struct btree *b, unsigned *keys,
         return stale;
  }
  
-static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k,
-                                   struct btree_op *op)
+static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k)
  {
         /*
          * We block priorities from being written for the duration of garbage
@@ -1191,7 +1161,7 @@ static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k,
                 memcpy(k->ptr, b->key.ptr,
                        sizeof(uint64_t) * KEY_PTRS(&b->key));
  
-               btree_node_free(n, op);
+               btree_node_free(n);
                 up_write(&n->lock);
         }
  
@@ -1211,8 +1181,8 @@ struct gc_merge_info {
         unsigned        keys;
  };
  
-static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
-                             struct gc_stat *gc, struct gc_merge_info *r)
+static void btree_gc_coalesce(struct btree *b, struct gc_stat *gc,
+                             struct gc_merge_info *r)
  {
         unsigned nodes = 0, keys = 0, blocks;
         int i;
@@ -1228,7 +1198,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
  
         for (i = nodes - 1; i >= 0; --i) {
                 if (r[i].b->written)
-                       r[i].b = btree_gc_alloc(r[i].b, r[i].k, op);
+                       r[i].b = btree_gc_alloc(r[i].b, r[i].k);
  
                 if (r[i].b->written)
                         return;
@@ -1292,7 +1262,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
                 r[i - 1].keys   = n2->keys;
         }
  
-       btree_node_free(r->b, op);
+       btree_node_free(r->b);
         up_write(&r->b->lock);
  
         trace_bcache_btree_gc_coalesce(nodes);
@@ -1324,7 +1294,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
         memset(r, 0, sizeof(r));
  
         while ((r->k = bch_next_recurse_key(b, &b->c->gc_done))) {
-               r->b = bch_btree_node_get(b->c, r->k, b->level - 1, op);
+               r->b = bch_btree_node_get(b->c, r->k, b->level - 1, true);
  
                 if (IS_ERR(r->b)) {
                         ret = PTR_ERR(r->b);
@@ -1337,7 +1307,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
                 if (!b->written &&
                     (r->b->level || stale > 10 ||
                      b->c->gc_always_rewrite))
-                       r->b = btree_gc_alloc(r->b, r->k, op);
+                       r->b = btree_gc_alloc(r->b, r->k);
  
                 if (r->b->level)
                         ret = btree_gc_recurse(r->b, op, writes, gc);
@@ -1350,7 +1320,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
                 bkey_copy_key(&b->c->gc_done, r->k);
  
                 if (!b->written)
-                       btree_gc_coalesce(b, op, gc, r);
+                       btree_gc_coalesce(b, gc, r);
  
                 if (r[GC_MERGE_NODES - 1].b)
                         write(r[GC_MERGE_NODES - 1].b);
@@ -1404,7 +1374,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
         if (!IS_ERR_OR_NULL(n)) {
                 closure_sync(&op->cl);
                 bch_btree_set_root(b);
-               btree_node_free(n, op);
+               btree_node_free(n);
                 rw_unlock(true, b);
         }
  
@@ -2004,18 +1974,18 @@ static int btree_split(struct btree *b, struct btree_op *op,
         }
  
         rw_unlock(true, n1);
-       btree_node_free(b, op);
+       btree_node_free(b);
  
         bch_time_stats_update(&b->c->btree_split_time, start_time);
  
         return 0;
  err_free2:
         __bkey_put(n2->c, &n2->key);
-       btree_node_free(n2, op);
+       btree_node_free(n2);
         rw_unlock(true, n2);
  err_free1:
         __bkey_put(n1->c, &n1->key);
-       btree_node_free(n1, op);
+       btree_node_free(n1);
         rw_unlock(true, n1);
  err:
         if (n3 == ERR_PTR(-EAGAIN) ||
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h

index 17b7a4e39c7e07df5b9966a663b3e9834619191c..72794ab8e8e5038a40faa2ade443a64915269699 100644 (file)
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -326,7 +326,7 @@ static inline void rw_unlock(bool w, struct btree *b)
  ({                                                                     \
         int _r, l = (b)->level - 1;                                     \
         bool _w = l <= (op)->lock;                                      \
-       struct btree *_child = bch_btree_node_get((b)->c, key, l, op);  \
+       struct btree *_child = bch_btree_node_get((b)->c, key, l, _w);  \
         if (!IS_ERR(_child)) {                                          \
                 _child->parent = (b);                                   \
                 _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__);       \
@@ -356,6 +356,11 @@ static inline void rw_unlock(bool w, struct btree *b)
                 }                                                       \
                 rw_unlock(_w, _b);                                      \
                 bch_cannibalize_unlock(c, &(op)->cl);                   \
+               if (_r == -ENOSPC) {                                    \
+                       wait_event((c)->try_wait,                       \
+                                  !(c)->try_harder);                   \
+                       _r = -EINTR;                                    \
+               }                                                       \
         } while (_r == -EINTR);                                         \
                                                                         \
         _r;                                                             \
@@ -375,8 +380,7 @@ void bch_btree_node_write(struct btree *, struct closure *);
  void bch_cannibalize_unlock(struct cache_set *, struct closure *);
  void bch_btree_set_root(struct btree *);
  struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *);
-struct btree *bch_btree_node_get(struct cache_set *, struct bkey *,
-                               int, struct btree_op *);
+struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, bool);
  
  int bch_btree_insert_check_key(struct btree *, struct btree_op *,
                                struct bkey *);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c

index d3169c0652f8cd67e1ae1a478eb687c157ed8ae3..9a164cd4058c94925e9955e863eaef66e791f1e4 100644 (file)
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1436,12 +1436,14 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
  
         c->sort_crit_factor = int_sqrt(c->btree_pages);
  
-       mutex_init(&c->bucket_lock);
-       mutex_init(&c->sort_lock);
-       spin_lock_init(&c->sort_time_lock);
         closure_init_unlocked(&c->sb_write);
+       mutex_init(&c->bucket_lock);
+       init_waitqueue_head(&c->try_wait);
         closure_init_unlocked(&c->uuid_write);
+       spin_lock_init(&c->sort_time_lock);
+       mutex_init(&c->sort_lock);
         spin_lock_init(&c->btree_read_time_lock);
+
         bch_moving_init_cache_set(c);
  
         INIT_LIST_HEAD(&c->list);
@@ -1529,7 +1531,7 @@ static void run_cache_set(struct cache_set *c)
                         goto err;
  
                 err = "error reading btree root";
-               c->root = bch_btree_node_get(c, k, j->btree_level, &op);
+               c->root = bch_btree_node_get(c, k, j->btree_level, true);
                 if (IS_ERR_OR_NULL(c->root))
                         goto err;
author	Kent Overstreet <kmo@daterainc.com>
	Thu, 25 Jul 2013 00:27:07 +0000 (17:27 -0700)
committer	Kent Overstreet <kmo@daterainc.com>
	Mon, 11 Nov 2013 05:56:03 +0000 (21:56 -0800)
drivers/md/bcache/bcache.h		patch \| blob \| blame \| history
drivers/md/bcache/btree.c		patch \| blob \| blame \| history
drivers/md/bcache/btree.h		patch \| blob \| blame \| history
drivers/md/bcache/super.c		patch \| blob \| blame \| history