crush: use breadth-first search for indep mode
authorIlya Dryomov <ilya.dryomov@inktank.com>
Tue, 24 Dec 2013 19:19:25 +0000 (21:19 +0200)
committerIlya Dryomov <ilya.dryomov@inktank.com>
Tue, 31 Dec 2013 18:32:16 +0000 (20:32 +0200)
Reflects ceph.git commit 86e978036a4ecbac4c875e7c00f6c5bbe37282d3.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
include/linux/crush/crush.h
net/ceph/crush/mapper.c

index 3d6a12928560166db3ba9dec65bbb073f9221881..4023b1b52296a7546e62fce49f9ef6d102d1d67e 100644 (file)
@@ -22,7 +22,8 @@
 #define CRUSH_MAX_DEPTH 10  /* max crush hierarchy depth */
 
 
-#define CRUSH_ITEM_UNDEF  0x7fffffff  /* undefined result */
+#define CRUSH_ITEM_UNDEF  0x7ffffffe  /* undefined result (internal use only) */
+#define CRUSH_ITEM_NONE   0x7fffffff  /* no result */
 
 /*
  * CRUSH uses user-defined "rules" to describe how inputs should be
index a8605245d1905408e40bebef19dcc5e6663287da..caeb1066bea392a47b2314db2d3a63d49cf681ed 100644 (file)
@@ -473,6 +473,148 @@ reject:
 }
 
 
+/**
+ * choose indep: alternative breadth-first positionally stable mapping
+ *
+ */
+static void crush_choose_indep(const struct crush_map *map,
+                             struct crush_bucket *bucket,
+                             const __u32 *weight, int weight_max,
+                             int x, int numrep, int type,
+                             int *out, int outpos,
+                             int recurse_to_leaf,
+                             int *out2)
+{
+       struct crush_bucket *in = bucket;
+       int left = numrep - outpos;
+       int rep;
+       unsigned int ftotal;
+       int r;
+       int i;
+       int item = 0;
+       int itemtype;
+       int collide;
+
+       dprintk("CHOOSE%s INDEP bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+               bucket->id, x, outpos, numrep);
+
+       /* initially my result is undefined */
+       for (rep = outpos; rep < numrep; rep++) {
+               out[rep] = CRUSH_ITEM_UNDEF;
+               if (out2)
+                       out2[rep] = CRUSH_ITEM_UNDEF;
+       }
+
+       for (ftotal = 0; left > 0 && ftotal < map->choose_total_tries; ftotal++) {
+               for (rep = outpos; rep < numrep; rep++) {
+                       if (out[rep] != CRUSH_ITEM_UNDEF)
+                               continue;
+
+                       in = bucket;  /* initial bucket */
+
+                       /* choose through intervening buckets */
+                       for (;;) {
+                               r = rep;
+
+                               /* be careful */
+                               if (in->alg == CRUSH_BUCKET_UNIFORM &&
+                                   in->size % numrep == 0)
+                                       /* r'=r+(n+1)*f_total */
+                                       r += (numrep+1) * ftotal;
+                               else
+                                       /* r' = r + n*f_total */
+                                       r += numrep * ftotal;
+
+                               /* bucket choose */
+                               if (in->size == 0) {
+                                       dprintk("   empty bucket\n");
+                                       break;
+                               }
+
+                               item = crush_bucket_choose(in, x, r);
+                               if (item >= map->max_devices) {
+                                       dprintk("   bad item %d\n", item);
+                                       out[rep] = CRUSH_ITEM_NONE;
+                                       if (out2)
+                                               out2[rep] = CRUSH_ITEM_NONE;
+                                       left--;
+                                       break;
+                               }
+
+                               /* desired type? */
+                               if (item < 0)
+                                       itemtype = map->buckets[-1-item]->type;
+                               else
+                                       itemtype = 0;
+                               dprintk("  item %d type %d\n", item, itemtype);
+
+                               /* keep going? */
+                               if (itemtype != type) {
+                                       if (item >= 0 ||
+                                           (-1-item) >= map->max_buckets) {
+                                               dprintk("   bad item type %d\n", type);
+                                               out[rep] = CRUSH_ITEM_NONE;
+                                               if (out2)
+                                                       out2[rep] =
+                                                               CRUSH_ITEM_NONE;
+                                               left--;
+                                               break;
+                                       }
+                                       in = map->buckets[-1-item];
+                                       continue;
+                               }
+
+                               /* collision? */
+                               collide = 0;
+                               for (i = outpos; i < numrep; i++) {
+                                       if (out[i] == item) {
+                                               collide = 1;
+                                               break;
+                                       }
+                               }
+                               if (collide)
+                                       break;
+
+                               if (recurse_to_leaf) {
+                                       if (item < 0) {
+                                               crush_choose_indep(map,
+                                                                  map->buckets[-1-item],
+                                                                  weight, weight_max,
+                                                                  x, rep+1, 0,
+                                                                  out2, rep,
+                                                                  0, NULL);
+                                               if (out2[rep] == CRUSH_ITEM_NONE) {
+                                                       /* placed nothing; no leaf */
+                                                       break;
+                                               }
+                                       } else {
+                                               /* we already have a leaf! */
+                                               out2[rep] = item;
+                                       }
+                               }
+
+                               /* out? */
+                               if (itemtype == 0 &&
+                                   is_out(map, weight, weight_max, item, x))
+                                       break;
+
+                               /* yay! */
+                               out[rep] = item;
+                               left--;
+                               break;
+                       }
+               }
+       }
+       for (rep = outpos; rep < numrep; rep++) {
+               if (out[rep] == CRUSH_ITEM_UNDEF) {
+                       out[rep] = CRUSH_ITEM_NONE;
+               }
+               if (out2 && out2[rep] == CRUSH_ITEM_UNDEF) {
+                       out2[rep] = CRUSH_ITEM_NONE;
+               }
+       }
+}
+
 /**
  * crush_do_rule - calculate a mapping with the given input and rule
  * @map: the crush_map
@@ -556,15 +698,27 @@ int crush_do_rule(const struct crush_map *map,
                                                continue;
                                }
                                j = 0;
-                               osize += crush_choose(map,
-                                                     map->buckets[-1-w[i]],
-                                                     weight, weight_max,
-                                                     x, numrep,
-                                                     curstep->arg2,
-                                                     o+osize, j,
-                                                     firstn,
-                                                     recurse_to_leaf,
-                                                     descend_once, c+osize);
+                               if (firstn) {
+                                       osize += crush_choose(map,
+                                                             map->buckets[-1-w[i]],
+                                                             weight, weight_max,
+                                                             x, numrep,
+                                                             curstep->arg2,
+                                                             o+osize, j,
+                                                             firstn,
+                                                             recurse_to_leaf,
+                                                             descend_once, c+osize);
+                               } else {
+                                       crush_choose_indep(map,
+                                                          map->buckets[-1-w[i]],
+                                                          weight, weight_max,
+                                                          x, numrep,
+                                                          curstep->arg2,
+                                                          o+osize, j,
+                                                          recurse_to_leaf,
+                                                          c+osize);
+                                       osize += numrep;
+                               }
                        }
 
                        if (recurse_to_leaf)