GFS2: Set of distributed preferences for rgrps
authorBob Peterson <rpeterso@redhat.com>
Wed, 29 Oct 2014 13:02:28 +0000 (08:02 -0500)
committerSteven Whitehouse <swhiteho@redhat.com>
Mon, 3 Nov 2014 19:24:49 +0000 (19:24 +0000)
This patch tries to use the journal numbers to evenly distribute
which node prefers which resource group for block allocations. This
is to help performance.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
fs/gfs2/incore.h
fs/gfs2/rgrp.c

index 39e7e9959b7462f99a7d04dff4759445f581d9df..1b899187be5aae15719bec953a37d555618b285c 100644 (file)
@@ -97,6 +97,7 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_CHECK         0x10000000 /* check for unlinked inodes */
 #define GFS2_RDF_UPTODATE      0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR         0x40000000 /* error in rg */
+#define GFS2_RDF_PREFERRED     0x80000000 /* This rgrp is preferred */
 #define GFS2_RDF_MASK          0xf0000000 /* mask for internal flags */
        spinlock_t rd_rsspin;           /* protects reservation related vars */
        struct rb_root rd_rstree;       /* multi-block reservation tree */
index 7474c413ffd1e2c8da3d4396f3bbc31a0175805d..f4e4a0c5babe4c239af5f4b0111a65ec2d98af99 100644 (file)
@@ -936,7 +936,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
        rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize;
        rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1;
        rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
-       rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
+       rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
        if (rgd->rd_data > sdp->sd_max_rg_data)
                sdp->sd_max_rg_data = rgd->rd_data;
        spin_lock(&sdp->sd_rindex_spin);
@@ -954,6 +954,36 @@ fail:
        return error;
 }
 
+/**
+ * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
+ * @sdp: the GFS2 superblock
+ *
+ * The purpose of this function is to select a subset of the resource groups
+ * and mark them as PREFERRED. We do it in such a way that each node prefers
+ * to use a unique set of rgrps to minimize glock contention.
+ */
+static void set_rgrp_preferences(struct gfs2_sbd *sdp)
+{
+       struct gfs2_rgrpd *rgd, *first;
+       int i;
+
+       /* Skip an initial number of rgrps, based on this node's journal ID.
+          That should start each node out on its own set. */
+       rgd = gfs2_rgrpd_get_first(sdp);
+       for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
+               rgd = gfs2_rgrpd_get_next(rgd);
+       first = rgd;
+
+       do {
+               rgd->rd_flags |= GFS2_RDF_PREFERRED;
+               for (i = 0; i < sdp->sd_journals; i++) {
+                       rgd = gfs2_rgrpd_get_next(rgd);
+                       if (rgd == first)
+                               break;
+               }
+       } while (rgd != first);
+}
+
 /**
  * gfs2_ri_update - Pull in a new resource index from the disk
  * @ip: pointer to the rindex inode
@@ -973,6 +1003,8 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
        if (error < 0)
                return error;
 
+       set_rgrp_preferences(sdp);
+
        sdp->sd_rindex_uptodate = 1;
        return 0;
 }
@@ -1890,6 +1922,25 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b
        return false;
 }
 
+/**
+ * fast_to_acquire - determine if a resource group will be fast to acquire
+ *
+ * If this is one of our preferred rgrps, it should be quicker to acquire,
+ * because we tried to set ourselves up as dlm lock master.
+ */
+static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
+{
+       struct gfs2_glock *gl = rgd->rd_gl;
+
+       if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
+           !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
+           !test_bit(GLF_DEMOTE, &gl->gl_flags))
+               return 1;
+       if (rgd->rd_flags & GFS2_RDF_PREFERRED)
+               return 1;
+       return 0;
+}
+
 /**
  * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve space for
@@ -1932,10 +1983,15 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
                        rg_locked = 0;
                        if (skip && skip--)
                                goto next_rgrp;
-                       if (!gfs2_rs_active(rs) && (loops < 2) &&
-                            gfs2_rgrp_used_recently(rs, 1000) &&
-                            gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
-                               goto next_rgrp;
+                       if (!gfs2_rs_active(rs)) {
+                               if (loops == 0 &&
+                                   !fast_to_acquire(rs->rs_rbm.rgd))
+                                       goto next_rgrp;
+                               if ((loops < 2) &&
+                                   gfs2_rgrp_used_recently(rs, 1000) &&
+                                   gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+                                       goto next_rgrp;
+                       }
                        error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
                                                   LM_ST_EXCLUSIVE, flags,
                                                   &rs->rs_rgd_gh);