[GFS2] Invalidate cache at correct point
authorBenjamin Marzinski <bmarzins@redhat.com>
Fri, 14 Mar 2008 18:52:52 +0000 (13:52 -0500)
committerSteven Whitehouse <swhiteho@redhat.com>
Mon, 31 Mar 2008 09:41:44 +0000 (10:41 +0100)
GFS2 wasn't invalidating its cache before it called into the lock manager
with a request that could potentially drop a lock.  This was leaving a
window where the lock could be actually be held by another node, but the
file's page cache would still appear valid, causing coherency problems.
This patch moves the cache invalidation to before the lock manager call
when dropping a lock. It also adds the option to the lock_dlm lock
manager to not use conversion mode deadlock avoidance, which, on a
conversion from shared to exclusive, could internally drop the lock, and
then reacquire in. GFS2 now asks lock_dlm to not do this.  Instead, GFS2
manually drops the lock and reacquires it.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
fs/gfs2/glock.c
fs/gfs2/incore.h
fs/gfs2/locking/dlm/lock.c
fs/gfs2/locking/dlm/thread.c
fs/gfs2/ops_fstype.c
include/linux/lm_interface.h

index 63981e2fb835f3c21a404e1eb71ca1181c1b20f7..d636b3e80f5d26dda0a0f799011da225c5ef24ba 100644 (file)
@@ -764,7 +764,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
 static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
 {
        struct gfs2_sbd *sdp = gl->gl_sbd;
-       const struct gfs2_glock_operations *glops = gl->gl_ops;
+       struct gfs2_holder *gh = gl->gl_req_gh;
 
        gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
        gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
@@ -772,8 +772,14 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
 
        state_change(gl, LM_ST_UNLOCKED);
 
-       if (glops->go_inval)
-               glops->go_inval(gl, DIO_METADATA);
+       if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) {
+               spin_lock(&gl->gl_spin);
+               gh->gh_error = 0;
+               spin_unlock(&gl->gl_spin);
+               gfs2_glock_xmote_th(gl, gl->gl_req_gh);
+               gfs2_glock_put(gl);
+               return;
+       }
 
        spin_lock(&gl->gl_spin);
        gfs2_demote_wake(gl);
@@ -794,7 +800,6 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
        struct gfs2_sbd *sdp = gl->gl_sbd;
        const struct gfs2_glock_operations *glops = gl->gl_ops;
        struct gfs2_holder *gh = gl->gl_req_gh;
-       int prev_state = gl->gl_state;
        int op_done = 1;
 
        if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) {
@@ -808,16 +813,6 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
 
        state_change(gl, ret & LM_OUT_ST_MASK);
 
-       if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
-               if (glops->go_inval)
-                       glops->go_inval(gl, DIO_METADATA);
-       } else if (gl->gl_state == LM_ST_DEFERRED) {
-               /* We might not want to do this here.
-                  Look at moving to the inode glops. */
-               if (glops->go_inval)
-                       glops->go_inval(gl, 0);
-       }
-
        /*  Deal with each possible exit condition  */
 
        if (!gh) {
@@ -837,6 +832,14 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
                }
        } else {
                spin_lock(&gl->gl_spin);
+               if (ret & LM_OUT_CONV_DEADLK) {
+                       gh->gh_error = 0;
+                       set_bit(GLF_CONV_DEADLK, &gl->gl_flags);
+                       spin_unlock(&gl->gl_spin);
+                       gfs2_glock_drop_th(gl);
+                       gfs2_glock_put(gl);
+                       return;
+               }
                list_del_init(&gh->gh_list);
                gh->gh_error = -EIO;
                if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 
@@ -910,6 +913,8 @@ static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
 
        if (glops->go_xmote_th)
                glops->go_xmote_th(gl);
+       if (state == LM_ST_DEFERRED && glops->go_inval)
+               glops->go_inval(gl, DIO_METADATA);
 
        gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
        gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
@@ -952,6 +957,8 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl)
 
        if (glops->go_xmote_th)
                glops->go_xmote_th(gl);
+       if (glops->go_inval)
+               glops->go_inval(gl, DIO_METADATA);
 
        gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
        gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
index 4ba2ea63119dbafa391333dd44c3d6efb0a66618..9c2c0b90b22a2e1c041d745a7386752a0399529b 100644 (file)
@@ -167,6 +167,7 @@ enum {
        GLF_DEMOTE_IN_PROGRESS  = 6,
        GLF_LFLUSH              = 7,
        GLF_WAITERS2            = 8,
+       GLF_CONV_DEADLK         = 9,
 };
 
 struct gfs2_glock {
index 542a797ac89a453f8cf9d691ad56c8888ac092cb..53a6ab3c09191ab588298d98c5ae676852750f35 100644 (file)
@@ -137,7 +137,8 @@ static inline unsigned int make_flags(struct gdlm_lock *lp,
 
                /* Conversion deadlock avoidance by DLM */
 
-               if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
+               if (!(lp->ls->fsflags & LM_MFLAG_CONV_NODROP) &&
+                   !test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
                    !(lkf & DLM_LKF_NOQUEUE) &&
                    cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
                        lkf |= DLM_LKF_CONVDEADLK;
index 521694fc19d672c010fe6afdb4455fd389598a8e..e53db6fd28ab62f140e44557c993d24f1036d1c1 100644 (file)
@@ -135,7 +135,15 @@ static void process_complete(struct gdlm_lock *lp)
                         lp->lksb.sb_status, lp->lockname.ln_type,
                         (unsigned long long)lp->lockname.ln_number,
                         lp->flags);
-               return;
+               if (lp->lksb.sb_status == -EDEADLOCK &&
+                   lp->ls->fsflags & LM_MFLAG_CONV_NODROP) {
+                       lp->req = lp->cur;
+                       acb.lc_ret |= LM_OUT_CONV_DEADLK;
+                       if (lp->cur == DLM_LOCK_IV)
+                               lp->lksb.sb_lkid = 0;
+                       goto out;
+               } else
+                       return;
        }
 
        /*
index 5b518f73497a64ba15a7f180b1279e0b0cb4b08a..ef9c6c4f80f6cb420ba539e26a601240bbb49183 100644 (file)
@@ -723,7 +723,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
 {
        char *proto = sdp->sd_proto_name;
        char *table = sdp->sd_table_name;
-       int flags = 0;
+       int flags = LM_MFLAG_CONV_NODROP;
        int error;
 
        if (sdp->sd_args.ar_spectator)
index 1418fdc9ac0261b4bccb38e146cf94a895de4983..f274997bc2832fb9d8cf0c4e6d0ee2d5858d61e9 100644 (file)
@@ -21,9 +21,15 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data);
  * modify the filesystem.  The lock module shouldn't assign a journal to the FS
  * mount.  It shouldn't send recovery callbacks to the FS mount.  If the node
  * dies or withdraws, all locks can be wiped immediately.
+ *
+ * LM_MFLAG_CONV_NODROP
+ * Do not allow the dlm to internally resolve conversion deadlocks by demoting
+ * the lock to unlocked and then reacquiring it in the requested mode. Instead,
+ * it should cancel the request and return LM_OUT_CONV_DEADLK.
  */
 
 #define LM_MFLAG_SPECTATOR     0x00000001
+#define LM_MFLAG_CONV_NODROP   0x00000002
 
 /*
  * lm_lockstruct flags
@@ -110,6 +116,9 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data);
  *
  * LM_OUT_ASYNC
  * The result of the request will be returned in an LM_CB_ASYNC callback.
+ *
+ * LM_OUT_CONV_DEADLK
+ * The lock request was canceled do to a conversion deadlock.
  */
 
 #define LM_OUT_ST_MASK         0x00000003
@@ -117,6 +126,7 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data);
 #define LM_OUT_CANCELED                0x00000008
 #define LM_OUT_ASYNC           0x00000080
 #define LM_OUT_ERROR           0x00000100
+#define LM_OUT_CONV_DEADLK     0x00000200
 
 /*
  * lm_callback_t types