GFS2: Fix race in glock lru glock disposal
authorSteven Whitehouse <swhiteho@redhat.com>
Mon, 23 Jun 2014 13:43:32 +0000 (14:43 +0100)
committerSteven Whitehouse <swhiteho@redhat.com>
Fri, 18 Jul 2014 10:12:51 +0000 (11:12 +0100)
We must not leave items on the LRU list with GLF_LOCK set, since
they can be removed if the glock is brought back into use, which
may then potentially result in a hang, waiting for GLF_LOCK to
clear.

It doesn't happen very often, since it requires a glock that has
not been used for a long time to be brought back into use at the
same moment that the shrinker is part way through disposing of
glocks.

The fix is to set GLF_LOCK at a later time, when we already know
that the other locks can be obtained. Also, we now only release
the lru_lock in case a resched is needed, rather than on every
iteration.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
fs/gfs2/glock.c

index 278fae5b6982f81ca3ee8c17d830ad28d2ab47c6..c1e5b126d2ca999dc9e667087d2305d21160935e 100644 (file)
@@ -1406,12 +1406,16 @@ __acquires(&lru_lock)
                gl = list_entry(list->next, struct gfs2_glock, gl_lru);
                list_del_init(&gl->gl_lru);
                if (!spin_trylock(&gl->gl_spin)) {
+add_back_to_lru:
                        list_add(&gl->gl_lru, &lru_list);
                        atomic_inc(&lru_count);
                        continue;
                }
+               if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+                       spin_unlock(&gl->gl_spin);
+                       goto add_back_to_lru;
+               }
                clear_bit(GLF_LRU, &gl->gl_flags);
-               spin_unlock(&lru_lock);
                gl->gl_lockref.count++;
                if (demote_ok(gl))
                        handle_callback(gl, LM_ST_UNLOCKED, 0, false);
@@ -1419,7 +1423,7 @@ __acquires(&lru_lock)
                if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
                        gl->gl_lockref.count--;
                spin_unlock(&gl->gl_spin);
-               spin_lock(&lru_lock);
+               cond_resched_lock(&lru_lock);
        }
 }
 
@@ -1444,7 +1448,7 @@ static long gfs2_scan_glock_lru(int nr)
                gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
 
                /* Test for being demotable */
-               if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+               if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
                        list_move(&gl->gl_lru, &dispose);
                        atomic_dec(&lru_count);
                        freed++;