NFS: Cache state owners after files are closed
authorChuck Lever <chuck.lever@oracle.com>
Tue, 6 Dec 2011 21:13:48 +0000 (16:13 -0500)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Thu, 5 Jan 2012 16:59:18 +0000 (11:59 -0500)
Servers have a finite amount of memory to store NFSv4 open and lock
owners.  Moreover, servers may have a difficult time determining when
they can reap their state owner table, thanks to gray areas in the
NFSv4 protocol specification.  Thus clients should be careful to reuse
state owners when possible.

Currently Linux is not too careful.  When a user has closed all her
files on one mount point, the state owner's reference count goes to
zero, and it is released.  The next OPEN allocates a new one.  A
workload that serially opens and closes files can run through a large
number of open owners this way.

When a state owner's reference count goes to zero, slap it onto a free
list for that nfs_server, with an expiry time.  Garbage collect before
looking for a state owner.  This makes state owners for active users
available for re-use.

Now that there can be unused state owners remaining at umount time,
purge the state owner free list when a server is destroyed.  Also be
sure not to reclaim unused state owners during state recovery.

This change has benefits for the client as well.  For some workloads,
this approach drops the number of OPEN_CONFIRM calls from the same as
the number of OPEN calls, down to just one.  This reduces wire traffic
and thus open(2) latency.  Before this patch, untarring a kernel
source tarball shows the OPEN_CONFIRM call counter steadily increasing
through the test.  With the patch, the OPEN_CONFIRM count remains at 1
throughout the entire untar.

As long as the expiry time is kept short, I don't think garbage
collection should be terribly expensive, although it does bounce the
clp->cl_lock around a bit.

[ At some point we should rationalize the use of the nfs_server
->destroy method. ]

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[Trond: Fixed a garbage collection race and a few efficiency issues]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
fs/nfs/client.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4state.c
include/linux/nfs_fs_sb.h

index 32ea37198e93b647e0b134b6c17db8fe51bfbedf..41bd67f80d31ba730ac759818d45195f45ae7fbf 100644 (file)
@@ -250,6 +250,11 @@ static void pnfs_init_server(struct nfs_server *server)
        rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
 }
 
+static void nfs4_destroy_server(struct nfs_server *server)
+{
+       nfs4_purge_state_owners(server);
+}
+
 #else
 static void nfs4_shutdown_client(struct nfs_client *clp)
 {
@@ -1065,6 +1070,7 @@ static struct nfs_server *nfs_alloc_server(void)
        INIT_LIST_HEAD(&server->master_link);
        INIT_LIST_HEAD(&server->delegations);
        INIT_LIST_HEAD(&server->layouts);
+       INIT_LIST_HEAD(&server->state_owners_lru);
 
        atomic_set(&server->active, 0);
 
@@ -1538,6 +1544,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
 
        nfs_server_insert_lists(server);
        server->mount_time = jiffies;
+       server->destroy = nfs4_destroy_server;
 out:
        nfs_free_fattr(fattr);
        return error;
@@ -1719,6 +1726,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 
        /* Copy data from the source */
        server->nfs_client = source->nfs_client;
+       server->destroy = source->destroy;
        atomic_inc(&server->nfs_client->cl_count);
        nfs_server_copy_userdata(server, source);
 
index 693ae22f873194d0c36adc384d7cb05fefbc8b6c..4d7d0aedc101831ecb3b10cf345f0e18e7ca56ad 100644 (file)
@@ -94,6 +94,8 @@ struct nfs_unique_id {
 struct nfs4_state_owner {
        struct nfs_unique_id so_owner_id;
        struct nfs_server    *so_server;
+       struct list_head     so_lru;
+       unsigned long        so_expires;
        struct rb_node       so_server_node;
 
        struct rpc_cred      *so_cred;   /* Associated cred */
@@ -319,6 +321,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
+extern void nfs4_purge_state_owners(struct nfs_server *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, fmode_t);
index 6354e4fcc8299d2974f43aefa7ab0aae43bd59eb..a53f33b4ac3a5cf333afe80b358f27fe4130289f 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/ratelimit.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
+#include <linux/jiffies.h>
 
 #include "nfs4_fs.h"
 #include "callback.h"
@@ -388,6 +389,8 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
                else if (cred > sp->so_cred)
                        p = &parent->rb_right;
                else {
+                       if (!list_empty(&sp->so_lru))
+                               list_del_init(&sp->so_lru);
                        atomic_inc(&sp->so_count);
                        return sp;
                }
@@ -412,6 +415,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
                else if (new->so_cred > sp->so_cred)
                        p = &parent->rb_right;
                else {
+                       if (!list_empty(&sp->so_lru))
+                               list_del_init(&sp->so_lru);
                        atomic_inc(&sp->so_count);
                        return sp;
                }
@@ -453,6 +458,7 @@ nfs4_alloc_state_owner(void)
        spin_lock_init(&sp->so_sequence.lock);
        INIT_LIST_HEAD(&sp->so_sequence.list);
        atomic_set(&sp->so_count, 1);
+       INIT_LIST_HEAD(&sp->so_lru);
        return sp;
 }
 
@@ -470,6 +476,38 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
        }
 }
 
+static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
+{
+       rpc_destroy_wait_queue(&sp->so_sequence.wait);
+       put_rpccred(sp->so_cred);
+       kfree(sp);
+}
+
+static void nfs4_gc_state_owners(struct nfs_server *server)
+{
+       struct nfs_client *clp = server->nfs_client;
+       struct nfs4_state_owner *sp, *tmp;
+       unsigned long time_min, time_max;
+       LIST_HEAD(doomed);
+
+       spin_lock(&clp->cl_lock);
+       time_max = jiffies;
+       time_min = (long)time_max - (long)clp->cl_lease_time;
+       list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+               /* NB: LRU is sorted so that oldest is at the head */
+               if (time_in_range(sp->so_expires, time_min, time_max))
+                       break;
+               list_move(&sp->so_lru, &doomed);
+               nfs4_remove_state_owner_locked(sp);
+       }
+       spin_unlock(&clp->cl_lock);
+
+       list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+               list_del(&sp->so_lru);
+               nfs4_free_state_owner(sp);
+       }
+}
+
 /**
  * nfs4_get_state_owner - Look up a state owner given a credential
  * @server: nfs_server to search
@@ -487,10 +525,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
        sp = nfs4_find_state_owner_locked(server, cred);
        spin_unlock(&clp->cl_lock);
        if (sp != NULL)
-               return sp;
+               goto out;
        new = nfs4_alloc_state_owner();
        if (new == NULL)
-               return NULL;
+               goto out;
        new->so_server = server;
        new->so_cred = cred;
        spin_lock(&clp->cl_lock);
@@ -502,26 +540,58 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
                rpc_destroy_wait_queue(&new->so_sequence.wait);
                kfree(new);
        }
+out:
+       nfs4_gc_state_owners(server);
        return sp;
 }
 
 /**
  * nfs4_put_state_owner - Release a nfs4_state_owner
  * @sp: state owner data to release
- *
  */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
-       struct nfs_client *clp = sp->so_server->nfs_client;
-       struct rpc_cred *cred = sp->so_cred;
+       struct nfs_server *server = sp->so_server;
+       struct nfs_client *clp = server->nfs_client;
 
        if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
                return;
-       nfs4_remove_state_owner_locked(sp);
+
+       if (!RB_EMPTY_NODE(&sp->so_server_node)) {
+               sp->so_expires = jiffies;
+               list_add_tail(&sp->so_lru, &server->state_owners_lru);
+               spin_unlock(&clp->cl_lock);
+       } else {
+               nfs4_remove_state_owner_locked(sp);
+               spin_unlock(&clp->cl_lock);
+               nfs4_free_state_owner(sp);
+       }
+}
+
+/**
+ * nfs4_purge_state_owners - Release all cached state owners
+ * @server: nfs_server with cached state owners to release
+ *
+ * Called at umount time.  Remaining state owners will be on
+ * the LRU with ref count of zero.
+ */
+void nfs4_purge_state_owners(struct nfs_server *server)
+{
+       struct nfs_client *clp = server->nfs_client;
+       struct nfs4_state_owner *sp, *tmp;
+       LIST_HEAD(doomed);
+
+       spin_lock(&clp->cl_lock);
+       list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+               list_move(&sp->so_lru, &doomed);
+               nfs4_remove_state_owner_locked(sp);
+       }
        spin_unlock(&clp->cl_lock);
-       rpc_destroy_wait_queue(&sp->so_sequence.wait);
-       put_rpccred(cred);
-       kfree(sp);
+
+       list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+               list_del(&sp->so_lru);
+               nfs4_free_state_owner(sp);
+       }
 }
 
 static struct nfs4_state *
@@ -1393,6 +1463,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
 restart:
        rcu_read_lock();
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+               nfs4_purge_state_owners(server);
                spin_lock(&clp->cl_lock);
                for (pos = rb_first(&server->state_owners);
                     pos != NULL;
index b5479df8378d2fe3981ffab93bb5ff39980e9474..ba4d7656ecfde15c98188afee4a086b19746772f 100644 (file)
@@ -153,6 +153,7 @@ struct nfs_server {
        struct rb_root          openowner_id;
        struct rb_root          lockowner_id;
 #endif
+       struct list_head        state_owners_lru;
        struct list_head        layouts;
        struct list_head        delegations;
        void (*destroy)(struct nfs_server *);