NFS: fix subtle change in COMMIT behavior
authorWeston Andros Adamson <dros@primarydata.com>
Wed, 12 Nov 2014 17:08:00 +0000 (12:08 -0500)
committerTrond Myklebust <trond.myklebust@primarydata.com>
Mon, 24 Nov 2014 22:00:42 +0000 (17:00 -0500)
Recent work in the pgio layer made it possible for there to be more than one
request per page. This caused a subtle change in commit behavior, because
write.c:nfs_commit_unstable_pages compares the number of *pages* waiting for
writeback against the number of requests on a commit list to choose when to
send a COMMIT in a non-blocking flush.

This is probably hard to hit in normal operation - you have to be using
rsize/wsize < PAGE_SIZE, or pnfs with lots of boundaries that are not page
aligned to have a noticeable change in behavior.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
fs/nfs/callback_proc.c
fs/nfs/inode.c
fs/nfs/pagelist.c
fs/nfs/write.c
include/linux/nfs_fs.h

index 73466b934090667c3469947e536e80804b6e8cb7..e36a9d78ea49adc63329253a0a94cb22d6be933a 100644 (file)
@@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
                goto out_iput;
        res->size = i_size_read(inode);
        res->change_attr = delegation->change_attr;
-       if (nfsi->npages != 0)
+       if (nfsi->nrequests != 0)
                res->change_attr++;
        res->ctime = inode->i_ctime;
        res->mtime = inode->i_mtime;
index 00689a8a85e44677d9a23c092b209ab6600928e4..2b48ce58a584ef0a3afa2ad218b4d9f4d2a84ca8 100644 (file)
@@ -1149,7 +1149,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
        if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
                        && (fattr->valid & NFS_ATTR_FATTR_SIZE)
                        && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
-                       && nfsi->npages == 0) {
+                       && nfsi->nrequests == 0) {
                i_size_write(inode, nfs_size_to_loff_t(fattr->size));
                ret |= NFS_INO_INVALID_ATTR;
        }
@@ -1192,7 +1192,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
        if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
                cur_size = i_size_read(inode);
                new_isize = nfs_size_to_loff_t(fattr->size);
-               if (cur_size != new_isize && nfsi->npages == 0)
+               if (cur_size != new_isize && nfsi->nrequests == 0)
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
        }
 
@@ -1619,7 +1619,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                if (new_isize != cur_isize) {
                        /* Do we perhaps have any outstanding writes, or has
                         * the file grown beyond our last write? */
-                       if ((nfsi->npages == 0) || new_isize > cur_isize) {
+                       if ((nfsi->nrequests == 0) || new_isize > cur_isize) {
                                i_size_write(inode, new_isize);
                                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
                                invalid &= ~NFS_INO_REVAL_PAGECACHE;
@@ -1784,7 +1784,7 @@ static void init_once(void *foo)
        INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
        INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
        INIT_LIST_HEAD(&nfsi->commit_info.list);
-       nfsi->npages = 0;
+       nfsi->nrequests = 0;
        nfsi->commit_info.ncommit = 0;
        atomic_set(&nfsi->commit_info.rpcs_out, 0);
        atomic_set(&nfsi->silly_count, 1);
index ed0db61f8543f0ee4d68c3b457510cedad2187ca..2b5e769beb16b4a3236d39a1abe60cb3fcbc2279 100644 (file)
@@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
 static inline void
 nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
 {
+       struct inode *inode;
        WARN_ON_ONCE(prev == req);
 
        if (!prev) {
@@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
                 * nfs_page_group_destroy is called */
                kref_get(&req->wb_head->wb_kref);
 
-               /* grab extra ref if head request has extra ref from
-                * the write/commit path to handle handoff between write
-                * and commit lists */
+               /* grab extra ref and bump the request count if head request
+                * has extra ref from the write/commit path to handle handoff
+                * between write and commit lists. */
                if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+                       inode = page_file_mapping(req->wb_page)->host;
                        set_bit(PG_INODE_REF, &req->wb_flags);
                        kref_get(&req->wb_kref);
+                       spin_lock(&inode->i_lock);
+                       NFS_I(inode)->nrequests++;
+                       spin_unlock(&inode->i_lock);
                }
        }
 }
index f83b02dc9166d1aaf48b76f816f96a1b908bcec1..d489ff3f438f86177b34f110e084e1fca71659bc 100644 (file)
@@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
        nfs_lock_request(req);
 
        spin_lock(&inode->i_lock);
-       if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
+       if (!nfsi->nrequests &&
+           NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
                inode->i_version++;
        /*
         * Swap-space should not get truncated. Hence no need to plug the race
@@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
                SetPagePrivate(req->wb_page);
                set_page_private(req->wb_page, (unsigned long)req);
        }
-       nfsi->npages++;
+       nfsi->nrequests++;
        /* this a head request for a page group - mark it as having an
-        * extra reference so sub groups can follow suit */
+        * extra reference so sub groups can follow suit.
+        * This flag also informs pgio layer when to bump nrequests when
+        * adding subrequests. */
        WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
        kref_get(&req->wb_kref);
        spin_unlock(&inode->i_lock);
@@ -709,7 +712,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
                        wake_up_page(head->wb_page, PG_private);
                        clear_bit(PG_MAPPED, &head->wb_flags);
                }
-               nfsi->npages--;
+               nfsi->nrequests--;
+               spin_unlock(&inode->i_lock);
+       } else {
+               spin_lock(&inode->i_lock);
+               nfsi->nrequests--;
                spin_unlock(&inode->i_lock);
        }
 
@@ -1735,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
                /* Don't commit yet if this is a non-blocking flush and there
                 * are a lot of outstanding writes for this mapping.
                 */
-               if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
+               if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
                        goto out_mark_dirty;
 
                /* don't wait for the COMMIT response */
index c72d1ad41ad48506e1cec621f6f4fd8ba9ec15e9..6d627b92df537ada3886af5850c0fabbac53b66a 100644 (file)
@@ -163,7 +163,7 @@ struct nfs_inode {
         */
        __be32                  cookieverf[2];
 
-       unsigned long           npages;
+       unsigned long           nrequests;
        struct nfs_mds_commit_info commit_info;
 
        /* Open contexts for shared mmap writes */
@@ -520,7 +520,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data);
 static inline int
 nfs_have_writebacks(struct inode *inode)
 {
-       return NFS_I(inode)->npages != 0;
+       return NFS_I(inode)->nrequests != 0;
 }
 
 /*