ceph: use vfs __set_page_dirty_nobuffers interface instead of doing it inside filesystem
authorSha Zhengju <handai.szj@taobao.com>
Wed, 21 Aug 2013 08:27:34 +0000 (16:27 +0800)
committerSage Weil <sage@inktank.com>
Tue, 27 Aug 2013 23:29:44 +0000 (16:29 -0700)
Following we will begin to add memcg dirty page accounting around
__set_page_dirty_{buffers,nobuffers} in vfs layer, so we'd better use vfs interface to
avoid exporting those details to filesystems.

Since vfs set_page_dirty() should be called under page lock, here we don't need elaborate
codes to handle racy anymore, and two WARN_ON() are added to detect such exceptions.
Thanks very much for Sage and Yan Zheng's coaching!

I tested it in a two server's ceph environment that one is client and the other is
mds/osd/mon, and run the following fsx test from xfstests:

  ./fsx   1MB -N 50000 -p 10000 -l 1048576
  ./fsx  10MB -N 50000 -p 10000 -l 10485760
  ./fsx 100MB -N 50000 -p 10000 -l 104857600

The fsx does lots of mmap-read/mmap-write/truncate operations and the tests completed
successfully without triggering any of WARN_ON.

Signed-off-by: Sha Zhengju <handai.szj@taobao.com>
Reviewed-by: Sage Weil <sage@inktank.com>
fs/ceph/addr.c

index cb78ce81d6a6b2acbcd109dbb47d1ea7adfa689a..3bed7da383263a3ba967c6158b2ff6bd1bf5c4af 100644 (file)
@@ -70,15 +70,16 @@ static int ceph_set_page_dirty(struct page *page)
        struct address_space *mapping = page->mapping;
        struct inode *inode;
        struct ceph_inode_info *ci;
-       int undo = 0;
        struct ceph_snap_context *snapc;
+       int ret;
 
        if (unlikely(!mapping))
                return !TestSetPageDirty(page);
 
-       if (TestSetPageDirty(page)) {
+       if (PageDirty(page)) {
                dout("%p set_page_dirty %p idx %lu -- already dirty\n",
                     mapping->host, page, page->index);
+               BUG_ON(!PagePrivate(page));
                return 0;
        }
 
@@ -107,35 +108,19 @@ static int ceph_set_page_dirty(struct page *page)
             snapc, snapc->seq, snapc->num_snaps);
        spin_unlock(&ci->i_ceph_lock);
 
-       /* now adjust page */
-       spin_lock_irq(&mapping->tree_lock);
-       if (page->mapping) {    /* Race with truncate? */
-               WARN_ON_ONCE(!PageUptodate(page));
-               account_page_dirtied(page, page->mapping);
-               radix_tree_tag_set(&mapping->page_tree,
-                               page_index(page), PAGECACHE_TAG_DIRTY);
-
-               /*
-                * Reference snap context in page->private.  Also set
-                * PagePrivate so that we get invalidatepage callback.
-                */
-               page->private = (unsigned long)snapc;
-               SetPagePrivate(page);
-       } else {
-               dout("ANON set_page_dirty %p (raced truncate?)\n", page);
-               undo = 1;
-       }
-
-       spin_unlock_irq(&mapping->tree_lock);
-
-       if (undo)
-               /* whoops, we failed to dirty the page */
-               ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
+       /*
+        * Reference snap context in page->private.  Also set
+        * PagePrivate so that we get invalidatepage callback.
+        */
+       BUG_ON(PagePrivate(page));
+       page->private = (unsigned long)snapc;
+       SetPagePrivate(page);
 
-       __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+       ret = __set_page_dirty_nobuffers(page);
+       WARN_ON(!PageLocked(page));
+       WARN_ON(!page->mapping);
 
-       BUG_ON(!PageDirty(page));
-       return 1;
+       return ret;
 }
 
 /*