ceph: make fsync() wait unsafe requests that created/modified inode
authorYan, Zheng <zyan@redhat.com>
Tue, 27 Oct 2015 10:36:06 +0000 (18:36 +0800)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 2 Nov 2015 22:36:48 +0000 (23:36 +0100)
If we get a unsafe reply for request that created/modified inode,
add the unsafe request to a list in the newly created/modified
inode. So we can make fsync() wait these unsafe requests.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
fs/ceph/caps.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/super.h

index 349315332040c5e12630e1dd2712b04342ac33be..c69e1253b47bfbefb2eb2cb1564a5c4b9d87f9a3 100644 (file)
@@ -1970,49 +1970,46 @@ out:
 }
 
 /*
- * wait for any uncommitted directory operations to commit.
+ * wait for any unsafe requests to complete.
  */
-static int unsafe_dirop_wait(struct inode *inode)
+static int unsafe_request_wait(struct inode *inode)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct list_head *head = &ci->i_unsafe_dirops;
-       struct ceph_mds_request *req;
-       u64 last_tid;
-       int ret = 0;
-
-       if (!S_ISDIR(inode->i_mode))
-               return 0;
+       struct ceph_mds_request *req1 = NULL, *req2 = NULL;
+       int ret, err = 0;
 
        spin_lock(&ci->i_unsafe_lock);
-       if (list_empty(head))
-               goto out;
-
-       req = list_last_entry(head, struct ceph_mds_request,
-                             r_unsafe_dir_item);
-       last_tid = req->r_tid;
-
-       do {
-               ceph_mdsc_get_request(req);
-               spin_unlock(&ci->i_unsafe_lock);
+       if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) {
+               req1 = list_last_entry(&ci->i_unsafe_dirops,
+                                       struct ceph_mds_request,
+                                       r_unsafe_dir_item);
+               ceph_mdsc_get_request(req1);
+       }
+       if (!list_empty(&ci->i_unsafe_iops)) {
+               req2 = list_last_entry(&ci->i_unsafe_iops,
+                                       struct ceph_mds_request,
+                                       r_unsafe_target_item);
+               ceph_mdsc_get_request(req2);
+       }
+       spin_unlock(&ci->i_unsafe_lock);
 
-               dout("unsafe_dirop_wait %p wait on tid %llu (until %llu)\n",
-                    inode, req->r_tid, last_tid);
-               ret = !wait_for_completion_timeout(&req->r_safe_completion,
-                                       ceph_timeout_jiffies(req->r_timeout));
+       dout("unsafe_requeset_wait %p wait on tid %llu %llu\n",
+            inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
+       if (req1) {
+               ret = !wait_for_completion_timeout(&req1->r_safe_completion,
+                                       ceph_timeout_jiffies(req1->r_timeout));
                if (ret)
-                       ret = -EIO;  /* timed out */
-
-               ceph_mdsc_put_request(req);
-
-               spin_lock(&ci->i_unsafe_lock);
-               if (ret || list_empty(head))
-                       break;
-               req = list_first_entry(head, struct ceph_mds_request,
-                                      r_unsafe_dir_item);
-       } while (req->r_tid < last_tid);
-out:
-       spin_unlock(&ci->i_unsafe_lock);
-       return ret;
+                       err = -EIO;
+               ceph_mdsc_put_request(req1);
+       }
+       if (req2) {
+               ret = !wait_for_completion_timeout(&req2->r_safe_completion,
+                                       ceph_timeout_jiffies(req2->r_timeout));
+               if (ret)
+                       err = -EIO;
+               ceph_mdsc_put_request(req2);
+       }
+       return err;
 }
 
 int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
@@ -2038,7 +2035,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        dirty = try_flush_caps(inode, &flush_tid);
        dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
 
-       ret = unsafe_dirop_wait(inode);
+       ret = unsafe_request_wait(inode);
 
        /*
         * only wait on non-file metadata writeback (the mds
index 96d2bd8299022e554c8bfdc0b7f1c759be8fc8cd..498dcfa2dcdbedf393ae26fc9f7f68cf90bceb90 100644 (file)
@@ -452,6 +452,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
        INIT_LIST_HEAD(&ci->i_unsafe_writes);
        INIT_LIST_HEAD(&ci->i_unsafe_dirops);
+       INIT_LIST_HEAD(&ci->i_unsafe_iops);
        spin_lock_init(&ci->i_unsafe_lock);
 
        ci->i_snap_realm = NULL;
index 1e47a3d1d12f9f4e2cf92bec547042d80e64c9e5..89838a226fe91896972d945785e490214783dea6 100644 (file)
@@ -666,6 +666,12 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
                list_del_init(&req->r_unsafe_dir_item);
                spin_unlock(&ci->i_unsafe_lock);
        }
+       if (req->r_target_inode && req->r_got_unsafe) {
+               struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
+               spin_lock(&ci->i_unsafe_lock);
+               list_del_init(&req->r_unsafe_target_item);
+               spin_unlock(&ci->i_unsafe_lock);
+       }
 
        if (req->r_unsafe_dir) {
                iput(req->r_unsafe_dir);
@@ -1707,6 +1713,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
        req->r_started = jiffies;
        req->r_resend_mds = -1;
        INIT_LIST_HEAD(&req->r_unsafe_dir_item);
+       INIT_LIST_HEAD(&req->r_unsafe_target_item);
        req->r_fmode = -1;
        kref_init(&req->r_kref);
        INIT_LIST_HEAD(&req->r_wait);
@@ -2529,6 +2536,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        up_read(&mdsc->snap_rwsem);
        if (realm)
                ceph_put_snap_realm(mdsc, realm);
+
+       if (err == 0 && req->r_got_unsafe && req->r_target_inode) {
+               struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
+               spin_lock(&ci->i_unsafe_lock);
+               list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops);
+               spin_unlock(&ci->i_unsafe_lock);
+       }
 out_err:
        mutex_lock(&mdsc->mutex);
        if (!req->r_aborted) {
index f575eafe2261cbd5974d8d4f072879e9d5bd7a39..ccf11ef0ca8717f79e6bfc9d6cf806ec359b5e8c 100644 (file)
@@ -236,6 +236,9 @@ struct ceph_mds_request {
        struct inode    *r_unsafe_dir;
        struct list_head r_unsafe_dir_item;
 
+       /* unsafe requests that modify the target inode */
+       struct list_head r_unsafe_target_item;
+
        struct ceph_mds_session *r_session;
 
        int               r_attempts;   /* resend attempts */
index 2f2460d23a0600f8f9bf2e1cc4fe3b2286684356..75b7d125ce668a9b3f4f851d2be1c688d6d5f964 100644 (file)
@@ -342,6 +342,7 @@ struct ceph_inode_info {
 
        struct list_head i_unsafe_writes; /* uncommitted sync writes */
        struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
+       struct list_head i_unsafe_iops;   /* uncommitted mds inode ops */
        spinlock_t i_unsafe_lock;
 
        struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */