ceph: preserve I_COMPLETE across rename
authorSage Weil <sage@newdream.net>
Sat, 5 Feb 2011 05:38:47 +0000 (21:38 -0800)
committerSage Weil <sage@newdream.net>
Tue, 15 Mar 2011 16:14:03 +0000 (09:14 -0700)
d_move puts the renamed dentry at the end of d_subdirs, screwing with our
cached dentry directory offsets.  We were just clearing I_COMPLETE to avoid
any possibility of trouble.  However, assigning the renamed dentry an
offset at the end of the directory (to match it's new d_subdirs position)
is sufficient to maintain correct behavior and hold onto I_COMPLETE.

This is especially important for workloads like rsync, which renames files
into place.  Before, we would lose I_COMPLETE and do MDS lookups for each
file.  With this patch we only talk to the MDS on create and rename.

Signed-off-by: Sage Weil <sage@newdream.net>
fs/ceph/inode.c

index 193bfa5e9cbd7943bcb2a8124b5fa9ed6717cac4..60456361e07d31ff751f33fb54e14693c077bd39 100644 (file)
@@ -1030,9 +1030,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                        dout("fill_trace doing d_move %p -> %p\n",
                             req->r_old_dentry, dn);
 
-                       /* d_move screws up d_subdirs order */
-                       ceph_i_clear(dir, CEPH_I_COMPLETE);
-
                        d_move(req->r_old_dentry, dn);
                        dout(" src %p '%.*s' dst %p '%.*s'\n",
                             req->r_old_dentry,
@@ -1044,12 +1041,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                           rehashing bug in vfs_rename_dir */
                        ceph_invalidate_dentry_lease(dn);
 
-                       /* take overwritten dentry's readdir offset */
-                       dout("dn %p gets %p offset %lld (old offset %lld)\n",
-                            req->r_old_dentry, dn, ceph_dentry(dn)->offset,
+                       /*
+                        * d_move() puts the renamed dentry at the end of
+                        * d_subdirs.  We need to assign it an appropriate
+                        * directory offset so we can behave when holding
+                        * I_COMPLETE.
+                        */
+                       ceph_set_dentry_offset(req->r_old_dentry);
+                       dout("dn %p gets new offset %lld\n", req->r_old_dentry, 
                             ceph_dentry(req->r_old_dentry)->offset);
-                       ceph_dentry(req->r_old_dentry)->offset =
-                               ceph_dentry(dn)->offset;
 
                        dn = req->r_old_dentry;  /* use old_dentry */
                        in = dn->d_inode;