ocfs2: fix inode bh swapping mixup in ocfs2_reflink_inodes_lock
authorDarrick J. Wong <darrick.wong@oracle.com>
Fri, 29 Mar 2019 03:43:38 +0000 (20:43 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 3 Apr 2019 04:25:17 +0000 (06:25 +0200)
commit e6a9467ea14bae8691b0f72c500510c42ea8edb8 upstream.

ocfs2_reflink_inodes_lock() can swap the inode1/inode2 variables so that
we always grab cluster locks in order of increasing inode number.

Unfortunately, we forget to swap the inode record buffer head pointers
when we've done this, which leads to incorrect bookkeepping when we're
trying to make the two inodes have the same refcount tree.

This has the effect of causing filesystem shutdowns if you're trying to
reflink data from inode 100 into inode 97, where inode 100 already has a
refcount tree attached and inode 97 doesn't.  The reflink code decides
to copy the refcount tree pointer from 100 to 97, but uses inode 97's
inode record to open the tree root (which it doesn't have) and blows up.
This issue causes filesystem shutdowns and metadata corruption!

Link: http://lkml.kernel.org/r/20190312214910.GK20533@magnolia
Fixes: 29ac8e856cb369 ("ocfs2: implement the VFS clone_range, copy_range, and dedupe_range features")
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <joseph.qi@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/ocfs2/refcounttree.c

index 824f407df1db1da34776afaed39ab4b20b68531b..3a4e1bca5e319ff6f96196aab97ced76ff74b0c2 100644 (file)
@@ -4716,22 +4716,23 @@ out:
 
 /* Lock an inode and grab a bh pointing to the inode. */
 static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
-                                    struct buffer_head **bh1,
+                                    struct buffer_head **bh_s,
                                     struct inode *t_inode,
-                                    struct buffer_head **bh2)
+                                    struct buffer_head **bh_t)
 {
-       struct inode *inode1;
-       struct inode *inode2;
+       struct inode *inode1 = s_inode;
+       struct inode *inode2 = t_inode;
        struct ocfs2_inode_info *oi1;
        struct ocfs2_inode_info *oi2;
+       struct buffer_head *bh1 = NULL;
+       struct buffer_head *bh2 = NULL;
        bool same_inode = (s_inode == t_inode);
+       bool need_swap = (inode1->i_ino > inode2->i_ino);
        int status;
 
        /* First grab the VFS and rw locks. */
        lock_two_nondirectories(s_inode, t_inode);
-       inode1 = s_inode;
-       inode2 = t_inode;
-       if (inode1->i_ino > inode2->i_ino)
+       if (need_swap)
                swap(inode1, inode2);
 
        status = ocfs2_rw_lock(inode1, 1);
@@ -4754,17 +4755,13 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
        trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno,
                                (unsigned long long)oi2->ip_blkno);
 
-       if (*bh1)
-               *bh1 = NULL;
-       if (*bh2)
-               *bh2 = NULL;
-
        /* We always want to lock the one with the lower lockid first. */
        if (oi1->ip_blkno > oi2->ip_blkno)
                mlog_errno(-ENOLCK);
 
        /* lock id1 */
-       status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET);
+       status = ocfs2_inode_lock_nested(inode1, &bh1, 1,
+                                        OI_LS_REFLINK_TARGET);
        if (status < 0) {
                if (status != -ENOENT)
                        mlog_errno(status);
@@ -4773,15 +4770,25 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
 
        /* lock id2 */
        if (!same_inode) {
-               status = ocfs2_inode_lock_nested(inode2, bh2, 1,
+               status = ocfs2_inode_lock_nested(inode2, &bh2, 1,
                                                 OI_LS_REFLINK_TARGET);
                if (status < 0) {
                        if (status != -ENOENT)
                                mlog_errno(status);
                        goto out_cl1;
                }
-       } else
-               *bh2 = *bh1;
+       } else {
+               bh2 = bh1;
+       }
+
+       /*
+        * If we swapped inode order above, we have to swap the buffer heads
+        * before passing them back to the caller.
+        */
+       if (need_swap)
+               swap(bh1, bh2);
+       *bh_s = bh1;
+       *bh_t = bh2;
 
        trace_ocfs2_double_lock_end(
                        (unsigned long long)OCFS2_I(inode1)->ip_blkno,
@@ -4791,8 +4798,7 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
 
 out_cl1:
        ocfs2_inode_unlock(inode1, 1);
-       brelse(*bh1);
-       *bh1 = NULL;
+       brelse(bh1);
 out_rw2:
        ocfs2_rw_unlock(inode2, 1);
 out_i2: