ocfs2: ip_alloc_sem should be taken in ocfs2_get_block()
authoralex chen <alex.chen@huawei.com>
Thu, 16 Nov 2017 01:31:44 +0000 (17:31 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 22 Jul 2018 12:27:35 +0000 (14:27 +0200)
commit 3e4c56d41eef5595035872a2ec5a483f42e8917f upstream.

ip_alloc_sem should be taken in ocfs2_get_block() when reading file in
DIRECT mode to prevent concurrent access to extent tree with
ocfs2_dio_end_io_write(), which may cause BUGON in the following
situation:

read file 'A'                                  end_io of writing file 'A'
vfs_read
 __vfs_read
  ocfs2_file_read_iter
   generic_file_read_iter
    ocfs2_direct_IO
     __blockdev_direct_IO
      do_blockdev_direct_IO
       do_direct_IO
        get_more_blocks
         ocfs2_get_block
          ocfs2_extent_map_get_blocks
           ocfs2_get_clusters
            ocfs2_get_clusters_nocache()
             ocfs2_search_extent_list
              return the index of record which
              contains the v_cluster, that is
              v_cluster > rec[i]->e_cpos.
                                                ocfs2_dio_end_io
                                                 ocfs2_dio_end_io_write
                                                  down_write(&oi->ip_alloc_sem);
                                                  ocfs2_mark_extent_written
                                                   ocfs2_change_extent_flag
                                                    ocfs2_split_extent
                                                     ...
                                                 --> modify the rec[i]->e_cpos, resulting
                                                     in v_cluster < rec[i]->e_cpos.
             BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos))

[alex.chen@huawei.com: v3]
Link: http://lkml.kernel.org/r/59EF3614.6050008@huawei.com
Link: http://lkml.kernel.org/r/59EF3614.6050008@huawei.com
Fixes: c15471f79506 ("ocfs2: fix sparse file & data ordering issue in direct io")
Signed-off-by: Alex Chen <alex.chen@huawei.com>
Reviewed-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Reviewed-by: Gang He <ghe@suse.com>
Acked-by: Changwei Ge <ge.changwei@h3c.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Salvatore Bonaccorso <carnil@debian.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/ocfs2/aops.c

index f2961b13e8c51cf05c2a87147d18296276fc90b4..c26d046adaaace43d9ffdfeae82a404819f0ec1f 100644 (file)
@@ -134,6 +134,19 @@ bail:
        return err;
 }
 
+static int ocfs2_lock_get_block(struct inode *inode, sector_t iblock,
+                   struct buffer_head *bh_result, int create)
+{
+       int ret = 0;
+       struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+       down_read(&oi->ip_alloc_sem);
+       ret = ocfs2_get_block(inode, iblock, bh_result, create);
+       up_read(&oi->ip_alloc_sem);
+
+       return ret;
+}
+
 int ocfs2_get_block(struct inode *inode, sector_t iblock,
                    struct buffer_head *bh_result, int create)
 {
@@ -2120,7 +2133,7 @@ static void ocfs2_dio_free_write_ctx(struct inode *inode,
  * called like this: dio->get_blocks(dio->inode, fs_startblk,
  *                                     fs_count, map_bh, dio->rw == WRITE);
  */
-static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
+static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
                               struct buffer_head *bh_result, int create)
 {
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -2146,12 +2159,9 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
         * while file size will be changed.
         */
        if (pos + total_len <= i_size_read(inode)) {
-               down_read(&oi->ip_alloc_sem);
-               /* This is the fast path for re-write. */
-               ret = ocfs2_get_block(inode, iblock, bh_result, create);
-
-               up_read(&oi->ip_alloc_sem);
 
+               /* This is the fast path for re-write. */
+               ret = ocfs2_lock_get_block(inode, iblock, bh_result, create);
                if (buffer_mapped(bh_result) &&
                    !buffer_new(bh_result) &&
                    ret == 0)
@@ -2416,9 +2426,9 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                return 0;
 
        if (iov_iter_rw(iter) == READ)
-               get_block = ocfs2_get_block;
+               get_block = ocfs2_lock_get_block;
        else
-               get_block = ocfs2_dio_get_block;
+               get_block = ocfs2_dio_wr_get_block;
 
        return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
                                    iter, get_block,