ext4: fix race when setting bitmap_uptodate flag
authorTheodore Ts'o <tytso@mit.edu>
Mon, 20 Feb 2012 22:52:46 +0000 (17:52 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Mon, 20 Feb 2012 22:52:46 +0000 (17:52 -0500)
In ext4_read_{inode,block}_bitmap() we were setting bitmap_uptodate()
before submitting the buffer for read.  The is bad, since we check
bitmap_uptodate() without locking the buffer, and so if another
process is racing with us, it's possible that they will think the
bitmap is uptodate even though the read has not completed yet,
resulting in inodes and blocks potentially getting allocated more than
once if we get really unlucky.

Addresses-Google-Bug: 2828254

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/balloc.c
fs/ext4/ext4.h
fs/ext4/ialloc.c
fs/ext4/mballoc.c

index f9e2cd8cf711d2f43a74f5a5f4cc830604cd82d9..a2cff2b9d5b549f60ba3cc581bcdbffe738aa27d 100644 (file)
@@ -336,10 +336,10 @@ err_out:
  * Return buffer_head on success or NULL in case of failure.
  */
 struct buffer_head *
-ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 {
        struct ext4_group_desc *desc;
-       struct buffer_head *bh = NULL;
+       struct buffer_head *bh;
        ext4_fsblk_t bitmap_blk;
 
        desc = ext4_get_group_desc(sb, block_group, NULL);
@@ -348,9 +348,9 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
        bitmap_blk = ext4_block_bitmap(sb, desc);
        bh = sb_getblk(sb, bitmap_blk);
        if (unlikely(!bh)) {
-               ext4_error(sb, "Cannot read block bitmap - "
-                           "block_group = %u, block_bitmap = %llu",
-                           block_group, bitmap_blk);
+               ext4_error(sb, "Cannot get buffer for block bitmap - "
+                          "block_group = %u, block_bitmap = %llu",
+                          block_group, bitmap_blk);
                return NULL;
        }
 
@@ -382,25 +382,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
                return bh;
        }
        /*
-        * submit the buffer_head for read. We can
-        * safely mark the bitmap as uptodate now.
-        * We do it here so the bitmap uptodate bit
-        * get set with buffer lock held.
+        * submit the buffer_head for reading
         */
+       set_buffer_new(bh);
        trace_ext4_read_block_bitmap_load(sb, block_group);
-       set_bitmap_uptodate(bh);
-       if (bh_submit_read(bh) < 0) {
-               put_bh(bh);
+       bh->b_end_io = ext4_end_bitmap_read;
+       get_bh(bh);
+       submit_bh(READ, bh);
+       return bh;
+}
+
+/* Returns 0 on success, 1 on error */
+int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
+                          struct buffer_head *bh)
+{
+       struct ext4_group_desc *desc;
+
+       if (!buffer_new(bh))
+               return 0;
+       desc = ext4_get_group_desc(sb, block_group, NULL);
+       if (!desc)
+               return 1;
+       wait_on_buffer(bh);
+       if (!buffer_uptodate(bh)) {
                ext4_error(sb, "Cannot read block bitmap - "
-                           "block_group = %u, block_bitmap = %llu",
-                           block_group, bitmap_blk);
-               return NULL;
+                          "block_group = %u, block_bitmap = %llu",
+                          block_group, bh->b_blocknr);
+               return 1;
        }
+       clear_buffer_new(bh);
+       /* Panic or remount fs read-only if block bitmap is invalid */
        ext4_valid_block_bitmap(sb, desc, block_group, bh);
-       /*
-        * file system mounted not to panic on error,
-        * continue with corrupt bitmap
-        */
+       return 0;
+}
+
+struct buffer_head *
+ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
+{
+       struct buffer_head *bh;
+
+       bh = ext4_read_block_bitmap_nowait(sb, block_group);
+       if (ext4_wait_block_bitmap(sb, block_group, bh)) {
+               put_bh(bh);
+               return NULL;
+       }
        return bh;
 }
 
index 513004fc3d840ee03586a4fedcdb133d8031c642..315f01be73e6d559930255cc93472955a0e3b5a2 100644 (file)
@@ -1794,8 +1794,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
                                                    ext4_group_t block_group,
                                                    struct buffer_head ** bh);
 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
-struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
-                                     ext4_group_t block_group);
+
+extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
+                                               ext4_group_t block_group);
+extern int ext4_wait_block_bitmap(struct super_block *sb,
+                                 ext4_group_t block_group,
+                                 struct buffer_head *bh);
+extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
+                                                 ext4_group_t block_group);
 extern void ext4_init_block_bitmap(struct super_block *sb,
                                   struct buffer_head *bh,
                                   ext4_group_t group,
@@ -1841,6 +1847,7 @@ extern void ext4_check_inodes_bitmap(struct super_block *);
 extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
 extern int ext4_init_inode_table(struct super_block *sb,
                                 ext4_group_t group, int barrier);
+extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
 
 /* mballoc.c */
 extern long ext4_mb_stats;
index 84e6e9a3986b65ea0f7d65882ab9acced4c69613..b2685992fb2d2a54cdb8e8b6967a5fe604d7afa0 100644 (file)
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
        return EXT4_INODES_PER_GROUP(sb);
 }
 
+void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
+{
+       if (uptodate) {
+               set_buffer_uptodate(bh);
+               set_bitmap_uptodate(bh);
+       }
+       unlock_buffer(bh);
+       put_bh(bh);
+}
+
 /*
  * Read the inode allocation bitmap for a given block_group, reading
  * into the specified slot in the superblock's bitmap cache.
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                return bh;
        }
        /*
-        * submit the buffer_head for read. We can
-        * safely mark the bitmap as uptodate now.
-        * We do it here so the bitmap uptodate bit
-        * get set with buffer lock held.
+        * submit the buffer_head for reading
         */
        trace_ext4_load_inode_bitmap(sb, block_group);
-       set_bitmap_uptodate(bh);
-       if (bh_submit_read(bh) < 0) {
+       bh->b_end_io = ext4_end_bitmap_read;
+       get_bh(bh);
+       submit_bh(READ, bh);
+       wait_on_buffer(bh);
+       if (!buffer_uptodate(bh)) {
                put_bh(bh);
                ext4_error(sb, "Cannot read inode bitmap - "
-                           "block_group = %u, inode_bitmap = %llu",
-                           block_group, bitmap_blk);
+                          "block_group = %u, inode_bitmap = %llu",
+                          block_group, bitmap_blk);
                return NULL;
        }
        return bh;
index cb990b21c698bd9dd1ec0e4bb8488f6e82bbe2f7..545fa0256606fec5e94998cf3331108b7d3258b5 100644 (file)
@@ -782,7 +782,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
        int groups_per_page;
        int err = 0;
        int i;
-       ext4_group_t first_group;
+       ext4_group_t first_group, group;
        int first_block;
        struct super_block *sb;
        struct buffer_head *bhs;
@@ -806,24 +806,23 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
        /* allocate buffer_heads to read bitmaps */
        if (groups_per_page > 1) {
-               err = -ENOMEM;
                i = sizeof(struct buffer_head *) * groups_per_page;
                bh = kzalloc(i, GFP_NOFS);
-               if (bh == NULL)
+               if (bh == NULL) {
+                       err = -ENOMEM;
                        goto out;
+               }
        } else
                bh = &bhs;
 
        first_group = page->index * blocks_per_page / 2;
 
        /* read all groups the page covers into the cache */
-       for (i = 0; i < groups_per_page; i++) {
-               struct ext4_group_desc *desc;
-
-               if (first_group + i >= ngroups)
+       for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
+               if (group >= ngroups)
                        break;
 
-               grinfo = ext4_get_group_info(sb, first_group + i);
+               grinfo = ext4_get_group_info(sb, group);
                /*
                 * If page is uptodate then we came here after online resize
                 * which added some new uninitialized group info structs, so
@@ -834,69 +833,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                        bh[i] = NULL;
                        continue;
                }
-
-               err = -EIO;
-               desc = ext4_get_group_desc(sb, first_group + i, NULL);
-               if (desc == NULL)
-                       goto out;
-
-               err = -ENOMEM;
-               bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
-               if (bh[i] == NULL)
+               if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
+                       err = -ENOMEM;
                        goto out;
-
-               if (bitmap_uptodate(bh[i]))
-                       continue;
-
-               lock_buffer(bh[i]);
-               if (bitmap_uptodate(bh[i])) {
-                       unlock_buffer(bh[i]);
-                       continue;
                }
-               ext4_lock_group(sb, first_group + i);
-               if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-                       ext4_init_block_bitmap(sb, bh[i],
-                                               first_group + i, desc);
-                       set_bitmap_uptodate(bh[i]);
-                       set_buffer_uptodate(bh[i]);
-                       ext4_unlock_group(sb, first_group + i);
-                       unlock_buffer(bh[i]);
-                       continue;
-               }
-               ext4_unlock_group(sb, first_group + i);
-               if (buffer_uptodate(bh[i])) {
-                       /*
-                        * if not uninit if bh is uptodate,
-                        * bitmap is also uptodate
-                        */
-                       set_bitmap_uptodate(bh[i]);
-                       unlock_buffer(bh[i]);
-                       continue;
-               }
-               get_bh(bh[i]);
-               /*
-                * submit the buffer_head for read. We can
-                * safely mark the bitmap as uptodate now.
-                * We do it here so the bitmap uptodate bit
-                * get set with buffer lock held.
-                */
-               set_bitmap_uptodate(bh[i]);
-               bh[i]->b_end_io = end_buffer_read_sync;
-               submit_bh(READ, bh[i]);
-               mb_debug(1, "read bitmap for group %u\n", first_group + i);
+               mb_debug(1, "read bitmap for group %u\n", group);
        }
 
        /* wait for I/O completion */
-       for (i = 0; i < groups_per_page; i++)
-               if (bh[i])
-                       wait_on_buffer(bh[i]);
-
-       err = -EIO;
-       for (i = 0; i < groups_per_page; i++)
-               if (bh[i] && !buffer_uptodate(bh[i]))
+       for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
+               if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
+                       err = -EIO;
                        goto out;
+               }
+       }
 
-       err = 0;
        first_block = page->index * blocks_per_page;
        for (i = 0; i < blocks_per_page; i++) {
                int group;