[PATCH] __block_write_full_page race fix
authorNick Piggin <nickpiggin@yahoo.com.au>
Thu, 5 May 2005 23:15:46 +0000 (16:15 -0700)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Thu, 5 May 2005 23:36:40 +0000 (16:36 -0700)
When running
fsstress -v -d $DIR/tmp -n 1000 -p 1000 -l 2
on an ext2 filesystem with 1024 byte block size, on SMP i386 with 4096 byte
page size over loopback to an image file on a tmpfs filesystem, I would
very quickly hit
BUG_ON(!buffer_async_write(bh));
in fs/buffer.c:end_buffer_async_write

It seems that more than one request would be submitted for a given bh
at a time.

What would happen is the following:
2 threads doing __mpage_writepages on the same page.
Thread 1 - lock the page first, and enter __block_write_full_page.
Thread 1 - (eg.) mark_buffer_async_write on the first 2 buffers.
Thread 1 - set page writeback, unlock page.
Thread 2 - lock page, wait on page writeback
Thread 1 - submit_bh on the first 2 buffers.
=> both requests complete, none of the page buffers are async_write,
   end_page_writeback is called.
Thread 2 - wakes up. enters __block_write_full_page.
Thread 2 - mark_buffer_async_write on (eg.) the last buffer
Thread 1 - finds the last buffer has async_write set, submit_bh on that.
Thread 2 - submit_bh on the last buffer.
=> oops.

So change __block_write_full_page to explicitly keep track of the last bh
we need to issue, so we don't touch anything after issuing the last
request.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
fs/buffer.c

index af7c51ded2e121ac023872c0b8d46d01244f32ce..bc75f2e7b274e09f395417124fdc7958b1d162f6 100644 (file)
@@ -1751,7 +1751,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
        int err;
        sector_t block;
        sector_t last_block;
-       struct buffer_head *bh, *head;
+       struct buffer_head *bh, *head, *last_bh = NULL;
        int nr_underway = 0;
 
        BUG_ON(!PageLocked(page));
@@ -1809,7 +1809,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
        } while (bh != head);
 
        do {
-               get_bh(bh);
                if (!buffer_mapped(bh))
                        continue;
                /*
@@ -1827,6 +1826,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
                }
                if (test_clear_buffer_dirty(bh)) {
                        mark_buffer_async_write(bh);
+                       get_bh(bh);
+                       last_bh = bh;
                } else {
                        unlock_buffer(bh);
                }
@@ -1845,10 +1846,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
                if (buffer_async_write(bh)) {
                        submit_bh(WRITE, bh);
                        nr_underway++;
+                       put_bh(bh);
+                       if (bh == last_bh)
+                               break;
                }
-               put_bh(bh);
                bh = next;
        } while (bh != head);
+       bh = head;
 
        err = 0;
 done:
@@ -1887,10 +1891,11 @@ recover:
        bh = head;
        /* Recovery: lock and submit the mapped buffers */
        do {
-               get_bh(bh);
                if (buffer_mapped(bh) && buffer_dirty(bh)) {
                        lock_buffer(bh);
                        mark_buffer_async_write(bh);
+                       get_bh(bh);
+                       last_bh = bh;
                } else {
                        /*
                         * The buffer may have been set dirty during
@@ -1909,10 +1914,13 @@ recover:
                        clear_buffer_dirty(bh);
                        submit_bh(WRITE, bh);
                        nr_underway++;
+                       put_bh(bh);
+                       if (bh == last_bh)
+                               break;
                }
-               put_bh(bh);
                bh = next;
        } while (bh != head);
+       bh = head;
        goto done;
 }