ext4: use private version of page_zero_new_buffers() for data=journal mode
authorTheodore Ts'o <tytso@mit.edu>
Thu, 15 Oct 2015 14:29:05 +0000 (10:29 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 15 Oct 2015 14:29:05 +0000 (10:29 -0400)
If there is a error while copying data from userspace into the page
cache during a write(2) system call, in data=journal mode, in
ext4_journalled_write_end() were using page_zero_new_buffers() from
fs/buffer.c.  Unfortunately, this sets the buffer dirty flag, which is
no good if journalling is enabled.  This is a long-standing bug that
goes back for years and years in ext3, but a combination of (a)
data=journal not being very common, (b) in many case it only results
in a warning message. and (c) only very rarely causes the kernel hang,
means that we only really noticed this as a problem when commit
998ef75ddb caused this failure to happen frequently enough to cause
generic/208 to fail when run in data=journal mode.

The fix is to have our own version of this function that doesn't call
mark_dirty_buffer(), since we will end up calling
ext4_handle_dirty_metadata() on the buffer head(s) in questions very
shortly afterwards in ext4_journalled_write_end().

Thanks to Dave Hansen and Linus Torvalds for helping to identify the
root cause of the problem.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.com>
fs/ext4/inode.c

index ae52e329e986855e1a15dc74583336190ec3b1b4..f205ac3c4e410995a0396d80776db5c1baf58bb7 100644 (file)
@@ -1181,6 +1181,38 @@ errout:
        return ret ? ret : copied;
 }
 
+/*
+ * This is a private version of page_zero_new_buffers() which doesn't
+ * set the buffer to be dirty, since in data=journalled mode we need
+ * to call ext4_handle_dirty_metadata() instead.
+ */
+static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
+{
+       unsigned int block_start = 0, block_end;
+       struct buffer_head *head, *bh;
+
+       bh = head = page_buffers(page);
+       do {
+               block_end = block_start + bh->b_size;
+               if (buffer_new(bh)) {
+                       if (block_end > from && block_start < to) {
+                               if (!PageUptodate(page)) {
+                                       unsigned start, size;
+
+                                       start = max(from, block_start);
+                                       size = min(to, block_end) - start;
+
+                                       zero_user(page, start, size);
+                                       set_buffer_uptodate(bh);
+                               }
+                               clear_buffer_new(bh);
+                       }
+               }
+               block_start = block_end;
+               bh = bh->b_this_page;
+       } while (bh != head);
+}
+
 static int ext4_journalled_write_end(struct file *file,
                                     struct address_space *mapping,
                                     loff_t pos, unsigned len, unsigned copied,
@@ -1207,7 +1239,7 @@ static int ext4_journalled_write_end(struct file *file,
                if (copied < len) {
                        if (!PageUptodate(page))
                                copied = 0;
-                       page_zero_new_buffers(page, from+copied, to);
+                       zero_new_buffers(page, from+copied, to);
                }
 
                ret = ext4_walk_page_buffers(handle, page_buffers(page), from,