jbd2: add support for avoiding data writes during transaction commits
authorJan Kara <jack@suse.cz>
Sun, 24 Apr 2016 04:56:07 +0000 (00:56 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Sun, 24 Apr 2016 04:56:07 +0000 (00:56 -0400)
Currently when filesystem needs to make sure data is on permanent
storage before committing a transaction it adds inode to transaction's
inode list. During transaction commit, jbd2 writes back all dirty
buffers that have allocated underlying blocks and waits for the IO to
finish. However when doing writeback for delayed allocated data, we
allocate blocks and immediately submit the data. Thus asking jbd2 to
write dirty pages just unnecessarily adds more work to jbd2 possibly
writing back other redirtied blocks.

Add support to jbd2 to allow filesystem to ask jbd2 to only wait for
outstanding data writes before committing a transaction and thus avoid
unnecessary writes.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/ext4_jbd2.h
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
fs/ocfs2/journal.h
include/linux/jbd2.h

index 5f58462110953dc61c9bd85101acd69c33a51331..f1c940b38b30cce172c4aad4bf288ebaa4d4bb37 100644 (file)
@@ -362,7 +362,8 @@ static inline int ext4_journal_force_commit(journal_t *journal)
 static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
 {
        if (ext4_handle_valid(handle))
-               return jbd2_journal_file_inode(handle, EXT4_I(inode)->jinode);
+               return jbd2_journal_inode_add_write(handle,
+                                                   EXT4_I(inode)->jinode);
        return 0;
 }
 
index 2ad98d6e19f43c369d9eb1f65640374e1e133b80..70078096117d3e956e86be7ba358b724286989d9 100644 (file)
@@ -219,6 +219,8 @@ static int journal_submit_data_buffers(journal_t *journal,
 
        spin_lock(&journal->j_list_lock);
        list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+               if (!(jinode->i_flags & JI_WRITE_DATA))
+                       continue;
                mapping = jinode->i_vfs_inode->i_mapping;
                jinode->i_flags |= JI_COMMIT_RUNNING;
                spin_unlock(&journal->j_list_lock);
@@ -256,6 +258,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
        /* For locking, see the comment in journal_submit_data_buffers() */
        spin_lock(&journal->j_list_lock);
        list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+               if (!(jinode->i_flags & JI_WAIT_DATA))
+                       continue;
                jinode->i_flags |= JI_COMMIT_RUNNING;
                spin_unlock(&journal->j_list_lock);
                err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
index 435f0b26ac2038e4f8037b5b1f2e4d15dbc9b2d2..b31852f76f46585137021df6266a18ba743b8528 100644 (file)
@@ -94,7 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
 EXPORT_SYMBOL(jbd2_journal_invalidatepage);
 EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
 EXPORT_SYMBOL(jbd2_journal_force_commit);
-EXPORT_SYMBOL(jbd2_journal_file_inode);
+EXPORT_SYMBOL(jbd2_journal_inode_add_write);
+EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
 EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
 EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
 EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
index 67c103867bf8d5d27e84a4eb2ad2c8a09b22d328..be56c8ca34c292e5156f840cac2ddcce6b562145 100644 (file)
@@ -2462,7 +2462,8 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
 /*
  * File inode in the inode list of the handle's transaction
  */
-int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
+static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
+                                  unsigned long flags)
 {
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal;
@@ -2487,12 +2488,14 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
         * and if jinode->i_next_transaction == transaction, commit code
         * will only file the inode where we want it.
         */
-       if (jinode->i_transaction == transaction ||
-           jinode->i_next_transaction == transaction)
+       if ((jinode->i_transaction == transaction ||
+           jinode->i_next_transaction == transaction) &&
+           (jinode->i_flags & flags) == flags)
                return 0;
 
        spin_lock(&journal->j_list_lock);
-
+       jinode->i_flags |= flags;
+       /* Is inode already attached where we need it? */
        if (jinode->i_transaction == transaction ||
            jinode->i_next_transaction == transaction)
                goto done;
@@ -2523,6 +2526,17 @@ done:
        return 0;
 }
 
+int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
+{
+       return jbd2_journal_file_inode(handle, jinode,
+                                      JI_WRITE_DATA | JI_WAIT_DATA);
+}
+
+int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
+{
+       return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA);
+}
+
 /*
  * File truncate and transaction commit interact with each other in a
  * non-trivial way.  If a transaction writing data block A is
index f4cd3c3e9fb70d708d57a3d8dc15f92492e4ea12..497a4171ef61f6209a32303530b79c72439962cf 100644 (file)
@@ -619,7 +619,7 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
 
 static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
 {
-       return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode);
+       return jbd2_journal_inode_add_write(handle, &OCFS2_I(inode)->ip_jinode);
 }
 
 static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
index fd1083c46c61f0f2163287f4b873968d4778aa07..39511484ad10f4764290af1f514c9f697a3be897 100644 (file)
@@ -403,11 +403,19 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
 
 /* Flags in jbd_inode->i_flags */
 #define __JI_COMMIT_RUNNING 0
-/* Commit of the inode data in progress. We use this flag to protect us from
+#define __JI_WRITE_DATA 1
+#define __JI_WAIT_DATA 2
+
+/*
+ * Commit of the inode data in progress. We use this flag to protect us from
  * concurrent deletion of inode. We cannot use reference to inode for this
  * since we cannot afford doing last iput() on behalf of kjournald
  */
 #define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING)
+/* Write allocated dirty buffers in this inode before commit */
+#define JI_WRITE_DATA (1 << __JI_WRITE_DATA)
+/* Wait for outstanding data writes for this inode before commit */
+#define JI_WAIT_DATA (1 << __JI_WAIT_DATA)
 
 /**
  * struct jbd_inode is the structure linking inodes in ordered mode
@@ -1270,7 +1278,8 @@ extern int           jbd2_journal_clear_err  (journal_t *);
 extern int        jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int        jbd2_journal_force_commit(journal_t *);
 extern int        jbd2_journal_force_commit_nested(journal_t *);
-extern int        jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
+extern int        jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
+extern int        jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
 extern int        jbd2_journal_begin_ordered_truncate(journal_t *journal,
                                struct jbd2_inode *inode, loff_t new_size);
 extern void       jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);