Btrfs: cleanup the read failure record after write or when the inode is freeing
authorMiao Xie <miaox@cn.fujitsu.com>
Fri, 12 Sep 2014 10:44:04 +0000 (18:44 +0800)
committerChris Mason <clm@fb.com>
Wed, 17 Sep 2014 20:39:02 +0000 (13:39 -0700)
After the data is written successfully, we should cleanup the read failure record
in that range because
- If we set data COW for the file, the range that the failure record pointed to is
  mapped to a new place, so it is invalid.
- If we set no data COW for the file, and if there is no error during writting,
  the corrupted data is corrected, so the failure record can be removed. And if
  some errors happen on the mirrors, we also needn't worry about it because the
  failure record will be recreated if we read the same place again.

Sometimes, we may fail to correct the data, so the failure records will be left
in the tree, we need free them when we free the inode or the memory leak happens.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/inode.c

index 9e2ef27672e55e572c0ea099d200494d8be2274c..78229007f99e9304b9589062b237957a27b8a008 100644 (file)
@@ -2138,6 +2138,40 @@ out:
        return 0;
 }
 
+/*
+ * Can be called when
+ * - hold extent lock
+ * - under ordered extent
+ * - the inode is freeing
+ */
+void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
+{
+       struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+       struct io_failure_record *failrec;
+       struct extent_state *state, *next;
+
+       if (RB_EMPTY_ROOT(&failure_tree->state))
+               return;
+
+       spin_lock(&failure_tree->lock);
+       state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
+       while (state) {
+               if (state->start > end)
+                       break;
+
+               ASSERT(state->end <= end);
+
+               next = next_state(state);
+
+               failrec = (struct io_failure_record *)state->private;
+               free_extent_state(state);
+               kfree(failrec);
+
+               state = next;
+       }
+       spin_unlock(&failure_tree->lock);
+}
+
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
                                struct io_failure_record **failrec_ret)
 {
index 176a4b1ed52080e015956ec9e22ae62448fe26a7..5e91fb9d17646475d01f44811b1e65fc33cd3436 100644 (file)
@@ -366,6 +366,7 @@ struct io_failure_record {
        int in_validation;
 };
 
+void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end);
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
                                struct io_failure_record **failrec_ret);
 int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
index c3c3269a9e080b152ef723ef56ba4fe3735ef614..3c16a1493e22f2a1df4e40857e892658f7e4fb0d 100644 (file)
@@ -2703,6 +2703,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                goto out;
        }
 
+       btrfs_free_io_failure_record(inode, ordered_extent->file_offset,
+                                    ordered_extent->file_offset +
+                                    ordered_extent->len - 1);
+
        if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
                truncated = true;
                logical_len = ordered_extent->truncated_len;
@@ -4799,6 +4803,8 @@ void btrfs_evict_inode(struct inode *inode)
        /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
        btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
+       btrfs_free_io_failure_record(inode, 0, (u64)-1);
+
        if (root->fs_info->log_root_recovering) {
                BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                                 &BTRFS_I(inode)->runtime_flags));