f2fs: handle EIO not to break fs consistency
[GitHub/exynos8895/android_kernel_samsung_universal8895.git] / fs / f2fs / checkpoint.c
index 0b4710c1d3702584818a897ea7600bf170591abf..c9c08d52ecfd64b8d469c7dafd2f03669728ad4f 100644 (file)
@@ -22,7 +22,7 @@
 #include "segment.h"
 #include <trace/events/f2fs.h>
 
-static struct kmem_cache *orphan_entry_slab;
+static struct kmem_cache *ino_entry_slab;
 static struct kmem_cache *inode_entry_slab;
 
 /*
@@ -160,14 +160,11 @@ static int f2fs_write_meta_page(struct page *page,
                goto redirty_out;
        if (wbc->for_reclaim)
                goto redirty_out;
-
-       /* Should not write any meta pages, if any IO error was occurred */
-       if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
-               goto no_write;
+       if (unlikely(f2fs_cp_error(sbi)))
+               goto redirty_out;
 
        f2fs_wait_on_page_writeback(page, META);
        write_meta_page(sbi, page);
-no_write:
        dec_page_count(sbi, F2FS_DIRTY_META);
        unlock_page(page);
        return 0;
@@ -282,72 +279,120 @@ const struct address_space_operations f2fs_meta_aops = {
        .set_page_dirty = f2fs_set_meta_page_dirty,
 };
 
+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+       struct ino_entry *e;
+retry:
+       spin_lock(&sbi->ino_lock[type]);
+
+       e = radix_tree_lookup(&sbi->ino_root[type], ino);
+       if (!e) {
+               e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
+               if (!e) {
+                       spin_unlock(&sbi->ino_lock[type]);
+                       goto retry;
+               }
+               if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
+                       spin_unlock(&sbi->ino_lock[type]);
+                       kmem_cache_free(ino_entry_slab, e);
+                       goto retry;
+               }
+               memset(e, 0, sizeof(struct ino_entry));
+               e->ino = ino;
+
+               list_add_tail(&e->list, &sbi->ino_list[type]);
+       }
+       spin_unlock(&sbi->ino_lock[type]);
+}
+
+static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+       struct ino_entry *e;
+
+       spin_lock(&sbi->ino_lock[type]);
+       e = radix_tree_lookup(&sbi->ino_root[type], ino);
+       if (e) {
+               list_del(&e->list);
+               radix_tree_delete(&sbi->ino_root[type], ino);
+               if (type == ORPHAN_INO)
+                       sbi->n_orphans--;
+               spin_unlock(&sbi->ino_lock[type]);
+               kmem_cache_free(ino_entry_slab, e);
+               return;
+       }
+       spin_unlock(&sbi->ino_lock[type]);
+}
+
+void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+       /* add new dirty ino entry into list */
+       __add_ino_entry(sbi, ino, type);
+}
+
+void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+       /* remove dirty ino entry from list */
+       __remove_ino_entry(sbi, ino, type);
+}
+
+/* mode should be APPEND_INO or UPDATE_INO */
+bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
+{
+       struct ino_entry *e;
+       spin_lock(&sbi->ino_lock[mode]);
+       e = radix_tree_lookup(&sbi->ino_root[mode], ino);
+       spin_unlock(&sbi->ino_lock[mode]);
+       return e ? true : false;
+}
+
+void release_dirty_inode(struct f2fs_sb_info *sbi)
+{
+       struct ino_entry *e, *tmp;
+       int i;
+
+       for (i = APPEND_INO; i <= UPDATE_INO; i++) {
+               spin_lock(&sbi->ino_lock[i]);
+               list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
+                       list_del(&e->list);
+                       radix_tree_delete(&sbi->ino_root[i], e->ino);
+                       kmem_cache_free(ino_entry_slab, e);
+               }
+               spin_unlock(&sbi->ino_lock[i]);
+       }
+}
+
 int acquire_orphan_inode(struct f2fs_sb_info *sbi)
 {
        int err = 0;
 
-       spin_lock(&sbi->orphan_inode_lock);
+       spin_lock(&sbi->ino_lock[ORPHAN_INO]);
        if (unlikely(sbi->n_orphans >= sbi->max_orphans))
                err = -ENOSPC;
        else
                sbi->n_orphans++;
-       spin_unlock(&sbi->orphan_inode_lock);
+       spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
 
        return err;
 }
 
 void release_orphan_inode(struct f2fs_sb_info *sbi)
 {
-       spin_lock(&sbi->orphan_inode_lock);
+       spin_lock(&sbi->ino_lock[ORPHAN_INO]);
        f2fs_bug_on(sbi->n_orphans == 0);
        sbi->n_orphans--;
-       spin_unlock(&sbi->orphan_inode_lock);
+       spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
 }
 
 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 {
-       struct list_head *head;
-       struct orphan_inode_entry *new, *orphan;
-
-       new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
-       new->ino = ino;
-
-       spin_lock(&sbi->orphan_inode_lock);
-       head = &sbi->orphan_inode_list;
-       list_for_each_entry(orphan, head, list) {
-               if (orphan->ino == ino) {
-                       spin_unlock(&sbi->orphan_inode_lock);
-                       kmem_cache_free(orphan_entry_slab, new);
-                       return;
-               }
-
-               if (orphan->ino > ino)
-                       break;
-       }
-
-       /* add new orphan entry into list which is sorted by inode number */
-       list_add_tail(&new->list, &orphan->list);
-       spin_unlock(&sbi->orphan_inode_lock);
+       /* add new orphan ino entry into list */
+       __add_ino_entry(sbi, ino, ORPHAN_INO);
 }
 
 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 {
-       struct list_head *head;
-       struct orphan_inode_entry *orphan;
-
-       spin_lock(&sbi->orphan_inode_lock);
-       head = &sbi->orphan_inode_list;
-       list_for_each_entry(orphan, head, list) {
-               if (orphan->ino == ino) {
-                       list_del(&orphan->list);
-                       f2fs_bug_on(sbi->n_orphans == 0);
-                       sbi->n_orphans--;
-                       spin_unlock(&sbi->orphan_inode_lock);
-                       kmem_cache_free(orphan_entry_slab, orphan);
-                       return;
-               }
-       }
-       spin_unlock(&sbi->orphan_inode_lock);
+       /* remove orphan entry from orphan list */
+       __remove_ino_entry(sbi, ino, ORPHAN_INO);
 }
 
 static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -401,14 +446,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
        unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
                (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
        struct page *page = NULL;
-       struct orphan_inode_entry *orphan = NULL;
+       struct ino_entry *orphan = NULL;
 
        for (index = 0; index < orphan_blocks; index++)
                grab_meta_page(sbi, start_blk + index);
 
        index = 1;
-       spin_lock(&sbi->orphan_inode_lock);
-       head = &sbi->orphan_inode_list;
+       spin_lock(&sbi->ino_lock[ORPHAN_INO]);
+       head = &sbi->ino_list[ORPHAN_INO];
 
        /* loop for each orphan inode entry and write them in Jornal block */
        list_for_each_entry(orphan, head, list) {
@@ -448,7 +493,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
                f2fs_put_page(page, 1);
        }
 
-       spin_unlock(&sbi->orphan_inode_lock);
+       spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
 }
 
 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -689,7 +734,7 @@ retry:
 /*
  * Freeze all the FS-operations for checkpoint.
  */
-static void block_operations(struct f2fs_sb_info *sbi)
+static int block_operations(struct f2fs_sb_info *sbi)
 {
        struct writeback_control wbc = {
                .sync_mode = WB_SYNC_ALL,
@@ -697,6 +742,7 @@ static void block_operations(struct f2fs_sb_info *sbi)
                .for_reclaim = 0,
        };
        struct blk_plug plug;
+       int err = 0;
 
        blk_start_plug(&plug);
 
@@ -706,27 +752,38 @@ retry_flush_dents:
        if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
                f2fs_unlock_all(sbi);
                sync_dirty_dir_inodes(sbi);
+               if (unlikely(f2fs_cp_error(sbi))) {
+                       err = -EIO;
+                       goto out;
+               }
                goto retry_flush_dents;
        }
 
        /*
-        * POR: we should ensure that there is no dirty node pages
+        * POR: we should ensure that there are no dirty node pages
         * until finishing nat/sit flush.
         */
 retry_flush_nodes:
-       mutex_lock(&sbi->node_write);
+       down_write(&sbi->node_write);
 
        if (get_pages(sbi, F2FS_DIRTY_NODES)) {
-               mutex_unlock(&sbi->node_write);
+               up_write(&sbi->node_write);
                sync_node_pages(sbi, 0, &wbc);
+               if (unlikely(f2fs_cp_error(sbi))) {
+                       f2fs_unlock_all(sbi);
+                       err = -EIO;
+                       goto out;
+               }
                goto retry_flush_nodes;
        }
+out:
        blk_finish_plug(&plug);
+       return err;
 }
 
 static void unblock_operations(struct f2fs_sb_info *sbi)
 {
-       mutex_unlock(&sbi->node_write);
+       up_write(&sbi->node_write);
        f2fs_unlock_all(sbi);
 }
 
@@ -748,6 +805,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
 static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 {
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+       struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
        nid_t last_nid = 0;
        block_t start_blk;
        struct page *cp_page;
@@ -761,11 +819,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
         * This avoids to conduct wrong roll-forward operations and uses
         * metapages, so should be called prior to sync_meta_pages below.
         */
-       discard_next_dnode(sbi);
+       discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
 
        /* Flush all the NAT/SIT pages */
-       while (get_pages(sbi, F2FS_DIRTY_META))
+       while (get_pages(sbi, F2FS_DIRTY_META)) {
                sync_meta_pages(sbi, META, LONG_MAX);
+               if (unlikely(f2fs_cp_error(sbi)))
+                       return;
+       }
 
        next_free_nid(sbi, &last_nid);
 
@@ -875,6 +936,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
        /* wait for previous submitted node/meta pages writeback */
        wait_on_all_pages_writeback(sbi);
 
+       if (unlikely(f2fs_cp_error(sbi)))
+               return;
+
        filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
        filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
 
@@ -885,14 +949,17 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
        /* Here, we only have one bio having CP pack */
        sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
 
-       if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
-               clear_prefree_segments(sbi);
-               F2FS_RESET_SB_DIRT(sbi);
-       }
+       release_dirty_inode(sbi);
+
+       if (unlikely(f2fs_cp_error(sbi)))
+               return;
+
+       clear_prefree_segments(sbi);
+       F2FS_RESET_SB_DIRT(sbi);
 }
 
 /*
- * We guarantee that this checkpoint procedure should not fail.
+ * We guarantee that this checkpoint procedure will not fail.
  */
 void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 {
@@ -902,7 +969,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
 
        mutex_lock(&sbi->cp_mutex);
-       block_operations(sbi);
+
+       if (!sbi->s_dirty)
+               goto out;
+       if (unlikely(f2fs_cp_error(sbi)))
+               goto out;
+       if (block_operations(sbi))
+               goto out;
 
        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
 
@@ -926,37 +999,43 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
        do_checkpoint(sbi, is_umount);
 
        unblock_operations(sbi);
-       mutex_unlock(&sbi->cp_mutex);
-
        stat_inc_cp_count(sbi->stat_info);
+out:
+       mutex_unlock(&sbi->cp_mutex);
        trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
 }
 
-void init_orphan_info(struct f2fs_sb_info *sbi)
+void init_ino_entry_info(struct f2fs_sb_info *sbi)
 {
-       spin_lock_init(&sbi->orphan_inode_lock);
-       INIT_LIST_HEAD(&sbi->orphan_inode_list);
-       sbi->n_orphans = 0;
+       int i;
+
+       for (i = 0; i < MAX_INO_ENTRY; i++) {
+               INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
+               spin_lock_init(&sbi->ino_lock[i]);
+               INIT_LIST_HEAD(&sbi->ino_list[i]);
+       }
+
        /*
         * considering 512 blocks in a segment 8 blocks are needed for cp
         * and log segment summaries. Remaining blocks are used to keep
         * orphan entries with the limitation one reserved segment
         * for cp pack we can have max 1020*504 orphan entries
         */
+       sbi->n_orphans = 0;
        sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
                                * F2FS_ORPHANS_PER_BLOCK;
 }
 
 int __init create_checkpoint_caches(void)
 {
-       orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
-                       sizeof(struct orphan_inode_entry));
-       if (!orphan_entry_slab)
+       ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
+                       sizeof(struct ino_entry));
+       if (!ino_entry_slab)
                return -ENOMEM;
        inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
                        sizeof(struct dir_inode_entry));
        if (!inode_entry_slab) {
-               kmem_cache_destroy(orphan_entry_slab);
+               kmem_cache_destroy(ino_entry_slab);
                return -ENOMEM;
        }
        return 0;
@@ -964,6 +1043,6 @@ int __init create_checkpoint_caches(void)
 
 void destroy_checkpoint_caches(void)
 {
-       kmem_cache_destroy(orphan_entry_slab);
+       kmem_cache_destroy(ino_entry_slab);
        kmem_cache_destroy(inode_entry_slab);
 }