f2fs: fix to account total free nid correctly
authorChao Yu <yuchao0@huawei.com>
Thu, 17 Nov 2016 12:53:11 +0000 (20:53 +0800)
committerJaegeuk Kim <jaegeuk@google.com>
Mon, 25 Sep 2017 22:36:56 +0000 (15:36 -0700)
commit 04d47e673863c637a2b44ad34a558aeb5d0a727e upstream.

Thread A Thread B Thread C
- f2fs_create
 - f2fs_new_inode
  - f2fs_lock_op
   - alloc_nid
    alloc last nid
  - f2fs_unlock_op
- f2fs_create
 - f2fs_new_inode
  - f2fs_lock_op
   - alloc_nid
    as node count still not
    be increased, we will
    loop in alloc_nid
- f2fs_write_node_pages
 - f2fs_balance_fs_bg
  - f2fs_sync_fs
   - write_checkpoint
    - block_operations
     - f2fs_lock_all
 - f2fs_lock_op

While creating new inode, we do not allocate and account nid atomically,
so that when there is almost no free nids left, we may encounter deadloop
like above stack.

In order to avoid that, reuse nm_i::available_nids for accounting free nids
and make nid allocation and counting being atomical during node creation.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/f2fs.h
fs/f2fs/node.c

index d6119ea3b86dcbe7dda3c7589393e303d9aee911..973ca74404dea53428f186b76dc7a7fdb9891e1d 100644 (file)
@@ -560,7 +560,7 @@ enum nid_list {
 struct f2fs_nm_info {
        block_t nat_blkaddr;            /* base disk address of NAT */
        nid_t max_nid;                  /* maximum possible node ids */
-       nid_t available_nids;           /* maximum available node ids */
+       nid_t available_nids;           /* # of available node ids */
        nid_t next_scan_nid;            /* the next nid to be scanned */
        unsigned int ram_thresh;        /* control the memory footprint */
        unsigned int ra_nid_pages;      /* # of nid pages to be readaheaded */
index 59cc29e6b73c892cef99da9abc1ed66e84ebd098..edacbabb92cf0f15b7e284d974c608f1a56feb0a 100644 (file)
@@ -1885,11 +1885,13 @@ retry:
        if (time_to_inject(sbi, FAULT_ALLOC_NID))
                return false;
 #endif
-       if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
-               return false;
-
        spin_lock(&nm_i->nid_list_lock);
 
+       if (unlikely(nm_i->available_nids == 0)) {
+               spin_unlock(&nm_i->nid_list_lock);
+               return false;
+       }
+
        /* We should not use stale free nids created by build_free_nids */
        if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) {
                f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST]));
@@ -1900,6 +1902,7 @@ retry:
                __remove_nid_from_list(sbi, i, FREE_NID_LIST, true);
                i->state = NID_ALLOC;
                __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
+               nm_i->available_nids--;
                spin_unlock(&nm_i->nid_list_lock);
                return true;
        }
@@ -1951,6 +1954,9 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
                i->state = NID_NEW;
                __insert_nid_to_list(sbi, i, FREE_NID_LIST, false);
        }
+
+       nm_i->available_nids++;
+
        spin_unlock(&nm_i->nid_list_lock);
 
        if (need_free)
@@ -2150,6 +2156,19 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
                        ne = grab_nat_entry(nm_i, nid);
                        node_info_from_raw_nat(&ne->ni, &raw_ne);
                }
+
+               /*
+                * if a free nat in journal has not been used after last
+                * checkpoint, we should remove it from available nids,
+                * since later we will add it again.
+                */
+               if (!get_nat_flag(ne, IS_DIRTY) &&
+                               le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
+                       spin_lock(&nm_i->nid_list_lock);
+                       nm_i->available_nids--;
+                       spin_unlock(&nm_i->nid_list_lock);
+               }
+
                __set_nat_cache_dirty(nm_i, ne);
        }
        update_nats_in_cursum(journal, -i);
@@ -2222,8 +2241,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                raw_nat_from_node_info(raw_ne, &ne->ni);
                nat_reset_flag(ne);
                __clear_nat_cache_dirty(NM_I(sbi), ne);
-               if (nat_get_blkaddr(ne) == NULL_ADDR)
+               if (nat_get_blkaddr(ne) == NULL_ADDR) {
                        add_free_nid(sbi, nid, false);
+                       spin_lock(&NM_I(sbi)->nid_list_lock);
+                       NM_I(sbi)->available_nids++;
+                       spin_unlock(&NM_I(sbi)->nid_list_lock);
+               }
        }
 
        if (to_journal)
@@ -2298,7 +2321,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
        nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
 
        /* not used nids: 0, node, meta, (and root counted as valid node) */
-       nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
+       nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
+                                                       F2FS_RESERVED_NODE_NUM;
        nm_i->nid_cnt[FREE_NID_LIST] = 0;
        nm_i->nid_cnt[ALLOC_NID_LIST] = 0;
        nm_i->nat_cnt = 0;