f2fs: make background threads of f2fs being aware of freezing
authorChao Yu <yuchao0@huawei.com>
Sat, 22 Jul 2017 00:52:23 +0000 (08:52 +0800)
committerJaegeuk Kim <jaegeuk@kernel.org>
Mon, 31 Jul 2017 23:47:27 +0000 (16:47 -0700)
When ->freeze_fs is called from lvm for doing snapshot, it needs to
make sure there will be no more changes in filesystem's data, however,
previously, background threads like GC thread wasn't aware of freezing,
so in environment with active background threads, data of snapshot
becomes unstable.

This patch fixes this issue by adding sb_{start,end}_intwrite in
below background threads:
- GC thread
- flush thread
- discard thread

Note that, don't use sb_start_intwrite() in gc_thread_func() due to:

generic/241 reports below bug:

 ======================================================
 WARNING: possible circular locking dependency detected
 4.13.0-rc1+ #32 Tainted: G           O
 ------------------------------------------------------
 f2fs_gc-250:0/22186 is trying to acquire lock:
  (&sbi->gc_mutex){+.+...}, at: [<f8fa7f0b>] f2fs_sync_fs+0x7b/0x1b0 [f2fs]

 but task is already holding lock:
  (sb_internal#2){++++.-}, at: [<f8fb5609>] gc_thread_func+0x159/0x4a0 [f2fs]

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #2 (sb_internal#2){++++.-}:
        __lock_acquire+0x405/0x7b0
        lock_acquire+0xae/0x220
        __sb_start_write+0x11d/0x1f0
        f2fs_evict_inode+0x2d6/0x4e0 [f2fs]
        evict+0xa8/0x170
        iput+0x1fb/0x2c0
        f2fs_sync_inode_meta+0x3f/0xf0 [f2fs]
        write_checkpoint+0x1b1/0x750 [f2fs]
        f2fs_sync_fs+0x85/0x1b0 [f2fs]
        f2fs_do_sync_file.isra.24+0x137/0xa30 [f2fs]
        f2fs_sync_file+0x34/0x40 [f2fs]
        vfs_fsync_range+0x4a/0xa0
        do_fsync+0x3c/0x60
        SyS_fdatasync+0x15/0x20
        do_fast_syscall_32+0xa1/0x1b0
        entry_SYSENTER_32+0x4c/0x7b

 -> #1 (&sbi->cp_mutex){+.+...}:
        __lock_acquire+0x405/0x7b0
        lock_acquire+0xae/0x220
        __mutex_lock+0x4f/0x830
        mutex_lock_nested+0x25/0x30
        write_checkpoint+0x2f/0x750 [f2fs]
        f2fs_sync_fs+0x85/0x1b0 [f2fs]
        sync_filesystem+0x67/0x80
        generic_shutdown_super+0x27/0x100
        kill_block_super+0x22/0x50
        kill_f2fs_super+0x3a/0x40 [f2fs]
        deactivate_locked_super+0x3d/0x70
        deactivate_super+0x40/0x60
        cleanup_mnt+0x39/0x70
        __cleanup_mnt+0x10/0x20
        task_work_run+0x69/0x80
        exit_to_usermode_loop+0x57/0x92
        do_fast_syscall_32+0x18c/0x1b0
        entry_SYSENTER_32+0x4c/0x7b

 -> #0 (&sbi->gc_mutex){+.+...}:
        validate_chain.isra.36+0xc50/0xdb0
        __lock_acquire+0x405/0x7b0
        lock_acquire+0xae/0x220
        __mutex_lock+0x4f/0x830
        mutex_lock_nested+0x25/0x30
        f2fs_sync_fs+0x7b/0x1b0 [f2fs]
        f2fs_balance_fs_bg+0xb9/0x200 [f2fs]
        gc_thread_func+0x302/0x4a0 [f2fs]
        kthread+0xe9/0x120
        ret_from_fork+0x19/0x24

 other info that might help us debug this:

 Chain exists of:
   &sbi->gc_mutex --> &sbi->cp_mutex --> sb_internal#2

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(sb_internal#2);
                                lock(&sbi->cp_mutex);
                                lock(sb_internal#2);
   lock(&sbi->gc_mutex);

  *** DEADLOCK ***

 1 lock held by f2fs_gc-250:0/22186:
  #0:  (sb_internal#2){++++.-}, at: [<f8fb5609>] gc_thread_func+0x159/0x4a0 [f2fs]

 stack backtrace:
 CPU: 2 PID: 22186 Comm: f2fs_gc-250:0 Tainted: G           O    4.13.0-rc1+ #32
 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
 Call Trace:
  dump_stack+0x5f/0x92
  print_circular_bug+0x1b3/0x1bd
  validate_chain.isra.36+0xc50/0xdb0
  ? __this_cpu_preempt_check+0xf/0x20
  __lock_acquire+0x405/0x7b0
  lock_acquire+0xae/0x220
  ? f2fs_sync_fs+0x7b/0x1b0 [f2fs]
  __mutex_lock+0x4f/0x830
  ? f2fs_sync_fs+0x7b/0x1b0 [f2fs]
  mutex_lock_nested+0x25/0x30
  ? f2fs_sync_fs+0x7b/0x1b0 [f2fs]
  f2fs_sync_fs+0x7b/0x1b0 [f2fs]
  f2fs_balance_fs_bg+0xb9/0x200 [f2fs]
  gc_thread_func+0x302/0x4a0 [f2fs]
  ? preempt_schedule_common+0x2f/0x4d
  ? f2fs_gc+0x540/0x540 [f2fs]
  kthread+0xe9/0x120
  ? f2fs_gc+0x540/0x540 [f2fs]
  ? kthread_create_on_node+0x30/0x30
  ret_from_fork+0x19/0x24

The deadlock occurs in below condition:
GC Thread Thread B
- sb_start_intwrite
- f2fs_sync_file
 - f2fs_sync_fs
  - mutex_lock(&sbi->gc_mutex)
   - write_checkpoint
    - block_operations
     - f2fs_sync_inode_meta
      - iput
       - sb_start_intwrite
 - mutex_lock(&sbi->gc_mutex)

Fix this by altering sb_start_intwrite to sb_start_write_trylock.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/gc.c
fs/f2fs/segment.c

index fa3d2e2df8e70e883011afc5e46c9989e0e834ff..41c649c9c55c4d6eef992f5b906329c7636b89e7 100644 (file)
@@ -55,6 +55,9 @@ static int gc_thread_func(void *data)
                }
 #endif
 
+               if (!sb_start_write_trylock(sbi->sb))
+                       continue;
+
                /*
                 * [GC triggering condition]
                 * 0. GC is not conducted currently.
@@ -69,12 +72,12 @@ static int gc_thread_func(void *data)
                 * So, I'd like to wait some time to collect dirty segments.
                 */
                if (!mutex_trylock(&sbi->gc_mutex))
-                       continue;
+                       goto next;
 
                if (!is_idle(sbi)) {
                        increase_sleep_time(gc_th, &wait_ms);
                        mutex_unlock(&sbi->gc_mutex);
-                       continue;
+                       goto next;
                }
 
                if (has_enough_invalid_blocks(sbi))
@@ -93,6 +96,8 @@ static int gc_thread_func(void *data)
 
                /* balancing f2fs's metadata periodically */
                f2fs_balance_fs_bg(sbi);
+next:
+               sb_end_write(sbi->sb);
 
        } while (!kthread_should_stop());
        return 0;
index f5d139f897dc9a73ba254ad956677b2e6442ea3f..2a5672a20a6253fabe83b549c9693cb0047e872a 100644 (file)
@@ -485,6 +485,8 @@ repeat:
        if (kthread_should_stop())
                return 0;
 
+       sb_start_intwrite(sbi->sb);
+
        if (!llist_empty(&fcc->issue_list)) {
                struct flush_cmd *cmd, *next;
                int ret;
@@ -503,6 +505,8 @@ repeat:
                fcc->dispatch_list = NULL;
        }
 
+       sb_end_intwrite(sbi->sb);
+
        wait_event_interruptible(*q,
                kthread_should_stop() || !llist_empty(&fcc->issue_list));
        goto repeat;
@@ -1130,9 +1134,13 @@ static int issue_discard_thread(void *data)
                if (kthread_should_stop())
                        return 0;
 
+               sb_start_intwrite(sbi->sb);
+
                __issue_discard_cmd(sbi, true);
                __wait_discard_cmd(sbi, true);
 
+               sb_end_intwrite(sbi->sb);
+
                congestion_wait(BLK_RW_SYNC, HZ/50);
        } while (!kthread_should_stop());
        return 0;