From 83c8266acc1d19debbf353a16aabbd892ef99462 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 1 Mar 2013 15:03:00 +0000 Subject: [PATCH] btrfs: try harder to allocate raid56 stripe cache The stripe hash table is large, starting with allocation order 4 and can go as high as order 7 in case lock debugging is turned on and structure padding happens. Observed mount failure: mount: page allocation failure: order:7, mode:0x200050 Pid: 8234, comm: mount Tainted: G W 3.8.0-default+ #267 Call Trace: [] warn_alloc_failed+0xf3/0x140 [] ? __alloc_pages_direct_compact+0x92/0x250 [] __alloc_pages_nodemask+0x733/0x9d0 [] ? cache_alloc_refill+0x3f8/0x840 [] cache_alloc_refill+0x43c/0x840 [] ? is_kernel_percpu_address+0x4b/0x90 [] ? btrfs_alloc_stripe_hash_table+0x5c/0x130 [btrfs] [] kmem_cache_alloc_trace+0x247/0x270 [] btrfs_alloc_stripe_hash_table+0x5c/0x130 [btrfs] [] open_ctree+0xb2f/0x1f90 [btrfs] [] ? string+0x49/0xe0 [] ? vsnprintf+0x443/0x5d0 [] btrfs_mount+0x526/0x600 [btrfs] [] ? cache_alloc_debugcheck_after+0x4c/0x200 [] mount_fs+0x20/0xe0 [] vfs_kern_mount+0x76/0x120 [] do_mount+0x386/0x980 [] ? strndup_user+0x5b/0x80 [] sys_mount+0x90/0xe0 [] system_call_fastpath+0x16/0x1b Signed-off-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/raid56.c | 31 +++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5031e6dd5938..02369a3c162e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2197,7 +2197,7 @@ int open_ctree(struct super_block *sb, ret = btrfs_alloc_stripe_hash_table(fs_info); if (ret) { - err = -ENOMEM; + err = ret; goto fail_alloc; } diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index e34e568534d9..07222053c7d8 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -188,13 +188,25 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) struct btrfs_stripe_hash *h; int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS; int i; + int table_size; if (info->stripe_hash_table) return 0; - table = kzalloc(sizeof(*table) + sizeof(*h) * num_entries, GFP_NOFS); - if (!table) - return -ENOMEM; + /* + * The table is large, starting with order 4 and can go as high as + * order 7 in case lock debugging is turned on. + * + * Try harder to allocate and fallback to vmalloc to lower the chance + * of a failing mount. + */ + table_size = sizeof(*table) + sizeof(*h) * num_entries; + table = kzalloc(table_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!table) { + table = vzalloc(table_size); + if (!table) + return -ENOMEM; + } spin_lock_init(&table->cache_lock); INIT_LIST_HEAD(&table->stripe_cache); @@ -209,8 +221,12 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) } x = cmpxchg(&info->stripe_hash_table, NULL, table); - if (x) - kfree(x); + if (x) { + if (is_vmalloc_addr(x)) + vfree(x); + else + kfree(x); + } return 0; } @@ -420,7 +436,10 @@ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info) if (!info->stripe_hash_table) return; btrfs_clear_rbio_cache(info); - kfree(info->stripe_hash_table); + if (is_vmalloc_addr(info->stripe_hash_table)) + vfree(info->stripe_hash_table); + else + kfree(info->stripe_hash_table); info->stripe_hash_table = NULL; } -- 2.20.1