X-Git-Url: https://git.stricted.de/?a=blobdiff_plain;f=fs%2Fext4%2Fmballoc.c;h=61cc29fef63eb3d4e35978ad7a49a40ab2a5d5ea;hb=47e5ca72da0f3bc71b728cffcbf42bbdda5aa006;hp=def84082a9a9b73deadc0d369c63a7ef981d8ea9;hpb=8d7a8fe2ce2f242953aef46226eaa8a4a1a2c380;p=GitHub%2Fmt8127%2Fandroid_kernel_alcatel_ttab.git diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index def84082a9a9..61cc29fef63e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -668,7 +668,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, ext4_grpblk_t min; ext4_grpblk_t max; ext4_grpblk_t chunk; - unsigned short border; + unsigned int border; BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb)); @@ -808,7 +808,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b) * for this page; do not hold this lock when calling this routine! */ -static int ext4_mb_init_cache(struct page *page, char *incore) +static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) { ext4_group_t ngroups; int blocksize; @@ -841,7 +841,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) /* allocate buffer_heads to read bitmaps */ if (groups_per_page > 1) { i = sizeof(struct buffer_head *) * groups_per_page; - bh = kzalloc(i, GFP_NOFS); + bh = kzalloc(i, gfp); if (bh == NULL) { err = -ENOMEM; goto out; @@ -966,7 +966,7 @@ out: * are on the same page e4b->bd_buddy_page is NULL and return value is 0. */ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, - ext4_group_t group, struct ext4_buddy *e4b) + ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp) { struct inode *inode = EXT4_SB(sb)->s_buddy_cache; int block, pnum, poff; @@ -985,7 +985,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, block = group * 2; pnum = block / blocks_per_page; poff = block % blocks_per_page; - page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, pnum, gfp); if (!page) return -EIO; BUG_ON(page->mapping != inode->i_mapping); @@ -999,7 +999,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, block++; pnum = block / blocks_per_page; - page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, pnum, gfp); if (!page) return -EIO; BUG_ON(page->mapping != inode->i_mapping); @@ -1025,7 +1025,7 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) * calling this routine! */ static noinline_for_stack -int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) +int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp) { struct ext4_group_info *this_grp; @@ -1043,7 +1043,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) * have taken a reference using ext4_mb_load_buddy and that * would have pinned buddy page to page cache. */ - ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); + ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp); if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { /* * somebody initialized the group @@ -1053,7 +1053,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) } page = e4b.bd_bitmap_page; - ret = ext4_mb_init_cache(page, NULL); + ret = ext4_mb_init_cache(page, NULL, gfp); if (ret) goto err; if (!PageUptodate(page)) { @@ -1073,7 +1073,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) } /* init buddy cache */ page = e4b.bd_buddy_page; - ret = ext4_mb_init_cache(page, e4b.bd_bitmap); + ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp); if (ret) goto err; if (!PageUptodate(page)) { @@ -1092,8 +1092,8 @@ err: * calling this routine! */ static noinline_for_stack int -ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, - struct ext4_buddy *e4b) +ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, + struct ext4_buddy *e4b, gfp_t gfp) { int blocks_per_page; int block; @@ -1123,7 +1123,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, * we need full data about the group * to make a good selection */ - ret = ext4_mb_init_group(sb, group); + ret = ext4_mb_init_group(sb, group, gfp); if (ret) return ret; } @@ -1151,11 +1151,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, * wait for it to initialize. */ page_cache_release(page); - page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, pnum, gfp); if (page) { BUG_ON(page->mapping != inode->i_mapping); if (!PageUptodate(page)) { - ret = ext4_mb_init_cache(page, NULL); + ret = ext4_mb_init_cache(page, NULL, gfp); if (ret) { unlock_page(page); goto err; @@ -1182,11 +1182,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, if (page == NULL || !PageUptodate(page)) { if (page) page_cache_release(page); - page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, pnum, gfp); if (page) { BUG_ON(page->mapping != inode->i_mapping); if (!PageUptodate(page)) { - ret = ext4_mb_init_cache(page, e4b->bd_bitmap); + ret = ext4_mb_init_cache(page, e4b->bd_bitmap, + gfp); if (ret) { unlock_page(page); goto err; @@ -1220,6 +1221,12 @@ err: return ret; } +static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, + struct ext4_buddy *e4b) +{ + return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS); +} + static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) { if (e4b->bd_bitmap_page) @@ -1232,6 +1239,7 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) { int order = 1; + int bb_incr = 1 << (e4b->bd_blkbits - 1); void *bb; BUG_ON(e4b->bd_bitmap == e4b->bd_buddy); @@ -1244,7 +1252,8 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) /* this block is part of buddy of order 'order' */ return order; } - bb += 1 << (e4b->bd_blkbits - order); + bb += bb_incr; + bb_incr >>= 1; order++; } return 0; @@ -1396,6 +1405,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, int last = first + count - 1; struct super_block *sb = e4b->bd_sb; + if (WARN_ON(count == 0)) + return; BUG_ON(last >= (sb->s_blocksize << 3)); assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); mb_check_buddy(e4b); @@ -1989,7 +2000,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { - int ret = ext4_mb_init_group(ac->ac_sb, group); + int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS); if (ret) return 0; } @@ -2211,7 +2222,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) struct ext4_group_info *grinfo; struct sg { struct ext4_group_info info; - ext4_grpblk_t counters[16]; + ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; } sg; group--; @@ -2512,7 +2523,7 @@ int ext4_mb_init(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned i, j; - unsigned offset; + unsigned offset, offset_incr; unsigned max; int ret; @@ -2541,11 +2552,13 @@ int ext4_mb_init(struct super_block *sb) i = 1; offset = 0; + offset_incr = 1 << (sb->s_blocksize_bits - 1); max = sb->s_blocksize << 2; do { sbi->s_mb_offsets[i] = offset; sbi->s_mb_maxs[i] = max; - offset += 1 << (sb->s_blocksize_bits - i); + offset += offset_incr; + offset_incr = offset_incr >> 1; max = max >> 1; i++; } while (i <= sb->s_blocksize_bits + 1); @@ -2705,7 +2718,8 @@ int ext4_mb_release(struct super_block *sb) } static inline int ext4_issue_discard(struct super_block *sb, - ext4_group_t block_group, ext4_grpblk_t cluster, int count) + ext4_group_t block_group, ext4_grpblk_t cluster, int count, + unsigned long flags) { ext4_fsblk_t discard_block; @@ -2714,7 +2728,7 @@ static inline int ext4_issue_discard(struct super_block *sb, count = EXT4_C2B(EXT4_SB(sb), count); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); + return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags); } /* @@ -2736,7 +2750,7 @@ static void ext4_free_data_callback(struct super_block *sb, if (test_opt(sb, DISCARD)) { err = ext4_issue_discard(sb, entry->efd_group, entry->efd_start_cluster, - entry->efd_count); + entry->efd_count, 0); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%d block:%d count:%d failed" @@ -2870,7 +2884,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_error(sb, "Allocating blocks %llu-%llu which overlap " "fs metadata", block, block+len); /* File system mounted not to panic on error - * Fix the bitmap and repeat the block allocation + * Fix the bitmap and return EUCLEAN * We leak some of the blocks here. */ ext4_lock_group(sb, ac->ac_b_ex.fe_group); @@ -2879,7 +2893,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!err) - err = -EAGAIN; + err = -EUCLEAN; goto out_err; } @@ -3116,7 +3130,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, } BUG_ON(start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical); - BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); + BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); /* now prepare goal request */ @@ -3177,8 +3191,30 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) { struct ext4_prealloc_space *pa = ac->ac_pa; + struct ext4_buddy e4b; + int err; - if (pa && pa->pa_type == MB_INODE_PA) + if (pa == NULL) { + if (ac->ac_f_ex.fe_len == 0) + return; + err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); + if (err) { + /* + * This should never happen since we pin the + * pages in the ext4_allocation_context so + * ext4_mb_load_buddy() should never fail. + */ + WARN(1, "mb_load_buddy failed (%d)", err); + return; + } + ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group); + mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, + ac->ac_f_ex.fe_len); + ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); + ext4_mb_unload_buddy(&e4b); + return; + } + if (pa->pa_type == MB_INODE_PA) pa->pa_free += ac->ac_b_ex.fe_len; } @@ -3423,6 +3459,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head) { struct ext4_prealloc_space *pa; pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); + + BUG_ON(atomic_read(&pa->pa_count)); + BUG_ON(pa->pa_deleted == 0); kmem_cache_free(ext4_pspace_cachep, pa); } @@ -3436,11 +3475,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, ext4_group_t grp; ext4_fsblk_t grp_blk; - if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) - return; - /* in this short window concurrent discard can set pa_deleted */ spin_lock(&pa->pa_lock); + if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) { + spin_unlock(&pa->pa_lock); + return; + } + if (pa->pa_deleted == 1) { spin_unlock(&pa->pa_lock); return; @@ -4102,7 +4143,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ext4_get_group_no_and_offset(sb, goal, &group, &block); /* set up allocation goals */ - ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); + ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical); ac->ac_status = AC_STATUS_CONTINUE; ac->ac_sb = sb; ac->ac_inode = ar->inode; @@ -4419,18 +4460,7 @@ repeat: } if (likely(ac->ac_status == AC_STATUS_FOUND)) { *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); - if (*errp == -EAGAIN) { - /* - * drop the reference that we took - * in ext4_mb_use_best_found - */ - ext4_mb_release_context(ac); - ac->ac_b_ex.fe_group = 0; - ac->ac_b_ex.fe_start = 0; - ac->ac_b_ex.fe_len = 0; - ac->ac_status = AC_STATUS_CONTINUE; - goto repeat; - } else if (*errp) { + if (*errp) { ext4_discard_allocated_blocks(ac); goto errout; } else { @@ -4581,6 +4611,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, struct buffer_head *gd_bh; ext4_group_t block_group; struct ext4_sb_info *sbi; + struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_buddy e4b; unsigned int count_clusters; int err = 0; @@ -4639,7 +4670,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * blocks at the beginning or the end unless we are explicitly * requested to avoid doing so. */ - overflow = block & (sbi->s_cluster_ratio - 1); + overflow = EXT4_PBLK_COFF(sbi, block); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { overflow = sbi->s_cluster_ratio - overflow; @@ -4653,7 +4684,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, count += overflow; } } - overflow = count & (sbi->s_cluster_ratio - 1); + overflow = EXT4_LBLK_COFF(sbi, count); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { if (count > overflow) @@ -4725,7 +4756,9 @@ do_more: #endif trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters); - err = ext4_mb_load_buddy(sb, block_group, &e4b); + /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */ + err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b, + GFP_NOFS|__GFP_NOFAIL); if (err) goto error_return; @@ -4734,13 +4767,12 @@ do_more: /* * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed + * + * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed + * to fail. */ - new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); - if (!new_entry) { - ext4_mb_unload_buddy(&e4b); - err = -ENOMEM; - goto error_return; - } + new_entry = kmem_cache_alloc(ext4_free_data_cachep, + GFP_NOFS|__GFP_NOFAIL); new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; new_entry->efd_count = count_clusters; @@ -4755,14 +4787,15 @@ do_more: * them with group lock_held */ if (test_opt(sb, DISCARD)) { - err = ext4_issue_discard(sb, block_group, bit, count); + err = ext4_issue_discard(sb, block_group, bit, count, + 0); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%d block:%d count:%lu failed" " with %d", block_group, bit, count, err); - } - + } else + EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); ext4_lock_group(sb, block_group); mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); @@ -4774,7 +4807,6 @@ do_more: ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh); ext4_group_desc_csum_set(sb, block_group, gdp); ext4_unlock_group(sb, block_group); - percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); @@ -4782,10 +4814,23 @@ do_more: &sbi->s_flex_groups[flex_group].free_clusters); } - ext4_mb_unload_buddy(&e4b); - - if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) + if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) { + percpu_counter_add(&sbi->s_dirtyclusters_counter, + count_clusters); + spin_lock(&ei->i_block_reservation_lock); + if (flags & EXT4_FREE_BLOCKS_METADATA) + ei->i_reserved_meta_blocks += count_clusters; + else + ei->i_reserved_data_blocks += count_clusters; + spin_unlock(&ei->i_block_reservation_lock); + if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) + dquot_reclaim_block(inode, + EXT4_C2B(sbi, count_clusters)); + } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); + percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); + + ext4_mb_unload_buddy(&e4b); /* We dirtied the bitmap block */ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); @@ -4950,13 +4995,15 @@ error_return: * @count: number of blocks to TRIM * @group: alloc. group we are working with * @e4b: ext4 buddy for the group + * @blkdev_flags: flags for the block device * * Trim "count" blocks starting at "start" in the "group". To assure that no * one will allocate those blocks, mark it as used in buddy bitmap. This must * be called with under the group lock. */ static int ext4_trim_extent(struct super_block *sb, int start, int count, - ext4_group_t group, struct ext4_buddy *e4b) + ext4_group_t group, struct ext4_buddy *e4b, + unsigned long blkdev_flags) { struct ext4_free_extent ex; int ret = 0; @@ -4975,7 +5022,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, */ mb_mark_used(e4b, &ex); ext4_unlock_group(sb, group); - ret = ext4_issue_discard(sb, group, start, count); + ret = ext4_issue_discard(sb, group, start, count, blkdev_flags); ext4_lock_group(sb, group); mb_free_blocks(NULL, e4b, start, ex.fe_len); return ret; @@ -4988,6 +5035,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count + * @blkdev_flags: flags for the block device * * ext4_trim_all_free walks through group's buddy bitmap searching for free * extents. When the free block is found, ext4_trim_extent is called to TRIM @@ -5002,7 +5050,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, static ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, - ext4_grpblk_t minblocks) + ext4_grpblk_t minblocks, unsigned long blkdev_flags) { void *bitmap; ext4_grpblk_t next, count = 0, free_count = 0; @@ -5035,7 +5083,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, if ((next - start) >= minblocks) { ret = ext4_trim_extent(sb, start, - next - start, group, &e4b); + next - start, group, &e4b, + blkdev_flags); if (ret && ret != -EOPNOTSUPP) break; ret = 0; @@ -5077,6 +5126,7 @@ out: * ext4_trim_fs() -- trim ioctl handle function * @sb: superblock for filesystem * @range: fstrim_range structure + * @blkdev_flags: flags for the block device * * start: First Byte to trim * len: number of Bytes to trim from start @@ -5085,7 +5135,8 @@ out: * start to start+len. For each such a group ext4_trim_all_free function * is invoked to trim all free space. */ -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range, + unsigned long blkdev_flags) { struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; @@ -5125,7 +5176,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) grp = ext4_get_group_info(sb, group); /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { - ret = ext4_mb_init_group(sb, group); + ret = ext4_mb_init_group(sb, group, GFP_NOFS); if (ret) break; } @@ -5141,7 +5192,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - end, minlen); + end, minlen, blkdev_flags); if (cnt < 0) { ret = cnt; break;