f2fs: updates on v4.16-rc1
authorJaegeuk Kim <jaegeuk@google.com>
Thu, 16 Nov 2017 08:59:14 +0000 (16:59 +0800)
committerJaegeuk Kim <jaegeuk@google.com>
Thu, 22 Feb 2018 19:18:40 +0000 (19:18 +0000)
Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've followed up to support some generic features such
  as cgroup, block reservation, linking fscrypt_ops, delivering
  write_hints, and some ioctls. And, we could fix some corner cases in
  terms of power-cut recovery and subtle deadlocks.

  Enhancements:
   - bitmap operations to handle NAT blocks
   - readahead to improve readdir speed
   - switch to use fscrypt_*
   - apply write hints for direct IO
   - add reserve_root=%u,resuid=%u,resgid=%u to reserve blocks for root/uid/gid
   - modify b_avail and b_free to consider root reserved blocks
   - support cgroup writeback
   - support FIEMAP_FLAG_XATTR for fibmap
   - add F2FS_IOC_PRECACHE_EXTENTS to pre-cache extents
   - add F2FS_IOC_{GET/SET}_PIN_FILE to pin LBAs for data blocks
   - support inode creation time

  Bug fixs:
   - sysfile-based quota operations
   - memory footprint accounting
   - allow to write data on partial preallocation case
   - fix deadlock case on fallocate
   - fix to handle fill_super errors
   - fix missing inode updates of fsync'ed file
   - recover renamed file which was fsycn'ed before
   - drop inmemory pages in corner error case
   - keep last_disk_size correctly
   - recover missing i_inline flags during roll-forward

  Various clean-up patches were added as well"

Cherry-pick from origin/upstream-f2fs-stable-linux-4.14.y:

00f0eaaadfe0 f2fs: support inode creation time
3e7444adf953 f2fs: rebuild sit page from sit info in mem
76688790c3bf f2fs: stop issuing discard if fs is readonly
ae93dca26413 f2fs: clean up duplicated assignment in init_discard_policy
0052bba1313e f2fs: use GFP_F2FS_ZERO for cleanup
6271336cfa80 f2fs: allow to recover node blocks given updated checkpoint
e003a2d15352 f2fs: recover some i_inline flags
3cafae53f3ef f2fs: correct removexattr behavior for null valued extended attribute
78d7fa9ac21f f2fs: drop page cache after fs shutdown
150b61cee574 f2fs: stop gc/discard thread after fs shutdown
cf27ccc41e86 f2fs: hanlde error case in f2fs_ioc_shutdown
1526117cdaa6 f2fs: split need_inplace_update
cd576d7b7a95 f2fs: fix to update last_disk_size correctly
7a57bd3313c2 f2fs: kill F2FS_INLINE_XATTR_ADDRS for cleanup
95eb6a6ceb04 f2fs: clean up error path of fill_super
63c949c97384 f2fs: avoid hungtask when GC encrypted block if io_bits is set
583d13d10c8a f2fs: allow quota to use reserved blocks
fbe371d3cdb2 f2fs: fix to drop all inmem pages correctly
7e08ce43562d f2fs: speed up defragment on sparse file
0f914cab8ce3 f2fs: support F2FS_IOC_PRECACHE_EXTENTS
ed1311e58555 f2fs: add an ioctl to disable GC for specific file
b08974ab5e0a f2fs: prevent newly created inode from being dirtied incorrectly
e8a8acf602a3 f2fs: support FIEMAP_FLAG_XATTR
042aeed690a3 f2fs: fix to cover f2fs_inline_data_fiemap with inode_lock
9cf9c37ebe90 f2fs: check node page again in write end io
b9eedb48132e f2fs: fix to caclulate required free section correctly
75ae50cf1539 f2fs: handle newly created page when revoking inmem pages
871b97493627 f2fs: add resgid and resuid to reserve root blocks
0cf361acdb47 f2fs: implement cgroup writeback support
196d52cf4ebe f2fs: remove unused pend_list_tag
6e899a83f5e0 f2fs: avoid high cpu usage in discard thread
bb1af976c2a2 f2fs: make local functions static
ad658936ea9d f2fs: add reserved blocks for root user
c6e64f1ff11c f2fs: check segment type in __f2fs_replace_block
88cdc60b7308 f2fs: update inode info to inode page for new file
4203e9fbd857 f2fs: show precise # of blocks that user/root can use
47dc137291e3 f2fs: clean up unneeded declaration
27f9e55195b1 f2fs: continue to do direct IO if we only preallocate partial blocks
f2f137831464 f2fs: enable quota at remount from r to w
d507f30065b3 f2fs: skip stop_checkpoint for user data writes
4b242ffcdb1f f2fs: fix missing error number for xattr operation
c6c76a0e6154 f2fs: recover directory operations by fsync
5943e3992eed f2fs: return error during fill_super
93579c97259b f2fs: fix an error case of missing update inode page
3d753c15af04 f2fs: fix potential hangtask in f2fs_trace_pid
625f066c5d18 f2fs: no need return value in restore summary process
f76c831abdd7 f2fs: use unlikely for release case
0408ad5efb28 f2fs: don't return value in truncate_data_blocks_range
62e507cd2b91 f2fs: clean up f2fs_map_blocks
233b197757c0 f2fs: clean up hash codes
58d550e5da7c f2fs: fix error handling in fill_super
35d78e6fc851 f2fs: spread f2fs_k{m,z}alloc
fecf31ce115a f2fs: inject fault to kvmalloc
41af39db9fd0 f2fs: inject fault to kzalloc
9fecb4159dc1 f2fs: remove a redundant conditional expression
8e56c02ee9fa f2fs: apply write hints to select the type of segment for direct write
a4015f91473e f2fs: switch to fscrypt_prepare_setattr()
56351ec774b8 f2fs: switch to fscrypt_prepare_lookup()
51f2caabf9a8 f2fs: switch to fscrypt_prepare_rename()
f9a35b22b914 f2fs: switch to fscrypt_prepare_link()
787bd2632d66 f2fs: switch to fscrypt_file_open()
eb9d8ee0fdb8 posix_acl: convert posix_acl.a_refcount from atomic_t to refcount_t
bd0bb8ab0c90 f2fs: remove repeated f2fs_bug_on
d1c0441c02cb f2fs: remove an excess variable
3f12c94d1b3c f2fs: fix lock dependency in between dio_rwsem & i_mmap_sem
39685b35e80a f2fs: remove unused parameter
b83577043a48 f2fs: still write data if preallocate only partial blocks
b61cf217182b f2fs: introduce sysfs readdir_ra to readahead inode block in readdir
44ed9b2d125c f2fs: fix concurrent problem for updating free bitmap
08be3792ef4b f2fs: remove unneeded memory footprint accounting
33362399b3fc f2fs: no need to read nat block if nat_block_bitmap is set
01bb5c8b1f32 f2fs: reserve nid resource for quota sysfile

Change-Id: Ie0beb18a04fc300d1591d64c7ae542a478644e26
Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
25 files changed:
Documentation/ABI/testing/sysfs-fs-f2fs
fs/f2fs/acl.c
fs/f2fs/checkpoint.c
fs/f2fs/data.c
fs/f2fs/debug.c
fs/f2fs/dir.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/gc.h
fs/f2fs/inode.c
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/node.h
fs/f2fs/recovery.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/sysfs.c
fs/f2fs/trace.c
fs/f2fs/xattr.c
fs/posix_acl.c
include/linux/f2fs_fs.h
include/linux/posix_acl.h
include/trace/events/f2fs.h

index a7799c2fca2855eced4740cb6a82e5f26a448287..d870b5514d15a23de66d34f226f870ed0d94dd67 100644 (file)
@@ -186,3 +186,9 @@ Date:               August 2017
 Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:
                 Controls sleep time of GC urgent mode
+
+What:          /sys/fs/f2fs/<disk>/readdir_ra
+Date:          November 2017
+Contact:       "Sheng Yong" <shengyong1@huawei.com>
+Description:
+                Controls readahead inode block in readdir.
index 2bb7c9fc5144aba162b01ba72295994f9fd23f00..111824199a886c11f3a543463aeba0385a8a50ac 100644 (file)
@@ -270,7 +270,7 @@ static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl,
                                sizeof(struct posix_acl_entry);
                clone = kmemdup(acl, size, flags);
                if (clone)
-                       atomic_set(&clone->a_refcount, 1);
+                       refcount_set(&clone->a_refcount, 1);
        }
        return clone;
 }
index a30024f2a567a9a34c95bd7c3f3506e8aae319fd..701781a372f3ec34f089bdb79c82628d4d6baaa1 100644 (file)
@@ -237,12 +237,15 @@ static int __f2fs_write_meta_page(struct page *page,
 
        trace_f2fs_writepage(page, META);
 
+       if (unlikely(f2fs_cp_error(sbi))) {
+               dec_page_count(sbi, F2FS_DIRTY_META);
+               unlock_page(page);
+               return 0;
+       }
        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                goto redirty_out;
        if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
                goto redirty_out;
-       if (unlikely(f2fs_cp_error(sbi)))
-               goto redirty_out;
 
        write_meta_page(sbi, page, io_type);
        dec_page_count(sbi, F2FS_DIRTY_META);
@@ -796,7 +799,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
        block_t cp_blk_no;
        int i;
 
-       sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
+       sbi->ckpt = f2fs_kzalloc(sbi, cp_blks * blk_size, GFP_KERNEL);
        if (!sbi->ckpt)
                return -ENOMEM;
        /*
@@ -1157,6 +1160,7 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
        /* set this flag to activate crc|cp_ver for recovery */
        __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
+       __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
 
        spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
index 823c842a7414f33effd6ce07c04e204d2e44dcc8..da7b00e6559b247427426d7218e2faf54dfc5670 100644 (file)
@@ -112,8 +112,13 @@ static void f2fs_write_end_io(struct bio *bio)
 
                if (unlikely(bio->bi_status)) {
                        mapping_set_error(page->mapping, -EIO);
-                       f2fs_stop_checkpoint(sbi, true);
+                       if (type == F2FS_WB_CP_DATA)
+                               f2fs_stop_checkpoint(sbi, true);
                }
+
+               f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
+                                       page->index != nid_of_node(page));
+
                dec_page_count(sbi, type);
                clear_cold_data(page);
                end_page_writeback(page);
@@ -170,6 +175,7 @@ static bool __same_bdev(struct f2fs_sb_info *sbi,
  * Low-level block read/write IO operations.
  */
 static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
+                               struct writeback_control *wbc,
                                int npages, bool is_read)
 {
        struct bio *bio;
@@ -179,6 +185,8 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
        f2fs_target_device(sbi, blk_addr, bio);
        bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
        bio->bi_private = is_read ? NULL : sbi;
+       if (wbc)
+               wbc_init_bio(wbc, bio);
 
        return bio;
 }
@@ -374,7 +382,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
        f2fs_trace_ios(fio, 0);
 
        /* Allocate a new bio */
-       bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->op));
+       bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
+                               1, is_read_io(fio->op));
 
        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
                bio_put(bio);
@@ -436,7 +445,7 @@ alloc_new:
                        dec_page_count(sbi, WB_DATA_TYPE(bio_page));
                        goto out_fail;
                }
-               io->bio = __bio_alloc(sbi, fio->new_blkaddr,
+               io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
                                                BIO_MAX_PAGES, false);
                io->fio = *fio;
        }
@@ -446,6 +455,9 @@ alloc_new:
                goto alloc_new;
        }
 
+       if (fio->io_wbc)
+               wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);
+
        io->last_block_in_bio = fio->new_blkaddr;
        f2fs_trace_ios(fio, 0);
 
@@ -784,7 +796,7 @@ got_it:
        return page;
 }
 
-static int __allocate_data_block(struct dnode_of_data *dn)
+static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
        struct f2fs_summary sum;
@@ -809,7 +821,7 @@ alloc:
        set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
 
        allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
-                                       &sum, CURSEG_WARM_DATA, NULL, false);
+                                       &sum, seg_type, NULL, false);
        set_data_blkaddr(dn);
 
        /* update i_size */
@@ -832,10 +844,12 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
        struct f2fs_map_blocks map;
+       int flag;
        int err = 0;
+       bool direct_io = iocb->ki_flags & IOCB_DIRECT;
 
        /* convert inline data for Direct I/O*/
-       if (iocb->ki_flags & IOCB_DIRECT) {
+       if (direct_io) {
                err = f2fs_convert_inline_inode(inode);
                if (err)
                        return err;
@@ -852,19 +866,33 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
                map.m_len = 0;
 
        map.m_next_pgofs = NULL;
+       map.m_next_extent = NULL;
+       map.m_seg_type = NO_CHECK_TYPE;
 
-       if (iocb->ki_flags & IOCB_DIRECT)
-               return f2fs_map_blocks(inode, &map, 1,
-                       __force_buffered_io(inode, WRITE) ?
-                               F2FS_GET_BLOCK_PRE_AIO :
-                               F2FS_GET_BLOCK_PRE_DIO);
+       if (direct_io) {
+               map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint);
+               flag = __force_buffered_io(inode, WRITE) ?
+                                       F2FS_GET_BLOCK_PRE_AIO :
+                                       F2FS_GET_BLOCK_PRE_DIO;
+               goto map_blocks;
+       }
        if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
                err = f2fs_convert_inline_inode(inode);
                if (err)
                        return err;
        }
-       if (!f2fs_has_inline_data(inode))
-               return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+       if (f2fs_has_inline_data(inode))
+               return err;
+
+       flag = F2FS_GET_BLOCK_PRE_AIO;
+
+map_blocks:
+       err = f2fs_map_blocks(inode, &map, 1, flag);
+       if (map.m_len > 0 && err == -ENOSPC) {
+               if (!direct_io)
+                       set_inode_flag(inode, FI_NO_PREALLOC);
+               err = 0;
+       }
        return err;
 }
 
@@ -905,6 +933,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
        blkcnt_t prealloc;
        struct extent_info ei = {0,0,0};
        block_t blkaddr;
+       unsigned int start_pgofs;
 
        if (!maxblocks)
                return 0;
@@ -920,6 +949,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
                map->m_pblk = ei.blk + pgofs - ei.fofs;
                map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
                map->m_flags = F2FS_MAP_MAPPED;
+               if (map->m_next_extent)
+                       *map->m_next_extent = pgofs + map->m_len;
                goto out;
        }
 
@@ -938,10 +969,14 @@ next_dnode:
                        if (map->m_next_pgofs)
                                *map->m_next_pgofs =
                                        get_next_page_offset(&dn, pgofs);
+                       if (map->m_next_extent)
+                               *map->m_next_extent =
+                                       get_next_page_offset(&dn, pgofs);
                }
                goto unlock_out;
        }
 
+       start_pgofs = pgofs;
        prealloc = 0;
        last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
        end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
@@ -961,7 +996,8 @@ next_block:
                                        last_ofs_in_node = dn.ofs_in_node;
                                }
                        } else {
-                               err = __allocate_data_block(&dn);
+                               err = __allocate_data_block(&dn,
+                                                       map->m_seg_type);
                                if (!err)
                                        set_inode_flag(inode, FI_APPEND_WRITE);
                        }
@@ -974,14 +1010,20 @@ next_block:
                                map->m_pblk = 0;
                                goto sync_out;
                        }
+                       if (flag == F2FS_GET_BLOCK_PRECACHE)
+                               goto sync_out;
                        if (flag == F2FS_GET_BLOCK_FIEMAP &&
                                                blkaddr == NULL_ADDR) {
                                if (map->m_next_pgofs)
                                        *map->m_next_pgofs = pgofs + 1;
+                               goto sync_out;
                        }
-                       if (flag != F2FS_GET_BLOCK_FIEMAP ||
-                                               blkaddr != NEW_ADDR)
+                       if (flag != F2FS_GET_BLOCK_FIEMAP) {
+                               /* for defragment case */
+                               if (map->m_next_pgofs)
+                                       *map->m_next_pgofs = pgofs + 1;
                                goto sync_out;
+                       }
                }
        }
 
@@ -1032,6 +1074,16 @@ skip:
        else if (dn.ofs_in_node < end_offset)
                goto next_block;
 
+       if (flag == F2FS_GET_BLOCK_PRECACHE) {
+               if (map->m_flags & F2FS_MAP_MAPPED) {
+                       unsigned int ofs = start_pgofs - map->m_lblk;
+
+                       f2fs_update_extent_cache_range(&dn,
+                               start_pgofs, map->m_pblk + ofs,
+                               map->m_len - ofs);
+               }
+       }
+
        f2fs_put_dnode(&dn);
 
        if (create) {
@@ -1041,6 +1093,17 @@ skip:
        goto next_dnode;
 
 sync_out:
+       if (flag == F2FS_GET_BLOCK_PRECACHE) {
+               if (map->m_flags & F2FS_MAP_MAPPED) {
+                       unsigned int ofs = start_pgofs - map->m_lblk;
+
+                       f2fs_update_extent_cache_range(&dn,
+                               start_pgofs, map->m_pblk + ofs,
+                               map->m_len - ofs);
+               }
+               if (map->m_next_extent)
+                       *map->m_next_extent = pgofs + 1;
+       }
        f2fs_put_dnode(&dn);
 unlock_out:
        if (create) {
@@ -1054,7 +1117,7 @@ out:
 
 static int __get_data_block(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh, int create, int flag,
-                       pgoff_t *next_pgofs)
+                       pgoff_t *next_pgofs, int seg_type)
 {
        struct f2fs_map_blocks map;
        int err;
@@ -1062,6 +1125,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
        map.m_lblk = iblock;
        map.m_len = bh->b_size >> inode->i_blkbits;
        map.m_next_pgofs = next_pgofs;
+       map.m_next_extent = NULL;
+       map.m_seg_type = seg_type;
 
        err = f2fs_map_blocks(inode, &map, create, flag);
        if (!err) {
@@ -1077,14 +1142,17 @@ static int get_data_block(struct inode *inode, sector_t iblock,
                        pgoff_t *next_pgofs)
 {
        return __get_data_block(inode, iblock, bh_result, create,
-                                                       flag, next_pgofs);
+                                                       flag, next_pgofs,
+                                                       NO_CHECK_TYPE);
 }
 
 static int get_data_block_dio(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create)
 {
        return __get_data_block(inode, iblock, bh_result, create,
-                                               F2FS_GET_BLOCK_DEFAULT, NULL);
+                                               F2FS_GET_BLOCK_DEFAULT, NULL,
+                                               rw_hint_to_seg_type(
+                                                       inode->i_write_hint));
 }
 
 static int get_data_block_bmap(struct inode *inode, sector_t iblock,
@@ -1095,7 +1163,8 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock,
                return -EFBIG;
 
        return __get_data_block(inode, iblock, bh_result, create,
-                                               F2FS_GET_BLOCK_BMAP, NULL);
+                                               F2FS_GET_BLOCK_BMAP, NULL,
+                                               NO_CHECK_TYPE);
 }
 
 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -1108,6 +1177,68 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
        return (blk << inode->i_blkbits);
 }
 
+static int f2fs_xattr_fiemap(struct inode *inode,
+                               struct fiemap_extent_info *fieinfo)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct page *page;
+       struct node_info ni;
+       __u64 phys = 0, len;
+       __u32 flags;
+       nid_t xnid = F2FS_I(inode)->i_xattr_nid;
+       int err = 0;
+
+       if (f2fs_has_inline_xattr(inode)) {
+               int offset;
+
+               page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
+                                               inode->i_ino, false);
+               if (!page)
+                       return -ENOMEM;
+
+               get_node_info(sbi, inode->i_ino, &ni);
+
+               phys = (__u64)blk_to_logical(inode, ni.blk_addr);
+               offset = offsetof(struct f2fs_inode, i_addr) +
+                                       sizeof(__le32) * (DEF_ADDRS_PER_INODE -
+                                       get_inline_xattr_addrs(inode));
+
+               phys += offset;
+               len = inline_xattr_size(inode);
+
+               f2fs_put_page(page, 1);
+
+               flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
+
+               if (!xnid)
+                       flags |= FIEMAP_EXTENT_LAST;
+
+               err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
+               if (err || err == 1)
+                       return err;
+       }
+
+       if (xnid) {
+               page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
+               if (!page)
+                       return -ENOMEM;
+
+               get_node_info(sbi, xnid, &ni);
+
+               phys = (__u64)blk_to_logical(inode, ni.blk_addr);
+               len = inode->i_sb->s_blocksize;
+
+               f2fs_put_page(page, 1);
+
+               flags = FIEMAP_EXTENT_LAST;
+       }
+
+       if (phys)
+               err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
+
+       return (err < 0 ? err : 0);
+}
+
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                u64 start, u64 len)
 {
@@ -1118,18 +1249,29 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        u32 flags = 0;
        int ret = 0;
 
-       ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+       if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
+               ret = f2fs_precache_extents(inode);
+               if (ret)
+                       return ret;
+       }
+
+       ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
        if (ret)
                return ret;
 
+       inode_lock(inode);
+
+       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
+               ret = f2fs_xattr_fiemap(inode, fieinfo);
+               goto out;
+       }
+
        if (f2fs_has_inline_data(inode)) {
                ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
                if (ret != -EAGAIN)
-                       return ret;
+                       goto out;
        }
 
-       inode_lock(inode);
-
        if (logical_to_blk(inode, len) == 0)
                len = blk_to_logical(inode, 1);
 
@@ -1199,7 +1341,6 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
                        unsigned nr_pages)
 {
        struct bio *bio = NULL;
-       unsigned page_idx;
        sector_t last_block_in_bio = 0;
        struct inode *inode = mapping->host;
        const unsigned blkbits = inode->i_blkbits;
@@ -1215,9 +1356,10 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
        map.m_len = 0;
        map.m_flags = 0;
        map.m_next_pgofs = NULL;
+       map.m_next_extent = NULL;
+       map.m_seg_type = NO_CHECK_TYPE;
 
-       for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
-
+       for (; nr_pages; nr_pages--) {
                if (pages) {
                        page = list_last_entry(pages, struct page, lru);
 
@@ -1377,18 +1519,79 @@ retry_encrypt:
        return PTR_ERR(fio->encrypted_page);
 }
 
+static inline bool check_inplace_update_policy(struct inode *inode,
+                               struct f2fs_io_info *fio)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       unsigned int policy = SM_I(sbi)->ipu_policy;
+
+       if (policy & (0x1 << F2FS_IPU_FORCE))
+               return true;
+       if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
+               return true;
+       if (policy & (0x1 << F2FS_IPU_UTIL) &&
+                       utilization(sbi) > SM_I(sbi)->min_ipu_util)
+               return true;
+       if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) &&
+                       utilization(sbi) > SM_I(sbi)->min_ipu_util)
+               return true;
+
+       /*
+        * IPU for rewrite async pages
+        */
+       if (policy & (0x1 << F2FS_IPU_ASYNC) &&
+                       fio && fio->op == REQ_OP_WRITE &&
+                       !(fio->op_flags & REQ_SYNC) &&
+                       !f2fs_encrypted_inode(inode))
+               return true;
+
+       /* this is only set during fdatasync */
+       if (policy & (0x1 << F2FS_IPU_FSYNC) &&
+                       is_inode_flag_set(inode, FI_NEED_IPU))
+               return true;
+
+       return false;
+}
+
+bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
+{
+       if (f2fs_is_pinned_file(inode))
+               return true;
+
+       /* if this is cold file, we should overwrite to avoid fragmentation */
+       if (file_is_cold(inode))
+               return true;
+
+       return check_inplace_update_policy(inode, fio);
+}
+
+bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
+{
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+       if (test_opt(sbi, LFS))
+               return true;
+       if (S_ISDIR(inode->i_mode))
+               return true;
+       if (f2fs_is_atomic_file(inode))
+               return true;
+       if (fio) {
+               if (is_cold_data(fio->page))
+                       return true;
+               if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
+                       return true;
+       }
+       return false;
+}
+
 static inline bool need_inplace_update(struct f2fs_io_info *fio)
 {
        struct inode *inode = fio->page->mapping->host;
 
-       if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
-               return false;
-       if (is_cold_data(fio->page))
-               return false;
-       if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
+       if (should_update_outplace(inode, fio))
                return false;
 
-       return need_inplace_update_policy(inode, fio);
+       return should_update_inplace(inode, fio);
 }
 
 static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
@@ -1509,10 +1712,17 @@ static int __write_data_page(struct page *page, bool *submitted,
                .submitted = false,
                .need_lock = LOCK_RETRY,
                .io_type = io_type,
+               .io_wbc = wbc,
        };
 
        trace_f2fs_writepage(page, DATA);
 
+       /* we should bypass data pages to proceed the kworkder jobs */
+       if (unlikely(f2fs_cp_error(sbi))) {
+               mapping_set_error(page->mapping, -EIO);
+               goto out;
+       }
+
        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                goto redirty_out;
 
@@ -1537,12 +1747,6 @@ write:
                        available_free_memory(sbi, BASE_CHECK))))
                goto redirty_out;
 
-       /* we should bypass data pages to proceed the kworkder jobs */
-       if (unlikely(f2fs_cp_error(sbi))) {
-               mapping_set_error(page->mapping, -EIO);
-               goto out;
-       }
-
        /* Dentry blocks are controlled by checkpoint */
        if (S_ISDIR(inode->i_mode)) {
                fio.need_lock = LOCK_DONE;
@@ -1572,10 +1776,14 @@ write:
                }
        }
 
-       down_write(&F2FS_I(inode)->i_sem);
-       if (F2FS_I(inode)->last_disk_size < psize)
-               F2FS_I(inode)->last_disk_size = psize;
-       up_write(&F2FS_I(inode)->i_sem);
+       if (err) {
+               file_set_keep_isize(inode);
+       } else {
+               down_write(&F2FS_I(inode)->i_sem);
+               if (F2FS_I(inode)->last_disk_size < psize)
+                       F2FS_I(inode)->last_disk_size = psize;
+               up_write(&F2FS_I(inode)->i_sem);
+       }
 
 done:
        if (err && err != -ENOENT)
@@ -1939,7 +2147,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct page *page = NULL;
        pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
-       bool need_balance = false;
+       bool need_balance = false, drop_atomic = false;
        block_t blkaddr = NULL_ADDR;
        int err = 0;
 
@@ -1958,6 +2166,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
        if (f2fs_is_atomic_file(inode) &&
                        !available_free_memory(sbi, INMEM_PAGES)) {
                err = -ENOMEM;
+               drop_atomic = true;
                goto fail;
        }
 
@@ -2038,7 +2247,7 @@ repeat:
 fail:
        f2fs_put_page(page, 1);
        f2fs_write_failed(mapping, pos + len);
-       if (f2fs_is_atomic_file(inode))
+       if (drop_atomic)
                drop_inmem_pages_all(sbi);
        return err;
 }
index ecada84252680f70770a7edeb71f89337ccdc018..a66107b5cfff98dc5796aac5c7d7b57e5eb26bcb 100644 (file)
@@ -49,14 +49,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
        si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
        si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
        si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
-
-       si->nquota_files = 0;
-       if (f2fs_sb_has_quota_ino(sbi->sb)) {
-               for (i = 0; i < MAXQUOTAS; i++) {
-                       if (f2fs_qf_ino(sbi->sb, i))
-                               si->nquota_files++;
-               }
-       }
+       si->nquota_files = sbi->nquota_files;
        si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
        si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
        si->aw_cnt = atomic_read(&sbi->aw_cnt);
@@ -186,7 +179,6 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
        si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
        si->base_mem += 2 * sizeof(struct f2fs_inode_info);
        si->base_mem += sizeof(*sbi->ckpt);
-       si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE;
 
        /* build sm */
        si->base_mem += sizeof(struct f2fs_sm_info);
@@ -447,7 +439,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
        struct f2fs_stat_info *si;
 
-       si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL);
+       si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
        if (!si)
                return -ENOMEM;
 
index 2d98d877c09dada99dfae36b45f85b3da75af095..f00b5ed8c01157e415450d6160b41880d23d5252 100644 (file)
@@ -713,6 +713,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 
        f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
 
+       add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO);
+
        if (f2fs_has_inline_dentry(dir))
                return f2fs_delete_inline_entry(dentry, page, dir, inode);
 
@@ -798,6 +800,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
        unsigned int bit_pos;
        struct f2fs_dir_entry *de = NULL;
        struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode);
 
        bit_pos = ((unsigned long)ctx->pos % d->max);
 
@@ -836,6 +839,9 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
                                        le32_to_cpu(de->ino), d_type))
                        return 1;
 
+               if (sbi->readdir_ra == 1)
+                       ra_node_page(sbi, le32_to_cpu(de->ino));
+
                bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
                ctx->pos = start_pos + bit_pos;
        }
index f4e094e816c63df79bd40f62b097147826037dae..eb0a0864da0a6875766ee302cb08600a06d959ff 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/magic.h>
 #include <linux/kobject.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/vmalloc.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
@@ -43,6 +44,7 @@
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 enum {
        FAULT_KMALLOC,
+       FAULT_KVMALLOC,
        FAULT_PAGE_ALLOC,
        FAULT_PAGE_GET,
        FAULT_ALLOC_BIO,
@@ -94,6 +96,7 @@ extern char *fault_name[FAULT_MAX];
 #define F2FS_MOUNT_PRJQUOTA            0x00200000
 #define F2FS_MOUNT_QUOTA               0x00400000
 #define F2FS_MOUNT_INLINE_XATTR_SIZE   0x00800000
+#define F2FS_MOUNT_RESERVE_ROOT                0x01000000
 
 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)   ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -121,6 +124,7 @@ struct f2fs_mount_info {
 #define F2FS_FEATURE_INODE_CHKSUM      0x0020
 #define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR     0x0040
 #define F2FS_FEATURE_QUOTA_INO         0x0080
+#define F2FS_FEATURE_INODE_CRTIME      0x0100
 
 #define F2FS_HAS_FEATURE(sb, mask)                                     \
        ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -129,6 +133,12 @@ struct f2fs_mount_info {
 #define F2FS_CLEAR_FEATURE(sb, mask)                                   \
        (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
 
+/*
+ * Default values for user and/or group using reserved blocks
+ */
+#define        F2FS_DEF_RESUID         0
+#define        F2FS_DEF_RESGID         0
+
 /*
  * For checkpoint manager
  */
@@ -179,6 +189,7 @@ enum {
        ORPHAN_INO,             /* for orphan ino list */
        APPEND_INO,             /* for append ino list */
        UPDATE_INO,             /* for update ino list */
+       TRANS_DIR_INO,          /* for trasactions dir ino list */
        FLUSH_INO,              /* for multiple device flushing */
        MAX_INO_ENTRY,          /* max. list */
 };
@@ -264,7 +275,6 @@ struct discard_cmd_control {
        struct task_struct *f2fs_issue_discard; /* discard thread */
        struct list_head entry_list;            /* 4KB discard entry list */
        struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */
-       unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */
        struct list_head wait_list;             /* store on-flushing entries */
        struct list_head fstrim_list;           /* in-flight discard from fstrim */
        wait_queue_head_t discard_wait_queue;   /* waiting queue for wake-up */
@@ -347,6 +357,9 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 #define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,      \
                                                struct f2fs_gc_range)
 #define F2FS_IOC_GET_FEATURES          _IOR(F2FS_IOCTL_MAGIC, 12, __u32)
+#define F2FS_IOC_SET_PIN_FILE          _IOW(F2FS_IOCTL_MAGIC, 13, __u32)
+#define F2FS_IOC_GET_PIN_FILE          _IOR(F2FS_IOCTL_MAGIC, 14, __u32)
+#define F2FS_IOC_PRECACHE_EXTENTS      _IO(F2FS_IOCTL_MAGIC, 15)
 
 #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
 #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
@@ -402,10 +415,9 @@ struct f2fs_flush_device {
 #define DEF_MIN_INLINE_SIZE            1
 static inline int get_extra_isize(struct inode *inode);
 static inline int get_inline_xattr_addrs(struct inode *inode);
-#define F2FS_INLINE_XATTR_ADDRS(inode) get_inline_xattr_addrs(inode)
 #define MAX_INLINE_DATA(inode) (sizeof(__le32) *                       \
                                (CUR_ADDRS_PER_INODE(inode) -           \
-                               F2FS_INLINE_XATTR_ADDRS(inode) -        \
+                               get_inline_xattr_addrs(inode) - \
                                DEF_INLINE_RESERVED_SIZE))
 
 /* for inline dir */
@@ -542,6 +554,8 @@ struct f2fs_map_blocks {
        unsigned int m_len;
        unsigned int m_flags;
        pgoff_t *m_next_pgofs;          /* point next possible non-hole pgofs */
+       pgoff_t *m_next_extent;         /* point to next possible extent */
+       int m_seg_type;
 };
 
 /* for flag in get_data_block */
@@ -551,6 +565,7 @@ enum {
        F2FS_GET_BLOCK_BMAP,
        F2FS_GET_BLOCK_PRE_DIO,
        F2FS_GET_BLOCK_PRE_AIO,
+       F2FS_GET_BLOCK_PRECACHE,
 };
 
 /*
@@ -583,7 +598,10 @@ struct f2fs_inode_info {
        unsigned long i_flags;          /* keep an inode flags for ioctl */
        unsigned char i_advise;         /* use to give file attribute hints */
        unsigned char i_dir_level;      /* use for dentry level for large dir */
-       unsigned int i_current_depth;   /* use only in directory structure */
+       union {
+               unsigned int i_current_depth;   /* only for directory depth */
+               unsigned short i_gc_failures;   /* only for regular file */
+       };
        unsigned int i_pino;            /* parent inode number */
        umode_t i_acl_mode;             /* keep file acl mode temporarily */
 
@@ -618,6 +636,7 @@ struct f2fs_inode_info {
        int i_extra_isize;              /* size of extra space located in i_addr */
        kprojid_t i_projid;             /* id for project quota */
        int i_inline_xattr_size;        /* inline xattr size */
+       struct timespec i_crtime;       /* inode creation time */
 };
 
 static inline void get_extent_info(struct extent_info *ext,
@@ -922,6 +941,7 @@ enum cp_reason_type {
        CP_NODE_NEED_CP,
        CP_FASTBOOT_MODE,
        CP_SPEC_LOG_NUM,
+       CP_RECOVER_DIR,
 };
 
 enum iostat_type {
@@ -957,6 +977,7 @@ struct f2fs_io_info {
        int need_lock;          /* indicate we need to lock cp_rwsem */
        bool in_list;           /* indicate fio is in io_list */
        enum iostat_type io_type;       /* io type */
+       struct writeback_control *io_wbc; /* writeback control */
 };
 
 #define is_read_io(rw) ((rw) == READ)
@@ -1093,6 +1114,7 @@ struct f2fs_sb_info {
        int dir_level;                          /* directory level */
        int inline_xattr_size;                  /* inline xattr size */
        unsigned int trigger_ssr_threshold;     /* threshold to trigger ssr */
+       int readdir_ra;                         /* readahead inode in readdir */
 
        block_t user_block_count;               /* # of user blocks */
        block_t total_valid_block_count;        /* # of valid blocks */
@@ -1100,6 +1122,11 @@ struct f2fs_sb_info {
        block_t last_valid_block_count;         /* for recovery */
        block_t reserved_blocks;                /* configurable reserved blocks */
        block_t current_reserved_blocks;        /* current reserved blocks */
+       block_t root_reserved_blocks;           /* root reserved blocks */
+       kuid_t s_resuid;                        /* reserved blocks for uid */
+       kgid_t s_resgid;                        /* reserved blocks for gid */
+
+       unsigned int nquota_files;              /* # of quota sysfile */
 
        u32 s_next_generation;                  /* for NFS support */
 
@@ -1124,6 +1151,9 @@ struct f2fs_sb_info {
        /* threshold for converting bg victims for fg */
        u64 fggc_threshold;
 
+       /* threshold for gc trials on pinned files */
+       u64 gc_pin_file_threshold;
+
        /* maximum # of trials to find a victim segment for SSR and GC */
        unsigned int max_victim_search;
 
@@ -1250,33 +1280,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
 /*
  * Inline functions
  */
-static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
-                          unsigned int length)
-{
-       SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver);
-       u32 *ctx = (u32 *)shash_desc_ctx(shash);
-       u32 retval;
-       int err;
-
-       shash->tfm = sbi->s_chksum_driver;
-       shash->flags = 0;
-       *ctx = F2FS_SUPER_MAGIC;
-
-       err = crypto_shash_update(shash, address, length);
-       BUG_ON(err);
-
-       retval = *ctx;
-       barrier_data(ctx);
-       return retval;
-}
-
-static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,
-                                 void *buf, size_t buf_size)
-{
-       return f2fs_crc32(sbi, buf, buf_size) == blk_crc;
-}
-
-static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc,
+static inline u32 __f2fs_crc32(struct f2fs_sb_info *sbi, u32 crc,
                              const void *address, unsigned int length)
 {
        struct {
@@ -1297,6 +1301,24 @@ static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc,
        return *(u32 *)desc.ctx;
 }
 
+static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
+                          unsigned int length)
+{
+       return __f2fs_crc32(sbi, F2FS_SUPER_MAGIC, address, length);
+}
+
+static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,
+                                 void *buf, size_t buf_size)
+{
+       return f2fs_crc32(sbi, buf, buf_size) == blk_crc;
+}
+
+static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc,
+                             const void *address, unsigned int length)
+{
+       return __f2fs_crc32(sbi, crc, address, length);
+}
+
 static inline struct f2fs_inode_info *F2FS_I(struct inode *inode)
 {
        return container_of(inode, struct f2fs_inode_info, vfs_inode);
@@ -1555,6 +1577,25 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
        return ofs == XATTR_NODE_OFFSET;
 }
 
+static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
+                                       struct inode *inode)
+{
+       if (!inode)
+               return true;
+       if (!test_opt(sbi, RESERVE_ROOT))
+               return false;
+       if (IS_NOQUOTA(inode))
+               return true;
+       if (capable(CAP_SYS_RESOURCE))
+               return true;
+       if (uid_eq(sbi->s_resuid, current_fsuid()))
+               return true;
+       if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) &&
+                                       in_group_p(sbi->s_resgid))
+               return true;
+       return false;
+}
+
 static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
 static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
                                 struct inode *inode, blkcnt_t *count)
@@ -1584,11 +1625,17 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
        sbi->total_valid_block_count += (block_t)(*count);
        avail_user_block_count = sbi->user_block_count -
                                        sbi->current_reserved_blocks;
+
+       if (!__allow_reserved_blocks(sbi, inode))
+               avail_user_block_count -= sbi->root_reserved_blocks;
+
        if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
                diff = sbi->total_valid_block_count - avail_user_block_count;
+               if (diff > *count)
+                       diff = *count;
                *count -= diff;
                release = diff;
-               sbi->total_valid_block_count = avail_user_block_count;
+               sbi->total_valid_block_count -= diff;
                if (!*count) {
                        spin_unlock(&sbi->stat_lock);
                        percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
@@ -1597,7 +1644,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
        }
        spin_unlock(&sbi->stat_lock);
 
-       if (release)
+       if (unlikely(release))
                dquot_release_reservation_block(inode, release);
        f2fs_i_blocks_write(inode, *count, true, true);
        return 0;
@@ -1777,9 +1824,13 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
        spin_lock(&sbi->stat_lock);
 
-       valid_block_count = sbi->total_valid_block_count + 1;
-       if (unlikely(valid_block_count + sbi->current_reserved_blocks >
-                                               sbi->user_block_count)) {
+       valid_block_count = sbi->total_valid_block_count +
+                                       sbi->current_reserved_blocks + 1;
+
+       if (!__allow_reserved_blocks(sbi, inode))
+               valid_block_count += sbi->root_reserved_blocks;
+
+       if (unlikely(valid_block_count > sbi->user_block_count)) {
                spin_unlock(&sbi->stat_lock);
                goto enospc;
        }
@@ -1992,11 +2043,11 @@ static inline block_t datablock_addr(struct inode *inode,
        raw_node = F2FS_NODE(node_page);
 
        /* from GC path only */
-       if (!inode) {
-               if (is_inode)
+       if (is_inode) {
+               if (!inode)
                        base = offset_in_addr(&raw_node->i);
-       } else if (f2fs_has_extra_attr(inode) && is_inode) {
-               base = get_extra_isize(inode);
+               else if (f2fs_has_extra_attr(inode))
+                       base = get_extra_isize(inode);
        }
 
        addr_array = blkaddr_in_node(raw_node);
@@ -2107,6 +2158,7 @@ enum {
        FI_HOT_DATA,            /* indicate file is hot */
        FI_EXTRA_ATTR,          /* indicate file has extra attribute */
        FI_PROJ_INHERIT,        /* indicate file inherits projectid */
+       FI_PIN_FILE,            /* indicate file should not be gced */
 };
 
 static inline void __mark_inode_dirty_flag(struct inode *inode,
@@ -2116,10 +2168,12 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
        case FI_INLINE_XATTR:
        case FI_INLINE_DATA:
        case FI_INLINE_DENTRY:
+       case FI_NEW_INODE:
                if (set)
                        return;
        case FI_DATA_EXIST:
        case FI_INLINE_DOTS:
+       case FI_PIN_FILE:
                f2fs_mark_inode_dirty_sync(inode, true);
        }
 }
@@ -2200,6 +2254,13 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth)
        f2fs_mark_inode_dirty_sync(inode, true);
 }
 
+static inline void f2fs_i_gc_failures_write(struct inode *inode,
+                                       unsigned int count)
+{
+       F2FS_I(inode)->i_gc_failures = count;
+       f2fs_mark_inode_dirty_sync(inode, true);
+}
+
 static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid)
 {
        F2FS_I(inode)->i_xattr_nid = xnid;
@@ -2228,6 +2289,8 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri)
                set_bit(FI_INLINE_DOTS, &fi->flags);
        if (ri->i_inline & F2FS_EXTRA_ATTR)
                set_bit(FI_EXTRA_ATTR, &fi->flags);
+       if (ri->i_inline & F2FS_PIN_FILE)
+               set_bit(FI_PIN_FILE, &fi->flags);
 }
 
 static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri)
@@ -2246,6 +2309,8 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri)
                ri->i_inline |= F2FS_INLINE_DOTS;
        if (is_inode_flag_set(inode, FI_EXTRA_ATTR))
                ri->i_inline |= F2FS_EXTRA_ATTR;
+       if (is_inode_flag_set(inode, FI_PIN_FILE))
+               ri->i_inline |= F2FS_PIN_FILE;
 }
 
 static inline int f2fs_has_extra_attr(struct inode *inode)
@@ -2260,7 +2325,7 @@ static inline int f2fs_has_inline_xattr(struct inode *inode)
 
 static inline unsigned int addrs_per_inode(struct inode *inode)
 {
-       return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS(inode);
+       return CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode);
 }
 
 static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
@@ -2268,7 +2333,7 @@ static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
        struct f2fs_inode *ri = F2FS_INODE(page);
 
        return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
-                                       F2FS_INLINE_XATTR_ADDRS(inode)]);
+                                       get_inline_xattr_addrs(inode)]);
 }
 
 static inline int inline_xattr_size(struct inode *inode)
@@ -2291,6 +2356,11 @@ static inline int f2fs_has_inline_dots(struct inode *inode)
        return is_inode_flag_set(inode, FI_INLINE_DOTS);
 }
 
+static inline bool f2fs_is_pinned_file(struct inode *inode)
+{
+       return is_inode_flag_set(inode, FI_PIN_FILE);
+}
+
 static inline bool f2fs_is_atomic_file(struct inode *inode)
 {
        return is_inode_flag_set(inode, FI_ATOMIC_FILE);
@@ -2418,12 +2488,35 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
        return kmalloc(size, flags);
 }
 
+static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
+                                       size_t size, gfp_t flags)
+{
+       return f2fs_kmalloc(sbi, size, flags | __GFP_ZERO);
+}
+
+static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi,
+                                       size_t size, gfp_t flags)
+{
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+       if (time_to_inject(sbi, FAULT_KVMALLOC)) {
+               f2fs_show_injection_info(FAULT_KVMALLOC);
+               return NULL;
+       }
+#endif
+       return kvmalloc(size, flags);
+}
+
+static inline void *f2fs_kvzalloc(struct f2fs_sb_info *sbi,
+                                       size_t size, gfp_t flags)
+{
+       return f2fs_kvmalloc(sbi, size, flags | __GFP_ZERO);
+}
+
 static inline int get_extra_isize(struct inode *inode)
 {
        return F2FS_I(inode)->i_extra_isize / sizeof(__le32);
 }
 
-static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb);
 static inline int get_inline_xattr_addrs(struct inode *inode)
 {
        return F2FS_I(inode)->i_inline_xattr_size;
@@ -2479,9 +2572,11 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned int flags);
 int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
 int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
-int truncate_data_blocks_range(struct dnode_of_data *dn, int count);
+void truncate_data_blocks_range(struct dnode_of_data *dn, int count);
+int f2fs_precache_extents(struct inode *inode);
 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int f2fs_pin_file_control(struct inode *inode, bool inc);
 
 /*
  * inode.c
@@ -2492,8 +2587,8 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page);
 struct inode *f2fs_iget(struct super_block *sb, unsigned long ino);
 struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino);
 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
-int update_inode(struct inode *inode, struct page *node_page);
-int update_inode_page(struct inode *inode);
+void update_inode(struct inode *inode, struct page *node_page);
+void update_inode_page(struct inode *inode);
 int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
 void f2fs_evict_inode(struct inode *inode);
 void handle_failed_inode(struct inode *inode);
@@ -2604,10 +2699,9 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
 int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
 void recover_inline_xattr(struct inode *inode, struct page *page);
-int recover_xattr_data(struct inode *inode, struct page *page,
-                       block_t blkaddr);
+int recover_xattr_data(struct inode *inode, struct page *page);
 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
-int restore_node_summary(struct f2fs_sb_info *sbi,
+void restore_node_summary(struct f2fs_sb_info *sbi,
                        unsigned int segno, struct f2fs_summary_block *sum);
 void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 int build_node_manager(struct f2fs_sb_info *sbi);
@@ -2634,6 +2728,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
 void init_discard_policy(struct discard_policy *dpolicy, int discard_type,
                                                unsigned int granularity);
+void drop_discard_cmd(struct f2fs_sb_info *sbi);
 void stop_discard_thread(struct f2fs_sb_info *sbi);
 bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
@@ -2672,6 +2767,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi);
 void destroy_segment_manager(struct f2fs_sb_info *sbi);
 int __init create_segment_manager_caches(void);
 void destroy_segment_manager_caches(void);
+int rw_hint_to_seg_type(enum rw_hint hint);
 
 /*
  * checkpoint.c
@@ -2741,6 +2837,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
                        int create, int flag);
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        u64 start, u64 len);
+bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio);
+bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio);
 void f2fs_set_page_dirty_nobuffers(struct page *page);
 int __f2fs_write_data_pages(struct address_space *mapping,
                                                struct writeback_control *wbc,
@@ -3109,6 +3207,11 @@ static inline int f2fs_sb_has_quota_ino(struct super_block *sb)
        return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO);
 }
 
+static inline int f2fs_sb_has_inode_crtime(struct super_block *sb)
+{
+       return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CRTIME);
+}
+
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline int get_blkz_type(struct f2fs_sb_info *sbi,
                        struct block_device *bdev, block_t blkaddr)
index 25038cfc921732ebcf410e6994865075d126da99..1d6a862bd25f1d61709c50558539adf21866a00f 100644 (file)
@@ -165,6 +165,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
                cp_reason = CP_FASTBOOT_MODE;
        else if (sbi->active_logs == 2)
                cp_reason = CP_SPEC_LOG_NUM;
+       else if (need_dentry_mark(sbi, inode->i_ino) &&
+               exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO))
+               cp_reason = CP_RECOVER_DIR;
 
        return cp_reason;
 }
@@ -471,26 +474,14 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int f2fs_file_open(struct inode *inode, struct file *filp)
 {
-       struct dentry *dir;
+       int err = fscrypt_file_open(inode, filp);
 
-       if (f2fs_encrypted_inode(inode)) {
-               int ret = fscrypt_get_encryption_info(inode);
-               if (ret)
-                       return -EACCES;
-               if (!fscrypt_has_encryption_key(inode))
-                       return -ENOKEY;
-       }
-       dir = dget_parent(file_dentry(filp));
-       if (f2fs_encrypted_inode(d_inode(dir)) &&
-                       !fscrypt_has_permitted_context(d_inode(dir), inode)) {
-               dput(dir);
-               return -EPERM;
-       }
-       dput(dir);
+       if (err)
+               return err;
        return dquot_file_open(inode, filp);
 }
 
-int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
+void truncate_data_blocks_range(struct dnode_of_data *dn, int count)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
        struct f2fs_node *raw_node;
@@ -533,7 +524,6 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
        f2fs_update_time(sbi, REQ_TIME);
        trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
                                         dn->ofs_in_node, nr_free);
-       return nr_free;
 }
 
 void truncate_data_blocks(struct dnode_of_data *dn)
@@ -681,8 +671,17 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
 {
        struct inode *inode = d_inode(path->dentry);
        struct f2fs_inode_info *fi = F2FS_I(inode);
+       struct f2fs_inode *ri;
        unsigned int flags;
 
+       if (f2fs_has_extra_attr(inode) &&
+                       f2fs_sb_has_inode_crtime(inode->i_sb) &&
+                       F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
+               stat->result_mask |= STATX_BTIME;
+               stat->btime.tv_sec = fi->i_crtime.tv_sec;
+               stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
+       }
+
        flags = fi->i_flags & (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL);
        if (flags & FS_APPEND_FL)
                stat->attributes |= STATX_ATTR_APPEND;
@@ -754,6 +753,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
        if (err)
                return err;
 
+       err = fscrypt_prepare_setattr(dentry, attr);
+       if (err)
+               return err;
+
        if (is_quota_modification(inode, attr)) {
                err = dquot_initialize(inode);
                if (err)
@@ -769,14 +772,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        if (attr->ia_valid & ATTR_SIZE) {
-               if (f2fs_encrypted_inode(inode)) {
-                       err = fscrypt_get_encryption_info(inode);
-                       if (err)
-                               return err;
-                       if (!fscrypt_has_encryption_key(inode))
-                               return -ENOKEY;
-               }
-
                if (attr->ia_size <= i_size_read(inode)) {
                        down_write(&F2FS_I(inode)->i_mmap_sem);
                        truncate_setsize(inode, attr->ia_size);
@@ -1113,11 +1108,13 @@ static int __exchange_data_block(struct inode *src_inode,
        while (len) {
                olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
 
-               src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL);
+               src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
+                                       sizeof(block_t) * olen, GFP_KERNEL);
                if (!src_blkaddr)
                        return -ENOMEM;
 
-               do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL);
+               do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
+                                       sizeof(int) * olen, GFP_KERNEL);
                if (!do_replace) {
                        kvfree(src_blkaddr);
                        return -ENOMEM;
@@ -1185,14 +1182,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        pg_start = offset >> PAGE_SHIFT;
        pg_end = (offset + len) >> PAGE_SHIFT;
 
+       /* avoid gc operation during block exchange */
+       down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
        down_write(&F2FS_I(inode)->i_mmap_sem);
        /* write out all dirty pages from offset */
        ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
        if (ret)
-               goto out;
-
-       /* avoid gc operation during block exchange */
-       down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+               goto out_unlock;
 
        truncate_pagecache(inode, offset);
 
@@ -1211,9 +1208,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        if (!ret)
                f2fs_i_size_write(inode, new_size);
 out_unlock:
-       up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
-out:
        up_write(&F2FS_I(inode)->i_mmap_sem);
+       up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
        return ret;
 }
 
@@ -1384,6 +1380,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
        f2fs_balance_fs(sbi, true);
 
+       /* avoid gc operation during block exchange */
+       down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
        down_write(&F2FS_I(inode)->i_mmap_sem);
        ret = truncate_blocks(inode, i_size_read(inode), true);
        if (ret)
@@ -1394,9 +1393,6 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
        if (ret)
                goto out;
 
-       /* avoid gc operation during block exchange */
-       down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
-
        truncate_pagecache(inode, offset);
 
        pg_start = offset >> PAGE_SHIFT;
@@ -1424,10 +1420,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
        if (!ret)
                f2fs_i_size_write(inode, new_size);
-
-       up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
 out:
        up_write(&F2FS_I(inode)->i_mmap_sem);
+       up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
        return ret;
 }
 
@@ -1435,7 +1430,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
                                        loff_t len, int mode)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
+       struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
+                       .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE };
        pgoff_t pg_end;
        loff_t new_size = i_size_read(inode);
        loff_t off_end;
@@ -1851,14 +1847,20 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
        switch (in) {
        case F2FS_GOING_DOWN_FULLSYNC:
                sb = freeze_bdev(sb->s_bdev);
-               if (sb && !IS_ERR(sb)) {
+               if (IS_ERR(sb)) {
+                       ret = PTR_ERR(sb);
+                       goto out;
+               }
+               if (sb) {
                        f2fs_stop_checkpoint(sbi, false);
                        thaw_bdev(sb->s_bdev, sb);
                }
                break;
        case F2FS_GOING_DOWN_METASYNC:
                /* do checkpoint only */
-               f2fs_sync_fs(sb, 1);
+               ret = f2fs_sync_fs(sb, 1);
+               if (ret)
+                       goto out;
                f2fs_stop_checkpoint(sbi, false);
                break;
        case F2FS_GOING_DOWN_NOSYNC:
@@ -1872,6 +1874,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
                ret = -EINVAL;
                goto out;
        }
+
+       stop_gc_thread(sbi);
+       stop_discard_thread(sbi);
+
+       drop_discard_cmd(sbi);
+       clear_opt(sbi, DISCARD);
+
        f2fs_update_time(sbi, REQ_TIME);
 out:
        mnt_drop_write_file(filp);
@@ -2083,9 +2092,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
                                        struct f2fs_defragment *range)
 {
        struct inode *inode = file_inode(filp);
-       struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
+       struct f2fs_map_blocks map = { .m_next_extent = NULL,
+                                       .m_seg_type = NO_CHECK_TYPE };
        struct extent_info ei = {0,0,0};
-       pgoff_t pg_start, pg_end;
+       pgoff_t pg_start, pg_end, next_pgofs;
        unsigned int blk_per_seg = sbi->blocks_per_seg;
        unsigned int total = 0, sec_num;
        block_t blk_end = 0;
@@ -2093,7 +2103,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
        int err;
 
        /* if in-place-update policy is enabled, don't waste time here */
-       if (need_inplace_update_policy(inode, NULL))
+       if (should_update_inplace(inode, NULL))
                return -EINVAL;
 
        pg_start = range->start >> PAGE_SHIFT;
@@ -2119,6 +2129,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
        }
 
        map.m_lblk = pg_start;
+       map.m_next_pgofs = &next_pgofs;
 
        /*
         * lookup mapping info in dnode page cache, skip defragmenting if all
@@ -2132,14 +2143,16 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
                        goto out;
 
                if (!(map.m_flags & F2FS_MAP_FLAGS)) {
-                       map.m_lblk++;
+                       map.m_lblk = next_pgofs;
                        continue;
                }
 
-               if (blk_end && blk_end != map.m_pblk) {
+               if (blk_end && blk_end != map.m_pblk)
                        fragmented = true;
-                       break;
-               }
+
+               /* record total count of block that we're going to move */
+               total += map.m_len;
+
                blk_end = map.m_pblk + map.m_len;
 
                map.m_lblk += map.m_len;
@@ -2148,10 +2161,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
        if (!fragmented)
                goto out;
 
-       map.m_lblk = pg_start;
-       map.m_len = pg_end - pg_start;
-
-       sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
+       sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
 
        /*
         * make sure there are enough free section for LFS allocation, this can
@@ -2163,6 +2173,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
                goto out;
        }
 
+       map.m_lblk = pg_start;
+       map.m_len = pg_end - pg_start;
+       total = 0;
+
        while (map.m_lblk < pg_end) {
                pgoff_t idx;
                int cnt = 0;
@@ -2174,7 +2188,7 @@ do_map:
                        goto clear_out;
 
                if (!(map.m_flags & F2FS_MAP_FLAGS)) {
-                       map.m_lblk++;
+                       map.m_lblk = next_pgofs;
                        continue;
                }
 
@@ -2680,6 +2694,125 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
        return 0;
 }
 
+int f2fs_pin_file_control(struct inode *inode, bool inc)
+{
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+       /* Use i_gc_failures for normal file as a risk signal. */
+       if (inc)
+               f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
+
+       if (fi->i_gc_failures > sbi->gc_pin_file_threshold) {
+               f2fs_msg(sbi->sb, KERN_WARNING,
+                       "%s: Enable GC = ino %lx after %x GC trials\n",
+                       __func__, inode->i_ino, fi->i_gc_failures);
+               clear_inode_flag(inode, FI_PIN_FILE);
+               return -EAGAIN;
+       }
+       return 0;
+}
+
+static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
+{
+       struct inode *inode = file_inode(filp);
+       __u32 pin;
+       int ret = 0;
+
+       if (!inode_owner_or_capable(inode))
+               return -EACCES;
+
+       if (get_user(pin, (__u32 __user *)arg))
+               return -EFAULT;
+
+       if (!S_ISREG(inode->i_mode))
+               return -EINVAL;
+
+       if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+               return -EROFS;
+
+       ret = mnt_want_write_file(filp);
+       if (ret)
+               return ret;
+
+       inode_lock(inode);
+
+       if (should_update_outplace(inode, NULL)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (!pin) {
+               clear_inode_flag(inode, FI_PIN_FILE);
+               F2FS_I(inode)->i_gc_failures = 1;
+               goto done;
+       }
+
+       if (f2fs_pin_file_control(inode, false)) {
+               ret = -EAGAIN;
+               goto out;
+       }
+       ret = f2fs_convert_inline_inode(inode);
+       if (ret)
+               goto out;
+
+       set_inode_flag(inode, FI_PIN_FILE);
+       ret = F2FS_I(inode)->i_gc_failures;
+done:
+       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+out:
+       inode_unlock(inode);
+       mnt_drop_write_file(filp);
+       return ret;
+}
+
+static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
+{
+       struct inode *inode = file_inode(filp);
+       __u32 pin = 0;
+
+       if (is_inode_flag_set(inode, FI_PIN_FILE))
+               pin = F2FS_I(inode)->i_gc_failures;
+       return put_user(pin, (u32 __user *)arg);
+}
+
+int f2fs_precache_extents(struct inode *inode)
+{
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       struct f2fs_map_blocks map;
+       pgoff_t m_next_extent;
+       loff_t end;
+       int err;
+
+       if (is_inode_flag_set(inode, FI_NO_EXTENT))
+               return -EOPNOTSUPP;
+
+       map.m_lblk = 0;
+       map.m_next_pgofs = NULL;
+       map.m_next_extent = &m_next_extent;
+       map.m_seg_type = NO_CHECK_TYPE;
+       end = F2FS_I_SB(inode)->max_file_blocks;
+
+       while (map.m_lblk < end) {
+               map.m_len = end - map.m_lblk;
+
+               down_write(&fi->dio_rwsem[WRITE]);
+               err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE);
+               up_write(&fi->dio_rwsem[WRITE]);
+               if (err)
+                       return err;
+
+               map.m_lblk = m_next_extent;
+       }
+
+       return err;
+}
+
+static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg)
+{
+       return f2fs_precache_extents(file_inode(filp));
+}
+
 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -2730,6 +2863,12 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                return f2fs_ioc_fsgetxattr(filp, arg);
        case F2FS_IOC_FSSETXATTR:
                return f2fs_ioc_fssetxattr(filp, arg);
+       case F2FS_IOC_GET_PIN_FILE:
+               return f2fs_ioc_get_pin_file(filp, arg);
+       case F2FS_IOC_SET_PIN_FILE:
+               return f2fs_ioc_set_pin_file(filp, arg);
+       case F2FS_IOC_PRECACHE_EXTENTS:
+               return f2fs_ioc_precache_extents(filp, arg);
        default:
                return -ENOTTY;
        }
@@ -2805,6 +2944,9 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case F2FS_IOC_GET_FEATURES:
        case F2FS_IOC_FSGETXATTR:
        case F2FS_IOC_FSSETXATTR:
+       case F2FS_IOC_GET_PIN_FILE:
+       case F2FS_IOC_SET_PIN_FILE:
+       case F2FS_IOC_PRECACHE_EXTENTS:
                break;
        default:
                return -ENOIOCTLCMD;
index 5d5bba462f26390512a50c4359ebc99b3b3481dc..3b26aa19430b86436f424a9039a930078d7addc1 100644 (file)
@@ -624,6 +624,11 @@ static void move_data_block(struct inode *inode, block_t bidx,
        if (f2fs_is_atomic_file(inode))
                goto out;
 
+       if (f2fs_is_pinned_file(inode)) {
+               f2fs_pin_file_control(inode, true);
+               goto out;
+       }
+
        set_new_dnode(&dn, inode, NULL, NULL, 0);
        err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
        if (err)
@@ -686,7 +691,12 @@ static void move_data_block(struct inode *inode, block_t bidx,
        fio.op = REQ_OP_WRITE;
        fio.op_flags = REQ_SYNC;
        fio.new_blkaddr = newaddr;
-       f2fs_submit_page_write(&fio);
+       err = f2fs_submit_page_write(&fio);
+       if (err) {
+               if (PageWriteback(fio.encrypted_page))
+                       end_page_writeback(fio.encrypted_page);
+               goto put_page_out;
+       }
 
        f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE);
 
@@ -720,6 +730,11 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
 
        if (f2fs_is_atomic_file(inode))
                goto out;
+       if (f2fs_is_pinned_file(inode)) {
+               if (gc_type == FG_GC)
+                       f2fs_pin_file_control(inode, true);
+               goto out;
+       }
 
        if (gc_type == BG_GC) {
                if (PageWriteback(page))
@@ -1091,6 +1106,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
 
        sbi->fggc_threshold = div64_u64((main_count - ovp_count) *
                                BLKS_PER_SEC(sbi), (main_count - resv_count));
+       sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
 
        /* give warm/cold data area from slower device */
        if (sbi->s_ndevs && sbi->segs_per_sec == 1)
index 9325191fab2d6f4d571541c79c55ede3d5e47775..b0045d4c8d1e6f74f0da0e0ee1ef9376e0289f33 100644 (file)
@@ -20,6 +20,8 @@
 #define LIMIT_INVALID_BLOCK    40 /* percentage over total user space */
 #define LIMIT_FREE_BLOCK       40 /* percentage over invalid + free space */
 
+#define DEF_GC_FAILED_PINNED_FILES     2048
+
 /* Search max. number of dirty segments to select a victim segment */
 #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
 
index b4c4f2b2530404d5dc5b60bafab86a910c1a7879..89c838bfb06789a8c7ba339b1968b5e8adb0bcca 100644 (file)
@@ -22,6 +22,9 @@
 
 void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
 {
+       if (is_inode_flag_set(inode, FI_NEW_INODE))
+               return;
+
        if (f2fs_inode_dirtied(inode, sync))
                return;
 
@@ -275,6 +278,12 @@ static int do_read_inode(struct inode *inode)
                i_projid = F2FS_DEF_PROJID;
        fi->i_projid = make_kprojid(&init_user_ns, i_projid);
 
+       if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) &&
+                       F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
+               fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime);
+               fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
+       }
+
        f2fs_put_page(node_page, 1);
 
        stat_inc_inline_xattr(inode);
@@ -360,14 +369,15 @@ retry:
        return inode;
 }
 
-int update_inode(struct inode *inode, struct page *node_page)
+void update_inode(struct inode *inode, struct page *node_page)
 {
        struct f2fs_inode *ri;
        struct extent_tree *et = F2FS_I(inode)->extent_tree;
 
-       f2fs_inode_synced(inode);
-
        f2fs_wait_on_page_writeback(node_page, NODE, true);
+       set_page_dirty(node_page);
+
+       f2fs_inode_synced(inode);
 
        ri = F2FS_INODE(node_page);
 
@@ -417,6 +427,15 @@ int update_inode(struct inode *inode, struct page *node_page)
                                                F2FS_I(inode)->i_projid);
                        ri->i_projid = cpu_to_le32(i_projid);
                }
+
+               if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) &&
+                       F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
+                                                               i_crtime)) {
+                       ri->i_crtime =
+                               cpu_to_le64(F2FS_I(inode)->i_crtime.tv_sec);
+                       ri->i_crtime_nsec =
+                               cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec);
+               }
        }
 
        __set_inode_rdev(inode, ri);
@@ -426,14 +445,12 @@ int update_inode(struct inode *inode, struct page *node_page)
        if (inode->i_nlink == 0)
                clear_inline_node(node_page);
 
-       return set_page_dirty(node_page);
 }
 
-int update_inode_page(struct inode *inode)
+void update_inode_page(struct inode *inode)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct page *node_page;
-       int ret = 0;
 retry:
        node_page = get_node_page(sbi, inode->i_ino);
        if (IS_ERR(node_page)) {
@@ -444,11 +461,10 @@ retry:
                } else if (err != -ENOENT) {
                        f2fs_stop_checkpoint(sbi, false);
                }
-               return 0;
+               return;
        }
-       ret = update_inode(inode, node_page);
+       update_inode(inode, node_page);
        f2fs_put_page(node_page, 1);
-       return ret;
 }
 
 int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
index 28bdf8828e73cdaa785ff988c94d61aead3af547..c4c94c7e9f4fb332a6684d4d46b1efb232d27742 100644 (file)
@@ -50,7 +50,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 
        inode->i_ino = ino;
        inode->i_blocks = 0;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode->i_ctime =
+                       F2FS_I(inode)->i_crtime = current_time(inode);
        inode->i_generation = sbi->s_next_generation++;
 
        err = insert_inode_locked(inode);
@@ -74,12 +75,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
        if (err)
                goto fail_drop;
 
+       set_inode_flag(inode, FI_NEW_INODE);
+
        /* If the directory encrypted, then we should encrypt the inode. */
        if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
                f2fs_set_encrypted_inode(inode);
 
-       set_inode_flag(inode, FI_NEW_INODE);
-
        if (f2fs_sb_has_extra_attr(sbi->sb)) {
                set_inode_flag(inode, FI_EXTRA_ATTR);
                F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
@@ -240,9 +241,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
 
-       if (f2fs_encrypted_inode(dir) &&
-                       !fscrypt_has_permitted_context(dir, inode))
-               return -EPERM;
+       err = fscrypt_prepare_link(old_dentry, dir, dentry);
+       if (err)
+               return err;
 
        if (is_inode_flag_set(dir, FI_PROJ_INHERIT) &&
                        (!projid_eq(F2FS_I(dir)->i_projid,
@@ -357,20 +358,9 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 
        trace_f2fs_lookup_start(dir, dentry, flags);
 
-       if (f2fs_encrypted_inode(dir)) {
-               err = fscrypt_get_encryption_info(dir);
-
-               /*
-                * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is
-                * created while the directory was encrypted and we
-                * don't have access to the key.
-                */
-               if (fscrypt_has_encryption_key(dir))
-                       fscrypt_set_encrypted_dentry(dentry);
-               fscrypt_set_d_op(dentry);
-               if (err && err != -ENOKEY)
-                       goto out;
-       }
+       err = fscrypt_prepare_lookup(dir, dentry, flags);
+       if (err)
+               goto out;
 
        if (dentry->d_name.len > F2FS_NAME_LEN) {
                err = -ENAMETOOLONG;
@@ -544,7 +534,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
                struct qstr istr = QSTR_INIT(symname, len);
                struct fscrypt_str ostr;
 
-               sd = kzalloc(disk_link.len, GFP_NOFS);
+               sd = f2fs_kzalloc(sbi, disk_link.len, GFP_NOFS);
                if (!sd) {
                        err = -ENOMEM;
                        goto err_out;
@@ -800,18 +790,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
 
-       if ((f2fs_encrypted_inode(old_dir) &&
-                       !fscrypt_has_encryption_key(old_dir)) ||
-                       (f2fs_encrypted_inode(new_dir) &&
-                       !fscrypt_has_encryption_key(new_dir)))
-               return -ENOKEY;
-
-       if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
-                       !fscrypt_has_permitted_context(new_dir, old_inode)) {
-               err = -EPERM;
-               goto out;
-       }
-
        if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
                        (!projid_eq(F2FS_I(new_dir)->i_projid,
                        F2FS_I(old_dentry->d_inode)->i_projid)))
@@ -958,6 +936,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                }
                f2fs_i_links_write(old_dir, false);
        }
+       add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
 
        f2fs_unlock_op(sbi);
 
@@ -1002,18 +981,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
 
-       if ((f2fs_encrypted_inode(old_dir) &&
-                       !fscrypt_has_encryption_key(old_dir)) ||
-                       (f2fs_encrypted_inode(new_dir) &&
-                       !fscrypt_has_encryption_key(new_dir)))
-               return -ENOKEY;
-
-       if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
-                       (old_dir != new_dir) &&
-                       (!fscrypt_has_permitted_context(new_dir, old_inode) ||
-                        !fscrypt_has_permitted_context(old_dir, new_inode)))
-               return -EPERM;
-
        if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
                        !projid_eq(F2FS_I(new_dir)->i_projid,
                        F2FS_I(old_dentry->d_inode)->i_projid)) ||
@@ -1124,6 +1091,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
        }
        f2fs_mark_inode_dirty_sync(new_dir, false);
 
+       add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO);
+       add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
+
        f2fs_unlock_op(sbi);
 
        if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
@@ -1153,9 +1123,16 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry,
                        unsigned int flags)
 {
+       int err;
+
        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                return -EINVAL;
 
+       err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
+                                    flags);
+       if (err)
+               return err;
+
        if (flags & RENAME_EXCHANGE) {
                return f2fs_cross_rename(old_dir, old_dentry,
                                         new_dir, new_dentry);
index fe1fc662af2a8cf78516ae65b8f95f76997efa28..7cded843cf1833c82de053b8e9ef240d8a39c3ab 100644 (file)
@@ -143,11 +143,9 @@ static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail)
        struct nat_entry *new;
 
        if (no_fail)
-               new = f2fs_kmem_cache_alloc(nat_entry_slab,
-                                               GFP_NOFS | __GFP_ZERO);
+               new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO);
        else
-               new = kmem_cache_alloc(nat_entry_slab,
-                                               GFP_NOFS | __GFP_ZERO);
+               new = kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO);
        if (new) {
                nat_set_nid(new, nid);
                nat_reset_flag(new);
@@ -702,7 +700,6 @@ static void truncate_node(struct dnode_of_data *dn)
        struct node_info ni;
 
        get_node_info(sbi, dn->nid, &ni);
-       f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
 
        /* Deallocate node address */
        invalidate_blocks(sbi, ni.blk_addr);
@@ -1340,14 +1337,19 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
                .encrypted_page = NULL,
                .submitted = false,
                .io_type = io_type,
+               .io_wbc = wbc,
        };
 
        trace_f2fs_writepage(page, NODE);
 
+       if (unlikely(f2fs_cp_error(sbi))) {
+               dec_page_count(sbi, F2FS_DIRTY_NODES);
+               unlock_page(page);
+               return 0;
+       }
+
        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                goto redirty_out;
-       if (unlikely(f2fs_cp_error(sbi)))
-               goto redirty_out;
 
        /* get old block addr of this node page */
        nid = nid_of_node(page);
@@ -1592,12 +1594,6 @@ next_step:
                        struct page *page = pvec.pages[i];
                        bool submitted = false;
 
-                       if (unlikely(f2fs_cp_error(sbi))) {
-                               pagevec_release(&pvec);
-                               ret = -EIO;
-                               goto out;
-                       }
-
                        /*
                         * flushing sequence with step:
                         * 0. indirect nodes
@@ -1667,9 +1663,12 @@ continue_unlock:
                step++;
                goto next_step;
        }
-out:
+
        if (nwritten)
                f2fs_submit_merged_write(sbi, NODE);
+
+       if (unlikely(f2fs_cp_error(sbi)))
+               return -EIO;
        return ret;
 }
 
@@ -1831,8 +1830,33 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
        }
 }
 
+static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
+                                                       bool set, bool build)
+{
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
+       unsigned int nid_ofs = nid - START_NID(nid);
+
+       if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+               return;
+
+       if (set) {
+               if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
+                       return;
+               __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+               nm_i->free_nid_count[nat_ofs]++;
+       } else {
+               if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
+                       return;
+               __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+               if (!build)
+                       nm_i->free_nid_count[nat_ofs]--;
+       }
+}
+
 /* return if the nid is recognized as free */
-static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
+static bool add_free_nid(struct f2fs_sb_info *sbi,
+                               nid_t nid, bool build, bool update)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct free_nid *i, *e;
@@ -1848,8 +1872,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
        i->nid = nid;
        i->state = FREE_NID;
 
-       if (radix_tree_preload(GFP_NOFS))
-               goto err;
+       radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
 
        spin_lock(&nm_i->nid_list_lock);
 
@@ -1890,9 +1913,14 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
        ret = true;
        err = __insert_free_nid(sbi, i, FREE_NID);
 err_out:
+       if (update) {
+               update_free_nid_bitmap(sbi, nid, ret, build);
+               if (!build)
+                       nm_i->available_nids++;
+       }
        spin_unlock(&nm_i->nid_list_lock);
        radix_tree_preload_end();
-err:
+
        if (err)
                kmem_cache_free(free_nid_slab, i);
        return ret;
@@ -1916,30 +1944,6 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
                kmem_cache_free(free_nid_slab, i);
 }
 
-static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
-                                                       bool set, bool build)
-{
-       struct f2fs_nm_info *nm_i = NM_I(sbi);
-       unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
-       unsigned int nid_ofs = nid - START_NID(nid);
-
-       if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
-               return;
-
-       if (set) {
-               if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
-                       return;
-               __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
-               nm_i->free_nid_count[nat_ofs]++;
-       } else {
-               if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
-                       return;
-               __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
-               if (!build)
-                       nm_i->free_nid_count[nat_ofs]--;
-       }
-}
-
 static void scan_nat_page(struct f2fs_sb_info *sbi,
                        struct page *nat_page, nid_t start_nid)
 {
@@ -1949,26 +1953,23 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
        unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
        int i;
 
-       if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
-               return;
-
        __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
 
        i = start_nid % NAT_ENTRY_PER_BLOCK;
 
        for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
-               bool freed = false;
-
                if (unlikely(start_nid >= nm_i->max_nid))
                        break;
 
                blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
                f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
-               if (blk_addr == NULL_ADDR)
-                       freed = add_free_nid(sbi, start_nid, true);
-               spin_lock(&NM_I(sbi)->nid_list_lock);
-               update_free_nid_bitmap(sbi, start_nid, freed, true);
-               spin_unlock(&NM_I(sbi)->nid_list_lock);
+               if (blk_addr == NULL_ADDR) {
+                       add_free_nid(sbi, start_nid, true, true);
+               } else {
+                       spin_lock(&NM_I(sbi)->nid_list_lock);
+                       update_free_nid_bitmap(sbi, start_nid, false, true);
+                       spin_unlock(&NM_I(sbi)->nid_list_lock);
+               }
        }
 }
 
@@ -1986,7 +1987,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
                addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
                nid = le32_to_cpu(nid_in_journal(journal, i));
                if (addr == NULL_ADDR)
-                       add_free_nid(sbi, nid, true);
+                       add_free_nid(sbi, nid, true, false);
                else
                        remove_free_nid(sbi, nid);
        }
@@ -2013,7 +2014,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
                                break;
 
                        nid = i * NAT_ENTRY_PER_BLOCK + idx;
-                       add_free_nid(sbi, nid, true);
+                       add_free_nid(sbi, nid, true, false);
 
                        if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS)
                                goto out;
@@ -2056,10 +2057,13 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
        down_read(&nm_i->nat_tree_lock);
 
        while (1) {
-               struct page *page = get_current_nat_page(sbi, nid);
+               if (!test_bit_le(NAT_BLOCK_OFFSET(nid),
+                                               nm_i->nat_block_bitmap)) {
+                       struct page *page = get_current_nat_page(sbi, nid);
 
-               scan_nat_page(sbi, page, nid);
-               f2fs_put_page(page, 1);
+                       scan_nat_page(sbi, page, nid);
+                       f2fs_put_page(page, 1);
+               }
 
                nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
                if (unlikely(nid >= nm_i->max_nid))
@@ -2222,7 +2226,9 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
        f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
 
        ri = F2FS_INODE(page);
-       if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
+       if (ri->i_inline & F2FS_INLINE_XATTR) {
+               set_inode_flag(inode, FI_INLINE_XATTR);
+       } else {
                clear_inode_flag(inode, FI_INLINE_XATTR);
                goto update_inode;
        }
@@ -2238,7 +2244,7 @@ update_inode:
        f2fs_put_page(ipage, 1);
 }
 
-int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
+int recover_xattr_data(struct inode *inode, struct page *page)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
@@ -2252,7 +2258,6 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
 
        /* 1: invalidate the previous xattr nid */
        get_node_info(sbi, prev_xnid, &ni);
-       f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
        invalidate_blocks(sbi, ni.blk_addr);
        dec_valid_node_count(sbi, inode, false);
        set_node_addr(sbi, &ni, NULL_ADDR, false);
@@ -2341,7 +2346,7 @@ retry:
        return 0;
 }
 
-int restore_node_summary(struct f2fs_sb_info *sbi,
+void restore_node_summary(struct f2fs_sb_info *sbi,
                        unsigned int segno, struct f2fs_summary_block *sum)
 {
        struct f2fs_node *rn;
@@ -2374,7 +2379,6 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
                invalidate_mapping_pages(META_MAPPING(sbi), addr,
                                                        addr + nrpages);
        }
-       return 0;
 }
 
 static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
@@ -2516,11 +2520,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                nat_reset_flag(ne);
                __clear_nat_cache_dirty(NM_I(sbi), set, ne);
                if (nat_get_blkaddr(ne) == NULL_ADDR) {
-                       add_free_nid(sbi, nid, false);
-                       spin_lock(&NM_I(sbi)->nid_list_lock);
-                       NM_I(sbi)->available_nids++;
-                       update_free_nid_bitmap(sbi, nid, true, false);
-                       spin_unlock(&NM_I(sbi)->nid_list_lock);
+                       add_free_nid(sbi, nid, false, true);
                } else {
                        spin_lock(&NM_I(sbi)->nid_list_lock);
                        update_free_nid_bitmap(sbi, nid, false, false);
@@ -2601,8 +2601,8 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
 
        nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
                                                F2FS_BLKSIZE - 1);
-       nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
-                                               GFP_KERNEL);
+       nm_i->nat_bits = f2fs_kzalloc(sbi,
+                       nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
        if (!nm_i->nat_bits)
                return -ENOMEM;
 
@@ -2680,7 +2680,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 
        /* not used nids: 0, node, meta, (and root counted as valid node) */
        nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
-                                                       F2FS_RESERVED_NODE_NUM;
+                               sbi->nquota_files - F2FS_RESERVED_NODE_NUM;
        nm_i->nid_cnt[FREE_NID] = 0;
        nm_i->nid_cnt[PREALLOC_NID] = 0;
        nm_i->nat_cnt = 0;
@@ -2727,17 +2727,17 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
 
-       nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks *
+       nm_i->free_nid_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks *
                                        NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
        if (!nm_i->free_nid_bitmap)
                return -ENOMEM;
 
-       nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8,
+       nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8,
                                                                GFP_KERNEL);
        if (!nm_i->nat_block_bitmap)
                return -ENOMEM;
 
-       nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks *
+       nm_i->free_nid_count = f2fs_kvzalloc(sbi, nm_i->nat_blocks *
                                        sizeof(unsigned short), GFP_KERNEL);
        if (!nm_i->free_nid_count)
                return -ENOMEM;
@@ -2748,7 +2748,8 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 {
        int err;
 
-       sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
+       sbi->nm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_nm_info),
+                                                       GFP_KERNEL);
        if (!sbi->nm_info)
                return -ENOMEM;
 
index 0ee3e5ff49a30b68d4b3040efa79642d989b19e4..081ef0d672bf5cf457a3bb1f4c20328bf24b1a12 100644 (file)
@@ -305,6 +305,10 @@ static inline bool is_recoverable_dnode(struct page *page)
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
        __u64 cp_ver = cur_cp_version(ckpt);
 
+       /* Don't care crc part, if fsck.f2fs sets it. */
+       if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG))
+               return (cp_ver << 32) == (cpver_of_node(page) << 32);
+
        if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
                cp_ver |= (cur_cp_crc(ckpt) << 32);
 
index 92c57ace1939b0a5d086cee4366f3f2168926c36..b6d1ec620a8cb319816b3ba77546117f9e5d662a 100644 (file)
@@ -195,6 +195,20 @@ out:
        return err;
 }
 
+static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
+{
+       if (ri->i_inline & F2FS_PIN_FILE)
+               set_inode_flag(inode, FI_PIN_FILE);
+       else
+               clear_inode_flag(inode, FI_PIN_FILE);
+       if (ri->i_inline & F2FS_DATA_EXIST)
+               set_inode_flag(inode, FI_DATA_EXIST);
+       else
+               clear_inode_flag(inode, FI_DATA_EXIST);
+       if (!(ri->i_inline & F2FS_INLINE_DOTS))
+               clear_inode_flag(inode, FI_INLINE_DOTS);
+}
+
 static void recover_inode(struct inode *inode, struct page *page)
 {
        struct f2fs_inode *raw = F2FS_INODE(page);
@@ -211,13 +225,16 @@ static void recover_inode(struct inode *inode, struct page *page)
 
        F2FS_I(inode)->i_advise = raw->i_advise;
 
+       recover_inline_flags(inode, raw);
+
        if (file_enc_name(inode))
                name = "<encrypted>";
        else
                name = F2FS_INODE(page)->i_name;
 
-       f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
-                       ino_of_node(page), name);
+       f2fs_msg(inode->i_sb, KERN_NOTICE,
+               "recover_inode: ino = %x, name = %s, inline = %x",
+                       ino_of_node(page), name, raw->i_inline);
 }
 
 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
@@ -404,7 +421,7 @@ truncate_out:
 }
 
 static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
-                                       struct page *page, block_t blkaddr)
+                                       struct page *page)
 {
        struct dnode_of_data dn;
        struct node_info ni;
@@ -415,7 +432,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
        if (IS_INODE(page)) {
                recover_inline_xattr(inode, page);
        } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
-               err = recover_xattr_data(inode, page, blkaddr);
+               err = recover_xattr_data(inode, page);
                if (!err)
                        recovered++;
                goto out;
@@ -568,7 +585,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
                                break;
                        }
                }
-               err = do_recover_data(sbi, entry->inode, page, blkaddr);
+               err = do_recover_data(sbi, entry->inode, page);
                if (err) {
                        f2fs_put_page(page, 1);
                        break;
index c117e0913f2a3b4a7c2573fc4fd2ece85f31f075..b16a8e6625aaee740b8232ac63dabe5c5fcc3e33 100644 (file)
@@ -248,7 +248,11 @@ retry:
                                goto next;
                        }
                        get_node_info(sbi, dn.nid, &ni);
-                       f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
+                       if (cur->old_addr == NEW_ADDR) {
+                               invalidate_blocks(sbi, dn.data_blkaddr);
+                               f2fs_update_data_blkaddr(&dn, NEW_ADDR);
+                       } else
+                               f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
                                        cur->old_addr, ni.version, true, true);
                        f2fs_put_dnode(&dn);
                }
@@ -657,7 +661,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
                goto init_thread;
        }
 
-       fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
+       fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
        if (!fcc)
                return -ENOMEM;
        atomic_set(&fcc->issued_flush, 0);
@@ -884,7 +888,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
        bio_put(bio);
 }
 
-void __check_sit_bitmap(struct f2fs_sb_info *sbi,
+static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
                                block_t start, block_t end)
 {
 #ifdef CONFIG_F2FS_CHECK_FS
@@ -1204,6 +1208,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
                pend_list = &dcc->pend_list[i];
 
                mutex_lock(&dcc->cmd_lock);
+               if (list_empty(pend_list))
+                       goto next;
                f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
                blk_start_plug(&plug);
                list_for_each_entry_safe(dc, tmp, pend_list, list) {
@@ -1222,6 +1228,7 @@ skip:
                                break;
                }
                blk_finish_plug(&plug);
+next:
                mutex_unlock(&dcc->cmd_lock);
 
                if (iter >= dpolicy->max_requests)
@@ -1256,6 +1263,11 @@ static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
        return dropped;
 }
 
+void drop_discard_cmd(struct f2fs_sb_info *sbi)
+{
+       __drop_discard_cmd(sbi);
+}
+
 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
                                                        struct discard_cmd *dc)
 {
@@ -1324,7 +1336,7 @@ static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
-void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
+static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct discard_cmd *dc;
@@ -1394,6 +1406,8 @@ static int issue_discard_thread(void *data)
                                msecs_to_jiffies(wait_ms));
                if (try_to_freeze())
                        continue;
+               if (f2fs_readonly(sbi->sb))
+                       continue;
                if (kthread_should_stop())
                        return 0;
 
@@ -1703,25 +1717,20 @@ void init_discard_policy(struct discard_policy *dpolicy,
        dpolicy->sync = true;
        dpolicy->granularity = granularity;
 
+       dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+       dpolicy->io_aware_gran = MAX_PLIST_NUM;
+
        if (discard_type == DPOLICY_BG) {
                dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
                dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
-               dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
-               dpolicy->io_aware_gran = MAX_PLIST_NUM;
                dpolicy->io_aware = true;
        } else if (discard_type == DPOLICY_FORCE) {
                dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
                dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
-               dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
-               dpolicy->io_aware_gran = MAX_PLIST_NUM;
                dpolicy->io_aware = true;
        } else if (discard_type == DPOLICY_FSTRIM) {
-               dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
-               dpolicy->io_aware_gran = MAX_PLIST_NUM;
                dpolicy->io_aware = false;
        } else if (discard_type == DPOLICY_UMOUNT) {
-               dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
-               dpolicy->io_aware_gran = MAX_PLIST_NUM;
                dpolicy->io_aware = false;
        }
 }
@@ -1737,7 +1746,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
                goto init_thread;
        }
 
-       dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL);
+       dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
        if (!dcc)
                return -ENOMEM;
 
@@ -2739,6 +2748,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                }
        }
 
+       f2fs_bug_on(sbi, !IS_DATASEG(type));
        curseg = CURSEG_I(sbi, type);
 
        mutex_lock(&curseg->curseg_mutex);
@@ -2823,7 +2833,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
        }
 }
 
-static int read_compacted_summaries(struct f2fs_sb_info *sbi)
+static void read_compacted_summaries(struct f2fs_sb_info *sbi)
 {
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
        struct curseg_info *seg_i;
@@ -2880,7 +2890,6 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
                }
        }
        f2fs_put_page(page, 1);
-       return 0;
 }
 
 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
@@ -2926,13 +2935,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
                                ns->ofs_in_node = 0;
                        }
                } else {
-                       int err;
-
-                       err = restore_node_summary(sbi, segno, sum);
-                       if (err) {
-                               f2fs_put_page(new, 1);
-                               return err;
-                       }
+                       restore_node_summary(sbi, segno, sum);
                }
        }
 
@@ -2971,8 +2974,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
                                                        META_CP, true);
 
                /* restore for compacted data summary */
-               if (read_compacted_summaries(sbi))
-                       return -EINVAL;
+               read_compacted_summaries(sbi);
                type = CURSEG_HOT_NODE;
        }
 
@@ -3108,28 +3110,19 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
                                        unsigned int start)
 {
        struct sit_info *sit_i = SIT_I(sbi);
-       struct page *src_page, *dst_page;
+       struct page *page;
        pgoff_t src_off, dst_off;
-       void *src_addr, *dst_addr;
 
        src_off = current_sit_addr(sbi, start);
        dst_off = next_sit_addr(sbi, src_off);
 
-       /* get current sit block page without lock */
-       src_page = get_meta_page(sbi, src_off);
-       dst_page = grab_meta_page(sbi, dst_off);
-       f2fs_bug_on(sbi, PageDirty(src_page));
-
-       src_addr = page_address(src_page);
-       dst_addr = page_address(dst_page);
-       memcpy(dst_addr, src_addr, PAGE_SIZE);
-
-       set_page_dirty(dst_page);
-       f2fs_put_page(src_page, 1);
+       page = grab_meta_page(sbi, dst_off);
+       seg_info_to_sit_page(sbi, page, start);
 
+       set_page_dirty(page);
        set_to_next_sit(sit_i, start);
 
-       return dst_page;
+       return page;
 }
 
 static struct sit_entry_set *grab_sit_entry_set(void)
@@ -3338,52 +3331,54 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
        unsigned int bitmap_size;
 
        /* allocate memory for SIT information */
-       sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
+       sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
        if (!sit_i)
                return -ENOMEM;
 
        SM_I(sbi)->sit_info = sit_i;
 
-       sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) *
+       sit_i->sentries = f2fs_kvzalloc(sbi, MAIN_SEGS(sbi) *
                                        sizeof(struct seg_entry), GFP_KERNEL);
        if (!sit_i->sentries)
                return -ENOMEM;
 
        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
-       sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL);
+       sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
+                                                               GFP_KERNEL);
        if (!sit_i->dirty_sentries_bitmap)
                return -ENOMEM;
 
        for (start = 0; start < MAIN_SEGS(sbi); start++) {
                sit_i->sentries[start].cur_valid_map
-                       = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
                sit_i->sentries[start].ckpt_valid_map
-                       = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
                if (!sit_i->sentries[start].cur_valid_map ||
                                !sit_i->sentries[start].ckpt_valid_map)
                        return -ENOMEM;
 
 #ifdef CONFIG_F2FS_CHECK_FS
                sit_i->sentries[start].cur_valid_map_mir
-                       = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
                if (!sit_i->sentries[start].cur_valid_map_mir)
                        return -ENOMEM;
 #endif
 
                if (f2fs_discard_en(sbi)) {
                        sit_i->sentries[start].discard_map
-                               = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+                               = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
+                                                               GFP_KERNEL);
                        if (!sit_i->sentries[start].discard_map)
                                return -ENOMEM;
                }
        }
 
-       sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+       sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
        if (!sit_i->tmp_map)
                return -ENOMEM;
 
        if (sbi->segs_per_sec > 1) {
-               sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) *
+               sit_i->sec_entries = f2fs_kvzalloc(sbi, MAIN_SECS(sbi) *
                                        sizeof(struct sec_entry), GFP_KERNEL);
                if (!sit_i->sec_entries)
                        return -ENOMEM;
@@ -3427,19 +3422,19 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
        unsigned int bitmap_size, sec_bitmap_size;
 
        /* allocate memory for free segmap information */
-       free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
+       free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
        if (!free_i)
                return -ENOMEM;
 
        SM_I(sbi)->free_info = free_i;
 
        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
-       free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL);
+       free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
        if (!free_i->free_segmap)
                return -ENOMEM;
 
        sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
-       free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL);
+       free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
        if (!free_i->free_secmap)
                return -ENOMEM;
 
@@ -3460,7 +3455,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
        struct curseg_info *array;
        int i;
 
-       array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
+       array = f2fs_kzalloc(sbi, sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
        if (!array)
                return -ENOMEM;
 
@@ -3468,12 +3463,12 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 
        for (i = 0; i < NR_CURSEG_TYPE; i++) {
                mutex_init(&array[i].curseg_mutex);
-               array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+               array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
                if (!array[i].sum_blk)
                        return -ENOMEM;
                init_rwsem(&array[i].journal_rwsem);
-               array[i].journal = kzalloc(sizeof(struct f2fs_journal),
-                                                       GFP_KERNEL);
+               array[i].journal = f2fs_kzalloc(sbi,
+                               sizeof(struct f2fs_journal), GFP_KERNEL);
                if (!array[i].journal)
                        return -ENOMEM;
                array[i].segno = NULL_SEGNO;
@@ -3482,7 +3477,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
        return restore_curseg_summaries(sbi);
 }
 
-static void build_sit_entries(struct f2fs_sb_info *sbi)
+static int build_sit_entries(struct f2fs_sb_info *sbi)
 {
        struct sit_info *sit_i = SIT_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
@@ -3492,6 +3487,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
        int sit_blk_cnt = SIT_BLK_CNT(sbi);
        unsigned int i, start, end;
        unsigned int readed, start_blk = 0;
+       int err = 0;
 
        do {
                readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
@@ -3510,7 +3506,9 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
                        sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
                        f2fs_put_page(page, 1);
 
-                       check_block_count(sbi, start, &sit);
+                       err = check_block_count(sbi, start, &sit);
+                       if (err)
+                               return err;
                        seg_info_from_raw_sit(se, &sit);
 
                        /* build discard map only one time */
@@ -3545,7 +3543,9 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
 
                old_valid_blocks = se->valid_blocks;
 
-               check_block_count(sbi, start, &sit);
+               err = check_block_count(sbi, start, &sit);
+               if (err)
+                       break;
                seg_info_from_raw_sit(se, &sit);
 
                if (f2fs_discard_en(sbi)) {
@@ -3565,6 +3565,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
                                se->valid_blocks - old_valid_blocks;
        }
        up_read(&curseg->journal_rwsem);
+       return err;
 }
 
 static void init_free_segmap(struct f2fs_sb_info *sbi)
@@ -3619,7 +3620,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi)
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
        unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
 
-       dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL);
+       dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
        if (!dirty_i->victim_secmap)
                return -ENOMEM;
        return 0;
@@ -3631,7 +3632,8 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
        unsigned int bitmap_size, i;
 
        /* allocate memory for dirty segments list information */
-       dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
+       dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
+                                                               GFP_KERNEL);
        if (!dirty_i)
                return -ENOMEM;
 
@@ -3641,7 +3643,8 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
 
        for (i = 0; i < NR_DIRTY_TYPE; i++) {
-               dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL);
+               dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
+                                                               GFP_KERNEL);
                if (!dirty_i->dirty_segmap[i])
                        return -ENOMEM;
        }
@@ -3685,7 +3688,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
        struct f2fs_sm_info *sm_info;
        int err;
 
-       sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
+       sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
        if (!sm_info)
                return -ENOMEM;
 
@@ -3737,7 +3740,9 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
                return err;
 
        /* reinit free segmap based on SIT */
-       build_sit_entries(sbi);
+       err = build_sit_entries(sbi);
+       if (err)
+               return err;
 
        init_free_segmap(sbi);
        err = build_dirty_segmap(sbi);
index d1d394cdf61dd9ffbde608a941463c98b4d4f0b2..f11c4bc82c7863f7f64dfa67750c846b690078e9 100644 (file)
@@ -348,16 +348,41 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se,
        se->mtime = le64_to_cpu(rs->mtime);
 }
 
-static inline void seg_info_to_raw_sit(struct seg_entry *se,
+static inline void __seg_info_to_raw_sit(struct seg_entry *se,
                                        struct f2fs_sit_entry *rs)
 {
        unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) |
                                        se->valid_blocks;
        rs->vblocks = cpu_to_le16(raw_vblocks);
        memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
+       rs->mtime = cpu_to_le64(se->mtime);
+}
+
+static inline void seg_info_to_sit_page(struct f2fs_sb_info *sbi,
+                               struct page *page, unsigned int start)
+{
+       struct f2fs_sit_block *raw_sit;
+       struct seg_entry *se;
+       struct f2fs_sit_entry *rs;
+       unsigned int end = min(start + SIT_ENTRY_PER_BLOCK,
+                                       (unsigned long)MAIN_SEGS(sbi));
+       int i;
+
+       raw_sit = (struct f2fs_sit_block *)page_address(page);
+       for (i = 0; i < end - start; i++) {
+               rs = &raw_sit->entries[i];
+               se = get_seg_entry(sbi, start + i);
+               __seg_info_to_raw_sit(se, rs);
+       }
+}
+
+static inline void seg_info_to_raw_sit(struct seg_entry *se,
+                                       struct f2fs_sit_entry *rs)
+{
+       __seg_info_to_raw_sit(se, rs);
+
        memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
        se->ckpt_valid_blocks = se->valid_blocks;
-       rs->mtime = cpu_to_le64(se->mtime);
 }
 
 static inline unsigned int find_next_inuse(struct free_segmap_info *free_i,
@@ -580,47 +605,6 @@ enum {
        F2FS_IPU_ASYNC,
 };
 
-static inline bool need_inplace_update_policy(struct inode *inode,
-                               struct f2fs_io_info *fio)
-{
-       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-       unsigned int policy = SM_I(sbi)->ipu_policy;
-
-       if (test_opt(sbi, LFS))
-               return false;
-
-       /* if this is cold file, we should overwrite to avoid fragmentation */
-       if (file_is_cold(inode))
-               return true;
-
-       if (policy & (0x1 << F2FS_IPU_FORCE))
-               return true;
-       if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
-               return true;
-       if (policy & (0x1 << F2FS_IPU_UTIL) &&
-                       utilization(sbi) > SM_I(sbi)->min_ipu_util)
-               return true;
-       if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) &&
-                       utilization(sbi) > SM_I(sbi)->min_ipu_util)
-               return true;
-
-       /*
-        * IPU for rewrite async pages
-        */
-       if (policy & (0x1 << F2FS_IPU_ASYNC) &&
-                       fio && fio->op == REQ_OP_WRITE &&
-                       !(fio->op_flags & REQ_SYNC) &&
-                       !f2fs_encrypted_inode(inode))
-               return true;
-
-       /* this is only set during fdatasync */
-       if (policy & (0x1 << F2FS_IPU_FSYNC) &&
-                       is_inode_flag_set(inode, FI_NEED_IPU))
-               return true;
-
-       return false;
-}
-
 static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
                int type)
 {
@@ -655,7 +639,7 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
 /*
  * Summary block is always treated as an invalid block
  */
-static inline void check_block_count(struct f2fs_sb_info *sbi,
+static inline int check_block_count(struct f2fs_sb_info *sbi,
                int segno, struct f2fs_sit_entry *raw_sit)
 {
 #ifdef CONFIG_F2FS_CHECK_FS
@@ -677,11 +661,25 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
                cur_pos = next_pos;
                is_valid = !is_valid;
        } while (cur_pos < sbi->blocks_per_seg);
-       BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
+
+       if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
+               f2fs_msg(sbi->sb, KERN_ERR,
+                               "Mismatch valid blocks %d vs. %d",
+                                       GET_SIT_VBLOCKS(raw_sit), valid_blocks);
+               set_sbi_flag(sbi, SBI_NEED_FSCK);
+               return -EINVAL;
+       }
 #endif
        /* check segment usage, and check boundary of a given segment number */
-       f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
-                                       || segno > TOTAL_SEGS(sbi) - 1);
+       if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
+                                       || segno > TOTAL_SEGS(sbi) - 1)) {
+               f2fs_msg(sbi->sb, KERN_ERR,
+                               "Wrong valid blocks %d or segno %u",
+                                       GET_SIT_VBLOCKS(raw_sit), segno);
+               set_sbi_flag(sbi, SBI_NEED_FSCK);
+               return -EINVAL;
+       }
+       return 0;
 }
 
 static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
index a6c5dd450002daa7d1c43f414d0ffbaa17ff1b04..0f8945bc4b4b9a93690da7e3e5f261d0d01168ac 100644 (file)
@@ -43,6 +43,7 @@ static struct kmem_cache *f2fs_inode_cachep;
 
 char *fault_name[FAULT_MAX] = {
        [FAULT_KMALLOC]         = "kmalloc",
+       [FAULT_KVMALLOC]        = "kvmalloc",
        [FAULT_PAGE_ALLOC]      = "page alloc",
        [FAULT_PAGE_GET]        = "page get",
        [FAULT_ALLOC_BIO]       = "alloc bio",
@@ -106,6 +107,9 @@ enum {
        Opt_noextent_cache,
        Opt_noinline_data,
        Opt_data_flush,
+       Opt_reserve_root,
+       Opt_resgid,
+       Opt_resuid,
        Opt_mode,
        Opt_io_size_bits,
        Opt_fault_injection,
@@ -156,6 +160,9 @@ static match_table_t f2fs_tokens = {
        {Opt_noextent_cache, "noextent_cache"},
        {Opt_noinline_data, "noinline_data"},
        {Opt_data_flush, "data_flush"},
+       {Opt_reserve_root, "reserve_root=%u"},
+       {Opt_resgid, "resgid=%u"},
+       {Opt_resuid, "resuid=%u"},
        {Opt_mode, "mode=%s"},
        {Opt_io_size_bits, "io_bits=%u"},
        {Opt_fault_injection, "fault_injection=%u"},
@@ -190,6 +197,28 @@ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
        va_end(args);
 }
 
+static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
+{
+       block_t limit = (sbi->user_block_count << 1) / 1000;
+
+       /* limit is 0.2% */
+       if (test_opt(sbi, RESERVE_ROOT) && sbi->root_reserved_blocks > limit) {
+               sbi->root_reserved_blocks = limit;
+               f2fs_msg(sbi->sb, KERN_INFO,
+                       "Reduce reserved blocks for root = %u",
+                               sbi->root_reserved_blocks);
+       }
+       if (!test_opt(sbi, RESERVE_ROOT) &&
+               (!uid_eq(sbi->s_resuid,
+                               make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
+               !gid_eq(sbi->s_resgid,
+                               make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
+               f2fs_msg(sbi->sb, KERN_INFO,
+                       "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
+                               from_kuid_munged(&init_user_ns, sbi->s_resuid),
+                               from_kgid_munged(&init_user_ns, sbi->s_resgid));
+}
+
 static void init_once(void *foo)
 {
        struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
@@ -320,6 +349,8 @@ static int parse_options(struct super_block *sb, char *options)
        substring_t args[MAX_OPT_ARGS];
        char *p, *name;
        int arg = 0;
+       kuid_t uid;
+       kgid_t gid;
 #ifdef CONFIG_QUOTA
        int ret;
 #endif
@@ -487,6 +518,40 @@ static int parse_options(struct super_block *sb, char *options)
                case Opt_data_flush:
                        set_opt(sbi, DATA_FLUSH);
                        break;
+               case Opt_reserve_root:
+                       if (args->from && match_int(args, &arg))
+                               return -EINVAL;
+                       if (test_opt(sbi, RESERVE_ROOT)) {
+                               f2fs_msg(sb, KERN_INFO,
+                                       "Preserve previous reserve_root=%u",
+                                       sbi->root_reserved_blocks);
+                       } else {
+                               sbi->root_reserved_blocks = arg;
+                               set_opt(sbi, RESERVE_ROOT);
+                       }
+                       break;
+               case Opt_resuid:
+                       if (args->from && match_int(args, &arg))
+                               return -EINVAL;
+                       uid = make_kuid(current_user_ns(), arg);
+                       if (!uid_valid(uid)) {
+                               f2fs_msg(sb, KERN_ERR,
+                                       "Invalid uid value %d", arg);
+                               return -EINVAL;
+                       }
+                       sbi->s_resuid = uid;
+                       break;
+               case Opt_resgid:
+                       if (args->from && match_int(args, &arg))
+                               return -EINVAL;
+                       gid = make_kgid(current_user_ns(), arg);
+                       if (!gid_valid(gid)) {
+                               f2fs_msg(sb, KERN_ERR,
+                                       "Invalid gid value %d", arg);
+                               return -EINVAL;
+                       }
+                       sbi->s_resgid = gid;
+                       break;
                case Opt_mode:
                        name = match_strdup(&args[0]);
 
@@ -993,22 +1058,25 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct super_block *sb = dentry->d_sb;
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
-       block_t total_count, user_block_count, start_count, ovp_count;
+       block_t total_count, user_block_count, start_count;
        u64 avail_node_count;
 
        total_count = le64_to_cpu(sbi->raw_super->block_count);
        user_block_count = sbi->user_block_count;
        start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
-       ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
        buf->f_type = F2FS_SUPER_MAGIC;
        buf->f_bsize = sbi->blocksize;
 
        buf->f_blocks = total_count - start_count;
-       buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
-       buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
+       buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
                                                sbi->current_reserved_blocks;
+       if (buf->f_bfree > sbi->root_reserved_blocks)
+               buf->f_bavail = buf->f_bfree - sbi->root_reserved_blocks;
+       else
+               buf->f_bavail = 0;
 
-       avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
+       avail_node_count = sbi->total_node_count - sbi->nquota_files -
+                                               F2FS_RESERVED_NODE_NUM;
 
        if (avail_node_count > user_block_count) {
                buf->f_files = user_block_count;
@@ -1134,6 +1202,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
        else if (test_opt(sbi, LFS))
                seq_puts(seq, "lfs");
        seq_printf(seq, ",active_logs=%u", sbi->active_logs);
+       if (test_opt(sbi, RESERVE_ROOT))
+               seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
+                               sbi->root_reserved_blocks,
+                               from_kuid_munged(&init_user_ns, sbi->s_resuid),
+                               from_kgid_munged(&init_user_ns, sbi->s_resgid));
        if (F2FS_IO_SIZE_BITS(sbi))
                seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
 #ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -1263,7 +1336,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                err = dquot_suspend(sb, -1);
                if (err < 0)
                        goto restore_opts;
-       } else {
+       } else if (f2fs_readonly(sb) && !(*flags & MS_RDONLY)) {
                /* dquot_resume needs RW */
                sb->s_flags &= ~MS_RDONLY;
                if (sb_any_quota_suspended(sb)) {
@@ -1332,6 +1405,7 @@ skip:
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
 
+       limit_reserve_root(sbi);
        return 0;
 restore_gc:
        if (need_restart_gc) {
@@ -1656,7 +1730,7 @@ void f2fs_quota_off_umount(struct super_block *sb)
                f2fs_quota_off(sb, type);
 }
 
-int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
+static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
 {
        *projid = F2FS_I(inode)->i_projid;
        return 0;
@@ -2148,14 +2222,15 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
        if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
                FDEV(devi).nr_blkz++;
 
-       FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
+       FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz,
+                                                               GFP_KERNEL);
        if (!FDEV(devi).blkz_type)
                return -ENOMEM;
 
 #define F2FS_REPORT_NR_ZONES   4096
 
-       zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
-                       GFP_KERNEL);
+       zones = f2fs_kzalloc(sbi, sizeof(struct blk_zone) *
+                               F2FS_REPORT_NR_ZONES, GFP_KERNEL);
        if (!zones)
                return -ENOMEM;
 
@@ -2295,8 +2370,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
         * Initialize multiple devices information, or single
         * zoned block device information.
         */
-       sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
-                               GFP_KERNEL);
+       sbi->devs = f2fs_kzalloc(sbi, sizeof(struct f2fs_dev_info) *
+                                               max_devices, GFP_KERNEL);
        if (!sbi->devs)
                return -ENOMEM;
 
@@ -2419,6 +2494,9 @@ try_onemore:
        sb->s_fs_info = sbi;
        sbi->raw_super = raw_super;
 
+       sbi->s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
+       sbi->s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
+
        /* precompute checksum seed for metadata */
        if (f2fs_sb_has_inode_chksum(sb))
                sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
@@ -2462,6 +2540,13 @@ try_onemore:
        else
                sb->s_qcop = &f2fs_quotactl_ops;
        sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
+
+       if (f2fs_sb_has_quota_ino(sbi->sb)) {
+               for (i = 0; i < MAXQUOTAS; i++) {
+                       if (f2fs_qf_ino(sbi->sb, i))
+                               sbi->nquota_files++;
+               }
+       }
 #endif
 
        sb->s_op = &f2fs_sops;
@@ -2475,6 +2560,7 @@ try_onemore:
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
        memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+       sb->s_iflags |= SB_I_CGROUPWB;
 
        /* init f2fs-specific super block info */
        sbi->valid_super_block = valid_super_block;
@@ -2495,8 +2581,9 @@ try_onemore:
                int n = (i == META) ? 1: NR_TEMP_TYPE;
                int j;
 
-               sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info),
-                                                               GFP_KERNEL);
+               sbi->write_io[i] = f2fs_kmalloc(sbi,
+                                       n * sizeof(struct f2fs_bio_info),
+                                       GFP_KERNEL);
                if (!sbi->write_io[i]) {
                        err = -ENOMEM;
                        goto free_options;
@@ -2517,14 +2604,14 @@ try_onemore:
 
        err = init_percpu_info(sbi);
        if (err)
-               goto free_options;
+               goto free_bio_info;
 
        if (F2FS_IO_SIZE(sbi) > 1) {
                sbi->write_io_dummy =
                        mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
                if (!sbi->write_io_dummy) {
                        err = -ENOMEM;
-                       goto free_options;
+                       goto free_percpu;
                }
        }
 
@@ -2559,6 +2646,7 @@ try_onemore:
        sbi->last_valid_block_count = sbi->total_valid_block_count;
        sbi->reserved_blocks = 0;
        sbi->current_reserved_blocks = 0;
+       limit_reserve_root(sbi);
 
        for (i = 0; i < NR_INODE_TYPE; i++) {
                INIT_LIST_HEAD(&sbi->inode_list[i]);
@@ -2604,18 +2692,16 @@ try_onemore:
                goto free_nm;
        }
 
-       f2fs_join_shrinker(sbi);
-
        err = f2fs_build_stats(sbi);
        if (err)
-               goto free_nm;
+               goto free_node_inode;
 
        /* read root inode and dentry */
        root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
        if (IS_ERR(root)) {
                f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
                err = PTR_ERR(root);
-               goto free_node_inode;
+               goto free_stats;
        }
        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
                iput(root);
@@ -2711,6 +2797,8 @@ skip_recovery:
                        sbi->valid_super_block ? 1 : 2, err);
        }
 
+       f2fs_join_shrinker(sbi);
+
        f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
                                cur_cp_version(F2FS_CKPT(sbi)));
        f2fs_update_time(sbi, CP_TIME);
@@ -2737,14 +2825,12 @@ free_sysfs:
 free_root_inode:
        dput(sb->s_root);
        sb->s_root = NULL;
+free_stats:
+       f2fs_destroy_stats(sbi);
 free_node_inode:
-       truncate_inode_pages_final(NODE_MAPPING(sbi));
-       mutex_lock(&sbi->umount_mutex);
        release_ino_entry(sbi, true);
-       f2fs_leave_shrinker(sbi);
+       truncate_inode_pages_final(NODE_MAPPING(sbi));
        iput(sbi->node_inode);
-       mutex_unlock(&sbi->umount_mutex);
-       f2fs_destroy_stats(sbi);
 free_nm:
        destroy_node_manager(sbi);
 free_sm:
@@ -2757,10 +2843,12 @@ free_meta_inode:
        iput(sbi->meta_inode);
 free_io_dummy:
        mempool_destroy(sbi->write_io_dummy);
-free_options:
+free_percpu:
+       destroy_percpu_info(sbi);
+free_bio_info:
        for (i = 0; i < NR_PAGE_TYPE; i++)
                kfree(sbi->write_io[i]);
-       destroy_percpu_info(sbi);
+free_options:
 #ifdef CONFIG_QUOTA
        for (i = 0; i < MAXQUOTAS; i++)
                kfree(sbi->s_qf_names[i]);
index 9835348b6e5d200b71112c2eb65ad101d5bd3be0..d978c7b6ea04a6c6062ec4c55c1b74adb53274a1 100644 (file)
@@ -113,6 +113,9 @@ static ssize_t features_show(struct f2fs_attr *a,
        if (f2fs_sb_has_quota_ino(sb))
                len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
                                len ? ", " : "", "quota_ino");
+       if (f2fs_sb_has_inode_crtime(sb))
+               len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+                               len ? ", " : "", "inode_crtime");
        len += snprintf(buf + len, PAGE_SIZE - len, "\n");
        return len;
 }
@@ -162,7 +165,8 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
 #endif
        if (a->struct_type == RESERVED_BLOCKS) {
                spin_lock(&sbi->stat_lock);
-               if (t > (unsigned long)sbi->user_block_count) {
+               if (t > (unsigned long)(sbi->user_block_count -
+                                       sbi->root_reserved_blocks)) {
                        spin_unlock(&sbi->stat_lock);
                        return -EINVAL;
                }
@@ -231,6 +235,7 @@ enum feat_id {
        FEAT_INODE_CHECKSUM,
        FEAT_FLEXIBLE_INLINE_XATTR,
        FEAT_QUOTA_INO,
+       FEAT_INODE_CRTIME,
 };
 
 static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -245,6 +250,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
        case FEAT_INODE_CHECKSUM:
        case FEAT_FLEXIBLE_INLINE_XATTR:
        case FEAT_QUOTA_INO:
+       case FEAT_INODE_CRTIME:
                return snprintf(buf, PAGE_SIZE, "supported\n");
        }
        return 0;
@@ -299,6 +305,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
 F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
@@ -320,6 +328,7 @@ F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA);
 F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM);
 F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
 F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
+F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -346,6 +355,8 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(cp_interval),
        ATTR_LIST(idle_interval),
        ATTR_LIST(iostat_enable),
+       ATTR_LIST(readdir_ra),
+       ATTR_LIST(gc_pin_file_thresh),
 #ifdef CONFIG_F2FS_FAULT_INJECTION
        ATTR_LIST(inject_rate),
        ATTR_LIST(inject_type),
@@ -371,6 +382,7 @@ static struct attribute *f2fs_feat_attrs[] = {
        ATTR_LIST(inode_checksum),
        ATTR_LIST(flexible_inline_xattr),
        ATTR_LIST(quota_ino),
+       ATTR_LIST(inode_crtime),
        NULL,
 };
 
index bccbbf2616d2b2cd3e5c3380b809f545dc1802a9..a1fcd00bbb2bd6e1b73750ed76d9b57c25e25705 100644 (file)
@@ -17,7 +17,7 @@
 #include "trace.h"
 
 static RADIX_TREE(pids, GFP_ATOMIC);
-static spinlock_t pids_lock;
+static struct mutex pids_lock;
 static struct last_io_info last_io;
 
 static inline void __print_last_io(void)
@@ -64,7 +64,7 @@ void f2fs_trace_pid(struct page *page)
        if (radix_tree_preload(GFP_NOFS))
                return;
 
-       spin_lock(&pids_lock);
+       mutex_lock(&pids_lock);
        p = radix_tree_lookup(&pids, pid);
        if (p == current)
                goto out;
@@ -77,7 +77,7 @@ void f2fs_trace_pid(struct page *page)
                        MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
                        pid, current->comm);
 out:
-       spin_unlock(&pids_lock);
+       mutex_unlock(&pids_lock);
        radix_tree_preload_end();
 }
 
@@ -122,7 +122,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
 
 void f2fs_build_trace_ios(void)
 {
-       spin_lock_init(&pids_lock);
+       mutex_init(&pids_lock);
 }
 
 #define PIDVEC_SIZE    128
@@ -150,7 +150,7 @@ void f2fs_destroy_trace_ios(void)
        pid_t next_pid = 0;
        unsigned int found;
 
-       spin_lock(&pids_lock);
+       mutex_lock(&pids_lock);
        while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
                unsigned idx;
 
@@ -158,5 +158,5 @@ void f2fs_destroy_trace_ios(void)
                for (idx = 0; idx < found; idx++)
                        radix_tree_delete(&pids, pid[idx]);
        }
-       spin_unlock(&pids_lock);
+       mutex_unlock(&pids_lock);
 }
index ec8961ef8cacfbcf2dcc5807ea11d7eb860294ec..ae2dfa709f5dcf2f8e5b1084f5bce89377942ccc 100644 (file)
@@ -298,8 +298,8 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
        if (!size && !inline_size)
                return -ENODATA;
 
-       txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE,
-                                                       GFP_F2FS_ZERO);
+       txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode),
+                       inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS);
        if (!txattr_addr)
                return -ENOMEM;
 
@@ -351,8 +351,8 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
        void *txattr_addr;
        int err;
 
-       txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE,
-                                                       GFP_F2FS_ZERO);
+       txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode),
+                       inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS);
        if (!txattr_addr)
                return -ENOMEM;
 
@@ -433,6 +433,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
        if (F2FS_I(inode)->i_xattr_nid) {
                xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
                if (IS_ERR(xpage)) {
+                       err = PTR_ERR(xpage);
                        alloc_nid_failed(sbi, new_nid);
                        goto in_page_out;
                }
@@ -443,6 +444,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
                set_new_dnode(&dn, inode, NULL, NULL, new_nid);
                xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
                if (IS_ERR(xpage)) {
+                       err = PTR_ERR(xpage);
                        alloc_nid_failed(sbi, new_nid);
                        goto in_page_out;
                }
@@ -598,7 +600,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
                        goto exit;
                }
 
-               if (f2fs_xattr_value_same(here, value, size))
+               if (value && f2fs_xattr_value_same(here, value, size))
                        goto exit;
        } else if ((flags & XATTR_REPLACE)) {
                error = -ENODATA;
index eebf5f6cf6d5645a128b9b3560ac9ce853a4bdc7..2fd0fde16fe17f56bec417090dcd6e10ce162259 100644 (file)
@@ -43,7 +43,7 @@ struct posix_acl *get_cached_acl(struct inode *inode, int type)
                rcu_read_lock();
                acl = rcu_dereference(*p);
                if (!acl || is_uncached_acl(acl) ||
-                   atomic_inc_not_zero(&acl->a_refcount))
+                   refcount_inc_not_zero(&acl->a_refcount))
                        break;
                rcu_read_unlock();
                cpu_relax();
@@ -164,7 +164,7 @@ EXPORT_SYMBOL(get_acl);
 void
 posix_acl_init(struct posix_acl *acl, int count)
 {
-       atomic_set(&acl->a_refcount, 1);
+       refcount_set(&acl->a_refcount, 1);
        acl->a_count = count;
 }
 EXPORT_SYMBOL(posix_acl_init);
@@ -197,7 +197,7 @@ posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
                           sizeof(struct posix_acl_entry);
                clone = kmemdup(acl, size, flags);
                if (clone)
-                       atomic_set(&clone->a_refcount, 1);
+                       refcount_set(&clone->a_refcount, 1);
        }
        return clone;
 }
index 43e98d30d2df210bba056e89ae64e0d2eeddd6a4..58aecb60ea51b90eaf3a447a1f25c21d137a145b 100644 (file)
@@ -117,6 +117,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_NOCRC_RECOVERY_FLAG 0x00000200
 #define CP_TRIMMED_FLAG                0x00000100
 #define CP_NAT_BITS_FLAG       0x00000080
 #define CP_CRC_RECOVERY_FLAG   0x00000040
@@ -212,6 +213,7 @@ struct f2fs_extent {
 #define F2FS_DATA_EXIST                0x08    /* file inline data exist flag */
 #define F2FS_INLINE_DOTS       0x10    /* file having implicit dot dentries */
 #define F2FS_EXTRA_ATTR                0x20    /* file having extra attribute */
+#define F2FS_PIN_FILE          0x40    /* file should not be gced */
 
 struct f2fs_inode {
        __le16 i_mode;                  /* file mode */
@@ -229,7 +231,13 @@ struct f2fs_inode {
        __le32 i_ctime_nsec;            /* change time in nano scale */
        __le32 i_mtime_nsec;            /* modification time in nano scale */
        __le32 i_generation;            /* file version (for NFS) */
-       __le32 i_current_depth;         /* only for directory depth */
+       union {
+               __le32 i_current_depth; /* only for directory depth */
+               __le16 i_gc_failures;   /*
+                                        * # of gc failures on pinned file.
+                                        * only for regular files.
+                                        */
+       };
        __le32 i_xattr_nid;             /* nid to save xattr */
        __le32 i_flags;                 /* file attributes */
        __le32 i_pino;                  /* parent inode number */
@@ -245,8 +253,10 @@ struct f2fs_inode {
                        __le16 i_inline_xattr_size;     /* inline xattr size, unit: 4 bytes */
                        __le32 i_projid;        /* project id */
                        __le32 i_inode_checksum;/* inode meta checksum */
+                       __le64 i_crtime;        /* creation time */
+                       __le32 i_crtime_nsec;   /* creation time in nano scale */
                        __le32 i_extra_end[0];  /* for attribute size calculation */
-               };
+               } __packed;
                __le32 i_addr[DEF_ADDRS_PER_INODE];     /* Pointers to data blocks */
        };
        __le32 i_nid[DEF_NIDS_PER_INODE];       /* direct(2), indirect(2),
index b2b7255ec7f56b650a74da54fab184f2060a69d8..540595a321a762d305bea8bb1ffa6586a6df0585 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/bug.h>
 #include <linux/slab.h>
 #include <linux/rcupdate.h>
+#include <linux/refcount.h>
 #include <uapi/linux/posix_acl.h>
 
 struct posix_acl_entry {
@@ -24,7 +25,7 @@ struct posix_acl_entry {
 };
 
 struct posix_acl {
-       atomic_t                a_refcount;
+       refcount_t              a_refcount;
        struct rcu_head         a_rcu;
        unsigned int            a_count;
        struct posix_acl_entry  a_entries[0];
@@ -41,7 +42,7 @@ static inline struct posix_acl *
 posix_acl_dup(struct posix_acl *acl)
 {
        if (acl)
-               atomic_inc(&acl->a_refcount);
+               refcount_inc(&acl->a_refcount);
        return acl;
 }
 
@@ -51,7 +52,7 @@ posix_acl_dup(struct posix_acl *acl)
 static inline void
 posix_acl_release(struct posix_acl *acl)
 {
-       if (acl && atomic_dec_and_test(&acl->a_refcount))
+       if (acl && refcount_dec_and_test(&acl->a_refcount))
                kfree_rcu(acl, a_rcu);
 }
 
index 8f8dd42fa57bd39c0a07cb15ece2249e8bb8928e..06c87f9f720c2ee9775eb707784c375bdcea4fd5 100644 (file)
@@ -147,7 +147,8 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
                { CP_NO_SPC_ROLL,       "no space roll forward" },      \
                { CP_NODE_NEED_CP,      "node needs cp" },              \
                { CP_FASTBOOT_MODE,     "fastboot mode" },              \
-               { CP_SPEC_LOG_NUM,      "log type is 2" })
+               { CP_SPEC_LOG_NUM,      "log type is 2" },              \
+               { CP_RECOVER_DIR,       "dir needs recovery" })
 
 struct victim_sel_policy;
 struct f2fs_map_blocks;