block: Abstract out bvec iterator
[GitHub/exynos8895/android_kernel_samsung_universal8895.git] / fs / btrfs / extent_io.c
index 51731b76900de55e8350d5795feacc61a9050d02..bcb6f1b780d64512868303c04a7939060612e3e3 100644 (file)
 #include <linux/cleancache.h>
 #include "extent_io.h"
 #include "extent_map.h"
-#include "compat.h"
 #include "ctree.h"
 #include "btrfs_inode.h"
 #include "volumes.h"
 #include "check-integrity.h"
 #include "locking.h"
 #include "rcu-string.h"
+#include "backref.h"
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
@@ -1597,11 +1597,10 @@ done:
  *
  * 1 is returned if we find something, 0 if nothing was in the tree
  */
-static noinline u64 find_lock_delalloc_range(struct inode *inode,
-                                            struct extent_io_tree *tree,
-                                            struct page *locked_page,
-                                            u64 *start, u64 *end,
-                                            u64 max_bytes)
+STATIC u64 find_lock_delalloc_range(struct inode *inode,
+                                   struct extent_io_tree *tree,
+                                   struct page *locked_page, u64 *start,
+                                   u64 *end, u64 max_bytes)
 {
        u64 delalloc_start;
        u64 delalloc_end;
@@ -1740,10 +1739,8 @@ u64 count_range_bits(struct extent_io_tree *tree,
        u64 last = 0;
        int found = 0;
 
-       if (search_end <= cur_start) {
-               WARN_ON(1);
+       if (WARN_ON(search_end <= cur_start))
                return 0;
-       }
 
        spin_lock(&tree->lock);
        if (cur_start == 0 && bits == EXTENT_DIRTY) {
@@ -1955,11 +1952,6 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
        return err;
 }
 
-static void repair_io_failure_callback(struct bio *bio, int err)
-{
-       complete(bio->bi_private);
-}
-
 /*
  * this bypasses the standard btrfs submit functions deliberately, as
  * the standard behavior is to write all copies in a raid setup. here we only
@@ -1976,13 +1968,13 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
 {
        struct bio *bio;
        struct btrfs_device *dev;
-       DECLARE_COMPLETION_ONSTACK(compl);
        u64 map_length = 0;
        u64 sector;
        struct btrfs_bio *bbio = NULL;
        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
        int ret;
 
+       ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
        BUG_ON(!mirror_num);
 
        /* we can't repair anything in raid56 yet */
@@ -1992,9 +1984,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
        bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
        if (!bio)
                return -EIO;
-       bio->bi_private = &compl;
-       bio->bi_end_io = repair_io_failure_callback;
-       bio->bi_size = 0;
+       bio->bi_iter.bi_size = 0;
        map_length = length;
 
        ret = btrfs_map_block(fs_info, WRITE, logical,
@@ -2005,7 +1995,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
        }
        BUG_ON(mirror_num != bbio->mirror_num);
        sector = bbio->stripes[mirror_num-1].physical >> 9;
-       bio->bi_sector = sector;
+       bio->bi_iter.bi_sector = sector;
        dev = bbio->stripes[mirror_num-1].dev;
        kfree(bbio);
        if (!dev || !dev->bdev || !dev->writeable) {
@@ -2014,10 +2004,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
        }
        bio->bi_bdev = dev->bdev;
        bio_add_page(bio, page, length, start - page_offset(page));
-       btrfsic_submit_bio(WRITE_SYNC, bio);
-       wait_for_completion(&compl);
 
-       if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+       if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
                /* try to remap that extent elsewhere? */
                bio_put(bio);
                btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
@@ -2039,6 +2027,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
        unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
        int ret = 0;
 
+       if (root->fs_info->sb->s_flags & MS_RDONLY)
+               return -EROFS;
+
        for (i = 0; i < num_pages; i++) {
                struct page *p = extent_buffer_page(eb, i);
                ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
@@ -2060,12 +2051,12 @@ static int clean_io_failure(u64 start, struct page *page)
        u64 private;
        u64 private_failure;
        struct io_failure_record *failrec;
-       struct btrfs_fs_info *fs_info;
+       struct inode *inode = page->mapping->host;
+       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
        struct extent_state *state;
        int num_copies;
        int did_repair = 0;
        int ret;
-       struct inode *inode = page->mapping->host;
 
        private = 0;
        ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
@@ -2088,6 +2079,8 @@ static int clean_io_failure(u64 start, struct page *page)
                did_repair = 1;
                goto out;
        }
+       if (fs_info->sb->s_flags & MS_RDONLY)
+               goto out;
 
        spin_lock(&BTRFS_I(inode)->io_tree.lock);
        state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
@@ -2097,7 +2090,6 @@ static int clean_io_failure(u64 start, struct page *page)
 
        if (state && state->start <= failrec->start &&
            state->end >= failrec->start + failrec->len - 1) {
-               fs_info = BTRFS_I(inode)->root->fs_info;
                num_copies = btrfs_num_copies(fs_info, failrec->logical,
                                              failrec->len);
                if (num_copies > 1)  {
@@ -2276,9 +2268,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                return -EIO;
        }
        bio->bi_end_io = failed_bio->bi_end_io;
-       bio->bi_sector = failrec->logical >> 9;
+       bio->bi_iter.bi_sector = failrec->logical >> 9;
        bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-       bio->bi_size = 0;
+       bio->bi_iter.bi_size = 0;
 
        btrfs_failed_bio = btrfs_io_bio(failed_bio);
        if (btrfs_failed_bio->csum) {
@@ -2340,12 +2332,13 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
  */
 static void end_bio_extent_writepage(struct bio *bio, int err)
 {
-       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+       struct bio_vec *bvec;
        struct extent_io_tree *tree;
        u64 start;
        u64 end;
+       int i;
 
-       do {
+       bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
                tree = &BTRFS_I(page->mapping->host)->io_tree;
 
@@ -2363,14 +2356,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
                start = page_offset(page);
                end = start + bvec->bv_offset + bvec->bv_len - 1;
 
-               if (--bvec >= bio->bi_io_vec)
-                       prefetchw(&bvec->bv_page->flags);
-
                if (end_extent_writepage(page, err, start, end))
                        continue;
 
                end_page_writeback(page);
-       } while (bvec >= bio->bi_io_vec);
+       }
 
        bio_put(bio);
 }
@@ -2400,9 +2390,8 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
  */
 static void end_bio_extent_readpage(struct bio *bio, int err)
 {
+       struct bio_vec *bvec;
        int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-       struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
-       struct bio_vec *bvec = bio->bi_io_vec;
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
        struct extent_io_tree *tree;
        u64 offset = 0;
@@ -2413,16 +2402,17 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
        u64 extent_len = 0;
        int mirror;
        int ret;
+       int i;
 
        if (err)
                uptodate = 0;
 
-       do {
+       bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
                struct inode *inode = page->mapping->host;
 
                pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
-                        "mirror=%lu\n", (u64)bio->bi_sector, err,
+                        "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err,
                         io_bio->mirror_num);
                tree = &BTRFS_I(inode)->io_tree;
 
@@ -2441,9 +2431,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                end = start + bvec->bv_offset + bvec->bv_len - 1;
                len = bvec->bv_len;
 
-               if (++bvec <= bvec_end)
-                       prefetchw(&bvec->bv_page->flags);
-
                mirror = io_bio->mirror_num;
                if (likely(uptodate && tree->ops &&
                           tree->ops->readpage_end_io_hook)) {
@@ -2524,7 +2511,7 @@ readpage_ok:
                        extent_start = start;
                        extent_len = end + 1 - start;
                }
-       } while (bvec <= bvec_end);
+       }
 
        if (extent_len)
                endio_readpage_release_extent(tree, extent_start, extent_len,
@@ -2555,9 +2542,8 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
        }
 
        if (bio) {
-               bio->bi_size = 0;
                bio->bi_bdev = bdev;
-               bio->bi_sector = first_sector;
+               bio->bi_iter.bi_sector = first_sector;
                btrfs_bio = btrfs_io_bio(bio);
                btrfs_bio->csum = NULL;
                btrfs_bio->csum_allocated = NULL;
@@ -2651,7 +2637,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
        if (bio_ret && *bio_ret) {
                bio = *bio_ret;
                if (old_compressed)
-                       contig = bio->bi_sector == sector;
+                       contig = bio->bi_iter.bi_sector == sector;
                else
                        contig = bio_end_sector(bio) == sector;
 
@@ -3418,20 +3404,18 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb)
 
 static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
 {
-       int uptodate = err == 0;
-       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+       struct bio_vec *bvec;
        struct extent_buffer *eb;
-       int done;
+       int i, done;
 
-       do {
+       bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
 
-               bvec--;
                eb = (struct extent_buffer *)page->private;
                BUG_ON(!eb);
                done = atomic_dec_and_test(&eb->io_pages);
 
-               if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
+               if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
                        set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
                        ClearPageUptodate(page);
                        SetPageError(page);
@@ -3443,10 +3427,9 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
                        continue;
 
                end_extent_buffer_writeback(eb);
-       } while (bvec >= bio->bi_io_vec);
+       }
 
        bio_put(bio);
-
 }
 
 static int write_one_eb(struct extent_buffer *eb,
@@ -3569,9 +3552,8 @@ retry:
                         * but no sense in crashing the users box for something
                         * we can survive anyway.
                         */
-                       if (!eb) {
+                       if (WARN_ON(!eb)) {
                                spin_unlock(&mapping->private_lock);
-                               WARN_ON(1);
                                continue;
                        }
 
@@ -4038,7 +4020,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
        if (offset >= last)
                return NULL;
 
-       while(1) {
+       while (1) {
                len = last - offset;
                if (len == 0)
                        break;
@@ -4062,6 +4044,19 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
        return NULL;
 }
 
+static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx)
+{
+       unsigned long cnt = *((unsigned long *)ctx);
+
+       cnt++;
+       *((unsigned long *)ctx) = cnt;
+
+       /* Now we're sure that the extent is shared. */
+       if (cnt > 1)
+               return 1;
+       return 0;
+}
+
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len, get_extent_t *get_extent)
 {
@@ -4128,7 +4123,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                last = found_key.offset;
                last_for_get_extent = last + 1;
        }
-       btrfs_free_path(path);
+       btrfs_release_path(path);
 
        /*
         * we might have some extents allocated but more delalloc past those
@@ -4198,7 +4193,24 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        flags |= (FIEMAP_EXTENT_DELALLOC |
                                  FIEMAP_EXTENT_UNKNOWN);
                } else {
+                       unsigned long ref_cnt = 0;
+
                        disko = em->block_start + offset_in_extent;
+
+                       /*
+                        * As btrfs supports shared space, this information
+                        * can be exported to userspace tools via
+                        * flag FIEMAP_EXTENT_SHARED.
+                        */
+                       ret = iterate_inodes_from_logical(
+                                       em->block_start,
+                                       BTRFS_I(inode)->root->fs_info,
+                                       path, count_ext_ref, &ref_cnt);
+                       if (ret < 0 && ret != -ENOENT)
+                               goto out_free;
+
+                       if (ref_cnt > 1)
+                               flags |= FIEMAP_EXTENT_SHARED;
                }
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
                        flags |= FIEMAP_EXTENT_ENCODED;
@@ -4230,6 +4242,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 out_free:
        free_extent_map(em);
 out:
+       btrfs_free_path(path);
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
                             &cached_state, GFP_NOFS);
        return ret;
@@ -4455,6 +4468,23 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
        }
 }
 
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+                                                       u64 start)
+{
+       struct extent_buffer *eb;
+
+       rcu_read_lock();
+       eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+       if (eb && atomic_inc_not_zero(&eb->refs)) {
+               rcu_read_unlock();
+               mark_extent_buffer_accessed(eb);
+               return eb;
+       }
+       rcu_read_unlock();
+
+       return NULL;
+}
+
 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                                          u64 start, unsigned long len)
 {
@@ -4468,14 +4498,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
        int uptodate = 1;
        int ret;
 
-       rcu_read_lock();
-       eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
-       if (eb && atomic_inc_not_zero(&eb->refs)) {
-               rcu_read_unlock();
-               mark_extent_buffer_accessed(eb);
+
+       eb = find_extent_buffer(tree, start);
+       if (eb)
                return eb;
-       }
-       rcu_read_unlock();
 
        eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
        if (!eb)
@@ -4534,24 +4560,17 @@ again:
 
        spin_lock(&tree->buffer_lock);
        ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
+       spin_unlock(&tree->buffer_lock);
+       radix_tree_preload_end();
        if (ret == -EEXIST) {
-               exists = radix_tree_lookup(&tree->buffer,
-                                               start >> PAGE_CACHE_SHIFT);
-               if (!atomic_inc_not_zero(&exists->refs)) {
-                       spin_unlock(&tree->buffer_lock);
-                       radix_tree_preload_end();
-                       exists = NULL;
+               exists = find_extent_buffer(tree, start);
+               if (exists)
+                       goto free_eb;
+               else
                        goto again;
-               }
-               spin_unlock(&tree->buffer_lock);
-               radix_tree_preload_end();
-               mark_extent_buffer_accessed(exists);
-               goto free_eb;
        }
        /* add one reference for the tree */
        check_buffer_tree_ref(eb);
-       spin_unlock(&tree->buffer_lock);
-       radix_tree_preload_end();
 
        /*
         * there is a race where release page may have
@@ -4582,23 +4601,6 @@ free_eb:
        return exists;
 }
 
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
-                                        u64 start, unsigned long len)
-{
-       struct extent_buffer *eb;
-
-       rcu_read_lock();
-       eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
-       if (eb && atomic_inc_not_zero(&eb->refs)) {
-               rcu_read_unlock();
-               mark_extent_buffer_accessed(eb);
-               return eb;
-       }
-       rcu_read_unlock();
-
-       return NULL;
-}
-
 static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
 {
        struct extent_buffer *eb =
@@ -5062,23 +5064,6 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
        }
 }
 
-static void move_pages(struct page *dst_page, struct page *src_page,
-                      unsigned long dst_off, unsigned long src_off,
-                      unsigned long len)
-{
-       char *dst_kaddr = page_address(dst_page);
-       if (dst_page == src_page) {
-               memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
-       } else {
-               char *src_kaddr = page_address(src_page);
-               char *p = dst_kaddr + dst_off + len;
-               char *s = src_kaddr + src_off + len;
-
-               while (len--)
-                       *--p = *--s;
-       }
-}
-
 static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
 {
        unsigned long distance = (src > dst) ? src - dst : dst - src;
@@ -5189,7 +5174,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 
                cur = min_t(unsigned long, len, src_off_in_page + 1);
                cur = min(cur, dst_off_in_page + 1);
-               move_pages(extent_buffer_page(dst, dst_i),
+               copy_pages(extent_buffer_page(dst, dst_i),
                           extent_buffer_page(dst, src_i),
                           dst_off_in_page - cur + 1,
                           src_off_in_page - cur + 1, cur);