Merge branch 'qgroup' of git://git.jan-o-sch.net/btrfs-unstable into for-linus
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / btrfs / extent-tree.c
index 2ce16f97730a7c3986642f19b2e8f7d035d50227..44f06201f376a83809b66fadcd70fd8400baa4c6 100644 (file)
@@ -2670,8 +2670,10 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
 
        node = rb_prev(node);
        if (node) {
+               int seq = ref->seq;
+
                ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               if (ref->bytenr == bytenr)
+               if (ref->bytenr == bytenr && ref->seq == seq)
                        goto out_unlock;
        }
 
@@ -2992,8 +2994,13 @@ again:
        }
 
        spin_lock(&block_group->lock);
-       if (block_group->cached != BTRFS_CACHE_FINISHED) {
-               /* We're not cached, don't bother trying to write stuff out */
+       if (block_group->cached != BTRFS_CACHE_FINISHED ||
+           !btrfs_test_opt(root, SPACE_CACHE)) {
+               /*
+                * don't bother trying to write stuff out _if_
+                * a) we're not cached,
+                * b) we're with nospace_cache mount option.
+                */
                dcs = BTRFS_DC_WRITTEN;
                spin_unlock(&block_group->lock);
                goto out_put;
@@ -3223,6 +3230,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        init_waitqueue_head(&found->wait);
        *space_info = found;
        list_add_rcu(&found->list, &info->space_info);
+       if (flags & BTRFS_BLOCK_GROUP_DATA)
+               info->data_sinfo = found;
        return 0;
 }
 
@@ -3352,12 +3361,6 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
        return get_alloc_profile(root, flags);
 }
 
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
-{
-       BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
-                                                      BTRFS_BLOCK_GROUP_DATA);
-}
-
 /*
  * This will check the space that the inode allocates from to make sure we have
  * enough space for bytes.
@@ -3366,6 +3369,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
 {
        struct btrfs_space_info *data_sinfo;
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
        u64 used;
        int ret = 0, committed = 0, alloc_chunk = 1;
 
@@ -3378,7 +3382,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
                committed = 1;
        }
 
-       data_sinfo = BTRFS_I(inode)->space_info;
+       data_sinfo = fs_info->data_sinfo;
        if (!data_sinfo)
                goto alloc;
 
@@ -3419,10 +3423,9 @@ alloc:
                                        goto commit_trans;
                        }
 
-                       if (!data_sinfo) {
-                               btrfs_set_inode_space_info(root, inode);
-                               data_sinfo = BTRFS_I(inode)->space_info;
-                       }
+                       if (!data_sinfo)
+                               data_sinfo = fs_info->data_sinfo;
+
                        goto again;
                }
 
@@ -3469,7 +3472,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
        /* make sure bytes are sectorsize aligned */
        bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
-       data_sinfo = BTRFS_I(inode)->space_info;
+       data_sinfo = root->fs_info->data_sinfo;
        spin_lock(&data_sinfo->lock);
        data_sinfo->bytes_may_use -= bytes;
        trace_btrfs_space_reservation(root->fs_info, "space_info",
@@ -3675,89 +3678,58 @@ out:
 /*
  * shrink metadata reservation for delalloc
  */
-static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
-                          bool wait_ordered)
+static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
+                           bool wait_ordered)
 {
        struct btrfs_block_rsv *block_rsv;
        struct btrfs_space_info *space_info;
        struct btrfs_trans_handle *trans;
-       u64 reserved;
+       u64 delalloc_bytes;
        u64 max_reclaim;
-       u64 reclaimed = 0;
        long time_left;
        unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
        int loops = 0;
-       unsigned long progress;
 
        trans = (struct btrfs_trans_handle *)current->journal_info;
        block_rsv = &root->fs_info->delalloc_block_rsv;
        space_info = block_rsv->space_info;
 
        smp_mb();
-       reserved = space_info->bytes_may_use;
-       progress = space_info->reservation_progress;
-
-       if (reserved == 0)
-               return 0;
-
-       smp_mb();
-       if (root->fs_info->delalloc_bytes == 0) {
+       delalloc_bytes = root->fs_info->delalloc_bytes;
+       if (delalloc_bytes == 0) {
                if (trans)
-                       return 0;
+                       return;
                btrfs_wait_ordered_extents(root, 0, 0);
-               return 0;
+               return;
        }
 
-       max_reclaim = min(reserved, to_reclaim);
-       nr_pages = max_t(unsigned long, nr_pages,
-                        max_reclaim >> PAGE_CACHE_SHIFT);
-       while (loops < 1024) {
-               /* have the flusher threads jump in and do some IO */
-               smp_mb();
-               nr_pages = min_t(unsigned long, nr_pages,
-                      root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+       while (delalloc_bytes && loops < 3) {
+               max_reclaim = min(delalloc_bytes, to_reclaim);
+               nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
                writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
-                                               WB_REASON_FS_FREE_SPACE);
+                                              WB_REASON_FS_FREE_SPACE);
 
                spin_lock(&space_info->lock);
-               if (reserved > space_info->bytes_may_use)
-                       reclaimed += reserved - space_info->bytes_may_use;
-               reserved = space_info->bytes_may_use;
+               if (space_info->bytes_used + space_info->bytes_reserved +
+                   space_info->bytes_pinned + space_info->bytes_readonly +
+                   space_info->bytes_may_use + orig <=
+                   space_info->total_bytes) {
+                       spin_unlock(&space_info->lock);
+                       break;
+               }
                spin_unlock(&space_info->lock);
 
                loops++;
-
-               if (reserved == 0 || reclaimed >= max_reclaim)
-                       break;
-
-               if (trans && trans->transaction->blocked)
-                       return -EAGAIN;
-
                if (wait_ordered && !trans) {
                        btrfs_wait_ordered_extents(root, 0, 0);
                } else {
-                       time_left = schedule_timeout_interruptible(1);
-
-                       /* We were interrupted, exit */
+                       time_left = schedule_timeout_killable(1);
                        if (time_left)
                                break;
                }
-
-               /* we've kicked the IO a few times, if anything has been freed,
-                * exit.  There is no sense in looping here for a long time
-                * when we really need to commit the transaction, or there are
-                * just too many writers without enough free space
-                */
-
-               if (loops > 3) {
-                       smp_mb();
-                       if (progress != space_info->reservation_progress)
-                               break;
-               }
-
+               smp_mb();
+               delalloc_bytes = root->fs_info->delalloc_bytes;
        }
-
-       return reclaimed >= to_reclaim;
 }
 
 /**
@@ -3817,6 +3789,58 @@ commit:
        return btrfs_commit_transaction(trans, root);
 }
 
+enum flush_state {
+       FLUSH_DELALLOC          =       1,
+       FLUSH_DELALLOC_WAIT     =       2,
+       FLUSH_DELAYED_ITEMS_NR  =       3,
+       FLUSH_DELAYED_ITEMS     =       4,
+       COMMIT_TRANS            =       5,
+};
+
+static int flush_space(struct btrfs_root *root,
+                      struct btrfs_space_info *space_info, u64 num_bytes,
+                      u64 orig_bytes, int state)
+{
+       struct btrfs_trans_handle *trans;
+       int nr;
+       int ret = 0;
+
+       switch (state) {
+       case FLUSH_DELALLOC:
+       case FLUSH_DELALLOC_WAIT:
+               shrink_delalloc(root, num_bytes, orig_bytes,
+                               state == FLUSH_DELALLOC_WAIT);
+               break;
+       case FLUSH_DELAYED_ITEMS_NR:
+       case FLUSH_DELAYED_ITEMS:
+               if (state == FLUSH_DELAYED_ITEMS_NR) {
+                       u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
+
+                       nr = (int)div64_u64(num_bytes, bytes);
+                       if (!nr)
+                               nr = 1;
+                       nr *= 2;
+               } else {
+                       nr = -1;
+               }
+               trans = btrfs_join_transaction(root);
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+                       break;
+               }
+               ret = btrfs_run_delayed_items_nr(trans, root, nr);
+               btrfs_end_transaction(trans, root);
+               break;
+       case COMMIT_TRANS:
+               ret = may_commit_transaction(root, space_info, orig_bytes, 0);
+               break;
+       default:
+               ret = -ENOSPC;
+               break;
+       }
+
+       return ret;
+}
 /**
  * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
  * @root - the root we're allocating for
@@ -3838,11 +3862,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
        struct btrfs_space_info *space_info = block_rsv->space_info;
        u64 used;
        u64 num_bytes = orig_bytes;
-       int retries = 0;
+       int flush_state = FLUSH_DELALLOC;
        int ret = 0;
-       bool committed = false;
        bool flushing = false;
-       bool wait_ordered = false;
+       bool committed = false;
 
 again:
        ret = 0;
@@ -3901,9 +3924,8 @@ again:
                 * amount plus the amount of bytes that we need for this
                 * reservation.
                 */
-               wait_ordered = true;
                num_bytes = used - space_info->total_bytes +
-                       (orig_bytes * (retries + 1));
+                       (orig_bytes * 2);
        }
 
        if (ret) {
@@ -3956,8 +3978,6 @@ again:
                        trace_btrfs_space_reservation(root->fs_info,
                                "space_info", space_info->flags, orig_bytes, 1);
                        ret = 0;
-               } else {
-                       wait_ordered = true;
                }
        }
 
@@ -3976,36 +3996,13 @@ again:
        if (!ret || !flush)
                goto out;
 
-       /*
-        * We do synchronous shrinking since we don't actually unreserve
-        * metadata until after the IO is completed.
-        */
-       ret = shrink_delalloc(root, num_bytes, wait_ordered);
-       if (ret < 0)
-               goto out;
-
-       ret = 0;
-
-       /*
-        * So if we were overcommitted it's possible that somebody else flushed
-        * out enough space and we simply didn't have enough space to reclaim,
-        * so go back around and try again.
-        */
-       if (retries < 2) {
-               wait_ordered = true;
-               retries++;
+       ret = flush_space(root, space_info, num_bytes, orig_bytes,
+                         flush_state);
+       flush_state++;
+       if (!ret)
                goto again;
-       }
-
-       ret = -ENOSPC;
-       if (committed)
-               goto out;
-
-       ret = may_commit_transaction(root, space_info, orig_bytes, 0);
-       if (!ret) {
-               committed = true;
+       else if (flush_state <= COMMIT_TRANS)
                goto again;
-       }
 
 out:
        if (flushing) {
@@ -4023,7 +4020,10 @@ static struct btrfs_block_rsv *get_block_rsv(
 {
        struct btrfs_block_rsv *block_rsv = NULL;
 
-       if (root->ref_cows || root == root->fs_info->csum_root)
+       if (root->ref_cows)
+               block_rsv = trans->block_rsv;
+
+       if (root == root->fs_info->csum_root && trans->adding_csums)
                block_rsv = trans->block_rsv;
 
        if (!block_rsv)
@@ -4375,6 +4375,9 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root)
 {
+       if (!trans->block_rsv)
+               return;
+
        if (!trans->bytes_reserved)
                return;
 
@@ -4533,7 +4536,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        int ret;
 
        /* Need to be holding the i_mutex here if we aren't free space cache */
-       if (btrfs_is_free_space_inode(root, inode))
+       if (btrfs_is_free_space_inode(inode))
                flush = 0;
 
        if (flush && btrfs_transaction_in_commit(root->fs_info))
@@ -5849,7 +5852,11 @@ loop:
                                ret = do_chunk_alloc(trans, root, num_bytes +
                                                     2 * 1024 * 1024, data,
                                                     CHUNK_ALLOC_LIMITED);
-                               if (ret < 0) {
+                               /*
+                                * Do not bail out on ENOSPC since we
+                                * can do more things.
+                                */
+                               if (ret < 0 && ret != -ENOSPC) {
                                        btrfs_abort_transaction(trans,
                                                                root, ret);
                                        goto out;
@@ -5917,13 +5924,13 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
 again:
        list_for_each_entry(cache, &info->block_groups[index], list) {
                spin_lock(&cache->lock);
-               printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
-                      "%llu pinned %llu reserved\n",
+               printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
                       (unsigned long long)cache->key.objectid,
                       (unsigned long long)cache->key.offset,
                       (unsigned long long)btrfs_block_group_used(&cache->item),
                       (unsigned long long)cache->pinned,
-                      (unsigned long long)cache->reserved);
+                      (unsigned long long)cache->reserved,
+                      cache->ro ? "[readonly]" : "");
                btrfs_dump_free_space(cache, bytes);
                spin_unlock(&cache->lock);
        }
@@ -7711,8 +7718,21 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                INIT_LIST_HEAD(&cache->list);
                INIT_LIST_HEAD(&cache->cluster_list);
 
-               if (need_clear)
+               if (need_clear) {
+                       /*
+                        * When we mount with old space cache, we need to
+                        * set BTRFS_DC_CLEAR and set dirty flag.
+                        *
+                        * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
+                        *    truncate the old free space cache inode and
+                        *    setup a new one.
+                        * b) Setting 'dirty flag' makes sure that we flush
+                        *    the new space cache info onto disk.
+                        */
                        cache->disk_cache_state = BTRFS_DC_CLEAR;
+                       if (btrfs_test_opt(root, SPACE_CACHE))
+                               cache->dirty = 1;
+               }
 
                read_extent_buffer(leaf, &cache->item,
                                   btrfs_item_ptr_offset(leaf, path->slots[0]),