Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 18 Jul 2010 17:09:25 +0000 (10:09 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 18 Jul 2010 17:09:25 +0000 (10:09 -0700)
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2:
  ocfs2: Silence gcc warning in ocfs2_write_zero_page().
  jbd2/ocfs2: Fix block checksumming when a buffer is used in several transactions
  ocfs2/dlm: Remove BUG_ON from migration in the rare case of a down node
  ocfs2: Don't duplicate pages past i_size during CoW.
  ocfs2: tighten up strlen() checking
  ocfs2: Make xattr reflink work with new local alloc reservation.
  ocfs2: make xattr extension work with new local alloc reservation.
  ocfs2: Remove the redundant cpu_to_le64.
  ocfs2/dlm: don't access beyond bitmap size
  ocfs2: No need to zero pages past i_size.
  ocfs2: Zero the tail cluster when extending past i_size.
  ocfs2: When zero extending, do it by page.
  ocfs2: Limit default local alloc size within bitmap range.
  ocfs2: Move orphan scan work to ocfs2_wq.
  fs/ocfs2/dlm: Add missing spin_unlock

16 files changed:
fs/jbd2/journal.c
fs/jbd2/transaction.c
fs/ocfs2/aops.c
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlm/dlmmaster.c
fs/ocfs2/dlm/dlmrecovery.c
fs/ocfs2/file.c
fs/ocfs2/file.h
fs/ocfs2/journal.c
fs/ocfs2/localalloc.c
fs/ocfs2/quota_global.c
fs/ocfs2/quota_local.c
fs/ocfs2/refcounttree.c
fs/ocfs2/suballoc.c
fs/ocfs2/xattr.c
include/linux/jbd2.h

index bc2ff5932769199f271db9055f7881b2e9c4bf54..036880895bfc8c2e99c42f6fd900819315bd508a 100644 (file)
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
        struct page *new_page;
        unsigned int new_offset;
        struct buffer_head *bh_in = jh2bh(jh_in);
-       struct jbd2_buffer_trigger_type *triggers;
        journal_t *journal = transaction->t_journal;
 
        /*
@@ -328,21 +327,21 @@ repeat:
                done_copy_out = 1;
                new_page = virt_to_page(jh_in->b_frozen_data);
                new_offset = offset_in_page(jh_in->b_frozen_data);
-               triggers = jh_in->b_frozen_triggers;
        } else {
                new_page = jh2bh(jh_in)->b_page;
                new_offset = offset_in_page(jh2bh(jh_in)->b_data);
-               triggers = jh_in->b_triggers;
        }
 
        mapped_data = kmap_atomic(new_page, KM_USER0);
        /*
-        * Fire any commit trigger.  Do this before checking for escaping,
-        * as the trigger may modify the magic offset.  If a copy-out
-        * happens afterwards, it will have the correct data in the buffer.
+        * Fire data frozen trigger if data already wasn't frozen.  Do this
+        * before checking for escaping, as the trigger may modify the magic
+        * offset.  If a copy-out happens afterwards, it will have the correct
+        * data in the buffer.
         */
-       jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
-                                  triggers);
+       if (!done_copy_out)
+               jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
+                                          jh_in->b_triggers);
 
        /*
         * Check for escaping
index e214d68620ac167fb5ddeb71775ead1b6063a571..b8e0806681bb0f4acf63964fa3f72ae00e1f8900 100644 (file)
@@ -725,6 +725,9 @@ done:
                page = jh2bh(jh)->b_page;
                offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
                source = kmap_atomic(page, KM_USER0);
+               /* Fire data frozen trigger just before we copy the data */
+               jbd2_buffer_frozen_trigger(jh, source + offset,
+                                          jh->b_triggers);
                memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
                kunmap_atomic(source, KM_USER0);
 
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
        jh->b_triggers = type;
 }
 
-void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
                                struct jbd2_buffer_trigger_type *triggers)
 {
        struct buffer_head *bh = jh2bh(jh);
 
-       if (!triggers || !triggers->t_commit)
+       if (!triggers || !triggers->t_frozen)
                return;
 
-       triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+       triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
 }
 
 void jbd2_buffer_abort_trigger(struct journal_head *jh,
index 3623ca20cc186046cfbf840f03ce9705146b17ba..356e976772bf1adb112aaf3c8b28ddb71b639afb 100644 (file)
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
                        dump_stack();
                        goto bail;
                }
-
-               past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
-               mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
-                    (unsigned long long)past_eof);
-
-               if (create && (iblock >= past_eof))
-                       set_buffer_new(bh_result);
        }
 
+       past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
+       mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
+            (unsigned long long)past_eof);
+       if (create && (iblock >= past_eof))
+               set_buffer_new(bh_result);
+
 bail:
        if (err < 0)
                err = -EIO;
@@ -459,36 +458,6 @@ int walk_page_buffers(     handle_t *handle,
        return ret;
 }
 
-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
-                                                        struct page *page,
-                                                        unsigned from,
-                                                        unsigned to)
-{
-       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-       handle_t *handle;
-       int ret = 0;
-
-       handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
-       if (IS_ERR(handle)) {
-               ret = -ENOMEM;
-               mlog_errno(ret);
-               goto out;
-       }
-
-       if (ocfs2_should_order_data(inode)) {
-               ret = ocfs2_jbd2_file_inode(handle, inode);
-               if (ret < 0)
-                       mlog_errno(ret);
-       }
-out:
-       if (ret) {
-               if (!IS_ERR(handle))
-                       ocfs2_commit_trans(osb, handle);
-               handle = ERR_PTR(ret);
-       }
-       return handle;
-}
-
 static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
 {
        sector_t status;
@@ -1131,23 +1100,37 @@ out:
  */
 static int ocfs2_grab_pages_for_write(struct address_space *mapping,
                                      struct ocfs2_write_ctxt *wc,
-                                     u32 cpos, loff_t user_pos, int new,
+                                     u32 cpos, loff_t user_pos,
+                                     unsigned user_len, int new,
                                      struct page *mmap_page)
 {
        int ret = 0, i;
-       unsigned long start, target_index, index;
+       unsigned long start, target_index, end_index, index;
        struct inode *inode = mapping->host;
+       loff_t last_byte;
 
        target_index = user_pos >> PAGE_CACHE_SHIFT;
 
        /*
         * Figure out how many pages we'll be manipulating here. For
         * non allocating write, we just change the one
-        * page. Otherwise, we'll need a whole clusters worth.
+        * page. Otherwise, we'll need a whole clusters worth.  If we're
+        * writing past i_size, we only need enough pages to cover the
+        * last page of the write.
         */
        if (new) {
                wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
                start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
+               /*
+                * We need the index *past* the last page we could possibly
+                * touch.  This is the page past the end of the write or
+                * i_size, whichever is greater.
+                */
+               last_byte = max(user_pos + user_len, i_size_read(inode));
+               BUG_ON(last_byte < 1);
+               end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
+               if ((start + wc->w_num_pages) > end_index)
+                       wc->w_num_pages = end_index - start;
        } else {
                wc->w_num_pages = 1;
                start = target_index;
@@ -1620,21 +1603,20 @@ out:
  * write path can treat it as an non-allocating write, which has no
  * special case code for sparse/nonsparse files.
  */
-static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
-                                       unsigned len,
+static int ocfs2_expand_nonsparse_inode(struct inode *inode,
+                                       struct buffer_head *di_bh,
+                                       loff_t pos, unsigned len,
                                        struct ocfs2_write_ctxt *wc)
 {
        int ret;
-       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        loff_t newsize = pos + len;
 
-       if (ocfs2_sparse_alloc(osb))
-               return 0;
+       BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
 
        if (newsize <= i_size_read(inode))
                return 0;
 
-       ret = ocfs2_extend_no_holes(inode, newsize, pos);
+       ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
        if (ret)
                mlog_errno(ret);
 
@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
        return ret;
 }
 
+static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
+                          loff_t pos)
+{
+       int ret = 0;
+
+       BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
+       if (pos > i_size_read(inode))
+               ret = ocfs2_zero_extend(inode, di_bh, pos);
+
+       return ret;
+}
+
 int ocfs2_write_begin_nolock(struct address_space *mapping,
                             loff_t pos, unsigned len, unsigned flags,
                             struct page **pagep, void **fsdata,
@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
                }
        }
 
-       ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
+       if (ocfs2_sparse_alloc(osb))
+               ret = ocfs2_zero_tail(inode, di_bh, pos);
+       else
+               ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
+                                                  wc);
        if (ret) {
                mlog_errno(ret);
                goto out;
@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
         * that we can zero and flush if we error after adding the
         * extent.
         */
-       ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
+       ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
                                         cluster_of_pages, mmap_page);
        if (ret) {
                mlog_errno(ret);
index 6b5a492e1749f8063097a1ebef4e3ef9ad53fdb5..153abb5abef024d2ca63f6d4a23ee1c126b89f13 100644 (file)
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
        struct dlm_ctxt *dlm = NULL;
        struct dlm_ctxt *new_ctxt = NULL;
 
-       if (strlen(domain) > O2NM_MAX_NAME_LEN) {
+       if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
                ret = -ENAMETOOLONG;
                mlog(ML_ERROR, "domain name length too long\n");
                goto leave;
@@ -1709,6 +1709,7 @@ retry:
                }
 
                if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
+                       spin_unlock(&dlm_domain_lock);
                        mlog(ML_ERROR,
                             "Requested locking protocol version is not "
                             "compatible with already registered domain "
index 4a7506a4e314c34014fd58ab739e836f9babe4e3..94b97fc6a88e62522b1958ed6a89de5e6803525b 100644 (file)
@@ -2808,14 +2808,8 @@ again:
                mlog(0, "trying again...\n");
                goto again;
        }
-       /* now that we are sure the MIGRATING state is there, drop
-        * the unneded state which blocked threads trying to DIRTY */
-       spin_lock(&res->spinlock);
-       BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
-       BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
-       res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
-       spin_unlock(&res->spinlock);
 
+       ret = 0;
        /* did the target go down or die? */
        spin_lock(&dlm->spinlock);
        if (!test_bit(target, dlm->domain_map)) {
@@ -2825,10 +2819,22 @@ again:
        }
        spin_unlock(&dlm->spinlock);
 
+       /*
+        * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
+        * another try; otherwise, we are sure the MIGRATING state is there,
+        * drop the unneded state which blocked threads trying to DIRTY
+        */
+       spin_lock(&res->spinlock);
+       BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
+       res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
+       if (!ret)
+               BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
+       spin_unlock(&res->spinlock);
+
        /*
         * at this point:
         *
-        *   o the DLM_LOCK_RES_MIGRATING flag is set
+        *   o the DLM_LOCK_RES_MIGRATING flag is set if target not down
         *   o there are no pending asts on this lockres
         *   o all processes trying to reserve an ast on this
         *     lockres must wait for the MIGRATING flag to clear
index f8b75ce4be7019ab20d9c5f40d1567a4e520b8fd..9dfaac73b36da4350cc5a2f2dc7a34918a21e11e 100644 (file)
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
        if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
                int bit;
 
-               bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
+               bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
                if (bit >= O2NM_MAX_NODES || bit < 0)
                        dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
                else
index 6a13ea64c44773fc239ad55a5237422ddee18e94..2b10b36d15772efcae056a62b4df1a9fe8aa2c53 100644 (file)
@@ -724,28 +724,55 @@ leave:
        return status;
 }
 
+/*
+ * While a write will already be ordering the data, a truncate will not.
+ * Thus, we need to explicitly order the zeroed pages.
+ */
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+{
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       handle_t *handle = NULL;
+       int ret = 0;
+
+       if (!ocfs2_should_order_data(inode))
+               goto out;
+
+       handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+       if (IS_ERR(handle)) {
+               ret = -ENOMEM;
+               mlog_errno(ret);
+               goto out;
+       }
+
+       ret = ocfs2_jbd2_file_inode(handle, inode);
+       if (ret < 0)
+               mlog_errno(ret);
+
+out:
+       if (ret) {
+               if (!IS_ERR(handle))
+                       ocfs2_commit_trans(osb, handle);
+               handle = ERR_PTR(ret);
+       }
+       return handle;
+}
+
 /* Some parts of this taken from generic_cont_expand, which turned out
  * to be too fragile to do exactly what we need without us having to
  * worry about recursive locking in ->write_begin() and ->write_end(). */
-static int ocfs2_write_zero_page(struct inode *inode,
-                                u64 size)
+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
+                                u64 abs_to)
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
-       unsigned long index;
-       unsigned int offset;
+       unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
        handle_t *handle = NULL;
-       int ret;
+       int ret = 0;
+       unsigned zero_from, zero_to, block_start, block_end;
 
-       offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
-       /* ugh.  in prepare/commit_write, if from==to==start of block, we
-       ** skip the prepare.  make sure we never send an offset for the start
-       ** of a block
-       */
-       if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-               offset++;
-       }
-       index = size >> PAGE_CACHE_SHIFT;
+       BUG_ON(abs_from >= abs_to);
+       BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
+       BUG_ON(abs_from & (inode->i_blkbits - 1));
 
        page = grab_cache_page(mapping, index);
        if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
                goto out;
        }
 
-       ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
-       if (ret < 0) {
-               mlog_errno(ret);
-               goto out_unlock;
-       }
+       /* Get the offsets within the page that we want to zero */
+       zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
+       zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
+       if (!zero_to)
+               zero_to = PAGE_CACHE_SIZE;
 
-       if (ocfs2_should_order_data(inode)) {
-               handle = ocfs2_start_walk_page_trans(inode, page, offset,
-                                                    offset);
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       handle = NULL;
+       mlog(0,
+            "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
+            (unsigned long long)abs_from, (unsigned long long)abs_to,
+            index, zero_from, zero_to);
+
+       /* We know that zero_from is block aligned */
+       for (block_start = zero_from; block_start < zero_to;
+            block_start = block_end) {
+               block_end = block_start + (1 << inode->i_blkbits);
+
+               /*
+                * block_start is block-aligned.  Bump it by one to
+                * force ocfs2_{prepare,commit}_write() to zero the
+                * whole block.
+                */
+               ret = ocfs2_prepare_write_nolock(inode, page,
+                                                block_start + 1,
+                                                block_start + 1);
+               if (ret < 0) {
+                       mlog_errno(ret);
                        goto out_unlock;
                }
-       }
 
-       /* must not update i_size! */
-       ret = block_commit_write(page, offset, offset);
-       if (ret < 0)
-               mlog_errno(ret);
-       else
-               ret = 0;
+               if (!handle) {
+                       handle = ocfs2_zero_start_ordered_transaction(inode);
+                       if (IS_ERR(handle)) {
+                               ret = PTR_ERR(handle);
+                               handle = NULL;
+                               break;
+                       }
+               }
+
+               /* must not update i_size! */
+               ret = block_commit_write(page, block_start + 1,
+                                        block_start + 1);
+               if (ret < 0)
+                       mlog_errno(ret);
+               else
+                       ret = 0;
+       }
 
        if (handle)
                ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+
 out_unlock:
        unlock_page(page);
        page_cache_release(page);
@@ -786,22 +838,114 @@ out:
        return ret;
 }
 
-static int ocfs2_zero_extend(struct inode *inode,
-                            u64 zero_to_size)
+/*
+ * Find the next range to zero.  We do this in terms of bytes because
+ * that's what ocfs2_zero_extend() wants, and it is dealing with the
+ * pagecache.  We may return multiple extents.
+ *
+ * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
+ * needs to be zeroed.  range_start and range_end return the next zeroing
+ * range.  A subsequent call should pass the previous range_end as its
+ * zero_start.  If range_end is 0, there's nothing to do.
+ *
+ * Unwritten extents are skipped over.  Refcounted extents are CoWd.
+ */
+static int ocfs2_zero_extend_get_range(struct inode *inode,
+                                      struct buffer_head *di_bh,
+                                      u64 zero_start, u64 zero_end,
+                                      u64 *range_start, u64 *range_end)
 {
-       int ret = 0;
-       u64 start_off;
-       struct super_block *sb = inode->i_sb;
+       int rc = 0, needs_cow = 0;
+       u32 p_cpos, zero_clusters = 0;
+       u32 zero_cpos =
+               zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+       u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
+       unsigned int num_clusters = 0;
+       unsigned int ext_flags = 0;
 
-       start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
-       while (start_off < zero_to_size) {
-               ret = ocfs2_write_zero_page(inode, start_off);
-               if (ret < 0) {
-                       mlog_errno(ret);
+       while (zero_cpos < last_cpos) {
+               rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
+                                       &num_clusters, &ext_flags);
+               if (rc) {
+                       mlog_errno(rc);
+                       goto out;
+               }
+
+               if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+                       zero_clusters = num_clusters;
+                       if (ext_flags & OCFS2_EXT_REFCOUNTED)
+                               needs_cow = 1;
+                       break;
+               }
+
+               zero_cpos += num_clusters;
+       }
+       if (!zero_clusters) {
+               *range_end = 0;
+               goto out;
+       }
+
+       while ((zero_cpos + zero_clusters) < last_cpos) {
+               rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
+                                       &p_cpos, &num_clusters,
+                                       &ext_flags);
+               if (rc) {
+                       mlog_errno(rc);
                        goto out;
                }
 
-               start_off += sb->s_blocksize;
+               if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
+                       break;
+               if (ext_flags & OCFS2_EXT_REFCOUNTED)
+                       needs_cow = 1;
+               zero_clusters += num_clusters;
+       }
+       if ((zero_cpos + zero_clusters) > last_cpos)
+               zero_clusters = last_cpos - zero_cpos;
+
+       if (needs_cow) {
+               rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
+                                       UINT_MAX);
+               if (rc) {
+                       mlog_errno(rc);
+                       goto out;
+               }
+       }
+
+       *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
+       *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
+                                            zero_cpos + zero_clusters);
+
+out:
+       return rc;
+}
+
+/*
+ * Zero one range returned from ocfs2_zero_extend_get_range().  The caller
+ * has made sure that the entire range needs zeroing.
+ */
+static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
+                                  u64 range_end)
+{
+       int rc = 0;
+       u64 next_pos;
+       u64 zero_pos = range_start;
+
+       mlog(0, "range_start = %llu, range_end = %llu\n",
+            (unsigned long long)range_start,
+            (unsigned long long)range_end);
+       BUG_ON(range_start >= range_end);
+
+       while (zero_pos < range_end) {
+               next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+               if (next_pos > range_end)
+                       next_pos = range_end;
+               rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+               if (rc < 0) {
+                       mlog_errno(rc);
+                       break;
+               }
+               zero_pos = next_pos;
 
                /*
                 * Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
                cond_resched();
        }
 
-out:
+       return rc;
+}
+
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+                     loff_t zero_to_size)
+{
+       int ret = 0;
+       u64 zero_start, range_start = 0, range_end = 0;
+       struct super_block *sb = inode->i_sb;
+
+       zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
+       mlog(0, "zero_start %llu for i_size %llu\n",
+            (unsigned long long)zero_start,
+            (unsigned long long)i_size_read(inode));
+       while (zero_start < zero_to_size) {
+               ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
+                                                 zero_to_size,
+                                                 &range_start,
+                                                 &range_end);
+               if (ret) {
+                       mlog_errno(ret);
+                       break;
+               }
+               if (!range_end)
+                       break;
+               /* Trim the ends */
+               if (range_start < zero_start)
+                       range_start = zero_start;
+               if (range_end > zero_to_size)
+                       range_end = zero_to_size;
+
+               ret = ocfs2_zero_extend_range(inode, range_start,
+                                             range_end);
+               if (ret) {
+                       mlog_errno(ret);
+                       break;
+               }
+               zero_start = range_end;
+       }
+
        return ret;
 }
 
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+                         u64 new_i_size, u64 zero_to)
 {
        int ret;
        u32 clusters_to_add;
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
+       /*
+        * Only quota files call this without a bh, and they can't be
+        * refcounted.
+        */
+       BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+       BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
+
        clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
        if (clusters_to_add < oi->ip_clusters)
                clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
         * still need to zero the area between the old i_size and the
         * new i_size.
         */
-       ret = ocfs2_zero_extend(inode, zero_to);
+       ret = ocfs2_zero_extend(inode, di_bh, zero_to);
        if (ret < 0)
                mlog_errno(ret);
 
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
                goto out;
 
        if (i_size_read(inode) == new_i_size)
-               goto out;
+               goto out;
        BUG_ON(new_i_size < i_size_read(inode));
 
-       /*
-        * Fall through for converting inline data, even if the fs
-        * supports sparse files.
-        *
-        * The check for inline data here is legal - nobody can add
-        * the feature since we have i_mutex. We must check it again
-        * after acquiring ip_alloc_sem though, as paths like mmap
-        * might have raced us to converting the inode to extents.
-        */
-       if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
-           && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
-               goto out_update_size;
-
        /*
         * The alloc sem blocks people in read/write from reading our
         * allocation until we're done changing it. We depend on
         * i_mutex to block other extend/truncate calls while we're
-        * here.
+        * here.  We even have to hold it for sparse files because there
+        * might be some tail zeroing.
         */
        down_write(&oi->ip_alloc_sem);
 
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
                ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
                if (ret) {
                        up_write(&oi->ip_alloc_sem);
-
                        mlog_errno(ret);
                        goto out;
                }
        }
 
-       if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
-               ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
+       if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+               ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
+       else
+               ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
+                                           new_i_size);
 
        up_write(&oi->ip_alloc_sem);
 
index d66cf4f7c70e34bbec1d63eccd25096eeac7e976..97bf761c9e7c7b8f1c1744bbada1d778095eb3d7 100644 (file)
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
 int ocfs2_simple_size_update(struct inode *inode,
                             struct buffer_head *di_bh,
                             u64 new_i_size);
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
-                         u64 zero_to);
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+                         u64 new_i_size, u64 zero_to);
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+                     loff_t zero_to);
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
                  struct kstat *stat);
index 47878cf164184cf910bcbbebc10da133aa2e0c64..625de9d7088cdf2c82008b2e875094ec2b43f1d0 100644 (file)
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
        return container_of(triggers, struct ocfs2_triggers, ot_triggers);
 }
 
-static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
                                 struct buffer_head *bh,
                                 void *data, size_t size)
 {
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
  * Quota blocks have their own trigger because the struct ocfs2_block_check
  * offset depends on the blocksize.
  */
-static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
                                 struct buffer_head *bh,
                                 void *data, size_t size)
 {
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
  * Directory blocks also have their own trigger because the
  * struct ocfs2_block_check offset depends on the blocksize.
  */
-static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
                                 struct buffer_head *bh,
                                 void *data, size_t size)
 {
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
 
 static struct ocfs2_triggers di_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_dinode, i_check),
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
 
 static struct ocfs2_triggers eb_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_extent_block, h_check),
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
 
 static struct ocfs2_triggers rb_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_refcount_block, rf_check),
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
 
 static struct ocfs2_triggers gd_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_group_desc, bg_check),
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
 
 static struct ocfs2_triggers db_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_db_commit_trigger,
+               .t_frozen = ocfs2_db_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
 };
 
 static struct ocfs2_triggers xb_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_xattr_block, xb_check),
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
 
 static struct ocfs2_triggers dq_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_dq_commit_trigger,
+               .t_frozen = ocfs2_dq_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
 };
 
 static struct ocfs2_triggers dr_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_dx_root_block, dr_check),
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
 
 static struct ocfs2_triggers dl_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_dx_leaf, dl_check),
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
        mutex_lock(&os->os_lock);
        ocfs2_queue_orphan_scan(osb);
        if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
-               schedule_delayed_work(&os->os_orphan_scan_work,
+               queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
                                      ocfs2_orphan_scan_timeout());
        mutex_unlock(&os->os_lock);
 }
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
                atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
        else {
                atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
-               schedule_delayed_work(&os->os_orphan_scan_work,
-                                     ocfs2_orphan_scan_timeout());
+               queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
+                                  ocfs2_orphan_scan_timeout());
        }
 }
 
index 3d7419682dc069da151ae265367f214e15ae31af..ec6adbf8f5515afbaa7e8ffcb8a32b6a3c4e1ea8 100644 (file)
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
 {
        unsigned int la_mb;
        unsigned int gd_mb;
+       unsigned int la_max_mb;
        unsigned int megs_per_slot;
        struct super_block *sb = osb->sb;
 
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
        if (megs_per_slot < la_mb)
                la_mb = megs_per_slot;
 
+       /* We can't store more bits than we can in a block. */
+       la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
+                                               ocfs2_local_alloc_size(sb) * 8);
+       if (la_mb > la_max_mb)
+               la_mb = la_max_mb;
+
        return la_mb;
 }
 
index 2bb35fe00511e98f41f23fa9a93d752d3a82b4fe..4607923eb24c192ff3642042161684a158300908 100644 (file)
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
                 * locking allocators ranks above a transaction start
                 */
                WARN_ON(journal_current_handle());
-               status = ocfs2_extend_no_holes(gqinode,
+               status = ocfs2_extend_no_holes(gqinode, NULL,
                        gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
                        gqinode->i_size);
                if (status < 0)
index 8bd70d4d184d5827fdb861b7a565fc26d364f0a1..dc78764ccc4c6211bacee0a409f23606e24b3997 100644 (file)
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
        u64 p_blkno;
 
        /* We are protected by dqio_sem so no locking needed */
-       status = ocfs2_extend_no_holes(lqinode,
+       status = ocfs2_extend_no_holes(lqinode, NULL,
                                       lqinode->i_size + 2 * sb->s_blocksize,
                                       lqinode->i_size);
        if (status < 0) {
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
                return ocfs2_local_quota_add_chunk(sb, type, offset);
 
        /* We are protected by dqio_sem so no locking needed */
-       status = ocfs2_extend_no_holes(lqinode,
+       status = ocfs2_extend_no_holes(lqinode, NULL,
                                       lqinode->i_size + sb->s_blocksize,
                                       lqinode->i_size);
        if (status < 0) {
index 4793f36f6518b25f312274d2fcdc84492de26340..3ac5aa733e9c8018090bc2493d819937593760a0 100644 (file)
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 
        offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
        end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
+       /*
+        * We only duplicate pages until we reach the page contains i_size - 1.
+        * So trim 'end' to i_size.
+        */
+       if (end > i_size_read(context->inode))
+               end = i_size_read(context->inode);
 
        while (offset < end) {
                page_index = offset >> PAGE_CACHE_SHIFT;
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
        struct inode *inode = old_dentry->d_inode;
        struct buffer_head *new_bh = NULL;
 
+       if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+               ret = -EINVAL;
+               mlog_errno(ret);
+               goto out;
+       }
+
        ret = filemap_fdatawrite(inode->i_mapping);
        if (ret) {
                mlog_errno(ret);
index f4c2a9eb8c4d75a6354fb52c37ba93edb81bebd5..a8e6a95a353f03dcb8a34cf928ded84ff7e6d127 100644 (file)
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
                     le16_to_cpu(bg->bg_free_bits_count));
        le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
                     le16_to_cpu(bg->bg_bits));
-       cl->cl_recs[alloc_rec].c_blkno  = cpu_to_le64(bg->bg_blkno);
+       cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
        if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
                le16_add_cpu(&cl->cl_next_free_rec, 1);
 
index e97b34842cfea0d188f85476a56c4a0eb0af5ce0..d03469f618012ea4aff64b62f1f77e3e8b8ec47d 100644 (file)
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
                                         struct ocfs2_xattr_value_buf *vb,
                                         struct ocfs2_xattr_set_ctxt *ctxt)
 {
-       int status = 0;
+       int status = 0, credits;
        handle_t *handle = ctxt->handle;
        enum ocfs2_alloc_restarted why;
        u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 
-       status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
-                             OCFS2_JOURNAL_ACCESS_WRITE);
-       if (status < 0) {
-               mlog_errno(status);
-               goto leave;
-       }
+       while (clusters_to_add) {
+               status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
+                                      OCFS2_JOURNAL_ACCESS_WRITE);
+               if (status < 0) {
+                       mlog_errno(status);
+                       break;
+               }
 
-       prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
-       status = ocfs2_add_clusters_in_btree(handle,
-                                            &et,
-                                            &logical_start,
-                                            clusters_to_add,
-                                            0,
-                                            ctxt->data_ac,
-                                            ctxt->meta_ac,
-                                            &why);
-       if (status < 0) {
-               mlog_errno(status);
-               goto leave;
-       }
+               prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
+               status = ocfs2_add_clusters_in_btree(handle,
+                                                    &et,
+                                                    &logical_start,
+                                                    clusters_to_add,
+                                                    0,
+                                                    ctxt->data_ac,
+                                                    ctxt->meta_ac,
+                                                    &why);
+               if ((status < 0) && (status != -EAGAIN)) {
+                       if (status != -ENOSPC)
+                               mlog_errno(status);
+                       break;
+               }
 
-       ocfs2_journal_dirty(handle, vb->vb_bh);
+               ocfs2_journal_dirty(handle, vb->vb_bh);
 
-       clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
+               clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
+                                        prev_clusters;
 
-       /*
-        * We should have already allocated enough space before the transaction,
-        * so no need to restart.
-        */
-       BUG_ON(why != RESTART_NONE || clusters_to_add);
-
-leave:
+               if (why != RESTART_NONE && clusters_to_add) {
+                       /*
+                        * We can only fail in case the alloc file doesn't give
+                        * up enough clusters.
+                        */
+                       BUG_ON(why == RESTART_META);
+
+                       mlog(0, "restarting xattr value extension for %u"
+                            " clusters,.\n", clusters_to_add);
+                       credits = ocfs2_calc_extend_credits(inode->i_sb,
+                                                           &vb->vb_xv->xr_list,
+                                                           clusters_to_add);
+                       status = ocfs2_extend_trans(handle, credits);
+                       if (status < 0) {
+                               status = -ENOMEM;
+                               mlog_errno(status);
+                               break;
+                       }
+               }
+       }
 
        return status;
 }
@@ -6788,16 +6804,15 @@ out:
        return ret;
 }
 
-static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+static int ocfs2_reflink_xattr_bucket(handle_t *handle,
                                u64 blkno, u64 new_blkno, u32 clusters,
+                               u32 *cpos, int num_buckets,
                                struct ocfs2_alloc_context *meta_ac,
                                struct ocfs2_alloc_context *data_ac,
                                struct ocfs2_reflink_xattr_tree_args *args)
 {
        int i, j, ret = 0;
        struct super_block *sb = args->reflink->old_inode->i_sb;
-       u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
-       u32 num_buckets = clusters * bpc;
        int bpb = args->old_bucket->bu_blocks;
        struct ocfs2_xattr_value_buf vb = {
                .vb_access = ocfs2_journal_access,
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
                        break;
                }
 
-               /*
-                * The real bucket num in this series of blocks is stored
-                * in the 1st bucket.
-                */
-               if (i == 0)
-                       num_buckets = le16_to_cpu(
-                               bucket_xh(args->old_bucket)->xh_num_buckets);
-
                ret = ocfs2_xattr_bucket_journal_access(handle,
                                                args->new_bucket,
                                                OCFS2_JOURNAL_ACCESS_CREATE);
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
                               bucket_block(args->old_bucket, j),
                               sb->s_blocksize);
 
+               /*
+                * Record the start cpos so that we can use it to initialize
+                * our xattr tree we also set the xh_num_bucket for the new
+                * bucket.
+                */
+               if (i == 0) {
+                       *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
+                                           xh_entries[0].xe_name_hash);
+                       bucket_xh(args->new_bucket)->xh_num_buckets =
+                               cpu_to_le16(num_buckets);
+               }
+
                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
 
                ret = ocfs2_reflink_xattr_header(handle, args->reflink,
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
                }
 
                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
+
                ocfs2_xattr_bucket_relse(args->old_bucket);
                ocfs2_xattr_bucket_relse(args->new_bucket);
        }
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
        ocfs2_xattr_bucket_relse(args->new_bucket);
        return ret;
 }
+
+static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+                               struct inode *inode,
+                               struct ocfs2_reflink_xattr_tree_args *args,
+                               struct ocfs2_extent_tree *et,
+                               struct ocfs2_alloc_context *meta_ac,
+                               struct ocfs2_alloc_context *data_ac,
+                               u64 blkno, u32 cpos, u32 len)
+{
+       int ret, first_inserted = 0;
+       u32 p_cluster, num_clusters, reflink_cpos = 0;
+       u64 new_blkno;
+       unsigned int num_buckets, reflink_buckets;
+       unsigned int bpc =
+               ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
+
+       ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+       num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
+       ocfs2_xattr_bucket_relse(args->old_bucket);
+
+       while (len && num_buckets) {
+               ret = ocfs2_claim_clusters(handle, data_ac,
+                                          1, &p_cluster, &num_clusters);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+
+               new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
+               reflink_buckets = min(num_buckets, bpc * num_clusters);
+
+               ret = ocfs2_reflink_xattr_bucket(handle, blkno,
+                                                new_blkno, num_clusters,
+                                                &reflink_cpos, reflink_buckets,
+                                                meta_ac, data_ac, args);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+
+               /*
+                * For the 1st allocated cluster, we make it use the same cpos
+                * so that the xattr tree looks the same as the original one
+                * in the most case.
+                */
+               if (!first_inserted) {
+                       reflink_cpos = cpos;
+                       first_inserted = 1;
+               }
+               ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
+                                         num_clusters, 0, meta_ac);
+               if (ret)
+                       mlog_errno(ret);
+
+               mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
+                    (unsigned long long)new_blkno, num_clusters, reflink_cpos);
+
+               len -= num_clusters;
+               blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
+               num_buckets -= reflink_buckets;
+       }
+out:
+       return ret;
+}
+
 /*
  * Create the same xattr extent record in the new inode's xattr tree.
  */
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
                                   void *para)
 {
        int ret, credits = 0;
-       u32 p_cluster, num_clusters;
-       u64 new_blkno;
        handle_t *handle;
        struct ocfs2_reflink_xattr_tree_args *args =
                        (struct ocfs2_reflink_xattr_tree_args *)para;
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
        struct ocfs2_alloc_context *data_ac = NULL;
        struct ocfs2_extent_tree et;
 
+       mlog(0, "reflink xattr buckets %llu len %u\n",
+            (unsigned long long)blkno, len);
+
        ocfs2_init_xattr_tree_extent_tree(&et,
                                          INODE_CACHE(args->reflink->new_inode),
                                          args->new_blk_bh);
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
                goto out;
        }
 
-       ret = ocfs2_claim_clusters(handle, data_ac,
-                                  len, &p_cluster, &num_clusters);
-       if (ret) {
-               mlog_errno(ret);
-               goto out_commit;
-       }
-
-       new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
-
-       mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
-            (unsigned long long)blkno, (unsigned long long)new_blkno, len);
-       ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
-                                         meta_ac, data_ac, args);
-       if (ret) {
-               mlog_errno(ret);
-               goto out_commit;
-       }
-
-       mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
-            (unsigned long long)new_blkno, len, cpos);
-       ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
-                                 len, 0, meta_ac);
+       ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
+                                         meta_ac, data_ac,
+                                         blkno, cpos, len);
        if (ret)
                mlog_errno(ret);
 
-out_commit:
        ocfs2_commit_trans(osb, handle);
 
 out:
index a4d2e9f7088ada70d8b1357f418c32cd09a5bf1a..adf832dec3f37dd639e8aa24fe3cc29c7504a6a7 100644 (file)
@@ -1026,11 +1026,12 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 struct jbd2_buffer_trigger_type {
        /*
-        * Fired just before a buffer is written to the journal.
-        * mapped_data is a mapped buffer that is the frozen data for
-        * commit.
+        * Fired a the moment data to write to the journal are known to be
+        * stable - so either at the moment b_frozen_data is created or just
+        * before a buffer is written to the journal.  mapped_data is a mapped
+        * buffer that is the frozen data for commit.
         */
-       void (*t_commit)(struct jbd2_buffer_trigger_type *type,
+       void (*t_frozen)(struct jbd2_buffer_trigger_type *type,
                         struct buffer_head *bh, void *mapped_data,
                         size_t size);
 
@@ -1042,7 +1043,7 @@ struct jbd2_buffer_trigger_type {
                        struct buffer_head *bh);
 };
 
-extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
+extern void jbd2_buffer_frozen_trigger(struct journal_head *jh,
                                       void *mapped_data,
                                       struct jbd2_buffer_trigger_type *triggers);
 extern void jbd2_buffer_abort_trigger(struct journal_head *jh,