libxfs: move source files
authorDave Chinner <dchinner@redhat.com>
Wed, 25 Jun 2014 04:57:53 +0000 (14:57 +1000)
committerDave Chinner <david@fromorbit.com>
Wed, 25 Jun 2014 04:57:53 +0000 (14:57 +1000)
Move all the source files that are shared with userspace into
libxfs/. This is done as one big chunk simpy to get it done
quickly

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
53 files changed:
fs/xfs/Makefile
fs/xfs/libxfs/xfs_alloc.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_alloc_btree.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_attr.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_attr_leaf.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_attr_remote.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_bmap.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_bmap_btree.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_btree.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_da_btree.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_da_format.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_dir2.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_dir2_block.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_dir2_data.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_dir2_leaf.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_dir2_node.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_dir2_priv.h [new file with mode: 0644]
fs/xfs/libxfs/xfs_dir2_sf.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_dquot_buf.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_ialloc.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_ialloc_btree.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_inode_buf.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_inode_fork.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_log_rlimit.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_rtbitmap.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_symlink_remote.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_trans_resv.c [new file with mode: 0644]
fs/xfs/xfs_alloc.c [deleted file]
fs/xfs/xfs_alloc_btree.c [deleted file]
fs/xfs/xfs_attr.c [deleted file]
fs/xfs/xfs_attr_leaf.c [deleted file]
fs/xfs/xfs_attr_remote.c [deleted file]
fs/xfs/xfs_bmap.c [deleted file]
fs/xfs/xfs_bmap_btree.c [deleted file]
fs/xfs/xfs_btree.c [deleted file]
fs/xfs/xfs_da_btree.c [deleted file]
fs/xfs/xfs_da_format.c [deleted file]
fs/xfs/xfs_dir2.c [deleted file]
fs/xfs/xfs_dir2_block.c [deleted file]
fs/xfs/xfs_dir2_data.c [deleted file]
fs/xfs/xfs_dir2_leaf.c [deleted file]
fs/xfs/xfs_dir2_node.c [deleted file]
fs/xfs/xfs_dir2_priv.h [deleted file]
fs/xfs/xfs_dir2_sf.c [deleted file]
fs/xfs/xfs_dquot_buf.c [deleted file]
fs/xfs/xfs_ialloc.c [deleted file]
fs/xfs/xfs_ialloc_btree.c [deleted file]
fs/xfs/xfs_inode_buf.c [deleted file]
fs/xfs/xfs_inode_fork.c [deleted file]
fs/xfs/xfs_log_rlimit.c [deleted file]
fs/xfs/xfs_rtbitmap.c [deleted file]
fs/xfs/xfs_symlink_remote.c [deleted file]
fs/xfs/xfs_trans_resv.c [deleted file]

index 4c5edf0df9a35332d6cfa3b84d2b0fecc099d4f2..0dfa26d626f5836aa844917fca7f75470eb80b7a 100644 (file)
@@ -28,7 +28,35 @@ xfs-y                                += xfs_trace.o
 
 # build the libxfs code first
 xfs-y                          += $(addprefix libxfs/, \
+                                  xfs_alloc.o \
+                                  xfs_alloc_btree.o \
+                                  xfs_attr.o \
+                                  xfs_attr_leaf.o \
+                                  xfs_attr_remote.o \
+                                  xfs_bmap.o \
+                                  xfs_bmap_btree.o \
+                                  xfs_btree.o \
+                                  xfs_da_btree.o \
+                                  xfs_da_format.o \
+                                  xfs_dir2.o \
+                                  xfs_dir2_block.o \
+                                  xfs_dir2_data.o \
+                                  xfs_dir2_leaf.o \
+                                  xfs_dir2_node.o \
+                                  xfs_dir2_sf.o \
+                                  xfs_dquot_buf.o \
+                                  xfs_ialloc.o \
+                                  xfs_ialloc_btree.o \
+                                  xfs_inode_fork.o \
+                                  xfs_inode_buf.o \
+                                  xfs_log_rlimit.o \
                                   xfs_sb.o \
+                                  xfs_symlink_remote.o \
+                                  xfs_trans_resv.o \
+                                  )
+# xfs_rtbitmap is shared with libxfs
+xfs-$(CONFIG_XFS_RT)           += $(addprefix libxfs/, \
+                                  xfs_rtbitmap.o \
                                   )
 
 # highlevel code
@@ -51,6 +79,7 @@ xfs-y                         += xfs_aops.o \
                                   xfs_ioctl.o \
                                   xfs_iomap.o \
                                   xfs_iops.o \
+                                  xfs_inode.o \
                                   xfs_itable.o \
                                   xfs_message.o \
                                   xfs_mount.o \
@@ -62,41 +91,14 @@ xfs-y                               += xfs_aops.o \
                                   kmem.o \
                                   uuid.o
 
-# code shared with libxfs
-xfs-y                          += xfs_alloc.o \
-                                  xfs_alloc_btree.o \
-                                  xfs_attr.o \
-                                  xfs_attr_leaf.o \
-                                  xfs_attr_remote.o \
-                                  xfs_bmap.o \
-                                  xfs_bmap_btree.o \
-                                  xfs_btree.o \
-                                  xfs_da_btree.o \
-                                  xfs_da_format.o \
-                                  xfs_dir2.o \
-                                  xfs_dir2_block.o \
-                                  xfs_dir2_data.o \
-                                  xfs_dir2_leaf.o \
-                                  xfs_dir2_node.o \
-                                  xfs_dir2_sf.o \
-                                  xfs_dquot_buf.o \
-                                  xfs_ialloc.o \
-                                  xfs_ialloc_btree.o \
-                                  xfs_icreate_item.o \
-                                  xfs_inode.o \
-                                  xfs_inode_fork.o \
-                                  xfs_inode_buf.o \
-                                  xfs_log_recover.o \
-                                  xfs_log_rlimit.o \
-                                  xfs_symlink_remote.o \
-                                  xfs_trans_resv.o
-
 # low-level transaction/log code
 xfs-y                          += xfs_log.o \
                                   xfs_log_cil.o \
                                   xfs_buf_item.o \
                                   xfs_extfree_item.o \
+                                  xfs_icreate_item.o \
                                   xfs_inode_item.o \
+                                  xfs_log_recover.o \
                                   xfs_trans_ail.o \
                                   xfs_trans_buf.o \
                                   xfs_trans_extfree.o \
@@ -112,8 +114,7 @@ xfs-$(CONFIG_XFS_QUOTA)             += xfs_dquot.o \
                                   xfs_quotaops.o
 
 # xfs_rtbitmap is shared with libxfs
-xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o \
-                                  xfs_rtbitmap.o
+xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o
 
 xfs-$(CONFIG_XFS_POSIX_ACL)    += xfs_acl.o
 xfs-$(CONFIG_PROC_FS)          += xfs_stats.o
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
new file mode 100644 (file)
index 0000000..d438132
--- /dev/null
@@ -0,0 +1,2630 @@
+/*
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_shared.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_extent_busy.h"
+#include "xfs_error.h"
+#include "xfs_cksum.h"
+#include "xfs_trace.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_log.h"
+
+struct workqueue_struct *xfs_alloc_wq;
+
+#define XFS_ABSDIFF(a,b)       (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
+
+#define        XFSA_FIXUP_BNO_OK       1
+#define        XFSA_FIXUP_CNT_OK       2
+
+STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
+STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
+STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
+STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
+               xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
+
+/*
+ * Lookup the record equal to [bno, len] in the btree given by cur.
+ */
+STATIC int                             /* error */
+xfs_alloc_lookup_eq(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       int                     *stat)  /* success/failure */
+{
+       cur->bc_rec.a.ar_startblock = bno;
+       cur->bc_rec.a.ar_blockcount = len;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int                            /* error */
+xfs_alloc_lookup_ge(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       int                     *stat)  /* success/failure */
+{
+       cur->bc_rec.a.ar_startblock = bno;
+       cur->bc_rec.a.ar_blockcount = len;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int                                    /* error */
+xfs_alloc_lookup_le(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len,    /* length of extent */
+       int                     *stat)  /* success/failure */
+{
+       cur->bc_rec.a.ar_startblock = bno;
+       cur->bc_rec.a.ar_blockcount = len;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int                             /* error */
+xfs_alloc_update(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* starting block of extent */
+       xfs_extlen_t            len)    /* length of extent */
+{
+       union xfs_btree_rec     rec;
+
+       rec.alloc.ar_startblock = cpu_to_be32(bno);
+       rec.alloc.ar_blockcount = cpu_to_be32(len);
+       return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int                                    /* error */
+xfs_alloc_get_rec(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           *bno,   /* output: starting block of extent */
+       xfs_extlen_t            *len,   /* output: length of extent */
+       int                     *stat)  /* output: success/failure */
+{
+       union xfs_btree_rec     *rec;
+       int                     error;
+
+       error = xfs_btree_get_rec(cur, &rec, stat);
+       if (!error && *stat == 1) {
+               *bno = be32_to_cpu(rec->alloc.ar_startblock);
+               *len = be32_to_cpu(rec->alloc.ar_blockcount);
+       }
+       return error;
+}
+
+/*
+ * Compute aligned version of the found extent.
+ * Takes alignment and min length into account.
+ */
+STATIC void
+xfs_alloc_compute_aligned(
+       xfs_alloc_arg_t *args,          /* allocation argument structure */
+       xfs_agblock_t   foundbno,       /* starting block in found extent */
+       xfs_extlen_t    foundlen,       /* length in found extent */
+       xfs_agblock_t   *resbno,        /* result block number */
+       xfs_extlen_t    *reslen)        /* result length */
+{
+       xfs_agblock_t   bno;
+       xfs_extlen_t    len;
+
+       /* Trim busy sections out of found extent */
+       xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
+
+       if (args->alignment > 1 && len >= args->minlen) {
+               xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
+               xfs_extlen_t    diff = aligned_bno - bno;
+
+               *resbno = aligned_bno;
+               *reslen = diff >= len ? 0 : len - diff;
+       } else {
+               *resbno = bno;
+               *reslen = len;
+       }
+}
+
+/*
+ * Compute best start block and diff for "near" allocations.
+ * freelen >= wantlen already checked by caller.
+ */
+STATIC xfs_extlen_t                    /* difference value (absolute) */
+xfs_alloc_compute_diff(
+       xfs_agblock_t   wantbno,        /* target starting block */
+       xfs_extlen_t    wantlen,        /* target length */
+       xfs_extlen_t    alignment,      /* target alignment */
+       char            userdata,       /* are we allocating data? */
+       xfs_agblock_t   freebno,        /* freespace's starting block */
+       xfs_extlen_t    freelen,        /* freespace's length */
+       xfs_agblock_t   *newbnop)       /* result: best start block from free */
+{
+       xfs_agblock_t   freeend;        /* end of freespace extent */
+       xfs_agblock_t   newbno1;        /* return block number */
+       xfs_agblock_t   newbno2;        /* other new block number */
+       xfs_extlen_t    newlen1=0;      /* length with newbno1 */
+       xfs_extlen_t    newlen2=0;      /* length with newbno2 */
+       xfs_agblock_t   wantend;        /* end of target extent */
+
+       ASSERT(freelen >= wantlen);
+       freeend = freebno + freelen;
+       wantend = wantbno + wantlen;
+       /*
+        * We want to allocate from the start of a free extent if it is past
+        * the desired block or if we are allocating user data and the free
+        * extent is before desired block. The second case is there to allow
+        * for contiguous allocation from the remaining free space if the file
+        * grows in the short term.
+        */
+       if (freebno >= wantbno || (userdata && freeend < wantend)) {
+               if ((newbno1 = roundup(freebno, alignment)) >= freeend)
+                       newbno1 = NULLAGBLOCK;
+       } else if (freeend >= wantend && alignment > 1) {
+               newbno1 = roundup(wantbno, alignment);
+               newbno2 = newbno1 - alignment;
+               if (newbno1 >= freeend)
+                       newbno1 = NULLAGBLOCK;
+               else
+                       newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
+               if (newbno2 < freebno)
+                       newbno2 = NULLAGBLOCK;
+               else
+                       newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
+               if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
+                       if (newlen1 < newlen2 ||
+                           (newlen1 == newlen2 &&
+                            XFS_ABSDIFF(newbno1, wantbno) >
+                            XFS_ABSDIFF(newbno2, wantbno)))
+                               newbno1 = newbno2;
+               } else if (newbno2 != NULLAGBLOCK)
+                       newbno1 = newbno2;
+       } else if (freeend >= wantend) {
+               newbno1 = wantbno;
+       } else if (alignment > 1) {
+               newbno1 = roundup(freeend - wantlen, alignment);
+               if (newbno1 > freeend - wantlen &&
+                   newbno1 - alignment >= freebno)
+                       newbno1 -= alignment;
+               else if (newbno1 >= freeend)
+                       newbno1 = NULLAGBLOCK;
+       } else
+               newbno1 = freeend - wantlen;
+       *newbnop = newbno1;
+       return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
+}
+
+/*
+ * Fix up the length, based on mod and prod.
+ * len should be k * prod + mod for some k.
+ * If len is too small it is returned unchanged.
+ * If len hits maxlen it is left alone.
+ */
+STATIC void
+xfs_alloc_fix_len(
+       xfs_alloc_arg_t *args)          /* allocation argument structure */
+{
+       xfs_extlen_t    k;
+       xfs_extlen_t    rlen;
+
+       ASSERT(args->mod < args->prod);
+       rlen = args->len;
+       ASSERT(rlen >= args->minlen);
+       ASSERT(rlen <= args->maxlen);
+       if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
+           (args->mod == 0 && rlen < args->prod))
+               return;
+       k = rlen % args->prod;
+       if (k == args->mod)
+               return;
+       if (k > args->mod)
+               rlen = rlen - (k - args->mod);
+       else
+               rlen = rlen - args->prod + (args->mod - k);
+       if ((int)rlen < (int)args->minlen)
+               return;
+       ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
+       ASSERT(rlen % args->prod == args->mod);
+       args->len = rlen;
+}
+
+/*
+ * Fix up length if there is too little space left in the a.g.
+ * Return 1 if ok, 0 if too little, should give up.
+ */
+STATIC int
+xfs_alloc_fix_minleft(
+       xfs_alloc_arg_t *args)          /* allocation argument structure */
+{
+       xfs_agf_t       *agf;           /* a.g. freelist header */
+       int             diff;           /* free space difference */
+
+       if (args->minleft == 0)
+               return 1;
+       agf = XFS_BUF_TO_AGF(args->agbp);
+       diff = be32_to_cpu(agf->agf_freeblks)
+               - args->len - args->minleft;
+       if (diff >= 0)
+               return 1;
+       args->len += diff;              /* shrink the allocated space */
+       if (args->len >= args->minlen)
+               return 1;
+       args->agbno = NULLAGBLOCK;
+       return 0;
+}
+
+/*
+ * Update the two btrees, logically removing from freespace the extent
+ * starting at rbno, rlen blocks.  The extent is contained within the
+ * actual (current) free extent fbno for flen blocks.
+ * Flags are passed in indicating whether the cursors are set to the
+ * relevant records.
+ */
+STATIC int                             /* error code */
+xfs_alloc_fixup_trees(
+       xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
+       xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
+       xfs_agblock_t   fbno,           /* starting block of free extent */
+       xfs_extlen_t    flen,           /* length of free extent */
+       xfs_agblock_t   rbno,           /* starting block of returned extent */
+       xfs_extlen_t    rlen,           /* length of returned extent */
+       int             flags)          /* flags, XFSA_FIXUP_... */
+{
+       int             error;          /* error code */
+       int             i;              /* operation results */
+       xfs_agblock_t   nfbno1;         /* first new free startblock */
+       xfs_agblock_t   nfbno2;         /* second new free startblock */
+       xfs_extlen_t    nflen1=0;       /* first new free length */
+       xfs_extlen_t    nflen2=0;       /* second new free length */
+
+       /*
+        * Look up the record in the by-size tree if necessary.
+        */
+       if (flags & XFSA_FIXUP_CNT_OK) {
+#ifdef DEBUG
+               if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(
+                       i == 1 && nfbno1 == fbno && nflen1 == flen);
+#endif
+       } else {
+               if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+       /*
+        * Look up the record in the by-block tree if necessary.
+        */
+       if (flags & XFSA_FIXUP_BNO_OK) {
+#ifdef DEBUG
+               if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(
+                       i == 1 && nfbno1 == fbno && nflen1 == flen);
+#endif
+       } else {
+               if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+
+#ifdef DEBUG
+       if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
+               struct xfs_btree_block  *bnoblock;
+               struct xfs_btree_block  *cntblock;
+
+               bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
+               cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
+
+               XFS_WANT_CORRUPTED_RETURN(
+                       bnoblock->bb_numrecs == cntblock->bb_numrecs);
+       }
+#endif
+
+       /*
+        * Deal with all four cases: the allocated record is contained
+        * within the freespace record, so we can have new freespace
+        * at either (or both) end, or no freespace remaining.
+        */
+       if (rbno == fbno && rlen == flen)
+               nfbno1 = nfbno2 = NULLAGBLOCK;
+       else if (rbno == fbno) {
+               nfbno1 = rbno + rlen;
+               nflen1 = flen - rlen;
+               nfbno2 = NULLAGBLOCK;
+       } else if (rbno + rlen == fbno + flen) {
+               nfbno1 = fbno;
+               nflen1 = flen - rlen;
+               nfbno2 = NULLAGBLOCK;
+       } else {
+               nfbno1 = fbno;
+               nflen1 = rbno - fbno;
+               nfbno2 = rbno + rlen;
+               nflen2 = (fbno + flen) - nfbno2;
+       }
+       /*
+        * Delete the entry from the by-size btree.
+        */
+       if ((error = xfs_btree_delete(cnt_cur, &i)))
+               return error;
+       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       /*
+        * Add new by-size btree entry(s).
+        */
+       if (nfbno1 != NULLAGBLOCK) {
+               if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               if ((error = xfs_btree_insert(cnt_cur, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+       if (nfbno2 != NULLAGBLOCK) {
+               if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               if ((error = xfs_btree_insert(cnt_cur, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+       /*
+        * Fix up the by-block btree entry(s).
+        */
+       if (nfbno1 == NULLAGBLOCK) {
+               /*
+                * No remaining freespace, just delete the by-block tree entry.
+                */
+               if ((error = xfs_btree_delete(bno_cur, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       } else {
+               /*
+                * Update the by-block entry to start later|be shorter.
+                */
+               if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
+                       return error;
+       }
+       if (nfbno2 != NULLAGBLOCK) {
+               /*
+                * 2 resulting free entries, need to add one.
+                */
+               if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               if ((error = xfs_btree_insert(bno_cur, &i)))
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+       return 0;
+}
+
+static bool
+xfs_agfl_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
+       int             i;
+
+       if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
+               return false;
+       if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
+               return false;
+       /*
+        * during growfs operations, the perag is not fully initialised,
+        * so we can't use it for any useful checking. growfs ensures we can't
+        * use it by using uncached buffers that don't have the perag attached
+        * so we can detect and avoid this problem.
+        */
+       if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
+               return false;
+
+       for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
+               if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
+                   be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
+                       return false;
+       }
+       return true;
+}
+
+static void
+xfs_agfl_read_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+
+       /*
+        * There is no verification of non-crc AGFLs because mkfs does not
+        * initialise the AGFL to zero or NULL. Hence the only valid part of the
+        * AGFL is what the AGF says is active. We can't get to the AGF, so we
+        * can't verify just those entries are valid.
+        */
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_agfl_verify(bp))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+}
+
+static void
+xfs_agfl_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+       /* no verification of non-crc AGFLs */
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (!xfs_agfl_verify(bp)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+
+       if (bip)
+               XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+       xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_agfl_buf_ops = {
+       .verify_read = xfs_agfl_read_verify,
+       .verify_write = xfs_agfl_write_verify,
+};
+
+/*
+ * Read in the allocation group free block array.
+ */
+STATIC int                             /* error */
+xfs_alloc_read_agfl(
+       xfs_mount_t     *mp,            /* mount point structure */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_agnumber_t  agno,           /* allocation group number */
+       xfs_buf_t       **bpp)          /* buffer for the ag free block array */
+{
+       xfs_buf_t       *bp;            /* return value */
+       int             error;
+
+       ASSERT(agno != NULLAGNUMBER);
+       error = xfs_trans_read_buf(
+                       mp, tp, mp->m_ddev_targp,
+                       XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
+                       XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
+       if (error)
+               return error;
+       xfs_buf_set_ref(bp, XFS_AGFL_REF);
+       *bpp = bp;
+       return 0;
+}
+
+STATIC int
+xfs_alloc_update_counters(
+       struct xfs_trans        *tp,
+       struct xfs_perag        *pag,
+       struct xfs_buf          *agbp,
+       long                    len)
+{
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+
+       pag->pagf_freeblks += len;
+       be32_add_cpu(&agf->agf_freeblks, len);
+
+       xfs_trans_agblocks_delta(tp, len);
+       if (unlikely(be32_to_cpu(agf->agf_freeblks) >
+                    be32_to_cpu(agf->agf_length)))
+               return EFSCORRUPTED;
+
+       xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
+       return 0;
+}
+
+/*
+ * Allocation group level functions.
+ */
+
+/*
+ * Allocate a variable extent in the allocation group agno.
+ * Type and bno are used to determine where in the allocation group the
+ * extent will start.
+ * Extent's length (returned in *len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
+ */
+STATIC int                     /* error */
+xfs_alloc_ag_vextent(
+       xfs_alloc_arg_t *args)  /* argument structure for allocation */
+{
+       int             error=0;
+
+       ASSERT(args->minlen > 0);
+       ASSERT(args->maxlen > 0);
+       ASSERT(args->minlen <= args->maxlen);
+       ASSERT(args->mod < args->prod);
+       ASSERT(args->alignment > 0);
+       /*
+        * Branch to correct routine based on the type.
+        */
+       args->wasfromfl = 0;
+       switch (args->type) {
+       case XFS_ALLOCTYPE_THIS_AG:
+               error = xfs_alloc_ag_vextent_size(args);
+               break;
+       case XFS_ALLOCTYPE_NEAR_BNO:
+               error = xfs_alloc_ag_vextent_near(args);
+               break;
+       case XFS_ALLOCTYPE_THIS_BNO:
+               error = xfs_alloc_ag_vextent_exact(args);
+               break;
+       default:
+               ASSERT(0);
+               /* NOTREACHED */
+       }
+
+       if (error || args->agbno == NULLAGBLOCK)
+               return error;
+
+       ASSERT(args->len >= args->minlen);
+       ASSERT(args->len <= args->maxlen);
+       ASSERT(!args->wasfromfl || !args->isfl);
+       ASSERT(args->agbno % args->alignment == 0);
+
+       if (!args->wasfromfl) {
+               error = xfs_alloc_update_counters(args->tp, args->pag,
+                                                 args->agbp,
+                                                 -((long)(args->len)));
+               if (error)
+                       return error;
+
+               ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
+                                             args->agbno, args->len));
+       }
+
+       if (!args->isfl) {
+               xfs_trans_mod_sb(args->tp, args->wasdel ?
+                                XFS_TRANS_SB_RES_FDBLOCKS :
+                                XFS_TRANS_SB_FDBLOCKS,
+                                -((long)(args->len)));
+       }
+
+       XFS_STATS_INC(xs_allocx);
+       XFS_STATS_ADD(xs_allocb, args->len);
+       return error;
+}
+
+/*
+ * Allocate a variable extent at exactly agno/bno.
+ * Extent's length (returned in *len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
+ */
+STATIC int                     /* error */
+xfs_alloc_ag_vextent_exact(
+       xfs_alloc_arg_t *args)  /* allocation argument structure */
+{
+       xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
+       xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
+       int             error;
+       xfs_agblock_t   fbno;   /* start block of found extent */
+       xfs_extlen_t    flen;   /* length of found extent */
+       xfs_agblock_t   tbno;   /* start block of trimmed extent */
+       xfs_extlen_t    tlen;   /* length of trimmed extent */
+       xfs_agblock_t   tend;   /* end block of trimmed extent */
+       int             i;      /* success/failure of operation */
+
+       ASSERT(args->alignment == 1);
+
+       /*
+        * Allocate/initialize a cursor for the by-number freespace btree.
+        */
+       bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+                                         args->agno, XFS_BTNUM_BNO);
+
+       /*
+        * Lookup bno and minlen in the btree (minlen is irrelevant, really).
+        * Look for the closest free block <= bno, it must contain bno
+        * if any free block does.
+        */
+       error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
+       if (error)
+               goto error0;
+       if (!i)
+               goto not_found;
+
+       /*
+        * Grab the freespace record.
+        */
+       error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
+       if (error)
+               goto error0;
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       ASSERT(fbno <= args->agbno);
+
+       /*
+        * Check for overlapping busy extents.
+        */
+       xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
+
+       /*
+        * Give up if the start of the extent is busy, or the freespace isn't
+        * long enough for the minimum request.
+        */
+       if (tbno > args->agbno)
+               goto not_found;
+       if (tlen < args->minlen)
+               goto not_found;
+       tend = tbno + tlen;
+       if (tend < args->agbno + args->minlen)
+               goto not_found;
+
+       /*
+        * End of extent will be smaller of the freespace end and the
+        * maximal requested end.
+        *
+        * Fix the length according to mod and prod if given.
+        */
+       args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
+                                               - args->agbno;
+       xfs_alloc_fix_len(args);
+       if (!xfs_alloc_fix_minleft(args))
+               goto not_found;
+
+       ASSERT(args->agbno + args->len <= tend);
+
+       /*
+        * We are allocating agbno for args->len
+        * Allocate/initialize a cursor for the by-size btree.
+        */
+       cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_CNT);
+       ASSERT(args->agbno + args->len <=
+               be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+       error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
+                                     args->len, XFSA_FIXUP_BNO_OK);
+       if (error) {
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+               goto error0;
+       }
+
+       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+
+       args->wasfromfl = 0;
+       trace_xfs_alloc_exact_done(args);
+       return 0;
+
+not_found:
+       /* Didn't find it, return null. */
+       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+       args->agbno = NULLAGBLOCK;
+       trace_xfs_alloc_exact_notfound(args);
+       return 0;
+
+error0:
+       xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
+       trace_xfs_alloc_exact_error(args);
+       return error;
+}
+
+/*
+ * Search the btree in a given direction via the search cursor and compare
+ * the records found against the good extent we've already found.
+ */
+STATIC int
+xfs_alloc_find_best_extent(
+       struct xfs_alloc_arg    *args,  /* allocation argument structure */
+       struct xfs_btree_cur    **gcur, /* good cursor */
+       struct xfs_btree_cur    **scur, /* searching cursor */
+       xfs_agblock_t           gdiff,  /* difference for search comparison */
+       xfs_agblock_t           *sbno,  /* extent found by search */
+       xfs_extlen_t            *slen,  /* extent length */
+       xfs_agblock_t           *sbnoa, /* aligned extent found by search */
+       xfs_extlen_t            *slena, /* aligned extent length */
+       int                     dir)    /* 0 = search right, 1 = search left */
+{
+       xfs_agblock_t           new;
+       xfs_agblock_t           sdiff;
+       int                     error;
+       int                     i;
+
+       /* The good extent is perfect, no need to  search. */
+       if (!gdiff)
+               goto out_use_good;
+
+       /*
+        * Look until we find a better one, run out of space or run off the end.
+        */
+       do {
+               error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
+               if (error)
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
+
+               /*
+                * The good extent is closer than this one.
+                */
+               if (!dir) {
+                       if (*sbnoa >= args->agbno + gdiff)
+                               goto out_use_good;
+               } else {
+                       if (*sbnoa <= args->agbno - gdiff)
+                               goto out_use_good;
+               }
+
+               /*
+                * Same distance, compare length and pick the best.
+                */
+               if (*slena >= args->minlen) {
+                       args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
+                       xfs_alloc_fix_len(args);
+
+                       sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+                                                      args->alignment,
+                                                      args->userdata, *sbnoa,
+                                                      *slena, &new);
+
+                       /*
+                        * Choose closer size and invalidate other cursor.
+                        */
+                       if (sdiff < gdiff)
+                               goto out_use_search;
+                       goto out_use_good;
+               }
+
+               if (!dir)
+                       error = xfs_btree_increment(*scur, 0, &i);
+               else
+                       error = xfs_btree_decrement(*scur, 0, &i);
+               if (error)
+                       goto error0;
+       } while (i);
+
+out_use_good:
+       xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
+       *scur = NULL;
+       return 0;
+
+out_use_search:
+       xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
+       *gcur = NULL;
+       return 0;
+
+error0:
+       /* caller invalidates cursors */
+       return error;
+}
+
+/*
+ * Allocate a variable extent near bno in the allocation group agno.
+ * Extent's length (returned in len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
+ */
+STATIC int                             /* error */
+xfs_alloc_ag_vextent_near(
+       xfs_alloc_arg_t *args)          /* allocation argument structure */
+{
+       xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
+       xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
+       xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
+       xfs_agblock_t   gtbno;          /* start bno of right side entry */
+       xfs_agblock_t   gtbnoa;         /* aligned ... */
+       xfs_extlen_t    gtdiff;         /* difference to right side entry */
+       xfs_extlen_t    gtlen;          /* length of right side entry */
+       xfs_extlen_t    gtlena;         /* aligned ... */
+       xfs_agblock_t   gtnew;          /* useful start bno of right side */
+       int             error;          /* error code */
+       int             i;              /* result code, temporary */
+       int             j;              /* result code, temporary */
+       xfs_agblock_t   ltbno;          /* start bno of left side entry */
+       xfs_agblock_t   ltbnoa;         /* aligned ... */
+       xfs_extlen_t    ltdiff;         /* difference to left side entry */
+       xfs_extlen_t    ltlen;          /* length of left side entry */
+       xfs_extlen_t    ltlena;         /* aligned ... */
+       xfs_agblock_t   ltnew;          /* useful start bno of left side */
+       xfs_extlen_t    rlen;           /* length of returned extent */
+       int             forced = 0;
+#ifdef DEBUG
+       /*
+        * Randomly don't execute the first algorithm.
+        */
+       int             dofirst;        /* set to do first algorithm */
+
+       dofirst = prandom_u32() & 1;
+#endif
+
+restart:
+       bno_cur_lt = NULL;
+       bno_cur_gt = NULL;
+       ltlen = 0;
+       gtlena = 0;
+       ltlena = 0;
+
+       /*
+        * Get a cursor for the by-size btree.
+        */
+       cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_CNT);
+
+       /*
+        * See if there are any free extents as big as maxlen.
+        */
+       if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
+               goto error0;
+       /*
+        * If none, then pick up the last entry in the tree unless the
+        * tree is empty.
+        */
+       if (!i) {
+               if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
+                               &ltlen, &i)))
+                       goto error0;
+               if (i == 0 || ltlen == 0) {
+                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       trace_xfs_alloc_near_noentry(args);
+                       return 0;
+               }
+               ASSERT(i == 1);
+       }
+       args->wasfromfl = 0;
+
+       /*
+        * First algorithm.
+        * If the requested extent is large wrt the freespaces available
+        * in this a.g., then the cursor will be pointing to a btree entry
+        * near the right edge of the tree.  If it's in the last btree leaf
+        * block, then we just examine all the entries in that block
+        * that are big enough, and pick the best one.
+        * This is written as a while loop so we can break out of it,
+        * but we never loop back to the top.
+        */
+       while (xfs_btree_islastblock(cnt_cur, 0)) {
+               xfs_extlen_t    bdiff;
+               int             besti=0;
+               xfs_extlen_t    blen=0;
+               xfs_agblock_t   bnew=0;
+
+#ifdef DEBUG
+               if (dofirst)
+                       break;
+#endif
+               /*
+                * Start from the entry that lookup found, sequence through
+                * all larger free blocks.  If we're actually pointing at a
+                * record smaller than maxlen, go to the start of this block,
+                * and skip all those smaller than minlen.
+                */
+               if (ltlen || args->alignment > 1) {
+                       cnt_cur->bc_ptrs[0] = 1;
+                       do {
+                               if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
+                                               &ltlen, &i)))
+                                       goto error0;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                               if (ltlen >= args->minlen)
+                                       break;
+                               if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
+                                       goto error0;
+                       } while (i);
+                       ASSERT(ltlen >= args->minlen);
+                       if (!i)
+                               break;
+               }
+               i = cnt_cur->bc_ptrs[0];
+               for (j = 1, blen = 0, bdiff = 0;
+                    !error && j && (blen < args->maxlen || bdiff > 0);
+                    error = xfs_btree_increment(cnt_cur, 0, &j)) {
+                       /*
+                        * For each entry, decide if it's better than
+                        * the previous best entry.
+                        */
+                       if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       xfs_alloc_compute_aligned(args, ltbno, ltlen,
+                                                 &ltbnoa, &ltlena);
+                       if (ltlena < args->minlen)
+                               continue;
+                       args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
+                       xfs_alloc_fix_len(args);
+                       ASSERT(args->len >= args->minlen);
+                       if (args->len < blen)
+                               continue;
+                       ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+                               args->alignment, args->userdata, ltbnoa,
+                               ltlena, &ltnew);
+                       if (ltnew != NULLAGBLOCK &&
+                           (args->len > blen || ltdiff < bdiff)) {
+                               bdiff = ltdiff;
+                               bnew = ltnew;
+                               blen = args->len;
+                               besti = cnt_cur->bc_ptrs[0];
+                       }
+               }
+               /*
+                * It didn't work.  We COULD be in a case where
+                * there's a good record somewhere, so try again.
+                */
+               if (blen == 0)
+                       break;
+               /*
+                * Point at the best entry, and retrieve it again.
+                */
+               cnt_cur->bc_ptrs[0] = besti;
+               if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+               args->len = blen;
+               if (!xfs_alloc_fix_minleft(args)) {
+                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       trace_xfs_alloc_near_nominleft(args);
+                       return 0;
+               }
+               blen = args->len;
+               /*
+                * We are allocating starting at bnew for blen blocks.
+                */
+               args->agbno = bnew;
+               ASSERT(bnew >= ltbno);
+               ASSERT(bnew + blen <= ltbno + ltlen);
+               /*
+                * Set up a cursor for the by-bno tree.
+                */
+               bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
+                       args->agbp, args->agno, XFS_BTNUM_BNO);
+               /*
+                * Fix up the btree entries.
+                */
+               if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
+                               ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
+                       goto error0;
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+               xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
+
+               trace_xfs_alloc_near_first(args);
+               return 0;
+       }
+       /*
+        * Second algorithm.
+        * Search in the by-bno tree to the left and to the right
+        * simultaneously, until in each case we find a space big enough,
+        * or run into the edge of the tree.  When we run into the edge,
+        * we deallocate that cursor.
+        * If both searches succeed, we compare the two spaces and pick
+        * the better one.
+        * With alignment, it's possible for both to fail; the upper
+        * level algorithm that picks allocation groups for allocations
+        * is not supposed to do this.
+        */
+       /*
+        * Allocate and initialize the cursor for the leftward search.
+        */
+       bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_BNO);
+       /*
+        * Lookup <= bno to find the leftward search's starting point.
+        */
+       if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
+               goto error0;
+       if (!i) {
+               /*
+                * Didn't find anything; use this cursor for the rightward
+                * search.
+                */
+               bno_cur_gt = bno_cur_lt;
+               bno_cur_lt = NULL;
+       }
+       /*
+        * Found something.  Duplicate the cursor for the rightward search.
+        */
+       else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
+               goto error0;
+       /*
+        * Increment the cursor, so we will point at the entry just right
+        * of the leftward entry if any, or to the leftmost entry.
+        */
+       if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
+               goto error0;
+       if (!i) {
+               /*
+                * It failed, there are no rightward entries.
+                */
+               xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
+               bno_cur_gt = NULL;
+       }
+       /*
+        * Loop going left with the leftward cursor, right with the
+        * rightward cursor, until either both directions give up or
+        * we find an entry at least as big as minlen.
+        */
+       do {
+               if (bno_cur_lt) {
+                       if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       xfs_alloc_compute_aligned(args, ltbno, ltlen,
+                                                 &ltbnoa, &ltlena);
+                       if (ltlena >= args->minlen)
+                               break;
+                       if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
+                               goto error0;
+                       if (!i) {
+                               xfs_btree_del_cursor(bno_cur_lt,
+                                                    XFS_BTREE_NOERROR);
+                               bno_cur_lt = NULL;
+                       }
+               }
+               if (bno_cur_gt) {
+                       if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       xfs_alloc_compute_aligned(args, gtbno, gtlen,
+                                                 &gtbnoa, &gtlena);
+                       if (gtlena >= args->minlen)
+                               break;
+                       if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
+                               goto error0;
+                       if (!i) {
+                               xfs_btree_del_cursor(bno_cur_gt,
+                                                    XFS_BTREE_NOERROR);
+                               bno_cur_gt = NULL;
+                       }
+               }
+       } while (bno_cur_lt || bno_cur_gt);
+
+       /*
+        * Got both cursors still active, need to find better entry.
+        */
+       if (bno_cur_lt && bno_cur_gt) {
+               if (ltlena >= args->minlen) {
+                       /*
+                        * Left side is good, look for a right side entry.
+                        */
+                       args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
+                       xfs_alloc_fix_len(args);
+                       ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+                               args->alignment, args->userdata, ltbnoa,
+                               ltlena, &ltnew);
+
+                       error = xfs_alloc_find_best_extent(args,
+                                               &bno_cur_lt, &bno_cur_gt,
+                                               ltdiff, &gtbno, &gtlen,
+                                               &gtbnoa, &gtlena,
+                                               0 /* search right */);
+               } else {
+                       ASSERT(gtlena >= args->minlen);
+
+                       /*
+                        * Right side is good, look for a left side entry.
+                        */
+                       args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
+                       xfs_alloc_fix_len(args);
+                       gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
+                               args->alignment, args->userdata, gtbnoa,
+                               gtlena, &gtnew);
+
+                       error = xfs_alloc_find_best_extent(args,
+                                               &bno_cur_gt, &bno_cur_lt,
+                                               gtdiff, &ltbno, &ltlen,
+                                               &ltbnoa, &ltlena,
+                                               1 /* search left */);
+               }
+
+               if (error)
+                       goto error0;
+       }
+
+       /*
+        * If we couldn't get anything, give up.
+        */
+       if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+
+               if (!forced++) {
+                       trace_xfs_alloc_near_busy(args);
+                       xfs_log_force(args->mp, XFS_LOG_SYNC);
+                       goto restart;
+               }
+               trace_xfs_alloc_size_neither(args);
+               args->agbno = NULLAGBLOCK;
+               return 0;
+       }
+
+       /*
+        * At this point we have selected a freespace entry, either to the
+        * left or to the right.  If it's on the right, copy all the
+        * useful variables to the "left" set so we only have one
+        * copy of this code.
+        */
+       if (bno_cur_gt) {
+               bno_cur_lt = bno_cur_gt;
+               bno_cur_gt = NULL;
+               ltbno = gtbno;
+               ltbnoa = gtbnoa;
+               ltlen = gtlen;
+               ltlena = gtlena;
+               j = 1;
+       } else
+               j = 0;
+
+       /*
+        * Fix up the length and compute the useful address.
+        */
+       args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
+       xfs_alloc_fix_len(args);
+       if (!xfs_alloc_fix_minleft(args)) {
+               trace_xfs_alloc_near_nominleft(args);
+               xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+               return 0;
+       }
+       rlen = args->len;
+       (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
+                                    args->userdata, ltbnoa, ltlena, &ltnew);
+       ASSERT(ltnew >= ltbno);
+       ASSERT(ltnew + rlen <= ltbnoa + ltlena);
+       ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+       args->agbno = ltnew;
+
+       if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
+                       ltnew, rlen, XFSA_FIXUP_BNO_OK)))
+               goto error0;
+
+       if (j)
+               trace_xfs_alloc_near_greater(args);
+       else
+               trace_xfs_alloc_near_lesser(args);
+
+       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+       xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
+       return 0;
+
+ error0:
+       trace_xfs_alloc_near_error(args);
+       if (cnt_cur != NULL)
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+       if (bno_cur_lt != NULL)
+               xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
+       if (bno_cur_gt != NULL)
+               xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Allocate a variable extent anywhere in the allocation group agno.
+ * Extent's length (returned in len) will be between minlen and maxlen,
+ * and of the form k * prod + mod unless there's nothing that large.
+ * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
+ */
+STATIC int                             /* error */
+xfs_alloc_ag_vextent_size(
+       xfs_alloc_arg_t *args)          /* allocation argument structure */
+{
+       xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
+       xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
+       int             error;          /* error result */
+       xfs_agblock_t   fbno;           /* start of found freespace */
+       xfs_extlen_t    flen;           /* length of found freespace */
+       int             i;              /* temp status variable */
+       xfs_agblock_t   rbno;           /* returned block number */
+       xfs_extlen_t    rlen;           /* length of returned extent */
+       int             forced = 0;
+
+restart:
+       /*
+        * Allocate and initialize a cursor for the by-size btree.
+        */
+       cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_CNT);
+       bno_cur = NULL;
+
+       /*
+        * Look for an entry >= maxlen+alignment-1 blocks.
+        */
+       if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
+                       args->maxlen + args->alignment - 1, &i)))
+               goto error0;
+
+       /*
+        * If none or we have busy extents that we cannot allocate from, then
+        * we have to settle for a smaller extent. In the case that there are
+        * no large extents, this will return the last entry in the tree unless
+        * the tree is empty. In the case that there are only busy large
+        * extents, this will return the largest small extent unless there
+        * are no smaller extents available.
+        */
+       if (!i || forced > 1) {
+               error = xfs_alloc_ag_vextent_small(args, cnt_cur,
+                                                  &fbno, &flen, &i);
+               if (error)
+                       goto error0;
+               if (i == 0 || flen == 0) {
+                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       trace_xfs_alloc_size_noentry(args);
+                       return 0;
+               }
+               ASSERT(i == 1);
+               xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
+       } else {
+               /*
+                * Search for a non-busy extent that is large enough.
+                * If we are at low space, don't check, or if we fall of
+                * the end of the btree, turn off the busy check and
+                * restart.
+                */
+               for (;;) {
+                       error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
+                       if (error)
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+                       xfs_alloc_compute_aligned(args, fbno, flen,
+                                                 &rbno, &rlen);
+
+                       if (rlen >= args->maxlen)
+                               break;
+
+                       error = xfs_btree_increment(cnt_cur, 0, &i);
+                       if (error)
+                               goto error0;
+                       if (i == 0) {
+                               /*
+                                * Our only valid extents must have been busy.
+                                * Make it unbusy by forcing the log out and
+                                * retrying. If we've been here before, forcing
+                                * the log isn't making the extents available,
+                                * which means they have probably been freed in
+                                * this transaction.  In that case, we have to
+                                * give up on them and we'll attempt a minlen
+                                * allocation the next time around.
+                                */
+                               xfs_btree_del_cursor(cnt_cur,
+                                                    XFS_BTREE_NOERROR);
+                               trace_xfs_alloc_size_busy(args);
+                               if (!forced++)
+                                       xfs_log_force(args->mp, XFS_LOG_SYNC);
+                               goto restart;
+                       }
+               }
+       }
+
+       /*
+        * In the first case above, we got the last entry in the
+        * by-size btree.  Now we check to see if the space hits maxlen
+        * once aligned; if not, we search left for something better.
+        * This can't happen in the second case above.
+        */
+       rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
+       XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
+                       (rlen <= flen && rbno + rlen <= fbno + flen), error0);
+       if (rlen < args->maxlen) {
+               xfs_agblock_t   bestfbno;
+               xfs_extlen_t    bestflen;
+               xfs_agblock_t   bestrbno;
+               xfs_extlen_t    bestrlen;
+
+               bestrlen = rlen;
+               bestrbno = rbno;
+               bestflen = flen;
+               bestfbno = fbno;
+               for (;;) {
+                       if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
+                               goto error0;
+                       if (i == 0)
+                               break;
+                       if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
+                                       &i)))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       if (flen < bestrlen)
+                               break;
+                       xfs_alloc_compute_aligned(args, fbno, flen,
+                                                 &rbno, &rlen);
+                       rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
+                       XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
+                               (rlen <= flen && rbno + rlen <= fbno + flen),
+                               error0);
+                       if (rlen > bestrlen) {
+                               bestrlen = rlen;
+                               bestrbno = rbno;
+                               bestflen = flen;
+                               bestfbno = fbno;
+                               if (rlen == args->maxlen)
+                                       break;
+                       }
+               }
+               if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
+                               &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               rlen = bestrlen;
+               rbno = bestrbno;
+               flen = bestflen;
+               fbno = bestfbno;
+       }
+       args->wasfromfl = 0;
+       /*
+        * Fix up the length.
+        */
+       args->len = rlen;
+       if (rlen < args->minlen) {
+               if (!forced++) {
+                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+                       trace_xfs_alloc_size_busy(args);
+                       xfs_log_force(args->mp, XFS_LOG_SYNC);
+                       goto restart;
+               }
+               goto out_nominleft;
+       }
+       xfs_alloc_fix_len(args);
+
+       if (!xfs_alloc_fix_minleft(args))
+               goto out_nominleft;
+       rlen = args->len;
+       XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
+       /*
+        * Allocate and initialize a cursor for the by-block tree.
+        */
+       bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
+               args->agno, XFS_BTNUM_BNO);
+       if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
+                       rbno, rlen, XFSA_FIXUP_CNT_OK)))
+               goto error0;
+       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+       cnt_cur = bno_cur = NULL;
+       args->len = rlen;
+       args->agbno = rbno;
+       XFS_WANT_CORRUPTED_GOTO(
+               args->agbno + args->len <=
+                       be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
+               error0);
+       trace_xfs_alloc_size_done(args);
+       return 0;
+
+error0:
+       trace_xfs_alloc_size_error(args);
+       if (cnt_cur)
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+       if (bno_cur)
+               xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
+       return error;
+
+out_nominleft:
+       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+       trace_xfs_alloc_size_nominleft(args);
+       args->agbno = NULLAGBLOCK;
+       return 0;
+}
+
+/*
+ * Deal with the case where only small freespaces remain.
+ * Either return the contents of the last freespace record,
+ * or allocate space from the freelist if there is nothing in the tree.
+ */
+STATIC int                     /* error */
+xfs_alloc_ag_vextent_small(
+       xfs_alloc_arg_t *args,  /* allocation argument structure */
+       xfs_btree_cur_t *ccur,  /* by-size cursor */
+       xfs_agblock_t   *fbnop, /* result block number */
+       xfs_extlen_t    *flenp, /* result length */
+       int             *stat)  /* status: 0-freelist, 1-normal/none */
+{
+       int             error;
+       xfs_agblock_t   fbno;
+       xfs_extlen_t    flen;
+       int             i;
+
+       if ((error = xfs_btree_decrement(ccur, 0, &i)))
+               goto error0;
+       if (i) {
+               if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       }
+       /*
+        * Nothing in the btree, try the freelist.  Make sure
+        * to respect minleft even when pulling from the
+        * freelist.
+        */
+       else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
+                (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
+                 > args->minleft)) {
+               error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
+               if (error)
+                       goto error0;
+               if (fbno != NULLAGBLOCK) {
+                       xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
+                                            args->userdata);
+
+                       if (args->userdata) {
+                               xfs_buf_t       *bp;
+
+                               bp = xfs_btree_get_bufs(args->mp, args->tp,
+                                       args->agno, fbno, 0);
+                               xfs_trans_binval(args->tp, bp);
+                       }
+                       args->len = 1;
+                       args->agbno = fbno;
+                       XFS_WANT_CORRUPTED_GOTO(
+                               args->agbno + args->len <=
+                               be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
+                               error0);
+                       args->wasfromfl = 1;
+                       trace_xfs_alloc_small_freelist(args);
+                       *stat = 0;
+                       return 0;
+               }
+               /*
+                * Nothing in the freelist.
+                */
+               else
+                       flen = 0;
+       }
+       /*
+        * Can't allocate from the freelist for some reason.
+        */
+       else {
+               fbno = NULLAGBLOCK;
+               flen = 0;
+       }
+       /*
+        * Can't do the allocation, give up.
+        */
+       if (flen < args->minlen) {
+               args->agbno = NULLAGBLOCK;
+               trace_xfs_alloc_small_notenough(args);
+               flen = 0;
+       }
+       *fbnop = fbno;
+       *flenp = flen;
+       *stat = 1;
+       trace_xfs_alloc_small_done(args);
+       return 0;
+
+error0:
+       trace_xfs_alloc_small_error(args);
+       return error;
+}
+
+/*
+ * Free the extent starting at agno/bno for length.
+ */
+STATIC int                     /* error */
+xfs_free_ag_extent(
+       xfs_trans_t     *tp,    /* transaction pointer */
+       xfs_buf_t       *agbp,  /* buffer for a.g. freelist header */
+       xfs_agnumber_t  agno,   /* allocation group number */
+       xfs_agblock_t   bno,    /* starting block number */
+       xfs_extlen_t    len,    /* length of extent */
+       int             isfl)   /* set if is freelist blocks - no sb acctg */
+{
+       xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
+       xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
+       int             error;          /* error return value */
+       xfs_agblock_t   gtbno;          /* start of right neighbor block */
+       xfs_extlen_t    gtlen;          /* length of right neighbor block */
+       int             haveleft;       /* have a left neighbor block */
+       int             haveright;      /* have a right neighbor block */
+       int             i;              /* temp, result code */
+       xfs_agblock_t   ltbno;          /* start of left neighbor block */
+       xfs_extlen_t    ltlen;          /* length of left neighbor block */
+       xfs_mount_t     *mp;            /* mount point struct for filesystem */
+       xfs_agblock_t   nbno;           /* new starting block of freespace */
+       xfs_extlen_t    nlen;           /* new length of freespace */
+       xfs_perag_t     *pag;           /* per allocation group data */
+
+       mp = tp->t_mountp;
+       /*
+        * Allocate and initialize a cursor for the by-block btree.
+        */
+       bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
+       cnt_cur = NULL;
+       /*
+        * Look for a neighboring block on the left (lower block numbers)
+        * that is contiguous with this space.
+        */
+       if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
+               goto error0;
+       if (haveleft) {
+               /*
+                * There is a block to our left.
+                */
+               if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * It's not contiguous, though.
+                */
+               if (ltbno + ltlen < bno)
+                       haveleft = 0;
+               else {
+                       /*
+                        * If this failure happens the request to free this
+                        * space was invalid, it's (partly) already free.
+                        * Very bad.
+                        */
+                       XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
+               }
+       }
+       /*
+        * Look for a neighboring block on the right (higher block numbers)
+        * that is contiguous with this space.
+        */
+       if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
+               goto error0;
+       if (haveright) {
+               /*
+                * There is a block to our right.
+                */
+               if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * It's not contiguous, though.
+                */
+               if (bno + len < gtbno)
+                       haveright = 0;
+               else {
+                       /*
+                        * If this failure happens the request to free this
+                        * space was invalid, it's (partly) already free.
+                        * Very bad.
+                        */
+                       XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
+               }
+       }
+       /*
+        * Now allocate and initialize a cursor for the by-size tree.
+        */
+       cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
+       /*
+        * Have both left and right contiguous neighbors.
+        * Merge all three into a single free block.
+        */
+       if (haveleft && haveright) {
+               /*
+                * Delete the old by-size entry on the left.
+                */
+               if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if ((error = xfs_btree_delete(cnt_cur, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Delete the old by-size entry on the right.
+                */
+               if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if ((error = xfs_btree_delete(cnt_cur, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Delete the old by-block entry for the right block.
+                */
+               if ((error = xfs_btree_delete(bno_cur, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Move the by-block cursor back to the left neighbor.
+                */
+               if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+#ifdef DEBUG
+               /*
+                * Check that this is the right record: delete didn't
+                * mangle the cursor.
+                */
+               {
+                       xfs_agblock_t   xxbno;
+                       xfs_extlen_t    xxlen;
+
+                       if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
+                                       &i)))
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(
+                               i == 1 && xxbno == ltbno && xxlen == ltlen,
+                               error0);
+               }
+#endif
+               /*
+                * Update remaining by-block entry to the new, joined block.
+                */
+               nbno = ltbno;
+               nlen = len + ltlen + gtlen;
+               if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
+                       goto error0;
+       }
+       /*
+        * Have only a left contiguous neighbor.
+        * Merge it together with the new freespace.
+        */
+       else if (haveleft) {
+               /*
+                * Delete the old by-size entry on the left.
+                */
+               if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if ((error = xfs_btree_delete(cnt_cur, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Back up the by-block cursor to the left neighbor, and
+                * update its length.
+                */
+               if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               nbno = ltbno;
+               nlen = len + ltlen;
+               if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
+                       goto error0;
+       }
+       /*
+        * Have only a right contiguous neighbor.
+        * Merge it together with the new freespace.
+        */
+       else if (haveright) {
+               /*
+                * Delete the old by-size entry on the right.
+                */
+               if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if ((error = xfs_btree_delete(cnt_cur, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               /*
+                * Update the starting block and length of the right
+                * neighbor in the by-block tree.
+                */
+               nbno = bno;
+               nlen = len + gtlen;
+               if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
+                       goto error0;
+       }
+       /*
+        * No contiguous neighbors.
+        * Insert the new freespace into the by-block tree.
+        */
+       else {
+               nbno = bno;
+               nlen = len;
+               if ((error = xfs_btree_insert(bno_cur, &i)))
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       }
+       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
+       bno_cur = NULL;
+       /*
+        * In all cases we need to insert the new freespace in the by-size tree.
+        */
+       if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
+               goto error0;
+       XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
+       if ((error = xfs_btree_insert(cnt_cur, &i)))
+               goto error0;
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+       cnt_cur = NULL;
+
+       /*
+        * Update the freespace totals in the ag and superblock.
+        */
+       pag = xfs_perag_get(mp, agno);
+       error = xfs_alloc_update_counters(tp, pag, agbp, len);
+       xfs_perag_put(pag);
+       if (error)
+               goto error0;
+
+       if (!isfl)
+               xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
+       XFS_STATS_INC(xs_freex);
+       XFS_STATS_ADD(xs_freeb, len);
+
+       trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
+
+       return 0;
+
+ error0:
+       trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
+       if (bno_cur)
+               xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
+       if (cnt_cur)
+               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Visible (exported) allocation/free functions.
+ * Some of these are used just by xfs_alloc_btree.c and this file.
+ */
+
+/*
+ * Compute and fill in value of m_ag_maxlevels.
+ */
+void
+xfs_alloc_compute_maxlevels(
+       xfs_mount_t     *mp)    /* file system mount structure */
+{
+       int             level;
+       uint            maxblocks;
+       uint            maxleafents;
+       int             minleafrecs;
+       int             minnoderecs;
+
+       maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
+       minleafrecs = mp->m_alloc_mnr[0];
+       minnoderecs = mp->m_alloc_mnr[1];
+       maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+       for (level = 1; maxblocks > 1; level++)
+               maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+       mp->m_ag_maxlevels = level;
+}
+
+/*
+ * Find the length of the longest extent in an AG.
+ */
+xfs_extlen_t
+xfs_alloc_longest_free_extent(
+       struct xfs_mount        *mp,
+       struct xfs_perag        *pag)
+{
+       xfs_extlen_t            need, delta = 0;
+
+       need = XFS_MIN_FREELIST_PAG(pag, mp);
+       if (need > pag->pagf_flcount)
+               delta = need - pag->pagf_flcount;
+
+       if (pag->pagf_longest > delta)
+               return pag->pagf_longest - delta;
+       return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
+}
+
+/*
+ * Decide whether to use this allocation group for this allocation.
+ * If so, fix up the btree freelist's size.
+ */
+STATIC int                     /* error */
+xfs_alloc_fix_freelist(
+       xfs_alloc_arg_t *args,  /* allocation argument structure */
+       int             flags)  /* XFS_ALLOC_FLAG_... */
+{
+       xfs_buf_t       *agbp;  /* agf buffer pointer */
+       xfs_agf_t       *agf;   /* a.g. freespace structure pointer */
+       xfs_buf_t       *agflbp;/* agfl buffer pointer */
+       xfs_agblock_t   bno;    /* freelist block */
+       xfs_extlen_t    delta;  /* new blocks needed in freelist */
+       int             error;  /* error result code */
+       xfs_extlen_t    longest;/* longest extent in allocation group */
+       xfs_mount_t     *mp;    /* file system mount point structure */
+       xfs_extlen_t    need;   /* total blocks needed in freelist */
+       xfs_perag_t     *pag;   /* per-ag information structure */
+       xfs_alloc_arg_t targs;  /* local allocation arguments */
+       xfs_trans_t     *tp;    /* transaction pointer */
+
+       mp = args->mp;
+
+       pag = args->pag;
+       tp = args->tp;
+       if (!pag->pagf_init) {
+               if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
+                               &agbp)))
+                       return error;
+               if (!pag->pagf_init) {
+                       ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
+                       ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
+                       args->agbp = NULL;
+                       return 0;
+               }
+       } else
+               agbp = NULL;
+
+       /*
+        * If this is a metadata preferred pag and we are user data
+        * then try somewhere else if we are not being asked to
+        * try harder at this point
+        */
+       if (pag->pagf_metadata && args->userdata &&
+           (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
+               ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
+               args->agbp = NULL;
+               return 0;
+       }
+
+       if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+               /*
+                * If it looks like there isn't a long enough extent, or enough
+                * total blocks, reject it.
+                */
+               need = XFS_MIN_FREELIST_PAG(pag, mp);
+               longest = xfs_alloc_longest_free_extent(mp, pag);
+               if ((args->minlen + args->alignment + args->minalignslop - 1) >
+                               longest ||
+                   ((int)(pag->pagf_freeblks + pag->pagf_flcount -
+                          need - args->total) < (int)args->minleft)) {
+                       if (agbp)
+                               xfs_trans_brelse(tp, agbp);
+                       args->agbp = NULL;
+                       return 0;
+               }
+       }
+
+       /*
+        * Get the a.g. freespace buffer.
+        * Can fail if we're not blocking on locks, and it's held.
+        */
+       if (agbp == NULL) {
+               if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
+                               &agbp)))
+                       return error;
+               if (agbp == NULL) {
+                       ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
+                       ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
+                       args->agbp = NULL;
+                       return 0;
+               }
+       }
+       /*
+        * Figure out how many blocks we should have in the freelist.
+        */
+       agf = XFS_BUF_TO_AGF(agbp);
+       need = XFS_MIN_FREELIST(agf, mp);
+       /*
+        * If there isn't enough total or single-extent, reject it.
+        */
+       if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+               delta = need > be32_to_cpu(agf->agf_flcount) ?
+                       (need - be32_to_cpu(agf->agf_flcount)) : 0;
+               longest = be32_to_cpu(agf->agf_longest);
+               longest = (longest > delta) ? (longest - delta) :
+                       (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
+               if ((args->minlen + args->alignment + args->minalignslop - 1) >
+                               longest ||
+                   ((int)(be32_to_cpu(agf->agf_freeblks) +
+                    be32_to_cpu(agf->agf_flcount) - need - args->total) <
+                               (int)args->minleft)) {
+                       xfs_trans_brelse(tp, agbp);
+                       args->agbp = NULL;
+                       return 0;
+               }
+       }
+       /*
+        * Make the freelist shorter if it's too long.
+        */
+       while (be32_to_cpu(agf->agf_flcount) > need) {
+               xfs_buf_t       *bp;
+
+               error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
+               if (error)
+                       return error;
+               if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
+                       return error;
+               bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
+               xfs_trans_binval(tp, bp);
+       }
+       /*
+        * Initialize the args structure.
+        */
+       memset(&targs, 0, sizeof(targs));
+       targs.tp = tp;
+       targs.mp = mp;
+       targs.agbp = agbp;
+       targs.agno = args->agno;
+       targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
+       targs.type = XFS_ALLOCTYPE_THIS_AG;
+       targs.pag = pag;
+       if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
+               return error;
+       /*
+        * Make the freelist longer if it's too short.
+        */
+       while (be32_to_cpu(agf->agf_flcount) < need) {
+               targs.agbno = 0;
+               targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
+               /*
+                * Allocate as many blocks as possible at once.
+                */
+               if ((error = xfs_alloc_ag_vextent(&targs))) {
+                       xfs_trans_brelse(tp, agflbp);
+                       return error;
+               }
+               /*
+                * Stop if we run out.  Won't happen if callers are obeying
+                * the restrictions correctly.  Can happen for free calls
+                * on a completely full ag.
+                */
+               if (targs.agbno == NULLAGBLOCK) {
+                       if (flags & XFS_ALLOC_FLAG_FREEING)
+                               break;
+                       xfs_trans_brelse(tp, agflbp);
+                       args->agbp = NULL;
+                       return 0;
+               }
+               /*
+                * Put each allocated block on the list.
+                */
+               for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
+                       error = xfs_alloc_put_freelist(tp, agbp,
+                                                       agflbp, bno, 0);
+                       if (error)
+                               return error;
+               }
+       }
+       xfs_trans_brelse(tp, agflbp);
+       args->agbp = agbp;
+       return 0;
+}
+
+/*
+ * Get a block from the freelist.
+ * Returns with the buffer for the block gotten.
+ */
+int                            /* error */
+xfs_alloc_get_freelist(
+       xfs_trans_t     *tp,    /* transaction pointer */
+       xfs_buf_t       *agbp,  /* buffer containing the agf structure */
+       xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
+       int             btreeblk) /* destination is a AGF btree */
+{
+       xfs_agf_t       *agf;   /* a.g. freespace structure */
+       xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
+       xfs_agblock_t   bno;    /* block number returned */
+       __be32          *agfl_bno;
+       int             error;
+       int             logflags;
+       xfs_mount_t     *mp = tp->t_mountp;
+       xfs_perag_t     *pag;   /* per allocation group data */
+
+       /*
+        * Freelist is empty, give up.
+        */
+       agf = XFS_BUF_TO_AGF(agbp);
+       if (!agf->agf_flcount) {
+               *bnop = NULLAGBLOCK;
+               return 0;
+       }
+       /*
+        * Read the array of free blocks.
+        */
+       error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
+                                   &agflbp);
+       if (error)
+               return error;
+
+
+       /*
+        * Get the block number and update the data structures.
+        */
+       agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
+       bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
+       be32_add_cpu(&agf->agf_flfirst, 1);
+       xfs_trans_brelse(tp, agflbp);
+       if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
+               agf->agf_flfirst = 0;
+
+       pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+       be32_add_cpu(&agf->agf_flcount, -1);
+       xfs_trans_agflist_delta(tp, -1);
+       pag->pagf_flcount--;
+       xfs_perag_put(pag);
+
+       logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
+       if (btreeblk) {
+               be32_add_cpu(&agf->agf_btreeblks, 1);
+               pag->pagf_btreeblks++;
+               logflags |= XFS_AGF_BTREEBLKS;
+       }
+
+       xfs_alloc_log_agf(tp, agbp, logflags);
+       *bnop = bno;
+
+       return 0;
+}
+
+/*
+ * Log the given fields from the agf structure.
+ */
+void
+xfs_alloc_log_agf(
+       xfs_trans_t     *tp,    /* transaction pointer */
+       xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
+       int             fields) /* mask of fields to be logged (XFS_AGF_...) */
+{
+       int     first;          /* first byte offset */
+       int     last;           /* last byte offset */
+       static const short      offsets[] = {
+               offsetof(xfs_agf_t, agf_magicnum),
+               offsetof(xfs_agf_t, agf_versionnum),
+               offsetof(xfs_agf_t, agf_seqno),
+               offsetof(xfs_agf_t, agf_length),
+               offsetof(xfs_agf_t, agf_roots[0]),
+               offsetof(xfs_agf_t, agf_levels[0]),
+               offsetof(xfs_agf_t, agf_flfirst),
+               offsetof(xfs_agf_t, agf_fllast),
+               offsetof(xfs_agf_t, agf_flcount),
+               offsetof(xfs_agf_t, agf_freeblks),
+               offsetof(xfs_agf_t, agf_longest),
+               offsetof(xfs_agf_t, agf_btreeblks),
+               offsetof(xfs_agf_t, agf_uuid),
+               sizeof(xfs_agf_t)
+       };
+
+       trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
+
+       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
+
+       xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
+       xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
+}
+
+/*
+ * Interface for inode allocation to force the pag data to be initialized.
+ */
+int                                    /* error */
+xfs_alloc_pagf_init(
+       xfs_mount_t             *mp,    /* file system mount structure */
+       xfs_trans_t             *tp,    /* transaction pointer */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       int                     flags)  /* XFS_ALLOC_FLAGS_... */
+{
+       xfs_buf_t               *bp;
+       int                     error;
+
+       if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
+               return error;
+       if (bp)
+               xfs_trans_brelse(tp, bp);
+       return 0;
+}
+
+/*
+ * Put the block on the freelist for the allocation group.
+ */
+int                                    /* error */
+xfs_alloc_put_freelist(
+       xfs_trans_t             *tp,    /* transaction pointer */
+       xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
+       xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
+       xfs_agblock_t           bno,    /* block being freed */
+       int                     btreeblk) /* block came from a AGF btree */
+{
+       xfs_agf_t               *agf;   /* a.g. freespace structure */
+       __be32                  *blockp;/* pointer to array entry */
+       int                     error;
+       int                     logflags;
+       xfs_mount_t             *mp;    /* mount structure */
+       xfs_perag_t             *pag;   /* per allocation group data */
+       __be32                  *agfl_bno;
+       int                     startoff;
+
+       agf = XFS_BUF_TO_AGF(agbp);
+       mp = tp->t_mountp;
+
+       if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
+                       be32_to_cpu(agf->agf_seqno), &agflbp)))
+               return error;
+       be32_add_cpu(&agf->agf_fllast, 1);
+       if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
+               agf->agf_fllast = 0;
+
+       pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+       be32_add_cpu(&agf->agf_flcount, 1);
+       xfs_trans_agflist_delta(tp, 1);
+       pag->pagf_flcount++;
+
+       logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
+       if (btreeblk) {
+               be32_add_cpu(&agf->agf_btreeblks, -1);
+               pag->pagf_btreeblks--;
+               logflags |= XFS_AGF_BTREEBLKS;
+       }
+       xfs_perag_put(pag);
+
+       xfs_alloc_log_agf(tp, agbp, logflags);
+
+       ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
+
+       agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
+       blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
+       *blockp = cpu_to_be32(bno);
+       startoff = (char *)blockp - (char *)agflbp->b_addr;
+
+       xfs_alloc_log_agf(tp, agbp, logflags);
+
+       xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
+       xfs_trans_log_buf(tp, agflbp, startoff,
+                         startoff + sizeof(xfs_agblock_t) - 1);
+       return 0;
+}
+
+static bool
+xfs_agf_verify(
+       struct xfs_mount *mp,
+       struct xfs_buf  *bp)
+ {
+       struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
+                       return false;
+
+       if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
+             XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
+             be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
+             be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
+             be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
+             be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
+               return false;
+
+       /*
+        * during growfs operations, the perag is not fully initialised,
+        * so we can't use it for any useful checking. growfs ensures we can't
+        * use it by using uncached buffers that don't have the perag attached
+        * so we can detect and avoid this problem.
+        */
+       if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
+               return false;
+
+       if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
+           be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
+               return false;
+
+       return true;;
+
+}
+
+static void
+xfs_agf_read_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
+                               XFS_ERRTAG_ALLOC_READ_AGF,
+                               XFS_RANDOM_ALLOC_READ_AGF))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+}
+
+static void
+xfs_agf_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+       if (!xfs_agf_verify(mp, bp)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (bip)
+               XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+       xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_agf_buf_ops = {
+       .verify_read = xfs_agf_read_verify,
+       .verify_write = xfs_agf_write_verify,
+};
+
+/*
+ * Read in the allocation group header (free/alloc section).
+ */
+int                                    /* error */
+xfs_read_agf(
+       struct xfs_mount        *mp,    /* mount point structure */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       int                     flags,  /* XFS_BUF_ */
+       struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
+{
+       int             error;
+
+       trace_xfs_read_agf(mp, agno);
+
+       ASSERT(agno != NULLAGNUMBER);
+       error = xfs_trans_read_buf(
+                       mp, tp, mp->m_ddev_targp,
+                       XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
+                       XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
+       if (error)
+               return error;
+       if (!*bpp)
+               return 0;
+
+       ASSERT(!(*bpp)->b_error);
+       xfs_buf_set_ref(*bpp, XFS_AGF_REF);
+       return 0;
+}
+
+/*
+ * Read in the allocation group header (free/alloc section).
+ */
+int                                    /* error */
+xfs_alloc_read_agf(
+       struct xfs_mount        *mp,    /* mount point structure */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       int                     flags,  /* XFS_ALLOC_FLAG_... */
+       struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
+{
+       struct xfs_agf          *agf;           /* ag freelist header */
+       struct xfs_perag        *pag;           /* per allocation group data */
+       int                     error;
+
+       trace_xfs_alloc_read_agf(mp, agno);
+
+       ASSERT(agno != NULLAGNUMBER);
+       error = xfs_read_agf(mp, tp, agno,
+                       (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
+                       bpp);
+       if (error)
+               return error;
+       if (!*bpp)
+               return 0;
+       ASSERT(!(*bpp)->b_error);
+
+       agf = XFS_BUF_TO_AGF(*bpp);
+       pag = xfs_perag_get(mp, agno);
+       if (!pag->pagf_init) {
+               pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
+               pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
+               pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
+               pag->pagf_longest = be32_to_cpu(agf->agf_longest);
+               pag->pagf_levels[XFS_BTNUM_BNOi] =
+                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
+               pag->pagf_levels[XFS_BTNUM_CNTi] =
+                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
+               spin_lock_init(&pag->pagb_lock);
+               pag->pagb_count = 0;
+               pag->pagb_tree = RB_ROOT;
+               pag->pagf_init = 1;
+       }
+#ifdef DEBUG
+       else if (!XFS_FORCED_SHUTDOWN(mp)) {
+               ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
+               ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
+               ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
+               ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
+               ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
+                      be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
+               ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
+                      be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
+       }
+#endif
+       xfs_perag_put(pag);
+       return 0;
+}
+
+/*
+ * Allocate an extent (variable-size).
+ * Depending on the allocation type, we either look in a single allocation
+ * group or loop over the allocation groups to find the result.
+ */
+int                            /* error */
+xfs_alloc_vextent(
+       xfs_alloc_arg_t *args)  /* allocation argument structure */
+{
+       xfs_agblock_t   agsize; /* allocation group size */
+       int             error;
+       int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
+       xfs_extlen_t    minleft;/* minimum left value, temp copy */
+       xfs_mount_t     *mp;    /* mount structure pointer */
+       xfs_agnumber_t  sagno;  /* starting allocation group number */
+       xfs_alloctype_t type;   /* input allocation type */
+       int             bump_rotor = 0;
+       int             no_min = 0;
+       xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
+
+       mp = args->mp;
+       type = args->otype = args->type;
+       args->agbno = NULLAGBLOCK;
+       /*
+        * Just fix this up, for the case where the last a.g. is shorter
+        * (or there's only one a.g.) and the caller couldn't easily figure
+        * that out (xfs_bmap_alloc).
+        */
+       agsize = mp->m_sb.sb_agblocks;
+       if (args->maxlen > agsize)
+               args->maxlen = agsize;
+       if (args->alignment == 0)
+               args->alignment = 1;
+       ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
+       ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
+       ASSERT(args->minlen <= args->maxlen);
+       ASSERT(args->minlen <= agsize);
+       ASSERT(args->mod < args->prod);
+       if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
+           XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
+           args->minlen > args->maxlen || args->minlen > agsize ||
+           args->mod >= args->prod) {
+               args->fsbno = NULLFSBLOCK;
+               trace_xfs_alloc_vextent_badargs(args);
+               return 0;
+       }
+       minleft = args->minleft;
+
+       switch (type) {
+       case XFS_ALLOCTYPE_THIS_AG:
+       case XFS_ALLOCTYPE_NEAR_BNO:
+       case XFS_ALLOCTYPE_THIS_BNO:
+               /*
+                * These three force us into a single a.g.
+                */
+               args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+               args->pag = xfs_perag_get(mp, args->agno);
+               args->minleft = 0;
+               error = xfs_alloc_fix_freelist(args, 0);
+               args->minleft = minleft;
+               if (error) {
+                       trace_xfs_alloc_vextent_nofix(args);
+                       goto error0;
+               }
+               if (!args->agbp) {
+                       trace_xfs_alloc_vextent_noagbp(args);
+                       break;
+               }
+               args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
+               if ((error = xfs_alloc_ag_vextent(args)))
+                       goto error0;
+               break;
+       case XFS_ALLOCTYPE_START_BNO:
+               /*
+                * Try near allocation first, then anywhere-in-ag after
+                * the first a.g. fails.
+                */
+               if ((args->userdata  == XFS_ALLOC_INITIAL_USER_DATA) &&
+                   (mp->m_flags & XFS_MOUNT_32BITINODES)) {
+                       args->fsbno = XFS_AGB_TO_FSB(mp,
+                                       ((mp->m_agfrotor / rotorstep) %
+                                       mp->m_sb.sb_agcount), 0);
+                       bump_rotor = 1;
+               }
+               args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
+               args->type = XFS_ALLOCTYPE_NEAR_BNO;
+               /* FALLTHROUGH */
+       case XFS_ALLOCTYPE_ANY_AG:
+       case XFS_ALLOCTYPE_START_AG:
+       case XFS_ALLOCTYPE_FIRST_AG:
+               /*
+                * Rotate through the allocation groups looking for a winner.
+                */
+               if (type == XFS_ALLOCTYPE_ANY_AG) {
+                       /*
+                        * Start with the last place we left off.
+                        */
+                       args->agno = sagno = (mp->m_agfrotor / rotorstep) %
+                                       mp->m_sb.sb_agcount;
+                       args->type = XFS_ALLOCTYPE_THIS_AG;
+                       flags = XFS_ALLOC_FLAG_TRYLOCK;
+               } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
+                       /*
+                        * Start with allocation group given by bno.
+                        */
+                       args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+                       args->type = XFS_ALLOCTYPE_THIS_AG;
+                       sagno = 0;
+                       flags = 0;
+               } else {
+                       if (type == XFS_ALLOCTYPE_START_AG)
+                               args->type = XFS_ALLOCTYPE_THIS_AG;
+                       /*
+                        * Start with the given allocation group.
+                        */
+                       args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+                       flags = XFS_ALLOC_FLAG_TRYLOCK;
+               }
+               /*
+                * Loop over allocation groups twice; first time with
+                * trylock set, second time without.
+                */
+               for (;;) {
+                       args->pag = xfs_perag_get(mp, args->agno);
+                       if (no_min) args->minleft = 0;
+                       error = xfs_alloc_fix_freelist(args, flags);
+                       args->minleft = minleft;
+                       if (error) {
+                               trace_xfs_alloc_vextent_nofix(args);
+                               goto error0;
+                       }
+                       /*
+                        * If we get a buffer back then the allocation will fly.
+                        */
+                       if (args->agbp) {
+                               if ((error = xfs_alloc_ag_vextent(args)))
+                                       goto error0;
+                               break;
+                       }
+
+                       trace_xfs_alloc_vextent_loopfailed(args);
+
+                       /*
+                        * Didn't work, figure out the next iteration.
+                        */
+                       if (args->agno == sagno &&
+                           type == XFS_ALLOCTYPE_START_BNO)
+                               args->type = XFS_ALLOCTYPE_THIS_AG;
+                       /*
+                       * For the first allocation, we can try any AG to get
+                       * space.  However, if we already have allocated a
+                       * block, we don't want to try AGs whose number is below
+                       * sagno. Otherwise, we may end up with out-of-order
+                       * locking of AGF, which might cause deadlock.
+                       */
+                       if (++(args->agno) == mp->m_sb.sb_agcount) {
+                               if (args->firstblock != NULLFSBLOCK)
+                                       args->agno = sagno;
+                               else
+                                       args->agno = 0;
+                       }
+                       /*
+                        * Reached the starting a.g., must either be done
+                        * or switch to non-trylock mode.
+                        */
+                       if (args->agno == sagno) {
+                               if (no_min == 1) {
+                                       args->agbno = NULLAGBLOCK;
+                                       trace_xfs_alloc_vextent_allfailed(args);
+                                       break;
+                               }
+                               if (flags == 0) {
+                                       no_min = 1;
+                               } else {
+                                       flags = 0;
+                                       if (type == XFS_ALLOCTYPE_START_BNO) {
+                                               args->agbno = XFS_FSB_TO_AGBNO(mp,
+                                                       args->fsbno);
+                                               args->type = XFS_ALLOCTYPE_NEAR_BNO;
+                                       }
+                               }
+                       }
+                       xfs_perag_put(args->pag);
+               }
+               if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
+                       if (args->agno == sagno)
+                               mp->m_agfrotor = (mp->m_agfrotor + 1) %
+                                       (mp->m_sb.sb_agcount * rotorstep);
+                       else
+                               mp->m_agfrotor = (args->agno * rotorstep + 1) %
+                                       (mp->m_sb.sb_agcount * rotorstep);
+               }
+               break;
+       default:
+               ASSERT(0);
+               /* NOTREACHED */
+       }
+       if (args->agbno == NULLAGBLOCK)
+               args->fsbno = NULLFSBLOCK;
+       else {
+               args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
+#ifdef DEBUG
+               ASSERT(args->len >= args->minlen);
+               ASSERT(args->len <= args->maxlen);
+               ASSERT(args->agbno % args->alignment == 0);
+               XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
+                       args->len);
+#endif
+       }
+       xfs_perag_put(args->pag);
+       return 0;
+error0:
+       xfs_perag_put(args->pag);
+       return error;
+}
+
+/*
+ * Free an extent.
+ * Just break up the extent address and hand off to xfs_free_ag_extent
+ * after fixing up the freelist.
+ */
+int                            /* error */
+xfs_free_extent(
+       xfs_trans_t     *tp,    /* transaction pointer */
+       xfs_fsblock_t   bno,    /* starting block number of extent */
+       xfs_extlen_t    len)    /* length of extent */
+{
+       xfs_alloc_arg_t args;
+       int             error;
+
+       ASSERT(len != 0);
+       memset(&args, 0, sizeof(xfs_alloc_arg_t));
+       args.tp = tp;
+       args.mp = tp->t_mountp;
+
+       /*
+        * validate that the block number is legal - the enables us to detect
+        * and handle a silent filesystem corruption rather than crashing.
+        */
+       args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
+       if (args.agno >= args.mp->m_sb.sb_agcount)
+               return EFSCORRUPTED;
+
+       args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
+       if (args.agbno >= args.mp->m_sb.sb_agblocks)
+               return EFSCORRUPTED;
+
+       args.pag = xfs_perag_get(args.mp, args.agno);
+       ASSERT(args.pag);
+
+       error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
+       if (error)
+               goto error0;
+
+       /* validate the extent size is legal now we have the agf locked */
+       if (args.agbno + len >
+                       be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
+               error = EFSCORRUPTED;
+               goto error0;
+       }
+
+       error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
+       if (!error)
+               xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
+error0:
+       xfs_perag_put(args.pag);
+       return error;
+}
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
new file mode 100644 (file)
index 0000000..8358f1d
--- /dev/null
@@ -0,0 +1,504 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_extent_busy.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+
+
+STATIC struct xfs_btree_cur *
+xfs_allocbt_dup_cursor(
+       struct xfs_btree_cur    *cur)
+{
+       return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
+                       cur->bc_private.a.agbp, cur->bc_private.a.agno,
+                       cur->bc_btnum);
+}
+
+STATIC void
+xfs_allocbt_set_root(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       int                     inc)
+{
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       xfs_agnumber_t          seqno = be32_to_cpu(agf->agf_seqno);
+       int                     btnum = cur->bc_btnum;
+       struct xfs_perag        *pag = xfs_perag_get(cur->bc_mp, seqno);
+
+       ASSERT(ptr->s != 0);
+
+       agf->agf_roots[btnum] = ptr->s;
+       be32_add_cpu(&agf->agf_levels[btnum], inc);
+       pag->pagf_levels[btnum] += inc;
+       xfs_perag_put(pag);
+
+       xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_allocbt_alloc_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *start,
+       union xfs_btree_ptr     *new,
+       int                     *stat)
+{
+       int                     error;
+       xfs_agblock_t           bno;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+       /* Allocate the new block from the freelist. If we can't, give up.  */
+       error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+                                      &bno, 1);
+       if (error) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+               return error;
+       }
+
+       if (bno == NULLAGBLOCK) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+               *stat = 0;
+               return 0;
+       }
+
+       xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
+
+       xfs_trans_agbtree_delta(cur->bc_tp, 1);
+       new->s = cpu_to_be32(bno);
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+}
+
+STATIC int
+xfs_allocbt_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       xfs_agblock_t           bno;
+       int                     error;
+
+       bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+       error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+       if (error)
+               return error;
+
+       xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+                             XFS_EXTENT_BUSY_SKIP_DISCARD);
+       xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
+       xfs_trans_binval(cur->bc_tp, bp);
+       return 0;
+}
+
+/*
+ * Update the longest extent in the AGF
+ */
+STATIC void
+xfs_allocbt_update_lastrec(
+       struct xfs_btree_cur    *cur,
+       struct xfs_btree_block  *block,
+       union xfs_btree_rec     *rec,
+       int                     ptr,
+       int                     reason)
+{
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+       xfs_agnumber_t          seqno = be32_to_cpu(agf->agf_seqno);
+       struct xfs_perag        *pag;
+       __be32                  len;
+       int                     numrecs;
+
+       ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
+
+       switch (reason) {
+       case LASTREC_UPDATE:
+               /*
+                * If this is the last leaf block and it's the last record,
+                * then update the size of the longest extent in the AG.
+                */
+               if (ptr != xfs_btree_get_numrecs(block))
+                       return;
+               len = rec->alloc.ar_blockcount;
+               break;
+       case LASTREC_INSREC:
+               if (be32_to_cpu(rec->alloc.ar_blockcount) <=
+                   be32_to_cpu(agf->agf_longest))
+                       return;
+               len = rec->alloc.ar_blockcount;
+               break;
+       case LASTREC_DELREC:
+               numrecs = xfs_btree_get_numrecs(block);
+               if (ptr <= numrecs)
+                       return;
+               ASSERT(ptr == numrecs + 1);
+
+               if (numrecs) {
+                       xfs_alloc_rec_t *rrp;
+
+                       rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
+                       len = rrp->ar_blockcount;
+               } else {
+                       len = 0;
+               }
+
+               break;
+       default:
+               ASSERT(0);
+               return;
+       }
+
+       agf->agf_longest = len;
+       pag = xfs_perag_get(cur->bc_mp, seqno);
+       pag->pagf_longest = be32_to_cpu(len);
+       xfs_perag_put(pag);
+       xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
+}
+
+STATIC int
+xfs_allocbt_get_minrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       return cur->bc_mp->m_alloc_mnr[level != 0];
+}
+
+STATIC int
+xfs_allocbt_get_maxrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       return cur->bc_mp->m_alloc_mxr[level != 0];
+}
+
+STATIC void
+xfs_allocbt_init_key_from_rec(
+       union xfs_btree_key     *key,
+       union xfs_btree_rec     *rec)
+{
+       ASSERT(rec->alloc.ar_startblock != 0);
+
+       key->alloc.ar_startblock = rec->alloc.ar_startblock;
+       key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
+}
+
+STATIC void
+xfs_allocbt_init_rec_from_key(
+       union xfs_btree_key     *key,
+       union xfs_btree_rec     *rec)
+{
+       ASSERT(key->alloc.ar_startblock != 0);
+
+       rec->alloc.ar_startblock = key->alloc.ar_startblock;
+       rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
+}
+
+STATIC void
+xfs_allocbt_init_rec_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *rec)
+{
+       ASSERT(cur->bc_rec.a.ar_startblock != 0);
+
+       rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
+       rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
+}
+
+STATIC void
+xfs_allocbt_init_ptr_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+
+       ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+       ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
+
+       ptr->s = agf->agf_roots[cur->bc_btnum];
+}
+
+STATIC __int64_t
+xfs_allocbt_key_diff(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *key)
+{
+       xfs_alloc_rec_incore_t  *rec = &cur->bc_rec.a;
+       xfs_alloc_key_t         *kp = &key->alloc;
+       __int64_t               diff;
+
+       if (cur->bc_btnum == XFS_BTNUM_BNO) {
+               return (__int64_t)be32_to_cpu(kp->ar_startblock) -
+                               rec->ar_startblock;
+       }
+
+       diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
+       if (diff)
+               return diff;
+
+       return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+}
+
+static bool
+xfs_allocbt_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       struct xfs_perag        *pag = bp->b_pag;
+       unsigned int            level;
+
+       /*
+        * magic number and level verification
+        *
+        * During growfs operations, we can't verify the exact level or owner as
+        * the perag is not fully initialised and hence not attached to the
+        * buffer.  In this case, check against the maximum tree depth.
+        *
+        * Similarly, during log recovery we will have a perag structure
+        * attached, but the agf information will not yet have been initialised
+        * from the on disk AGF. Again, we can only check against maximum limits
+        * in this case.
+        */
+       level = be16_to_cpu(block->bb_level);
+       switch (block->bb_magic) {
+       case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
+               if (!xfs_sb_version_hascrc(&mp->m_sb))
+                       return false;
+               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+                       return false;
+               if (pag &&
+                   be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+                       return false;
+               /* fall through */
+       case cpu_to_be32(XFS_ABTB_MAGIC):
+               if (pag && pag->pagf_init) {
+                       if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
+                               return false;
+               } else if (level >= mp->m_ag_maxlevels)
+                       return false;
+               break;
+       case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
+               if (!xfs_sb_version_hascrc(&mp->m_sb))
+                       return false;
+               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+                       return false;
+               if (pag &&
+                   be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+                       return false;
+               /* fall through */
+       case cpu_to_be32(XFS_ABTC_MAGIC):
+               if (pag && pag->pagf_init) {
+                       if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
+                               return false;
+               } else if (level >= mp->m_ag_maxlevels)
+                       return false;
+               break;
+       default:
+               return false;
+       }
+
+       /* numrecs verification */
+       if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
+               return false;
+
+       /* sibling pointer verification */
+       if (!block->bb_u.s.bb_leftsib ||
+           (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+            block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+               return false;
+       if (!block->bb_u.s.bb_rightsib ||
+           (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+            block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+               return false;
+
+       return true;
+}
+
+static void
+xfs_allocbt_read_verify(
+       struct xfs_buf  *bp)
+{
+       if (!xfs_btree_sblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_allocbt_verify(bp))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
+       }
+}
+
+static void
+xfs_allocbt_write_verify(
+       struct xfs_buf  *bp)
+{
+       if (!xfs_allocbt_verify(bp)) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+       xfs_btree_sblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_allocbt_buf_ops = {
+       .verify_read = xfs_allocbt_read_verify,
+       .verify_write = xfs_allocbt_write_verify,
+};
+
+
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_allocbt_keys_inorder(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *k1,
+       union xfs_btree_key     *k2)
+{
+       if (cur->bc_btnum == XFS_BTNUM_BNO) {
+               return be32_to_cpu(k1->alloc.ar_startblock) <
+                      be32_to_cpu(k2->alloc.ar_startblock);
+       } else {
+               return be32_to_cpu(k1->alloc.ar_blockcount) <
+                       be32_to_cpu(k2->alloc.ar_blockcount) ||
+                       (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
+                        be32_to_cpu(k1->alloc.ar_startblock) <
+                        be32_to_cpu(k2->alloc.ar_startblock));
+       }
+}
+
+STATIC int
+xfs_allocbt_recs_inorder(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *r1,
+       union xfs_btree_rec     *r2)
+{
+       if (cur->bc_btnum == XFS_BTNUM_BNO) {
+               return be32_to_cpu(r1->alloc.ar_startblock) +
+                       be32_to_cpu(r1->alloc.ar_blockcount) <=
+                       be32_to_cpu(r2->alloc.ar_startblock);
+       } else {
+               return be32_to_cpu(r1->alloc.ar_blockcount) <
+                       be32_to_cpu(r2->alloc.ar_blockcount) ||
+                       (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
+                        be32_to_cpu(r1->alloc.ar_startblock) <
+                        be32_to_cpu(r2->alloc.ar_startblock));
+       }
+}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_allocbt_ops = {
+       .rec_len                = sizeof(xfs_alloc_rec_t),
+       .key_len                = sizeof(xfs_alloc_key_t),
+
+       .dup_cursor             = xfs_allocbt_dup_cursor,
+       .set_root               = xfs_allocbt_set_root,
+       .alloc_block            = xfs_allocbt_alloc_block,
+       .free_block             = xfs_allocbt_free_block,
+       .update_lastrec         = xfs_allocbt_update_lastrec,
+       .get_minrecs            = xfs_allocbt_get_minrecs,
+       .get_maxrecs            = xfs_allocbt_get_maxrecs,
+       .init_key_from_rec      = xfs_allocbt_init_key_from_rec,
+       .init_rec_from_key      = xfs_allocbt_init_rec_from_key,
+       .init_rec_from_cur      = xfs_allocbt_init_rec_from_cur,
+       .init_ptr_from_cur      = xfs_allocbt_init_ptr_from_cur,
+       .key_diff               = xfs_allocbt_key_diff,
+       .buf_ops                = &xfs_allocbt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+       .keys_inorder           = xfs_allocbt_keys_inorder,
+       .recs_inorder           = xfs_allocbt_recs_inorder,
+#endif
+};
+
+/*
+ * Allocate a new allocation btree cursor.
+ */
+struct xfs_btree_cur *                 /* new alloc btree cursor */
+xfs_allocbt_init_cursor(
+       struct xfs_mount        *mp,            /* file system mount point */
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_buf          *agbp,          /* buffer for agf structure */
+       xfs_agnumber_t          agno,           /* allocation group number */
+       xfs_btnum_t             btnum)          /* btree identifier */
+{
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       struct xfs_btree_cur    *cur;
+
+       ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
+
+       cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+
+       cur->bc_tp = tp;
+       cur->bc_mp = mp;
+       cur->bc_btnum = btnum;
+       cur->bc_blocklog = mp->m_sb.sb_blocklog;
+       cur->bc_ops = &xfs_allocbt_ops;
+
+       if (btnum == XFS_BTNUM_CNT) {
+               cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
+               cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
+       } else {
+               cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
+       }
+
+       cur->bc_private.a.agbp = agbp;
+       cur->bc_private.a.agno = agno;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
+
+       return cur;
+}
+
+/*
+ * Calculate number of records in an alloc btree block.
+ */
+int
+xfs_allocbt_maxrecs(
+       struct xfs_mount        *mp,
+       int                     blocklen,
+       int                     leaf)
+{
+       blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
+
+       if (leaf)
+               return blocklen / sizeof(xfs_alloc_rec_t);
+       return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
+}
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
new file mode 100644 (file)
index 0000000..7d95b16
--- /dev/null
@@ -0,0 +1,1459 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_attr_remote.h"
+#include "xfs_error.h"
+#include "xfs_quota.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+#include "xfs_dinode.h"
+
+/*
+ * xfs_attr.c
+ *
+ * Provide the external interfaces to manage attribute lists.
+ */
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+/*
+ * Internal routines when attribute list fits inside the inode.
+ */
+STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
+
+/*
+ * Internal routines when attribute list is one block.
+ */
+STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
+STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
+STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
+
+/*
+ * Internal routines when attribute list is more than one block.
+ */
+STATIC int xfs_attr_node_get(xfs_da_args_t *args);
+STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
+STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
+STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
+STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
+
+
+STATIC int
+xfs_attr_args_init(
+       struct xfs_da_args      *args,
+       struct xfs_inode        *dp,
+       const unsigned char     *name,
+       int                     flags)
+{
+
+       if (!name)
+               return EINVAL;
+
+       memset(args, 0, sizeof(*args));
+       args->geo = dp->i_mount->m_attr_geo;
+       args->whichfork = XFS_ATTR_FORK;
+       args->dp = dp;
+       args->flags = flags;
+       args->name = name;
+       args->namelen = strlen((const char *)name);
+       if (args->namelen >= MAXNAMELEN)
+               return EFAULT;          /* match IRIX behaviour */
+
+       args->hashval = xfs_da_hashname(args->name, args->namelen);
+       return 0;
+}
+
+int
+xfs_inode_hasattr(
+       struct xfs_inode        *ip)
+{
+       if (!XFS_IFORK_Q(ip) ||
+           (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+            ip->i_d.di_anextents == 0))
+               return 0;
+       return 1;
+}
+
+/*========================================================================
+ * Overall external interface routines.
+ *========================================================================*/
+
+int
+xfs_attr_get(
+       struct xfs_inode        *ip,
+       const unsigned char     *name,
+       unsigned char           *value,
+       int                     *valuelenp,
+       int                     flags)
+{
+       struct xfs_da_args      args;
+       uint                    lock_mode;
+       int                     error;
+
+       XFS_STATS_INC(xs_attr_get);
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return EIO;
+
+       if (!xfs_inode_hasattr(ip))
+               return ENOATTR;
+
+       error = xfs_attr_args_init(&args, ip, name, flags);
+       if (error)
+               return error;
+
+       args.value = value;
+       args.valuelen = *valuelenp;
+
+       lock_mode = xfs_ilock_attr_map_shared(ip);
+       if (!xfs_inode_hasattr(ip))
+               error = ENOATTR;
+       else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+               error = xfs_attr_shortform_getvalue(&args);
+       else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
+               error = xfs_attr_leaf_get(&args);
+       else
+               error = xfs_attr_node_get(&args);
+       xfs_iunlock(ip, lock_mode);
+
+       *valuelenp = args.valuelen;
+       return error == EEXIST ? 0 : error;
+}
+
+/*
+ * Calculate how many blocks we need for the new attribute,
+ */
+STATIC int
+xfs_attr_calc_size(
+       struct xfs_da_args      *args,
+       int                     *local)
+{
+       struct xfs_mount        *mp = args->dp->i_mount;
+       int                     size;
+       int                     nblks;
+
+       /*
+        * Determine space new attribute will use, and if it would be
+        * "local" or "remote" (note: local != inline).
+        */
+       size = xfs_attr_leaf_newentsize(args, local);
+       nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
+       if (*local) {
+               if (size > (args->geo->blksize / 2)) {
+                       /* Double split possible */
+                       nblks *= 2;
+               }
+       } else {
+               /*
+                * Out of line attribute, cannot double split, but
+                * make room for the attribute value itself.
+                */
+               uint    dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
+               nblks += dblocks;
+               nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
+       }
+
+       return nblks;
+}
+
+int
+xfs_attr_set(
+       struct xfs_inode        *dp,
+       const unsigned char     *name,
+       unsigned char           *value,
+       int                     valuelen,
+       int                     flags)
+{
+       struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_da_args      args;
+       struct xfs_bmap_free    flist;
+       struct xfs_trans_res    tres;
+       xfs_fsblock_t           firstblock;
+       int                     rsvd = (flags & ATTR_ROOT) != 0;
+       int                     error, err2, committed, local;
+
+       XFS_STATS_INC(xs_attr_set);
+
+       if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+               return EIO;
+
+       error = xfs_attr_args_init(&args, dp, name, flags);
+       if (error)
+               return error;
+
+       args.value = value;
+       args.valuelen = valuelen;
+       args.firstblock = &firstblock;
+       args.flist = &flist;
+       args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+       args.total = xfs_attr_calc_size(&args, &local);
+
+       error = xfs_qm_dqattach(dp, 0);
+       if (error)
+               return error;
+
+       /*
+        * If the inode doesn't have an attribute fork, add one.
+        * (inode must not be locked when we call this routine)
+        */
+       if (XFS_IFORK_Q(dp) == 0) {
+               int sf_size = sizeof(xfs_attr_sf_hdr_t) +
+                       XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
+
+               error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * Start our first transaction of the day.
+        *
+        * All future transactions during this code must be "chained" off
+        * this one via the trans_dup() call.  All transactions will contain
+        * the inode, and the inode will always be marked with trans_ihold().
+        * Since the inode will be locked in all transactions, we must log
+        * the inode in every transaction to let it float upward through
+        * the log.
+        */
+       args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
+
+       /*
+        * Root fork attributes can use reserved data blocks for this
+        * operation if necessary
+        */
+
+       if (rsvd)
+               args.trans->t_flags |= XFS_TRANS_RESERVE;
+
+       tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
+                        M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
+       tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
+       tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+       error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
+       if (error) {
+               xfs_trans_cancel(args.trans, 0);
+               return error;
+       }
+       xfs_ilock(dp, XFS_ILOCK_EXCL);
+
+       error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
+                               rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
+                                      XFS_QMOPT_RES_REGBLKS);
+       if (error) {
+               xfs_iunlock(dp, XFS_ILOCK_EXCL);
+               xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+               return error;
+       }
+
+       xfs_trans_ijoin(args.trans, dp, 0);
+
+       /*
+        * If the attribute list is non-existent or a shortform list,
+        * upgrade it to a single-leaf-block attribute list.
+        */
+       if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
+           (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+            dp->i_d.di_anextents == 0)) {
+
+               /*
+                * Build initial attribute list (if required).
+                */
+               if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
+                       xfs_attr_shortform_create(&args);
+
+               /*
+                * Try to add the attr to the attribute list in
+                * the inode.
+                */
+               error = xfs_attr_shortform_addname(&args);
+               if (error != ENOSPC) {
+                       /*
+                        * Commit the shortform mods, and we're done.
+                        * NOTE: this is also the error path (EEXIST, etc).
+                        */
+                       ASSERT(args.trans != NULL);
+
+                       /*
+                        * If this is a synchronous mount, make sure that
+                        * the transaction goes to disk before returning
+                        * to the user.
+                        */
+                       if (mp->m_flags & XFS_MOUNT_WSYNC)
+                               xfs_trans_set_sync(args.trans);
+
+                       if (!error && (flags & ATTR_KERNOTIME) == 0) {
+                               xfs_trans_ichgtime(args.trans, dp,
+                                                       XFS_ICHGTIME_CHG);
+                       }
+                       err2 = xfs_trans_commit(args.trans,
+                                                XFS_TRANS_RELEASE_LOG_RES);
+                       xfs_iunlock(dp, XFS_ILOCK_EXCL);
+
+                       return error ? error : err2;
+               }
+
+               /*
+                * It won't fit in the shortform, transform to a leaf block.
+                * GROT: another possible req'mt for a double-split btree op.
+                */
+               xfs_bmap_init(args.flist, args.firstblock);
+               error = xfs_attr_shortform_to_leaf(&args);
+               if (!error) {
+                       error = xfs_bmap_finish(&args.trans, args.flist,
+                                               &committed);
+               }
+               if (error) {
+                       ASSERT(committed);
+                       args.trans = NULL;
+                       xfs_bmap_cancel(&flist);
+                       goto out;
+               }
+
+               /*
+                * bmap_finish() may have committed the last trans and started
+                * a new one.  We need the inode to be in all transactions.
+                */
+               if (committed)
+                       xfs_trans_ijoin(args.trans, dp, 0);
+
+               /*
+                * Commit the leaf transformation.  We'll need another (linked)
+                * transaction to add the new attribute to the leaf.
+                */
+
+               error = xfs_trans_roll(&args.trans, dp);
+               if (error)
+                       goto out;
+
+       }
+
+       if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
+               error = xfs_attr_leaf_addname(&args);
+       else
+               error = xfs_attr_node_addname(&args);
+       if (error)
+               goto out;
+
+       /*
+        * If this is a synchronous mount, make sure that the
+        * transaction goes to disk before returning to the user.
+        */
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(args.trans);
+
+       if ((flags & ATTR_KERNOTIME) == 0)
+               xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
+
+       /*
+        * Commit the last in the sequence of transactions.
+        */
+       xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
+       error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+       xfs_iunlock(dp, XFS_ILOCK_EXCL);
+
+       return error;
+
+out:
+       if (args.trans) {
+               xfs_trans_cancel(args.trans,
+                       XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+       }
+       xfs_iunlock(dp, XFS_ILOCK_EXCL);
+       return error;
+}
+
+/*
+ * Generic handler routine to remove a name from an attribute list.
+ * Transitions attribute list from Btree to shortform as necessary.
+ */
+int
+xfs_attr_remove(
+       struct xfs_inode        *dp,
+       const unsigned char     *name,
+       int                     flags)
+{
+       struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_da_args      args;
+       struct xfs_bmap_free    flist;
+       xfs_fsblock_t           firstblock;
+       int                     error;
+
+       XFS_STATS_INC(xs_attr_remove);
+
+       if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+               return EIO;
+
+       if (!xfs_inode_hasattr(dp))
+               return ENOATTR;
+
+       error = xfs_attr_args_init(&args, dp, name, flags);
+       if (error)
+               return error;
+
+       args.firstblock = &firstblock;
+       args.flist = &flist;
+
+       /*
+        * we have no control over the attribute names that userspace passes us
+        * to remove, so we have to allow the name lookup prior to attribute
+        * removal to fail.
+        */
+       args.op_flags = XFS_DA_OP_OKNOENT;
+
+       error = xfs_qm_dqattach(dp, 0);
+       if (error)
+               return error;
+
+       /*
+        * Start our first transaction of the day.
+        *
+        * All future transactions during this code must be "chained" off
+        * this one via the trans_dup() call.  All transactions will contain
+        * the inode, and the inode will always be marked with trans_ihold().
+        * Since the inode will be locked in all transactions, we must log
+        * the inode in every transaction to let it float upward through
+        * the log.
+        */
+       args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
+
+       /*
+        * Root fork attributes can use reserved data blocks for this
+        * operation if necessary
+        */
+
+       if (flags & ATTR_ROOT)
+               args.trans->t_flags |= XFS_TRANS_RESERVE;
+
+       error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
+                                 XFS_ATTRRM_SPACE_RES(mp), 0);
+       if (error) {
+               xfs_trans_cancel(args.trans, 0);
+               return error;
+       }
+
+       xfs_ilock(dp, XFS_ILOCK_EXCL);
+       /*
+        * No need to make quota reservations here. We expect to release some
+        * blocks not allocate in the common case.
+        */
+       xfs_trans_ijoin(args.trans, dp, 0);
+
+       if (!xfs_inode_hasattr(dp)) {
+               error = ENOATTR;
+       } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+               ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
+               error = xfs_attr_shortform_remove(&args);
+       } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
+               error = xfs_attr_leaf_removename(&args);
+       } else {
+               error = xfs_attr_node_removename(&args);
+       }
+
+       if (error)
+               goto out;
+
+       /*
+        * If this is a synchronous mount, make sure that the
+        * transaction goes to disk before returning to the user.
+        */
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(args.trans);
+
+       if ((flags & ATTR_KERNOTIME) == 0)
+               xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
+
+       /*
+        * Commit the last in the sequence of transactions.
+        */
+       xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
+       error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
+       xfs_iunlock(dp, XFS_ILOCK_EXCL);
+
+       return error;
+
+out:
+       if (args.trans) {
+               xfs_trans_cancel(args.trans,
+                       XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+       }
+       xfs_iunlock(dp, XFS_ILOCK_EXCL);
+       return error;
+}
+
+/*========================================================================
+ * External routines when attribute list is inside the inode
+ *========================================================================*/
+
+/*
+ * Add a name to the shortform attribute list structure
+ * This is the external routine.
+ */
+STATIC int
+xfs_attr_shortform_addname(xfs_da_args_t *args)
+{
+       int newsize, forkoff, retval;
+
+       trace_xfs_attr_sf_addname(args);
+
+       retval = xfs_attr_shortform_lookup(args);
+       if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
+               return retval;
+       } else if (retval == EEXIST) {
+               if (args->flags & ATTR_CREATE)
+                       return retval;
+               retval = xfs_attr_shortform_remove(args);
+               ASSERT(retval == 0);
+       }
+
+       if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
+           args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
+               return ENOSPC;
+
+       newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
+       newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
+
+       forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
+       if (!forkoff)
+               return ENOSPC;
+
+       xfs_attr_shortform_add(args, forkoff);
+       return 0;
+}
+
+
+/*========================================================================
+ * External routines when attribute list is one block
+ *========================================================================*/
+
+/*
+ * Add a name to the leaf attribute list structure
+ *
+ * This leaf block cannot have a "remote" value, we only call this routine
+ * if bmap_one_block() says there is only one block (ie: no remote blks).
+ */
+STATIC int
+xfs_attr_leaf_addname(xfs_da_args_t *args)
+{
+       xfs_inode_t *dp;
+       struct xfs_buf *bp;
+       int retval, error, committed, forkoff;
+
+       trace_xfs_attr_leaf_addname(args);
+
+       /*
+        * Read the (only) block in the attribute list in.
+        */
+       dp = args->dp;
+       args->blkno = 0;
+       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+       if (error)
+               return error;
+
+       /*
+        * Look up the given attribute in the leaf block.  Figure out if
+        * the given flags produce an error or call for an atomic rename.
+        */
+       retval = xfs_attr3_leaf_lookup_int(bp, args);
+       if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
+               xfs_trans_brelse(args->trans, bp);
+               return retval;
+       } else if (retval == EEXIST) {
+               if (args->flags & ATTR_CREATE) {        /* pure create op */
+                       xfs_trans_brelse(args->trans, bp);
+                       return retval;
+               }
+
+               trace_xfs_attr_leaf_replace(args);
+
+               /* save the attribute state for later removal*/
+               args->op_flags |= XFS_DA_OP_RENAME;     /* an atomic rename */
+               args->blkno2 = args->blkno;             /* set 2nd entry info*/
+               args->index2 = args->index;
+               args->rmtblkno2 = args->rmtblkno;
+               args->rmtblkcnt2 = args->rmtblkcnt;
+               args->rmtvaluelen2 = args->rmtvaluelen;
+
+               /*
+                * clear the remote attr state now that it is saved so that the
+                * values reflect the state of the attribute we are about to
+                * add, not the attribute we just found and will remove later.
+                */
+               args->rmtblkno = 0;
+               args->rmtblkcnt = 0;
+               args->rmtvaluelen = 0;
+       }
+
+       /*
+        * Add the attribute to the leaf block, transitioning to a Btree
+        * if required.
+        */
+       retval = xfs_attr3_leaf_add(bp, args);
+       if (retval == ENOSPC) {
+               /*
+                * Promote the attribute list to the Btree format, then
+                * Commit that transaction so that the node_addname() call
+                * can manage its own transactions.
+                */
+               xfs_bmap_init(args->flist, args->firstblock);
+               error = xfs_attr3_leaf_to_node(args);
+               if (!error) {
+                       error = xfs_bmap_finish(&args->trans, args->flist,
+                                               &committed);
+               }
+               if (error) {
+                       ASSERT(committed);
+                       args->trans = NULL;
+                       xfs_bmap_cancel(args->flist);
+                       return error;
+               }
+
+               /*
+                * bmap_finish() may have committed the last trans and started
+                * a new one.  We need the inode to be in all transactions.
+                */
+               if (committed)
+                       xfs_trans_ijoin(args->trans, dp, 0);
+
+               /*
+                * Commit the current trans (including the inode) and start
+                * a new one.
+                */
+               error = xfs_trans_roll(&args->trans, dp);
+               if (error)
+                       return error;
+
+               /*
+                * Fob the whole rest of the problem off on the Btree code.
+                */
+               error = xfs_attr_node_addname(args);
+               return error;
+       }
+
+       /*
+        * Commit the transaction that added the attr name so that
+        * later routines can manage their own transactions.
+        */
+       error = xfs_trans_roll(&args->trans, dp);
+       if (error)
+               return error;
+
+       /*
+        * If there was an out-of-line value, allocate the blocks we
+        * identified for its storage and copy the value.  This is done
+        * after we create the attribute so that we don't overflow the
+        * maximum size of a transaction and/or hit a deadlock.
+        */
+       if (args->rmtblkno > 0) {
+               error = xfs_attr_rmtval_set(args);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * If this is an atomic rename operation, we must "flip" the
+        * incomplete flags on the "new" and "old" attribute/value pairs
+        * so that one disappears and one appears atomically.  Then we
+        * must remove the "old" attribute/value pair.
+        */
+       if (args->op_flags & XFS_DA_OP_RENAME) {
+               /*
+                * In a separate transaction, set the incomplete flag on the
+                * "old" attr and clear the incomplete flag on the "new" attr.
+                */
+               error = xfs_attr3_leaf_flipflags(args);
+               if (error)
+                       return error;
+
+               /*
+                * Dismantle the "old" attribute/value pair by removing
+                * a "remote" value (if it exists).
+                */
+               args->index = args->index2;
+               args->blkno = args->blkno2;
+               args->rmtblkno = args->rmtblkno2;
+               args->rmtblkcnt = args->rmtblkcnt2;
+               args->rmtvaluelen = args->rmtvaluelen2;
+               if (args->rmtblkno) {
+                       error = xfs_attr_rmtval_remove(args);
+                       if (error)
+                               return error;
+               }
+
+               /*
+                * Read in the block containing the "old" attr, then
+                * remove the "old" attr from that block (neat, huh!)
+                */
+               error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
+                                          -1, &bp);
+               if (error)
+                       return error;
+
+               xfs_attr3_leaf_remove(bp, args);
+
+               /*
+                * If the result is small enough, shrink it all into the inode.
+                */
+               if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
+                       xfs_bmap_init(args->flist, args->firstblock);
+                       error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
+                       /* bp is gone due to xfs_da_shrink_inode */
+                       if (!error) {
+                               error = xfs_bmap_finish(&args->trans,
+                                                       args->flist,
+                                                       &committed);
+                       }
+                       if (error) {
+                               ASSERT(committed);
+                               args->trans = NULL;
+                               xfs_bmap_cancel(args->flist);
+                               return error;
+                       }
+
+                       /*
+                        * bmap_finish() may have committed the last trans
+                        * and started a new one.  We need the inode to be
+                        * in all transactions.
+                        */
+                       if (committed)
+                               xfs_trans_ijoin(args->trans, dp, 0);
+               }
+
+               /*
+                * Commit the remove and start the next trans in series.
+                */
+               error = xfs_trans_roll(&args->trans, dp);
+
+       } else if (args->rmtblkno > 0) {
+               /*
+                * Added a "remote" value, just clear the incomplete flag.
+                */
+               error = xfs_attr3_leaf_clearflag(args);
+       }
+       return error;
+}
+
+/*
+ * Remove a name from the leaf attribute list structure
+ *
+ * This leaf block cannot have a "remote" value, we only call this routine
+ * if bmap_one_block() says there is only one block (ie: no remote blks).
+ */
+STATIC int
+xfs_attr_leaf_removename(xfs_da_args_t *args)
+{
+       xfs_inode_t *dp;
+       struct xfs_buf *bp;
+       int error, committed, forkoff;
+
+       trace_xfs_attr_leaf_removename(args);
+
+       /*
+        * Remove the attribute.
+        */
+       dp = args->dp;
+       args->blkno = 0;
+       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+       if (error)
+               return error;
+
+       error = xfs_attr3_leaf_lookup_int(bp, args);
+       if (error == ENOATTR) {
+               xfs_trans_brelse(args->trans, bp);
+               return error;
+       }
+
+       xfs_attr3_leaf_remove(bp, args);
+
+       /*
+        * If the result is small enough, shrink it all into the inode.
+        */
+       if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
+               xfs_bmap_init(args->flist, args->firstblock);
+               error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
+               /* bp is gone due to xfs_da_shrink_inode */
+               if (!error) {
+                       error = xfs_bmap_finish(&args->trans, args->flist,
+                                               &committed);
+               }
+               if (error) {
+                       ASSERT(committed);
+                       args->trans = NULL;
+                       xfs_bmap_cancel(args->flist);
+                       return error;
+               }
+
+               /*
+                * bmap_finish() may have committed the last trans and started
+                * a new one.  We need the inode to be in all transactions.
+                */
+               if (committed)
+                       xfs_trans_ijoin(args->trans, dp, 0);
+       }
+       return 0;
+}
+
+/*
+ * Look up a name in a leaf attribute list structure.
+ *
+ * This leaf block cannot have a "remote" value, we only call this routine
+ * if bmap_one_block() says there is only one block (ie: no remote blks).
+ */
+STATIC int
+xfs_attr_leaf_get(xfs_da_args_t *args)
+{
+       struct xfs_buf *bp;
+       int error;
+
+       trace_xfs_attr_leaf_get(args);
+
+       args->blkno = 0;
+       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+       if (error)
+               return error;
+
+       error = xfs_attr3_leaf_lookup_int(bp, args);
+       if (error != EEXIST)  {
+               xfs_trans_brelse(args->trans, bp);
+               return error;
+       }
+       error = xfs_attr3_leaf_getvalue(bp, args);
+       xfs_trans_brelse(args->trans, bp);
+       if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
+               error = xfs_attr_rmtval_get(args);
+       }
+       return error;
+}
+
+/*========================================================================
+ * External routines when attribute list size > geo->blksize
+ *========================================================================*/
+
+/*
+ * Add a name to a Btree-format attribute list.
+ *
+ * This will involve walking down the Btree, and may involve splitting
+ * leaf nodes and even splitting intermediate nodes up to and including
+ * the root node (a special case of an intermediate node).
+ *
+ * "Remote" attribute values confuse the issue and atomic rename operations
+ * add a whole extra layer of confusion on top of that.
+ */
+STATIC int
+xfs_attr_node_addname(xfs_da_args_t *args)
+{
+       xfs_da_state_t *state;
+       xfs_da_state_blk_t *blk;
+       xfs_inode_t *dp;
+       xfs_mount_t *mp;
+       int committed, retval, error;
+
+       trace_xfs_attr_node_addname(args);
+
+       /*
+        * Fill in bucket of arguments/results/context to carry around.
+        */
+       dp = args->dp;
+       mp = dp->i_mount;
+restart:
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = mp;
+
+       /*
+        * Search to see if name already exists, and get back a pointer
+        * to where it should go.
+        */
+       error = xfs_da3_node_lookup_int(state, &retval);
+       if (error)
+               goto out;
+       blk = &state->path.blk[ state->path.active-1 ];
+       ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
+       if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
+               goto out;
+       } else if (retval == EEXIST) {
+               if (args->flags & ATTR_CREATE)
+                       goto out;
+
+               trace_xfs_attr_node_replace(args);
+
+               /* save the attribute state for later removal*/
+               args->op_flags |= XFS_DA_OP_RENAME;     /* atomic rename op */
+               args->blkno2 = args->blkno;             /* set 2nd entry info*/
+               args->index2 = args->index;
+               args->rmtblkno2 = args->rmtblkno;
+               args->rmtblkcnt2 = args->rmtblkcnt;
+               args->rmtvaluelen2 = args->rmtvaluelen;
+
+               /*
+                * clear the remote attr state now that it is saved so that the
+                * values reflect the state of the attribute we are about to
+                * add, not the attribute we just found and will remove later.
+                */
+               args->rmtblkno = 0;
+               args->rmtblkcnt = 0;
+               args->rmtvaluelen = 0;
+       }
+
+       retval = xfs_attr3_leaf_add(blk->bp, state->args);
+       if (retval == ENOSPC) {
+               if (state->path.active == 1) {
+                       /*
+                        * Its really a single leaf node, but it had
+                        * out-of-line values so it looked like it *might*
+                        * have been a b-tree.
+                        */
+                       xfs_da_state_free(state);
+                       state = NULL;
+                       xfs_bmap_init(args->flist, args->firstblock);
+                       error = xfs_attr3_leaf_to_node(args);
+                       if (!error) {
+                               error = xfs_bmap_finish(&args->trans,
+                                                       args->flist,
+                                                       &committed);
+                       }
+                       if (error) {
+                               ASSERT(committed);
+                               args->trans = NULL;
+                               xfs_bmap_cancel(args->flist);
+                               goto out;
+                       }
+
+                       /*
+                        * bmap_finish() may have committed the last trans
+                        * and started a new one.  We need the inode to be
+                        * in all transactions.
+                        */
+                       if (committed)
+                               xfs_trans_ijoin(args->trans, dp, 0);
+
+                       /*
+                        * Commit the node conversion and start the next
+                        * trans in the chain.
+                        */
+                       error = xfs_trans_roll(&args->trans, dp);
+                       if (error)
+                               goto out;
+
+                       goto restart;
+               }
+
+               /*
+                * Split as many Btree elements as required.
+                * This code tracks the new and old attr's location
+                * in the index/blkno/rmtblkno/rmtblkcnt fields and
+                * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
+                */
+               xfs_bmap_init(args->flist, args->firstblock);
+               error = xfs_da3_split(state);
+               if (!error) {
+                       error = xfs_bmap_finish(&args->trans, args->flist,
+                                               &committed);
+               }
+               if (error) {
+                       ASSERT(committed);
+                       args->trans = NULL;
+                       xfs_bmap_cancel(args->flist);
+                       goto out;
+               }
+
+               /*
+                * bmap_finish() may have committed the last trans and started
+                * a new one.  We need the inode to be in all transactions.
+                */
+               if (committed)
+                       xfs_trans_ijoin(args->trans, dp, 0);
+       } else {
+               /*
+                * Addition succeeded, update Btree hashvals.
+                */
+               xfs_da3_fixhashpath(state, &state->path);
+       }
+
+       /*
+        * Kill the state structure, we're done with it and need to
+        * allow the buffers to come back later.
+        */
+       xfs_da_state_free(state);
+       state = NULL;
+
+       /*
+        * Commit the leaf addition or btree split and start the next
+        * trans in the chain.
+        */
+       error = xfs_trans_roll(&args->trans, dp);
+       if (error)
+               goto out;
+
+       /*
+        * If there was an out-of-line value, allocate the blocks we
+        * identified for its storage and copy the value.  This is done
+        * after we create the attribute so that we don't overflow the
+        * maximum size of a transaction and/or hit a deadlock.
+        */
+       if (args->rmtblkno > 0) {
+               error = xfs_attr_rmtval_set(args);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * If this is an atomic rename operation, we must "flip" the
+        * incomplete flags on the "new" and "old" attribute/value pairs
+        * so that one disappears and one appears atomically.  Then we
+        * must remove the "old" attribute/value pair.
+        */
+       if (args->op_flags & XFS_DA_OP_RENAME) {
+               /*
+                * In a separate transaction, set the incomplete flag on the
+                * "old" attr and clear the incomplete flag on the "new" attr.
+                */
+               error = xfs_attr3_leaf_flipflags(args);
+               if (error)
+                       goto out;
+
+               /*
+                * Dismantle the "old" attribute/value pair by removing
+                * a "remote" value (if it exists).
+                */
+               args->index = args->index2;
+               args->blkno = args->blkno2;
+               args->rmtblkno = args->rmtblkno2;
+               args->rmtblkcnt = args->rmtblkcnt2;
+               args->rmtvaluelen = args->rmtvaluelen2;
+               if (args->rmtblkno) {
+                       error = xfs_attr_rmtval_remove(args);
+                       if (error)
+                               return error;
+               }
+
+               /*
+                * Re-find the "old" attribute entry after any split ops.
+                * The INCOMPLETE flag means that we will find the "old"
+                * attr, not the "new" one.
+                */
+               args->flags |= XFS_ATTR_INCOMPLETE;
+               state = xfs_da_state_alloc();
+               state->args = args;
+               state->mp = mp;
+               state->inleaf = 0;
+               error = xfs_da3_node_lookup_int(state, &retval);
+               if (error)
+                       goto out;
+
+               /*
+                * Remove the name and update the hashvals in the tree.
+                */
+               blk = &state->path.blk[ state->path.active-1 ];
+               ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
+               error = xfs_attr3_leaf_remove(blk->bp, args);
+               xfs_da3_fixhashpath(state, &state->path);
+
+               /*
+                * Check to see if the tree needs to be collapsed.
+                */
+               if (retval && (state->path.active > 1)) {
+                       xfs_bmap_init(args->flist, args->firstblock);
+                       error = xfs_da3_join(state);
+                       if (!error) {
+                               error = xfs_bmap_finish(&args->trans,
+                                                       args->flist,
+                                                       &committed);
+                       }
+                       if (error) {
+                               ASSERT(committed);
+                               args->trans = NULL;
+                               xfs_bmap_cancel(args->flist);
+                               goto out;
+                       }
+
+                       /*
+                        * bmap_finish() may have committed the last trans
+                        * and started a new one.  We need the inode to be
+                        * in all transactions.
+                        */
+                       if (committed)
+                               xfs_trans_ijoin(args->trans, dp, 0);
+               }
+
+               /*
+                * Commit and start the next trans in the chain.
+                */
+               error = xfs_trans_roll(&args->trans, dp);
+               if (error)
+                       goto out;
+
+       } else if (args->rmtblkno > 0) {
+               /*
+                * Added a "remote" value, just clear the incomplete flag.
+                */
+               error = xfs_attr3_leaf_clearflag(args);
+               if (error)
+                       goto out;
+       }
+       retval = error = 0;
+
+out:
+       if (state)
+               xfs_da_state_free(state);
+       if (error)
+               return error;
+       return retval;
+}
+
+/*
+ * Remove a name from a B-tree attribute list.
+ *
+ * This will involve walking down the Btree, and may involve joining
+ * leaf nodes and even joining intermediate nodes up to and including
+ * the root node (a special case of an intermediate node).
+ */
+STATIC int
+xfs_attr_node_removename(xfs_da_args_t *args)
+{
+       xfs_da_state_t *state;
+       xfs_da_state_blk_t *blk;
+       xfs_inode_t *dp;
+       struct xfs_buf *bp;
+       int retval, error, committed, forkoff;
+
+       trace_xfs_attr_node_removename(args);
+
+       /*
+        * Tie a string around our finger to remind us where we are.
+        */
+       dp = args->dp;
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = dp->i_mount;
+
+       /*
+        * Search to see if name exists, and get back a pointer to it.
+        */
+       error = xfs_da3_node_lookup_int(state, &retval);
+       if (error || (retval != EEXIST)) {
+               if (error == 0)
+                       error = retval;
+               goto out;
+       }
+
+       /*
+        * If there is an out-of-line value, de-allocate the blocks.
+        * This is done before we remove the attribute so that we don't
+        * overflow the maximum size of a transaction and/or hit a deadlock.
+        */
+       blk = &state->path.blk[ state->path.active-1 ];
+       ASSERT(blk->bp != NULL);
+       ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
+       if (args->rmtblkno > 0) {
+               /*
+                * Fill in disk block numbers in the state structure
+                * so that we can get the buffers back after we commit
+                * several transactions in the following calls.
+                */
+               error = xfs_attr_fillstate(state);
+               if (error)
+                       goto out;
+
+               /*
+                * Mark the attribute as INCOMPLETE, then bunmapi() the
+                * remote value.
+                */
+               error = xfs_attr3_leaf_setflag(args);
+               if (error)
+                       goto out;
+               error = xfs_attr_rmtval_remove(args);
+               if (error)
+                       goto out;
+
+               /*
+                * Refill the state structure with buffers, the prior calls
+                * released our buffers.
+                */
+               error = xfs_attr_refillstate(state);
+               if (error)
+                       goto out;
+       }
+
+       /*
+        * Remove the name and update the hashvals in the tree.
+        */
+       blk = &state->path.blk[ state->path.active-1 ];
+       ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
+       retval = xfs_attr3_leaf_remove(blk->bp, args);
+       xfs_da3_fixhashpath(state, &state->path);
+
+       /*
+        * Check to see if the tree needs to be collapsed.
+        */
+       if (retval && (state->path.active > 1)) {
+               xfs_bmap_init(args->flist, args->firstblock);
+               error = xfs_da3_join(state);
+               if (!error) {
+                       error = xfs_bmap_finish(&args->trans, args->flist,
+                                               &committed);
+               }
+               if (error) {
+                       ASSERT(committed);
+                       args->trans = NULL;
+                       xfs_bmap_cancel(args->flist);
+                       goto out;
+               }
+
+               /*
+                * bmap_finish() may have committed the last trans and started
+                * a new one.  We need the inode to be in all transactions.
+                */
+               if (committed)
+                       xfs_trans_ijoin(args->trans, dp, 0);
+
+               /*
+                * Commit the Btree join operation and start a new trans.
+                */
+               error = xfs_trans_roll(&args->trans, dp);
+               if (error)
+                       goto out;
+       }
+
+       /*
+        * If the result is small enough, push it all into the inode.
+        */
+       if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
+               /*
+                * Have to get rid of the copy of this dabuf in the state.
+                */
+               ASSERT(state->path.active == 1);
+               ASSERT(state->path.blk[0].bp);
+               state->path.blk[0].bp = NULL;
+
+               error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
+               if (error)
+                       goto out;
+
+               if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
+                       xfs_bmap_init(args->flist, args->firstblock);
+                       error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
+                       /* bp is gone due to xfs_da_shrink_inode */
+                       if (!error) {
+                               error = xfs_bmap_finish(&args->trans,
+                                                       args->flist,
+                                                       &committed);
+                       }
+                       if (error) {
+                               ASSERT(committed);
+                               args->trans = NULL;
+                               xfs_bmap_cancel(args->flist);
+                               goto out;
+                       }
+
+                       /*
+                        * bmap_finish() may have committed the last trans
+                        * and started a new one.  We need the inode to be
+                        * in all transactions.
+                        */
+                       if (committed)
+                               xfs_trans_ijoin(args->trans, dp, 0);
+               } else
+                       xfs_trans_brelse(args->trans, bp);
+       }
+       error = 0;
+
+out:
+       xfs_da_state_free(state);
+       return error;
+}
+
+/*
+ * Fill in the disk block numbers in the state structure for the buffers
+ * that are attached to the state structure.
+ * This is done so that we can quickly reattach ourselves to those buffers
+ * after some set of transaction commits have released these buffers.
+ */
+STATIC int
+xfs_attr_fillstate(xfs_da_state_t *state)
+{
+       xfs_da_state_path_t *path;
+       xfs_da_state_blk_t *blk;
+       int level;
+
+       trace_xfs_attr_fillstate(state->args);
+
+       /*
+        * Roll down the "path" in the state structure, storing the on-disk
+        * block number for those buffers in the "path".
+        */
+       path = &state->path;
+       ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+       for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
+               if (blk->bp) {
+                       blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
+                       blk->bp = NULL;
+               } else {
+                       blk->disk_blkno = 0;
+               }
+       }
+
+       /*
+        * Roll down the "altpath" in the state structure, storing the on-disk
+        * block number for those buffers in the "altpath".
+        */
+       path = &state->altpath;
+       ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+       for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
+               if (blk->bp) {
+                       blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
+                       blk->bp = NULL;
+               } else {
+                       blk->disk_blkno = 0;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Reattach the buffers to the state structure based on the disk block
+ * numbers stored in the state structure.
+ * This is done after some set of transaction commits have released those
+ * buffers from our grip.
+ */
+STATIC int
+xfs_attr_refillstate(xfs_da_state_t *state)
+{
+       xfs_da_state_path_t *path;
+       xfs_da_state_blk_t *blk;
+       int level, error;
+
+       trace_xfs_attr_refillstate(state->args);
+
+       /*
+        * Roll down the "path" in the state structure, storing the on-disk
+        * block number for those buffers in the "path".
+        */
+       path = &state->path;
+       ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+       for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
+               if (blk->disk_blkno) {
+                       error = xfs_da3_node_read(state->args->trans,
+                                               state->args->dp,
+                                               blk->blkno, blk->disk_blkno,
+                                               &blk->bp, XFS_ATTR_FORK);
+                       if (error)
+                               return error;
+               } else {
+                       blk->bp = NULL;
+               }
+       }
+
+       /*
+        * Roll down the "altpath" in the state structure, storing the on-disk
+        * block number for those buffers in the "altpath".
+        */
+       path = &state->altpath;
+       ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+       for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
+               if (blk->disk_blkno) {
+                       error = xfs_da3_node_read(state->args->trans,
+                                               state->args->dp,
+                                               blk->blkno, blk->disk_blkno,
+                                               &blk->bp, XFS_ATTR_FORK);
+                       if (error)
+                               return error;
+               } else {
+                       blk->bp = NULL;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Look up a filename in a node attribute list.
+ *
+ * This routine gets called for any attribute fork that has more than one
+ * block, ie: both true Btree attr lists and for single-leaf-blocks with
+ * "remote" values taking up more blocks.
+ */
+STATIC int
+xfs_attr_node_get(xfs_da_args_t *args)
+{
+       xfs_da_state_t *state;
+       xfs_da_state_blk_t *blk;
+       int error, retval;
+       int i;
+
+       trace_xfs_attr_node_get(args);
+
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+
+       /*
+        * Search to see if name exists, and get back a pointer to it.
+        */
+       error = xfs_da3_node_lookup_int(state, &retval);
+       if (error) {
+               retval = error;
+       } else if (retval == EEXIST) {
+               blk = &state->path.blk[ state->path.active-1 ];
+               ASSERT(blk->bp != NULL);
+               ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
+
+               /*
+                * Get the value, local or "remote"
+                */
+               retval = xfs_attr3_leaf_getvalue(blk->bp, args);
+               if (!retval && (args->rmtblkno > 0)
+                   && !(args->flags & ATTR_KERNOVAL)) {
+                       retval = xfs_attr_rmtval_get(args);
+               }
+       }
+
+       /*
+        * If not in a transaction, we have to release all the buffers.
+        */
+       for (i = 0; i < state->path.active; i++) {
+               xfs_trans_brelse(args->trans, state->path.blk[i].bp);
+               state->path.blk[i].bp = NULL;
+       }
+
+       xfs_da_state_free(state);
+       return retval;
+}
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
new file mode 100644 (file)
index 0000000..127d96a
--- /dev/null
@@ -0,0 +1,2697 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_attr_sf.h"
+#include "xfs_attr_remote.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_buf_item.h"
+#include "xfs_cksum.h"
+#include "xfs_dinode.h"
+#include "xfs_dir2.h"
+
+
+/*
+ * xfs_attr_leaf.c
+ *
+ * Routines to implement leaf blocks of attributes as Btrees of hashed names.
+ */
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+/*
+ * Routines used for growing the Btree.
+ */
+STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args,
+                                xfs_dablk_t which_block, struct xfs_buf **bpp);
+STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
+                                  struct xfs_attr3_icleaf_hdr *ichdr,
+                                  struct xfs_da_args *args, int freemap_index);
+STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args,
+                                  struct xfs_attr3_icleaf_hdr *ichdr,
+                                  struct xfs_buf *leaf_buffer);
+STATIC void xfs_attr3_leaf_rebalance(xfs_da_state_t *state,
+                                                  xfs_da_state_blk_t *blk1,
+                                                  xfs_da_state_blk_t *blk2);
+STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state,
+                       xfs_da_state_blk_t *leaf_blk_1,
+                       struct xfs_attr3_icleaf_hdr *ichdr1,
+                       xfs_da_state_blk_t *leaf_blk_2,
+                       struct xfs_attr3_icleaf_hdr *ichdr2,
+                       int *number_entries_in_blk1,
+                       int *number_usedbytes_in_blk1);
+
+/*
+ * Utility routines.
+ */
+STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args,
+                       struct xfs_attr_leafblock *src_leaf,
+                       struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start,
+                       struct xfs_attr_leafblock *dst_leaf,
+                       struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start,
+                       int move_count);
+STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
+
+void
+xfs_attr3_leaf_hdr_from_disk(
+       struct xfs_attr3_icleaf_hdr     *to,
+       struct xfs_attr_leafblock       *from)
+{
+       int     i;
+
+       ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
+              from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
+
+       if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) {
+               struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)from;
+
+               to->forw = be32_to_cpu(hdr3->info.hdr.forw);
+               to->back = be32_to_cpu(hdr3->info.hdr.back);
+               to->magic = be16_to_cpu(hdr3->info.hdr.magic);
+               to->count = be16_to_cpu(hdr3->count);
+               to->usedbytes = be16_to_cpu(hdr3->usedbytes);
+               to->firstused = be16_to_cpu(hdr3->firstused);
+               to->holes = hdr3->holes;
+
+               for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
+                       to->freemap[i].base = be16_to_cpu(hdr3->freemap[i].base);
+                       to->freemap[i].size = be16_to_cpu(hdr3->freemap[i].size);
+               }
+               return;
+       }
+       to->forw = be32_to_cpu(from->hdr.info.forw);
+       to->back = be32_to_cpu(from->hdr.info.back);
+       to->magic = be16_to_cpu(from->hdr.info.magic);
+       to->count = be16_to_cpu(from->hdr.count);
+       to->usedbytes = be16_to_cpu(from->hdr.usedbytes);
+       to->firstused = be16_to_cpu(from->hdr.firstused);
+       to->holes = from->hdr.holes;
+
+       for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
+               to->freemap[i].base = be16_to_cpu(from->hdr.freemap[i].base);
+               to->freemap[i].size = be16_to_cpu(from->hdr.freemap[i].size);
+       }
+}
+
+void
+xfs_attr3_leaf_hdr_to_disk(
+       struct xfs_attr_leafblock       *to,
+       struct xfs_attr3_icleaf_hdr     *from)
+{
+       int     i;
+
+       ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC ||
+              from->magic == XFS_ATTR3_LEAF_MAGIC);
+
+       if (from->magic == XFS_ATTR3_LEAF_MAGIC) {
+               struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)to;
+
+               hdr3->info.hdr.forw = cpu_to_be32(from->forw);
+               hdr3->info.hdr.back = cpu_to_be32(from->back);
+               hdr3->info.hdr.magic = cpu_to_be16(from->magic);
+               hdr3->count = cpu_to_be16(from->count);
+               hdr3->usedbytes = cpu_to_be16(from->usedbytes);
+               hdr3->firstused = cpu_to_be16(from->firstused);
+               hdr3->holes = from->holes;
+               hdr3->pad1 = 0;
+
+               for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
+                       hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base);
+                       hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size);
+               }
+               return;
+       }
+       to->hdr.info.forw = cpu_to_be32(from->forw);
+       to->hdr.info.back = cpu_to_be32(from->back);
+       to->hdr.info.magic = cpu_to_be16(from->magic);
+       to->hdr.count = cpu_to_be16(from->count);
+       to->hdr.usedbytes = cpu_to_be16(from->usedbytes);
+       to->hdr.firstused = cpu_to_be16(from->firstused);
+       to->hdr.holes = from->holes;
+       to->hdr.pad1 = 0;
+
+       for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
+               to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base);
+               to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size);
+       }
+}
+
+static bool
+xfs_attr3_leaf_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_attr_leafblock *leaf = bp->b_addr;
+       struct xfs_attr3_icleaf_hdr ichdr;
+
+       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
+
+               if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC)
+                       return false;
+
+               if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
+                       return false;
+       } else {
+               if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
+                       return false;
+       }
+       if (ichdr.count == 0)
+               return false;
+
+       /* XXX: need to range check rest of attr header values */
+       /* XXX: hash order check? */
+
+       return true;
+}
+
+static void
+xfs_attr3_leaf_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
+
+       if (!xfs_attr3_leaf_verify(bp)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (bip)
+               hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+       xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF);
+}
+
+/*
+ * leaf/node format detection on trees is sketchy, so a node read can be done on
+ * leaf level blocks when detection identifies the tree as a node format tree
+ * incorrectly. In this case, we need to swap the verifier to match the correct
+ * format of the block being read.
+ */
+static void
+xfs_attr3_leaf_read_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_attr3_leaf_verify(bp))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+}
+
+const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
+       .verify_read = xfs_attr3_leaf_read_verify,
+       .verify_write = xfs_attr3_leaf_write_verify,
+};
+
+int
+xfs_attr3_leaf_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp)
+{
+       int                     err;
+
+       err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
+                               XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops);
+       if (!err && tp)
+               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
+       return err;
+}
+
+/*========================================================================
+ * Namespace helper routines
+ *========================================================================*/
+
+/*
+ * If namespace bits don't match return 0.
+ * If all match then return 1.
+ */
+STATIC int
+xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
+{
+       return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
+}
+
+
+/*========================================================================
+ * External routines when attribute fork size < XFS_LITINO(mp).
+ *========================================================================*/
+
+/*
+ * Query whether the requested number of additional bytes of extended
+ * attribute space will be able to fit inline.
+ *
+ * Returns zero if not, else the di_forkoff fork offset to be used in the
+ * literal area for attribute data once the new bytes have been added.
+ *
+ * di_forkoff must be 8 byte aligned, hence is stored as a >>3 value;
+ * special case for dev/uuid inodes, they have fixed size data forks.
+ */
+int
+xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
+{
+       int offset;
+       int minforkoff; /* lower limit on valid forkoff locations */
+       int maxforkoff; /* upper limit on valid forkoff locations */
+       int dsize;
+       xfs_mount_t *mp = dp->i_mount;
+
+       /* rounded down */
+       offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3;
+
+       switch (dp->i_d.di_format) {
+       case XFS_DINODE_FMT_DEV:
+               minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
+               return (offset >= minforkoff) ? minforkoff : 0;
+       case XFS_DINODE_FMT_UUID:
+               minforkoff = roundup(sizeof(uuid_t), 8) >> 3;
+               return (offset >= minforkoff) ? minforkoff : 0;
+       }
+
+       /*
+        * If the requested numbers of bytes is smaller or equal to the
+        * current attribute fork size we can always proceed.
+        *
+        * Note that if_bytes in the data fork might actually be larger than
+        * the current data fork size is due to delalloc extents. In that
+        * case either the extent count will go down when they are converted
+        * to real extents, or the delalloc conversion will take care of the
+        * literal area rebalancing.
+        */
+       if (bytes <= XFS_IFORK_ASIZE(dp))
+               return dp->i_d.di_forkoff;
+
+       /*
+        * For attr2 we can try to move the forkoff if there is space in the
+        * literal area, but for the old format we are done if there is no
+        * space in the fixed attribute fork.
+        */
+       if (!(mp->m_flags & XFS_MOUNT_ATTR2))
+               return 0;
+
+       dsize = dp->i_df.if_bytes;
+
+       switch (dp->i_d.di_format) {
+       case XFS_DINODE_FMT_EXTENTS:
+               /*
+                * If there is no attr fork and the data fork is extents, 
+                * determine if creating the default attr fork will result
+                * in the extents form migrating to btree. If so, the
+                * minimum offset only needs to be the space required for
+                * the btree root.
+                */
+               if (!dp->i_d.di_forkoff && dp->i_df.if_bytes >
+                   xfs_default_attroffset(dp))
+                       dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               /*
+                * If we have a data btree then keep forkoff if we have one,
+                * otherwise we are adding a new attr, so then we set
+                * minforkoff to where the btree root can finish so we have
+                * plenty of room for attrs
+                */
+               if (dp->i_d.di_forkoff) {
+                       if (offset < dp->i_d.di_forkoff)
+                               return 0;
+                       return dp->i_d.di_forkoff;
+               }
+               dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot);
+               break;
+       }
+
+       /*
+        * A data fork btree root must have space for at least
+        * MINDBTPTRS key/ptr pairs if the data fork is small or empty.
+        */
+       minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
+       minforkoff = roundup(minforkoff, 8) >> 3;
+
+       /* attr fork btree root can have at least this many key/ptr pairs */
+       maxforkoff = XFS_LITINO(mp, dp->i_d.di_version) -
+                       XFS_BMDR_SPACE_CALC(MINABTPTRS);
+       maxforkoff = maxforkoff >> 3;   /* rounded down */
+
+       if (offset >= maxforkoff)
+               return maxforkoff;
+       if (offset >= minforkoff)
+               return offset;
+       return 0;
+}
+
+/*
+ * Switch on the ATTR2 superblock bit (implies also FEATURES2)
+ */
+STATIC void
+xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
+{
+       if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
+           !(xfs_sb_version_hasattr2(&mp->m_sb))) {
+               spin_lock(&mp->m_sb_lock);
+               if (!xfs_sb_version_hasattr2(&mp->m_sb)) {
+                       xfs_sb_version_addattr2(&mp->m_sb);
+                       spin_unlock(&mp->m_sb_lock);
+                       xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
+               } else
+                       spin_unlock(&mp->m_sb_lock);
+       }
+}
+
+/*
+ * Create the initial contents of a shortform attribute list.
+ */
+void
+xfs_attr_shortform_create(xfs_da_args_t *args)
+{
+       xfs_attr_sf_hdr_t *hdr;
+       xfs_inode_t *dp;
+       xfs_ifork_t *ifp;
+
+       trace_xfs_attr_sf_create(args);
+
+       dp = args->dp;
+       ASSERT(dp != NULL);
+       ifp = dp->i_afp;
+       ASSERT(ifp != NULL);
+       ASSERT(ifp->if_bytes == 0);
+       if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) {
+               ifp->if_flags &= ~XFS_IFEXTENTS;        /* just in case */
+               dp->i_d.di_aformat = XFS_DINODE_FMT_LOCAL;
+               ifp->if_flags |= XFS_IFINLINE;
+       } else {
+               ASSERT(ifp->if_flags & XFS_IFINLINE);
+       }
+       xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
+       hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data;
+       hdr->count = 0;
+       hdr->totsize = cpu_to_be16(sizeof(*hdr));
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
+}
+
+/*
+ * Add a name/value pair to the shortform attribute list.
+ * Overflow from the inode has already been checked for.
+ */
+void
+xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
+{
+       xfs_attr_shortform_t *sf;
+       xfs_attr_sf_entry_t *sfe;
+       int i, offset, size;
+       xfs_mount_t *mp;
+       xfs_inode_t *dp;
+       xfs_ifork_t *ifp;
+
+       trace_xfs_attr_sf_add(args);
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       dp->i_d.di_forkoff = forkoff;
+
+       ifp = dp->i_afp;
+       ASSERT(ifp->if_flags & XFS_IFINLINE);
+       sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
+       sfe = &sf->list[0];
+       for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
+#ifdef DEBUG
+               if (sfe->namelen != args->namelen)
+                       continue;
+               if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
+                       continue;
+               if (!xfs_attr_namesp_match(args->flags, sfe->flags))
+                       continue;
+               ASSERT(0);
+#endif
+       }
+
+       offset = (char *)sfe - (char *)sf;
+       size = XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
+       xfs_idata_realloc(dp, size, XFS_ATTR_FORK);
+       sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
+       sfe = (xfs_attr_sf_entry_t *)((char *)sf + offset);
+
+       sfe->namelen = args->namelen;
+       sfe->valuelen = args->valuelen;
+       sfe->flags = XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
+       memcpy(sfe->nameval, args->name, args->namelen);
+       memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
+       sf->hdr.count++;
+       be16_add_cpu(&sf->hdr.totsize, size);
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
+
+       xfs_sbversion_add_attr2(mp, args->trans);
+}
+
+/*
+ * After the last attribute is removed revert to original inode format,
+ * making all literal area available to the data fork once more.
+ */
+STATIC void
+xfs_attr_fork_reset(
+       struct xfs_inode        *ip,
+       struct xfs_trans        *tp)
+{
+       xfs_idestroy_fork(ip, XFS_ATTR_FORK);
+       ip->i_d.di_forkoff = 0;
+       ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+       ASSERT(ip->i_d.di_anextents == 0);
+       ASSERT(ip->i_afp == NULL);
+
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/*
+ * Remove an attribute from the shortform attribute list structure.
+ */
+int
+xfs_attr_shortform_remove(xfs_da_args_t *args)
+{
+       xfs_attr_shortform_t *sf;
+       xfs_attr_sf_entry_t *sfe;
+       int base, size=0, end, totsize, i;
+       xfs_mount_t *mp;
+       xfs_inode_t *dp;
+
+       trace_xfs_attr_sf_remove(args);
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       base = sizeof(xfs_attr_sf_hdr_t);
+       sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
+       sfe = &sf->list[0];
+       end = sf->hdr.count;
+       for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe),
+                                       base += size, i++) {
+               size = XFS_ATTR_SF_ENTSIZE(sfe);
+               if (sfe->namelen != args->namelen)
+                       continue;
+               if (memcmp(sfe->nameval, args->name, args->namelen) != 0)
+                       continue;
+               if (!xfs_attr_namesp_match(args->flags, sfe->flags))
+                       continue;
+               break;
+       }
+       if (i == end)
+               return ENOATTR;
+
+       /*
+        * Fix up the attribute fork data, covering the hole
+        */
+       end = base + size;
+       totsize = be16_to_cpu(sf->hdr.totsize);
+       if (end != totsize)
+               memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end);
+       sf->hdr.count--;
+       be16_add_cpu(&sf->hdr.totsize, -size);
+
+       /*
+        * Fix up the start offset of the attribute fork
+        */
+       totsize -= size;
+       if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
+           (mp->m_flags & XFS_MOUNT_ATTR2) &&
+           (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
+           !(args->op_flags & XFS_DA_OP_ADDNAME)) {
+               xfs_attr_fork_reset(dp, args->trans);
+       } else {
+               xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
+               dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
+               ASSERT(dp->i_d.di_forkoff);
+               ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
+                               (args->op_flags & XFS_DA_OP_ADDNAME) ||
+                               !(mp->m_flags & XFS_MOUNT_ATTR2) ||
+                               dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
+               xfs_trans_log_inode(args->trans, dp,
+                                       XFS_ILOG_CORE | XFS_ILOG_ADATA);
+       }
+
+       xfs_sbversion_add_attr2(mp, args->trans);
+
+       return 0;
+}
+
+/*
+ * Look up a name in a shortform attribute list structure.
+ */
+/*ARGSUSED*/
+int
+xfs_attr_shortform_lookup(xfs_da_args_t *args)
+{
+       xfs_attr_shortform_t *sf;
+       xfs_attr_sf_entry_t *sfe;
+       int i;
+       xfs_ifork_t *ifp;
+
+       trace_xfs_attr_sf_lookup(args);
+
+       ifp = args->dp->i_afp;
+       ASSERT(ifp->if_flags & XFS_IFINLINE);
+       sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
+       sfe = &sf->list[0];
+       for (i = 0; i < sf->hdr.count;
+                               sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
+               if (sfe->namelen != args->namelen)
+                       continue;
+               if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
+                       continue;
+               if (!xfs_attr_namesp_match(args->flags, sfe->flags))
+                       continue;
+               return EEXIST;
+       }
+       return ENOATTR;
+}
+
+/*
+ * Look up a name in a shortform attribute list structure.
+ */
+/*ARGSUSED*/
+int
+xfs_attr_shortform_getvalue(xfs_da_args_t *args)
+{
+       xfs_attr_shortform_t *sf;
+       xfs_attr_sf_entry_t *sfe;
+       int i;
+
+       ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE);
+       sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data;
+       sfe = &sf->list[0];
+       for (i = 0; i < sf->hdr.count;
+                               sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
+               if (sfe->namelen != args->namelen)
+                       continue;
+               if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
+                       continue;
+               if (!xfs_attr_namesp_match(args->flags, sfe->flags))
+                       continue;
+               if (args->flags & ATTR_KERNOVAL) {
+                       args->valuelen = sfe->valuelen;
+                       return EEXIST;
+               }
+               if (args->valuelen < sfe->valuelen) {
+                       args->valuelen = sfe->valuelen;
+                       return ERANGE;
+               }
+               args->valuelen = sfe->valuelen;
+               memcpy(args->value, &sfe->nameval[args->namelen],
+                                                   args->valuelen);
+               return EEXIST;
+       }
+       return ENOATTR;
+}
+
+/*
+ * Convert from using the shortform to the leaf.
+ */
+int
+xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
+{
+       xfs_inode_t *dp;
+       xfs_attr_shortform_t *sf;
+       xfs_attr_sf_entry_t *sfe;
+       xfs_da_args_t nargs;
+       char *tmpbuffer;
+       int error, i, size;
+       xfs_dablk_t blkno;
+       struct xfs_buf *bp;
+       xfs_ifork_t *ifp;
+
+       trace_xfs_attr_sf_to_leaf(args);
+
+       dp = args->dp;
+       ifp = dp->i_afp;
+       sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
+       size = be16_to_cpu(sf->hdr.totsize);
+       tmpbuffer = kmem_alloc(size, KM_SLEEP);
+       ASSERT(tmpbuffer != NULL);
+       memcpy(tmpbuffer, ifp->if_u1.if_data, size);
+       sf = (xfs_attr_shortform_t *)tmpbuffer;
+
+       xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
+       xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK);
+
+       bp = NULL;
+       error = xfs_da_grow_inode(args, &blkno);
+       if (error) {
+               /*
+                * If we hit an IO error middle of the transaction inside
+                * grow_inode(), we may have inconsistent data. Bail out.
+                */
+               if (error == EIO)
+                       goto out;
+               xfs_idata_realloc(dp, size, XFS_ATTR_FORK);     /* try to put */
+               memcpy(ifp->if_u1.if_data, tmpbuffer, size);    /* it back */
+               goto out;
+       }
+
+       ASSERT(blkno == 0);
+       error = xfs_attr3_leaf_create(args, blkno, &bp);
+       if (error) {
+               error = xfs_da_shrink_inode(args, 0, bp);
+               bp = NULL;
+               if (error)
+                       goto out;
+               xfs_idata_realloc(dp, size, XFS_ATTR_FORK);     /* try to put */
+               memcpy(ifp->if_u1.if_data, tmpbuffer, size);    /* it back */
+               goto out;
+       }
+
+       memset((char *)&nargs, 0, sizeof(nargs));
+       nargs.dp = dp;
+       nargs.geo = args->geo;
+       nargs.firstblock = args->firstblock;
+       nargs.flist = args->flist;
+       nargs.total = args->total;
+       nargs.whichfork = XFS_ATTR_FORK;
+       nargs.trans = args->trans;
+       nargs.op_flags = XFS_DA_OP_OKNOENT;
+
+       sfe = &sf->list[0];
+       for (i = 0; i < sf->hdr.count; i++) {
+               nargs.name = sfe->nameval;
+               nargs.namelen = sfe->namelen;
+               nargs.value = &sfe->nameval[nargs.namelen];
+               nargs.valuelen = sfe->valuelen;
+               nargs.hashval = xfs_da_hashname(sfe->nameval,
+                                               sfe->namelen);
+               nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
+               error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
+               ASSERT(error == ENOATTR);
+               error = xfs_attr3_leaf_add(bp, &nargs);
+               ASSERT(error != ENOSPC);
+               if (error)
+                       goto out;
+               sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
+       }
+       error = 0;
+
+out:
+       kmem_free(tmpbuffer);
+       return error;
+}
+
+/*
+ * Check a leaf attribute block to see if all the entries would fit into
+ * a shortform attribute list.
+ */
+int
+xfs_attr_shortform_allfit(
+       struct xfs_buf          *bp,
+       struct xfs_inode        *dp)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr_leaf_entry *entry;
+       xfs_attr_leaf_name_local_t *name_loc;
+       struct xfs_attr3_icleaf_hdr leafhdr;
+       int                     bytes;
+       int                     i;
+
+       leaf = bp->b_addr;
+       xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+       entry = xfs_attr3_leaf_entryp(leaf);
+
+       bytes = sizeof(struct xfs_attr_sf_hdr);
+       for (i = 0; i < leafhdr.count; entry++, i++) {
+               if (entry->flags & XFS_ATTR_INCOMPLETE)
+                       continue;               /* don't copy partial entries */
+               if (!(entry->flags & XFS_ATTR_LOCAL))
+                       return 0;
+               name_loc = xfs_attr3_leaf_name_local(leaf, i);
+               if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX)
+                       return 0;
+               if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
+                       return 0;
+               bytes += sizeof(struct xfs_attr_sf_entry) - 1
+                               + name_loc->namelen
+                               + be16_to_cpu(name_loc->valuelen);
+       }
+       if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
+           (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
+           (bytes == sizeof(struct xfs_attr_sf_hdr)))
+               return -1;
+       return xfs_attr_shortform_bytesfit(dp, bytes);
+}
+
+/*
+ * Convert a leaf attribute list to shortform attribute list
+ */
+int
+xfs_attr3_leaf_to_shortform(
+       struct xfs_buf          *bp,
+       struct xfs_da_args      *args,
+       int                     forkoff)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr3_icleaf_hdr ichdr;
+       struct xfs_attr_leaf_entry *entry;
+       struct xfs_attr_leaf_name_local *name_loc;
+       struct xfs_da_args      nargs;
+       struct xfs_inode        *dp = args->dp;
+       char                    *tmpbuffer;
+       int                     error;
+       int                     i;
+
+       trace_xfs_attr_leaf_to_sf(args);
+
+       tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
+       if (!tmpbuffer)
+               return ENOMEM;
+
+       memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
+
+       leaf = (xfs_attr_leafblock_t *)tmpbuffer;
+       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       entry = xfs_attr3_leaf_entryp(leaf);
+
+       /* XXX (dgc): buffer is about to be marked stale - why zero it? */
+       memset(bp->b_addr, 0, args->geo->blksize);
+
+       /*
+        * Clean out the prior contents of the attribute list.
+        */
+       error = xfs_da_shrink_inode(args, 0, bp);
+       if (error)
+               goto out;
+
+       if (forkoff == -1) {
+               ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
+               ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
+               xfs_attr_fork_reset(dp, args->trans);
+               goto out;
+       }
+
+       xfs_attr_shortform_create(args);
+
+       /*
+        * Copy the attributes
+        */
+       memset((char *)&nargs, 0, sizeof(nargs));
+       nargs.geo = args->geo;
+       nargs.dp = dp;
+       nargs.firstblock = args->firstblock;
+       nargs.flist = args->flist;
+       nargs.total = args->total;
+       nargs.whichfork = XFS_ATTR_FORK;
+       nargs.trans = args->trans;
+       nargs.op_flags = XFS_DA_OP_OKNOENT;
+
+       for (i = 0; i < ichdr.count; entry++, i++) {
+               if (entry->flags & XFS_ATTR_INCOMPLETE)
+                       continue;       /* don't copy partial entries */
+               if (!entry->nameidx)
+                       continue;
+               ASSERT(entry->flags & XFS_ATTR_LOCAL);
+               name_loc = xfs_attr3_leaf_name_local(leaf, i);
+               nargs.name = name_loc->nameval;
+               nargs.namelen = name_loc->namelen;
+               nargs.value = &name_loc->nameval[nargs.namelen];
+               nargs.valuelen = be16_to_cpu(name_loc->valuelen);
+               nargs.hashval = be32_to_cpu(entry->hashval);
+               nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags);
+               xfs_attr_shortform_add(&nargs, forkoff);
+       }
+       error = 0;
+
+out:
+       kmem_free(tmpbuffer);
+       return error;
+}
+
+/*
+ * Convert from using a single leaf to a root node and a leaf.
+ */
+int
+xfs_attr3_leaf_to_node(
+       struct xfs_da_args      *args)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr3_icleaf_hdr icleafhdr;
+       struct xfs_attr_leaf_entry *entries;
+       struct xfs_da_node_entry *btree;
+       struct xfs_da3_icnode_hdr icnodehdr;
+       struct xfs_da_intnode   *node;
+       struct xfs_inode        *dp = args->dp;
+       struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_buf          *bp1 = NULL;
+       struct xfs_buf          *bp2 = NULL;
+       xfs_dablk_t             blkno;
+       int                     error;
+
+       trace_xfs_attr_leaf_to_node(args);
+
+       error = xfs_da_grow_inode(args, &blkno);
+       if (error)
+               goto out;
+       error = xfs_attr3_leaf_read(args->trans, dp, 0, -1, &bp1);
+       if (error)
+               goto out;
+
+       error = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp2, XFS_ATTR_FORK);
+       if (error)
+               goto out;
+
+       /* copy leaf to new buffer, update identifiers */
+       xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF);
+       bp2->b_ops = bp1->b_ops;
+       memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize);
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_da3_blkinfo *hdr3 = bp2->b_addr;
+               hdr3->blkno = cpu_to_be64(bp2->b_bn);
+       }
+       xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1);
+
+       /*
+        * Set up the new root node.
+        */
+       error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
+       if (error)
+               goto out;
+       node = bp1->b_addr;
+       dp->d_ops->node_hdr_from_disk(&icnodehdr, node);
+       btree = dp->d_ops->node_tree_p(node);
+
+       leaf = bp2->b_addr;
+       xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf);
+       entries = xfs_attr3_leaf_entryp(leaf);
+
+       /* both on-disk, don't endian-flip twice */
+       btree[0].hashval = entries[icleafhdr.count - 1].hashval;
+       btree[0].before = cpu_to_be32(blkno);
+       icnodehdr.count = 1;
+       dp->d_ops->node_hdr_to_disk(node, &icnodehdr);
+       xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1);
+       error = 0;
+out:
+       return error;
+}
+
+/*========================================================================
+ * Routines used for growing the Btree.
+ *========================================================================*/
+
+/*
+ * Create the initial contents of a leaf attribute list
+ * or a leaf in a node attribute list.
+ */
+STATIC int
+xfs_attr3_leaf_create(
+       struct xfs_da_args      *args,
+       xfs_dablk_t             blkno,
+       struct xfs_buf          **bpp)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr3_icleaf_hdr ichdr;
+       struct xfs_inode        *dp = args->dp;
+       struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_buf          *bp;
+       int                     error;
+
+       trace_xfs_attr_leaf_create(args);
+
+       error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
+                                           XFS_ATTR_FORK);
+       if (error)
+               return error;
+       bp->b_ops = &xfs_attr3_leaf_buf_ops;
+       xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF);
+       leaf = bp->b_addr;
+       memset(leaf, 0, args->geo->blksize);
+
+       memset(&ichdr, 0, sizeof(ichdr));
+       ichdr.firstused = args->geo->blksize;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_da3_blkinfo *hdr3 = bp->b_addr;
+
+               ichdr.magic = XFS_ATTR3_LEAF_MAGIC;
+
+               hdr3->blkno = cpu_to_be64(bp->b_bn);
+               hdr3->owner = cpu_to_be64(dp->i_ino);
+               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
+
+               ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr);
+       } else {
+               ichdr.magic = XFS_ATTR_LEAF_MAGIC;
+               ichdr.freemap[0].base = sizeof(struct xfs_attr_leaf_hdr);
+       }
+       ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;
+
+       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
+       xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1);
+
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Split the leaf node, rebalance, then add the new entry.
+ */
+int
+xfs_attr3_leaf_split(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *oldblk,
+       struct xfs_da_state_blk *newblk)
+{
+       xfs_dablk_t blkno;
+       int error;
+
+       trace_xfs_attr_leaf_split(state->args);
+
+       /*
+        * Allocate space for a new leaf node.
+        */
+       ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC);
+       error = xfs_da_grow_inode(state->args, &blkno);
+       if (error)
+               return error;
+       error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp);
+       if (error)
+               return error;
+       newblk->blkno = blkno;
+       newblk->magic = XFS_ATTR_LEAF_MAGIC;
+
+       /*
+        * Rebalance the entries across the two leaves.
+        * NOTE: rebalance() currently depends on the 2nd block being empty.
+        */
+       xfs_attr3_leaf_rebalance(state, oldblk, newblk);
+       error = xfs_da3_blk_link(state, oldblk, newblk);
+       if (error)
+               return error;
+
+       /*
+        * Save info on "old" attribute for "atomic rename" ops, leaf_add()
+        * modifies the index/blkno/rmtblk/rmtblkcnt fields to show the
+        * "new" attrs info.  Will need the "old" info to remove it later.
+        *
+        * Insert the "new" entry in the correct block.
+        */
+       if (state->inleaf) {
+               trace_xfs_attr_leaf_add_old(state->args);
+               error = xfs_attr3_leaf_add(oldblk->bp, state->args);
+       } else {
+               trace_xfs_attr_leaf_add_new(state->args);
+               error = xfs_attr3_leaf_add(newblk->bp, state->args);
+       }
+
+       /*
+        * Update last hashval in each block since we added the name.
+        */
+       oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
+       newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
+       return error;
+}
+
+/*
+ * Add a name to the leaf attribute list structure.
+ */
+int
+xfs_attr3_leaf_add(
+       struct xfs_buf          *bp,
+       struct xfs_da_args      *args)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr3_icleaf_hdr ichdr;
+       int                     tablesize;
+       int                     entsize;
+       int                     sum;
+       int                     tmp;
+       int                     i;
+
+       trace_xfs_attr_leaf_add(args);
+
+       leaf = bp->b_addr;
+       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       ASSERT(args->index >= 0 && args->index <= ichdr.count);
+       entsize = xfs_attr_leaf_newentsize(args, NULL);
+
+       /*
+        * Search through freemap for first-fit on new name length.
+        * (may need to figure in size of entry struct too)
+        */
+       tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t)
+                                       + xfs_attr3_leaf_hdr_size(leaf);
+       for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) {
+               if (tablesize > ichdr.firstused) {
+                       sum += ichdr.freemap[i].size;
+                       continue;
+               }
+               if (!ichdr.freemap[i].size)
+                       continue;       /* no space in this map */
+               tmp = entsize;
+               if (ichdr.freemap[i].base < ichdr.firstused)
+                       tmp += sizeof(xfs_attr_leaf_entry_t);
+               if (ichdr.freemap[i].size >= tmp) {
+                       tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
+                       goto out_log_hdr;
+               }
+               sum += ichdr.freemap[i].size;
+       }
+
+       /*
+        * If there are no holes in the address space of the block,
+        * and we don't have enough freespace, then compaction will do us
+        * no good and we should just give up.
+        */
+       if (!ichdr.holes && sum < entsize)
+               return ENOSPC;
+
+       /*
+        * Compact the entries to coalesce free space.
+        * This may change the hdr->count via dropping INCOMPLETE entries.
+        */
+       xfs_attr3_leaf_compact(args, &ichdr, bp);
+
+       /*
+        * After compaction, the block is guaranteed to have only one
+        * free region, in freemap[0].  If it is not big enough, give up.
+        */
+       if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
+               tmp = ENOSPC;
+               goto out_log_hdr;
+       }
+
+       tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
+
+out_log_hdr:
+       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
+       xfs_trans_log_buf(args->trans, bp,
+               XFS_DA_LOGRANGE(leaf, &leaf->hdr,
+                               xfs_attr3_leaf_hdr_size(leaf)));
+       return tmp;
+}
+
+/*
+ * Add a name to a leaf attribute list structure.
+ */
+STATIC int
+xfs_attr3_leaf_add_work(
+       struct xfs_buf          *bp,
+       struct xfs_attr3_icleaf_hdr *ichdr,
+       struct xfs_da_args      *args,
+       int                     mapindex)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr_leaf_entry *entry;
+       struct xfs_attr_leaf_name_local *name_loc;
+       struct xfs_attr_leaf_name_remote *name_rmt;
+       struct xfs_mount        *mp;
+       int                     tmp;
+       int                     i;
+
+       trace_xfs_attr_leaf_add_work(args);
+
+       leaf = bp->b_addr;
+       ASSERT(mapindex >= 0 && mapindex < XFS_ATTR_LEAF_MAPSIZE);
+       ASSERT(args->index >= 0 && args->index <= ichdr->count);
+
+       /*
+        * Force open some space in the entry array and fill it in.
+        */
+       entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
+       if (args->index < ichdr->count) {
+               tmp  = ichdr->count - args->index;
+               tmp *= sizeof(xfs_attr_leaf_entry_t);
+               memmove(entry + 1, entry, tmp);
+               xfs_trans_log_buf(args->trans, bp,
+                   XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
+       }
+       ichdr->count++;
+
+       /*
+        * Allocate space for the new string (at the end of the run).
+        */
+       mp = args->trans->t_mountp;
+       ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize);
+       ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0);
+       ASSERT(ichdr->freemap[mapindex].size >=
+               xfs_attr_leaf_newentsize(args, NULL));
+       ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize);
+       ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0);
+
+       ichdr->freemap[mapindex].size -= xfs_attr_leaf_newentsize(args, &tmp);
+
+       entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base +
+                                    ichdr->freemap[mapindex].size);
+       entry->hashval = cpu_to_be32(args->hashval);
+       entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
+       entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
+       if (args->op_flags & XFS_DA_OP_RENAME) {
+               entry->flags |= XFS_ATTR_INCOMPLETE;
+               if ((args->blkno2 == args->blkno) &&
+                   (args->index2 <= args->index)) {
+                       args->index2++;
+               }
+       }
+       xfs_trans_log_buf(args->trans, bp,
+                         XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
+       ASSERT((args->index == 0) ||
+              (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval)));
+       ASSERT((args->index == ichdr->count - 1) ||
+              (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval)));
+
+       /*
+        * For "remote" attribute values, simply note that we need to
+        * allocate space for the "remote" value.  We can't actually
+        * allocate the extents in this transaction, and we can't decide
+        * which blocks they should be as we might allocate more blocks
+        * as part of this transaction (a split operation for example).
+        */
+       if (entry->flags & XFS_ATTR_LOCAL) {
+               name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
+               name_loc->namelen = args->namelen;
+               name_loc->valuelen = cpu_to_be16(args->valuelen);
+               memcpy((char *)name_loc->nameval, args->name, args->namelen);
+               memcpy((char *)&name_loc->nameval[args->namelen], args->value,
+                                  be16_to_cpu(name_loc->valuelen));
+       } else {
+               name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
+               name_rmt->namelen = args->namelen;
+               memcpy((char *)name_rmt->name, args->name, args->namelen);
+               entry->flags |= XFS_ATTR_INCOMPLETE;
+               /* just in case */
+               name_rmt->valuelen = 0;
+               name_rmt->valueblk = 0;
+               args->rmtblkno = 1;
+               args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
+               args->rmtvaluelen = args->valuelen;
+       }
+       xfs_trans_log_buf(args->trans, bp,
+            XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
+                                  xfs_attr_leaf_entsize(leaf, args->index)));
+
+       /*
+        * Update the control info for this leaf node
+        */
+       if (be16_to_cpu(entry->nameidx) < ichdr->firstused)
+               ichdr->firstused = be16_to_cpu(entry->nameidx);
+
+       ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t)
+                                       + xfs_attr3_leaf_hdr_size(leaf));
+       tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t)
+                                       + xfs_attr3_leaf_hdr_size(leaf);
+
+       for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
+               if (ichdr->freemap[i].base == tmp) {
+                       ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
+                       ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t);
+               }
+       }
+       ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
+       return 0;
+}
+
+/*
+ * Garbage collect a leaf attribute list block by copying it to a new buffer.
+ */
+STATIC void
+xfs_attr3_leaf_compact(
+       struct xfs_da_args      *args,
+       struct xfs_attr3_icleaf_hdr *ichdr_dst,
+       struct xfs_buf          *bp)
+{
+       struct xfs_attr_leafblock *leaf_src;
+       struct xfs_attr_leafblock *leaf_dst;
+       struct xfs_attr3_icleaf_hdr ichdr_src;
+       struct xfs_trans        *trans = args->trans;
+       char                    *tmpbuffer;
+
+       trace_xfs_attr_leaf_compact(args);
+
+       tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
+       memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
+       memset(bp->b_addr, 0, args->geo->blksize);
+       leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
+       leaf_dst = bp->b_addr;
+
+       /*
+        * Copy the on-disk header back into the destination buffer to ensure
+        * all the information in the header that is not part of the incore
+        * header structure is preserved.
+        */
+       memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src));
+
+       /* Initialise the incore headers */
+       ichdr_src = *ichdr_dst; /* struct copy */
+       ichdr_dst->firstused = args->geo->blksize;
+       ichdr_dst->usedbytes = 0;
+       ichdr_dst->count = 0;
+       ichdr_dst->holes = 0;
+       ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src);
+       ichdr_dst->freemap[0].size = ichdr_dst->firstused -
+                                               ichdr_dst->freemap[0].base;
+
+       /* write the header back to initialise the underlying buffer */
+       xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
+
+       /*
+        * Copy all entry's in the same (sorted) order,
+        * but allocate name/value pairs packed and in sequence.
+        */
+       xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0,
+                               leaf_dst, ichdr_dst, 0, ichdr_src.count);
+       /*
+        * this logs the entire buffer, but the caller must write the header
+        * back to the buffer when it is finished modifying it.
+        */
+       xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);
+
+       kmem_free(tmpbuffer);
+}
+
+/*
+ * Compare two leaf blocks "order".
+ * Return 0 unless leaf2 should go before leaf1.
+ */
+static int
+xfs_attr3_leaf_order(
+       struct xfs_buf  *leaf1_bp,
+       struct xfs_attr3_icleaf_hdr *leaf1hdr,
+       struct xfs_buf  *leaf2_bp,
+       struct xfs_attr3_icleaf_hdr *leaf2hdr)
+{
+       struct xfs_attr_leaf_entry *entries1;
+       struct xfs_attr_leaf_entry *entries2;
+
+       entries1 = xfs_attr3_leaf_entryp(leaf1_bp->b_addr);
+       entries2 = xfs_attr3_leaf_entryp(leaf2_bp->b_addr);
+       if (leaf1hdr->count > 0 && leaf2hdr->count > 0 &&
+           ((be32_to_cpu(entries2[0].hashval) <
+             be32_to_cpu(entries1[0].hashval)) ||
+            (be32_to_cpu(entries2[leaf2hdr->count - 1].hashval) <
+             be32_to_cpu(entries1[leaf1hdr->count - 1].hashval)))) {
+               return 1;
+       }
+       return 0;
+}
+
+int
+xfs_attr_leaf_order(
+       struct xfs_buf  *leaf1_bp,
+       struct xfs_buf  *leaf2_bp)
+{
+       struct xfs_attr3_icleaf_hdr ichdr1;
+       struct xfs_attr3_icleaf_hdr ichdr2;
+
+       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr);
+       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr);
+       return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2);
+}
+
+/*
+ * Redistribute the attribute list entries between two leaf nodes,
+ * taking into account the size of the new entry.
+ *
+ * NOTE: if new block is empty, then it will get the upper half of the
+ * old block.  At present, all (one) callers pass in an empty second block.
+ *
+ * This code adjusts the args->index/blkno and args->index2/blkno2 fields
+ * to match what it is doing in splitting the attribute leaf block.  Those
+ * values are used in "atomic rename" operations on attributes.  Note that
+ * the "new" and "old" values can end up in different blocks.
+ */
+STATIC void
+xfs_attr3_leaf_rebalance(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *blk1,
+       struct xfs_da_state_blk *blk2)
+{
+       struct xfs_da_args      *args;
+       struct xfs_attr_leafblock *leaf1;
+       struct xfs_attr_leafblock *leaf2;
+       struct xfs_attr3_icleaf_hdr ichdr1;
+       struct xfs_attr3_icleaf_hdr ichdr2;
+       struct xfs_attr_leaf_entry *entries1;
+       struct xfs_attr_leaf_entry *entries2;
+       int                     count;
+       int                     totallen;
+       int                     max;
+       int                     space;
+       int                     swap;
+
+       /*
+        * Set up environment.
+        */
+       ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
+       leaf1 = blk1->bp->b_addr;
+       leaf2 = blk2->bp->b_addr;
+       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
+       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
+       ASSERT(ichdr2.count == 0);
+       args = state->args;
+
+       trace_xfs_attr_leaf_rebalance(args);
+
+       /*
+        * Check ordering of blocks, reverse if it makes things simpler.
+        *
+        * NOTE: Given that all (current) callers pass in an empty
+        * second block, this code should never set "swap".
+        */
+       swap = 0;
+       if (xfs_attr3_leaf_order(blk1->bp, &ichdr1, blk2->bp, &ichdr2)) {
+               struct xfs_da_state_blk *tmp_blk;
+               struct xfs_attr3_icleaf_hdr tmp_ichdr;
+
+               tmp_blk = blk1;
+               blk1 = blk2;
+               blk2 = tmp_blk;
+
+               /* struct copies to swap them rather than reconverting */
+               tmp_ichdr = ichdr1;
+               ichdr1 = ichdr2;
+               ichdr2 = tmp_ichdr;
+
+               leaf1 = blk1->bp->b_addr;
+               leaf2 = blk2->bp->b_addr;
+               swap = 1;
+       }
+
+       /*
+        * Examine entries until we reduce the absolute difference in
+        * byte usage between the two blocks to a minimum.  Then get
+        * the direction to copy and the number of elements to move.
+        *
+        * "inleaf" is true if the new entry should be inserted into blk1.
+        * If "swap" is also true, then reverse the sense of "inleaf".
+        */
+       state->inleaf = xfs_attr3_leaf_figure_balance(state, blk1, &ichdr1,
+                                                     blk2, &ichdr2,
+                                                     &count, &totallen);
+       if (swap)
+               state->inleaf = !state->inleaf;
+
+       /*
+        * Move any entries required from leaf to leaf:
+        */
+       if (count < ichdr1.count) {
+               /*
+                * Figure the total bytes to be added to the destination leaf.
+                */
+               /* number entries being moved */
+               count = ichdr1.count - count;
+               space  = ichdr1.usedbytes - totallen;
+               space += count * sizeof(xfs_attr_leaf_entry_t);
+
+               /*
+                * leaf2 is the destination, compact it if it looks tight.
+                */
+               max  = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1);
+               max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t);
+               if (space > max)
+                       xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp);
+
+               /*
+                * Move high entries from leaf1 to low end of leaf2.
+                */
+               xfs_attr3_leaf_moveents(args, leaf1, &ichdr1,
+                               ichdr1.count - count, leaf2, &ichdr2, 0, count);
+
+       } else if (count > ichdr1.count) {
+               /*
+                * I assert that since all callers pass in an empty
+                * second buffer, this code should never execute.
+                */
+               ASSERT(0);
+
+               /*
+                * Figure the total bytes to be added to the destination leaf.
+                */
+               /* number entries being moved */
+               count -= ichdr1.count;
+               space  = totallen - ichdr1.usedbytes;
+               space += count * sizeof(xfs_attr_leaf_entry_t);
+
+               /*
+                * leaf1 is the destination, compact it if it looks tight.
+                */
+               max  = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1);
+               max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t);
+               if (space > max)
+                       xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp);
+
+               /*
+                * Move low entries from leaf2 to high end of leaf1.
+                */
+               xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1,
+                                       ichdr1.count, count);
+       }
+
+       xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1);
+       xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2);
+       xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1);
+       xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1);
+
+       /*
+        * Copy out last hashval in each block for B-tree code.
+        */
+       entries1 = xfs_attr3_leaf_entryp(leaf1);
+       entries2 = xfs_attr3_leaf_entryp(leaf2);
+       blk1->hashval = be32_to_cpu(entries1[ichdr1.count - 1].hashval);
+       blk2->hashval = be32_to_cpu(entries2[ichdr2.count - 1].hashval);
+
+       /*
+        * Adjust the expected index for insertion.
+        * NOTE: this code depends on the (current) situation that the
+        * second block was originally empty.
+        *
+        * If the insertion point moved to the 2nd block, we must adjust
+        * the index.  We must also track the entry just following the
+        * new entry for use in an "atomic rename" operation, that entry
+        * is always the "old" entry and the "new" entry is what we are
+        * inserting.  The index/blkno fields refer to the "old" entry,
+        * while the index2/blkno2 fields refer to the "new" entry.
+        */
+       if (blk1->index > ichdr1.count) {
+               ASSERT(state->inleaf == 0);
+               blk2->index = blk1->index - ichdr1.count;
+               args->index = args->index2 = blk2->index;
+               args->blkno = args->blkno2 = blk2->blkno;
+       } else if (blk1->index == ichdr1.count) {
+               if (state->inleaf) {
+                       args->index = blk1->index;
+                       args->blkno = blk1->blkno;
+                       args->index2 = 0;
+                       args->blkno2 = blk2->blkno;
+               } else {
+                       /*
+                        * On a double leaf split, the original attr location
+                        * is already stored in blkno2/index2, so don't
+                        * overwrite it overwise we corrupt the tree.
+                        */
+                       blk2->index = blk1->index - ichdr1.count;
+                       args->index = blk2->index;
+                       args->blkno = blk2->blkno;
+                       if (!state->extravalid) {
+                               /*
+                                * set the new attr location to match the old
+                                * one and let the higher level split code
+                                * decide where in the leaf to place it.
+                                */
+                               args->index2 = blk2->index;
+                               args->blkno2 = blk2->blkno;
+                       }
+               }
+       } else {
+               ASSERT(state->inleaf == 1);
+               args->index = args->index2 = blk1->index;
+               args->blkno = args->blkno2 = blk1->blkno;
+       }
+}
+
+/*
+ * Examine entries until we reduce the absolute difference in
+ * byte usage between the two blocks to a minimum.
+ * GROT: Is this really necessary?  With other than a 512 byte blocksize,
+ * GROT: there will always be enough room in either block for a new entry.
+ * GROT: Do a double-split for this case?
+ */
+STATIC int
+xfs_attr3_leaf_figure_balance(
+       struct xfs_da_state             *state,
+       struct xfs_da_state_blk         *blk1,
+       struct xfs_attr3_icleaf_hdr     *ichdr1,
+       struct xfs_da_state_blk         *blk2,
+       struct xfs_attr3_icleaf_hdr     *ichdr2,
+       int                             *countarg,
+       int                             *usedbytesarg)
+{
+       struct xfs_attr_leafblock       *leaf1 = blk1->bp->b_addr;
+       struct xfs_attr_leafblock       *leaf2 = blk2->bp->b_addr;
+       struct xfs_attr_leaf_entry      *entry;
+       int                             count;
+       int                             max;
+       int                             index;
+       int                             totallen = 0;
+       int                             half;
+       int                             lastdelta;
+       int                             foundit = 0;
+       int                             tmp;
+
+       /*
+        * Examine entries until we reduce the absolute difference in
+        * byte usage between the two blocks to a minimum.
+        */
+       max = ichdr1->count + ichdr2->count;
+       half = (max + 1) * sizeof(*entry);
+       half += ichdr1->usedbytes + ichdr2->usedbytes +
+                       xfs_attr_leaf_newentsize(state->args, NULL);
+       half /= 2;
+       lastdelta = state->args->geo->blksize;
+       entry = xfs_attr3_leaf_entryp(leaf1);
+       for (count = index = 0; count < max; entry++, index++, count++) {
+
+#define XFS_ATTR_ABS(A)        (((A) < 0) ? -(A) : (A))
+               /*
+                * The new entry is in the first block, account for it.
+                */
+               if (count == blk1->index) {
+                       tmp = totallen + sizeof(*entry) +
+                               xfs_attr_leaf_newentsize(state->args, NULL);
+                       if (XFS_ATTR_ABS(half - tmp) > lastdelta)
+                               break;
+                       lastdelta = XFS_ATTR_ABS(half - tmp);
+                       totallen = tmp;
+                       foundit = 1;
+               }
+
+               /*
+                * Wrap around into the second block if necessary.
+                */
+               if (count == ichdr1->count) {
+                       leaf1 = leaf2;
+                       entry = xfs_attr3_leaf_entryp(leaf1);
+                       index = 0;
+               }
+
+               /*
+                * Figure out if next leaf entry would be too much.
+                */
+               tmp = totallen + sizeof(*entry) + xfs_attr_leaf_entsize(leaf1,
+                                                                       index);
+               if (XFS_ATTR_ABS(half - tmp) > lastdelta)
+                       break;
+               lastdelta = XFS_ATTR_ABS(half - tmp);
+               totallen = tmp;
+#undef XFS_ATTR_ABS
+       }
+
+       /*
+        * Calculate the number of usedbytes that will end up in lower block.
+        * If new entry not in lower block, fix up the count.
+        */
+       totallen -= count * sizeof(*entry);
+       if (foundit) {
+               totallen -= sizeof(*entry) +
+                               xfs_attr_leaf_newentsize(state->args, NULL);
+       }
+
+       *countarg = count;
+       *usedbytesarg = totallen;
+       return foundit;
+}
+
+/*========================================================================
+ * Routines used for shrinking the Btree.
+ *========================================================================*/
+
+/*
+ * Check a leaf block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ *
+ * GROT: allow for INCOMPLETE entries in calculation.
+ */
+int
+xfs_attr3_leaf_toosmall(
+       struct xfs_da_state     *state,
+       int                     *action)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_da_state_blk *blk;
+       struct xfs_attr3_icleaf_hdr ichdr;
+       struct xfs_buf          *bp;
+       xfs_dablk_t             blkno;
+       int                     bytes;
+       int                     forward;
+       int                     error;
+       int                     retval;
+       int                     i;
+
+       trace_xfs_attr_leaf_toosmall(state->args);
+
+       /*
+        * Check for the degenerate case of the block being over 50% full.
+        * If so, it's not worth even looking to see if we might be able
+        * to coalesce with a sibling.
+        */
+       blk = &state->path.blk[ state->path.active-1 ];
+       leaf = blk->bp->b_addr;
+       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       bytes = xfs_attr3_leaf_hdr_size(leaf) +
+               ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
+               ichdr.usedbytes;
+       if (bytes > (state->args->geo->blksize >> 1)) {
+               *action = 0;    /* blk over 50%, don't try to join */
+               return 0;
+       }
+
+       /*
+        * Check for the degenerate case of the block being empty.
+        * If the block is empty, we'll simply delete it, no need to
+        * coalesce it with a sibling block.  We choose (arbitrarily)
+        * to merge with the forward block unless it is NULL.
+        */
+       if (ichdr.count == 0) {
+               /*
+                * Make altpath point to the block we want to keep and
+                * path point to the block we want to drop (this one).
+                */
+               forward = (ichdr.forw != 0);
+               memcpy(&state->altpath, &state->path, sizeof(state->path));
+               error = xfs_da3_path_shift(state, &state->altpath, forward,
+                                                0, &retval);
+               if (error)
+                       return error;
+               if (retval) {
+                       *action = 0;
+               } else {
+                       *action = 2;
+               }
+               return 0;
+       }
+
+       /*
+        * Examine each sibling block to see if we can coalesce with
+        * at least 25% free space to spare.  We need to figure out
+        * whether to merge with the forward or the backward block.
+        * We prefer coalescing with the lower numbered sibling so as
+        * to shrink an attribute list over time.
+        */
+       /* start with smaller blk num */
+       forward = ichdr.forw < ichdr.back;
+       for (i = 0; i < 2; forward = !forward, i++) {
+               struct xfs_attr3_icleaf_hdr ichdr2;
+               if (forward)
+                       blkno = ichdr.forw;
+               else
+                       blkno = ichdr.back;
+               if (blkno == 0)
+                       continue;
+               error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
+                                       blkno, -1, &bp);
+               if (error)
+                       return error;
+
+               xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr);
+
+               bytes = state->args->geo->blksize -
+                       (state->args->geo->blksize >> 2) -
+                       ichdr.usedbytes - ichdr2.usedbytes -
+                       ((ichdr.count + ichdr2.count) *
+                                       sizeof(xfs_attr_leaf_entry_t)) -
+                       xfs_attr3_leaf_hdr_size(leaf);
+
+               xfs_trans_brelse(state->args->trans, bp);
+               if (bytes >= 0)
+                       break;  /* fits with at least 25% to spare */
+       }
+       if (i >= 2) {
+               *action = 0;
+               return 0;
+       }
+
+       /*
+        * Make altpath point to the block we want to keep (the lower
+        * numbered block) and path point to the block we want to drop.
+        */
+       memcpy(&state->altpath, &state->path, sizeof(state->path));
+       if (blkno < blk->blkno) {
+               error = xfs_da3_path_shift(state, &state->altpath, forward,
+                                                0, &retval);
+       } else {
+               error = xfs_da3_path_shift(state, &state->path, forward,
+                                                0, &retval);
+       }
+       if (error)
+               return error;
+       if (retval) {
+               *action = 0;
+       } else {
+               *action = 1;
+       }
+       return 0;
+}
+
+/*
+ * Remove a name from the leaf attribute list structure.
+ *
+ * Return 1 if leaf is less than 37% full, 0 if >= 37% full.
+ * If two leaves are 37% full, when combined they will leave 25% free.
+ */
+int
+xfs_attr3_leaf_remove(
+       struct xfs_buf          *bp,
+       struct xfs_da_args      *args)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr3_icleaf_hdr ichdr;
+       struct xfs_attr_leaf_entry *entry;
+       int                     before;
+       int                     after;
+       int                     smallest;
+       int                     entsize;
+       int                     tablesize;
+       int                     tmp;
+       int                     i;
+
+       trace_xfs_attr_leaf_remove(args);
+
+       leaf = bp->b_addr;
+       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+
+       ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);
+       ASSERT(args->index >= 0 && args->index < ichdr.count);
+       ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) +
+                                       xfs_attr3_leaf_hdr_size(leaf));
+
+       entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
+
+       ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
+       ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);
+
+       /*
+        * Scan through free region table:
+        *    check for adjacency of free'd entry with an existing one,
+        *    find smallest free region in case we need to replace it,
+        *    adjust any map that borders the entry table,
+        */
+       tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t)
+                                       + xfs_attr3_leaf_hdr_size(leaf);
+       tmp = ichdr.freemap[0].size;
+       before = after = -1;
+       smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
+       entsize = xfs_attr_leaf_entsize(leaf, args->index);
+       for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
+               ASSERT(ichdr.freemap[i].base < args->geo->blksize);
+               ASSERT(ichdr.freemap[i].size < args->geo->blksize);
+               if (ichdr.freemap[i].base == tablesize) {
+                       ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t);
+                       ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t);
+               }
+
+               if (ichdr.freemap[i].base + ichdr.freemap[i].size ==
+                               be16_to_cpu(entry->nameidx)) {
+                       before = i;
+               } else if (ichdr.freemap[i].base ==
+                               (be16_to_cpu(entry->nameidx) + entsize)) {
+                       after = i;
+               } else if (ichdr.freemap[i].size < tmp) {
+                       tmp = ichdr.freemap[i].size;
+                       smallest = i;
+               }
+       }
+
+       /*
+        * Coalesce adjacent freemap regions,
+        * or replace the smallest region.
+        */
+       if ((before >= 0) || (after >= 0)) {
+               if ((before >= 0) && (after >= 0)) {
+                       ichdr.freemap[before].size += entsize;
+                       ichdr.freemap[before].size += ichdr.freemap[after].size;
+                       ichdr.freemap[after].base = 0;
+                       ichdr.freemap[after].size = 0;
+               } else if (before >= 0) {
+                       ichdr.freemap[before].size += entsize;
+               } else {
+                       ichdr.freemap[after].base = be16_to_cpu(entry->nameidx);
+                       ichdr.freemap[after].size += entsize;
+               }
+       } else {
+               /*
+                * Replace smallest region (if it is smaller than free'd entry)
+                */
+               if (ichdr.freemap[smallest].size < entsize) {
+                       ichdr.freemap[smallest].base = be16_to_cpu(entry->nameidx);
+                       ichdr.freemap[smallest].size = entsize;
+               }
+       }
+
+       /*
+        * Did we remove the first entry?
+        */
+       if (be16_to_cpu(entry->nameidx) == ichdr.firstused)
+               smallest = 1;
+       else
+               smallest = 0;
+
+       /*
+        * Compress the remaining entries and zero out the removed stuff.
+        */
+       memset(xfs_attr3_leaf_name(leaf, args->index), 0, entsize);
+       ichdr.usedbytes -= entsize;
+       xfs_trans_log_buf(args->trans, bp,
+            XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
+                                  entsize));
+
+       tmp = (ichdr.count - args->index) * sizeof(xfs_attr_leaf_entry_t);
+       memmove(entry, entry + 1, tmp);
+       ichdr.count--;
+       xfs_trans_log_buf(args->trans, bp,
+           XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(xfs_attr_leaf_entry_t)));
+
+       entry = &xfs_attr3_leaf_entryp(leaf)[ichdr.count];
+       memset(entry, 0, sizeof(xfs_attr_leaf_entry_t));
+
+       /*
+        * If we removed the first entry, re-find the first used byte
+        * in the name area.  Note that if the entry was the "firstused",
+        * then we don't have a "hole" in our block resulting from
+        * removing the name.
+        */
+       if (smallest) {
+               tmp = args->geo->blksize;
+               entry = xfs_attr3_leaf_entryp(leaf);
+               for (i = ichdr.count - 1; i >= 0; entry++, i--) {
+                       ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
+                       ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);
+
+                       if (be16_to_cpu(entry->nameidx) < tmp)
+                               tmp = be16_to_cpu(entry->nameidx);
+               }
+               ichdr.firstused = tmp;
+               if (!ichdr.firstused)
+                       ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN;
+       } else {
+               ichdr.holes = 1;        /* mark as needing compaction */
+       }
+       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
+       xfs_trans_log_buf(args->trans, bp,
+                         XFS_DA_LOGRANGE(leaf, &leaf->hdr,
+                                         xfs_attr3_leaf_hdr_size(leaf)));
+
+       /*
+        * Check if leaf is less than 50% full, caller may want to
+        * "join" the leaf with a sibling if so.
+        */
+       tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) +
+             ichdr.count * sizeof(xfs_attr_leaf_entry_t);
+
+       return tmp < args->geo->magicpct; /* leaf is < 37% full */
+}
+
+/*
+ * Move all the attribute list entries from drop_leaf into save_leaf.
+ */
+void
+xfs_attr3_leaf_unbalance(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *drop_blk,
+       struct xfs_da_state_blk *save_blk)
+{
+       struct xfs_attr_leafblock *drop_leaf = drop_blk->bp->b_addr;
+       struct xfs_attr_leafblock *save_leaf = save_blk->bp->b_addr;
+       struct xfs_attr3_icleaf_hdr drophdr;
+       struct xfs_attr3_icleaf_hdr savehdr;
+       struct xfs_attr_leaf_entry *entry;
+
+       trace_xfs_attr_leaf_unbalance(state->args);
+
+       drop_leaf = drop_blk->bp->b_addr;
+       save_leaf = save_blk->bp->b_addr;
+       xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf);
+       xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf);
+       entry = xfs_attr3_leaf_entryp(drop_leaf);
+
+       /*
+        * Save last hashval from dying block for later Btree fixup.
+        */
+       drop_blk->hashval = be32_to_cpu(entry[drophdr.count - 1].hashval);
+
+       /*
+        * Check if we need a temp buffer, or can we do it in place.
+        * Note that we don't check "leaf" for holes because we will
+        * always be dropping it, toosmall() decided that for us already.
+        */
+       if (savehdr.holes == 0) {
+               /*
+                * dest leaf has no holes, so we add there.  May need
+                * to make some room in the entry array.
+                */
+               if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
+                                        drop_blk->bp, &drophdr)) {
+                       xfs_attr3_leaf_moveents(state->args,
+                                               drop_leaf, &drophdr, 0,
+                                               save_leaf, &savehdr, 0,
+                                               drophdr.count);
+               } else {
+                       xfs_attr3_leaf_moveents(state->args,
+                                               drop_leaf, &drophdr, 0,
+                                               save_leaf, &savehdr,
+                                               savehdr.count, drophdr.count);
+               }
+       } else {
+               /*
+                * Destination has holes, so we make a temporary copy
+                * of the leaf and add them both to that.
+                */
+               struct xfs_attr_leafblock *tmp_leaf;
+               struct xfs_attr3_icleaf_hdr tmphdr;
+
+               tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP);
+
+               /*
+                * Copy the header into the temp leaf so that all the stuff
+                * not in the incore header is present and gets copied back in
+                * once we've moved all the entries.
+                */
+               memcpy(tmp_leaf, save_leaf, xfs_attr3_leaf_hdr_size(save_leaf));
+
+               memset(&tmphdr, 0, sizeof(tmphdr));
+               tmphdr.magic = savehdr.magic;
+               tmphdr.forw = savehdr.forw;
+               tmphdr.back = savehdr.back;
+               tmphdr.firstused = state->args->geo->blksize;
+
+               /* write the header to the temp buffer to initialise it */
+               xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
+
+               if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
+                                        drop_blk->bp, &drophdr)) {
+                       xfs_attr3_leaf_moveents(state->args,
+                                               drop_leaf, &drophdr, 0,
+                                               tmp_leaf, &tmphdr, 0,
+                                               drophdr.count);
+                       xfs_attr3_leaf_moveents(state->args,
+                                               save_leaf, &savehdr, 0,
+                                               tmp_leaf, &tmphdr, tmphdr.count,
+                                               savehdr.count);
+               } else {
+                       xfs_attr3_leaf_moveents(state->args,
+                                               save_leaf, &savehdr, 0,
+                                               tmp_leaf, &tmphdr, 0,
+                                               savehdr.count);
+                       xfs_attr3_leaf_moveents(state->args,
+                                               drop_leaf, &drophdr, 0,
+                                               tmp_leaf, &tmphdr, tmphdr.count,
+                                               drophdr.count);
+               }
+               memcpy(save_leaf, tmp_leaf, state->args->geo->blksize);
+               savehdr = tmphdr; /* struct copy */
+               kmem_free(tmp_leaf);
+       }
+
+       xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr);
+       xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
+                                          state->args->geo->blksize - 1);
+
+       /*
+        * Copy out last hashval in each block for B-tree code.
+        */
+       entry = xfs_attr3_leaf_entryp(save_leaf);
+       save_blk->hashval = be32_to_cpu(entry[savehdr.count - 1].hashval);
+}
+
+/*========================================================================
+ * Routines used for finding things in the Btree.
+ *========================================================================*/
+
+/*
+ * Look up a name in a leaf attribute list structure.
+ * This is the internal routine, it uses the caller's buffer.
+ *
+ * Note that duplicate keys are allowed, but only check within the
+ * current leaf node.  The Btree code must check in adjacent leaf nodes.
+ *
+ * Return in args->index the index into the entry[] array of either
+ * the found entry, or where the entry should have been (insert before
+ * that entry).
+ *
+ * Don't change the args->value unless we find the attribute.
+ */
+int
+xfs_attr3_leaf_lookup_int(
+       struct xfs_buf          *bp,
+       struct xfs_da_args      *args)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr3_icleaf_hdr ichdr;
+       struct xfs_attr_leaf_entry *entry;
+       struct xfs_attr_leaf_entry *entries;
+       struct xfs_attr_leaf_name_local *name_loc;
+       struct xfs_attr_leaf_name_remote *name_rmt;
+       xfs_dahash_t            hashval;
+       int                     probe;
+       int                     span;
+
+       trace_xfs_attr_leaf_lookup(args);
+
+       leaf = bp->b_addr;
+       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       entries = xfs_attr3_leaf_entryp(leaf);
+       ASSERT(ichdr.count < args->geo->blksize / 8);
+
+       /*
+        * Binary search.  (note: small blocks will skip this loop)
+        */
+       hashval = args->hashval;
+       probe = span = ichdr.count / 2;
+       for (entry = &entries[probe]; span > 4; entry = &entries[probe]) {
+               span /= 2;
+               if (be32_to_cpu(entry->hashval) < hashval)
+                       probe += span;
+               else if (be32_to_cpu(entry->hashval) > hashval)
+                       probe -= span;
+               else
+                       break;
+       }
+       ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count));
+       ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval);
+
+       /*
+        * Since we may have duplicate hashval's, find the first matching
+        * hashval in the leaf.
+        */
+       while (probe > 0 && be32_to_cpu(entry->hashval) >= hashval) {
+               entry--;
+               probe--;
+       }
+       while (probe < ichdr.count &&
+              be32_to_cpu(entry->hashval) < hashval) {
+               entry++;
+               probe++;
+       }
+       if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) {
+               args->index = probe;
+               return ENOATTR;
+       }
+
+       /*
+        * Duplicate keys may be present, so search all of them for a match.
+        */
+       for (; probe < ichdr.count && (be32_to_cpu(entry->hashval) == hashval);
+                       entry++, probe++) {
+/*
+ * GROT: Add code to remove incomplete entries.
+ */
+               /*
+                * If we are looking for INCOMPLETE entries, show only those.
+                * If we are looking for complete entries, show only those.
+                */
+               if ((args->flags & XFS_ATTR_INCOMPLETE) !=
+                   (entry->flags & XFS_ATTR_INCOMPLETE)) {
+                       continue;
+               }
+               if (entry->flags & XFS_ATTR_LOCAL) {
+                       name_loc = xfs_attr3_leaf_name_local(leaf, probe);
+                       if (name_loc->namelen != args->namelen)
+                               continue;
+                       if (memcmp(args->name, name_loc->nameval,
+                                                       args->namelen) != 0)
+                               continue;
+                       if (!xfs_attr_namesp_match(args->flags, entry->flags))
+                               continue;
+                       args->index = probe;
+                       return EEXIST;
+               } else {
+                       name_rmt = xfs_attr3_leaf_name_remote(leaf, probe);
+                       if (name_rmt->namelen != args->namelen)
+                               continue;
+                       if (memcmp(args->name, name_rmt->name,
+                                                       args->namelen) != 0)
+                               continue;
+                       if (!xfs_attr_namesp_match(args->flags, entry->flags))
+                               continue;
+                       args->index = probe;
+                       args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
+                       args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
+                       args->rmtblkcnt = xfs_attr3_rmt_blocks(
+                                                       args->dp->i_mount,
+                                                       args->rmtvaluelen);
+                       return EEXIST;
+               }
+       }
+       args->index = probe;
+       return ENOATTR;
+}
+
+/*
+ * Get the value associated with an attribute name from a leaf attribute
+ * list structure.
+ */
+int
+xfs_attr3_leaf_getvalue(
+       struct xfs_buf          *bp,
+       struct xfs_da_args      *args)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr3_icleaf_hdr ichdr;
+       struct xfs_attr_leaf_entry *entry;
+       struct xfs_attr_leaf_name_local *name_loc;
+       struct xfs_attr_leaf_name_remote *name_rmt;
+       int                     valuelen;
+
+       leaf = bp->b_addr;
+       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       ASSERT(ichdr.count < args->geo->blksize / 8);
+       ASSERT(args->index < ichdr.count);
+
+       entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
+       if (entry->flags & XFS_ATTR_LOCAL) {
+               name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
+               ASSERT(name_loc->namelen == args->namelen);
+               ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0);
+               valuelen = be16_to_cpu(name_loc->valuelen);
+               if (args->flags & ATTR_KERNOVAL) {
+                       args->valuelen = valuelen;
+                       return 0;
+               }
+               if (args->valuelen < valuelen) {
+                       args->valuelen = valuelen;
+                       return ERANGE;
+               }
+               args->valuelen = valuelen;
+               memcpy(args->value, &name_loc->nameval[args->namelen], valuelen);
+       } else {
+               name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
+               ASSERT(name_rmt->namelen == args->namelen);
+               ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
+               args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
+               args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
+               args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
+                                                      args->rmtvaluelen);
+               if (args->flags & ATTR_KERNOVAL) {
+                       args->valuelen = args->rmtvaluelen;
+                       return 0;
+               }
+               if (args->valuelen < args->rmtvaluelen) {
+                       args->valuelen = args->rmtvaluelen;
+                       return ERANGE;
+               }
+               args->valuelen = args->rmtvaluelen;
+       }
+       return 0;
+}
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Move the indicated entries from one leaf to another.
+ * NOTE: this routine modifies both source and destination leaves.
+ */
+/*ARGSUSED*/
+STATIC void
+xfs_attr3_leaf_moveents(
+       struct xfs_da_args              *args,
+       struct xfs_attr_leafblock       *leaf_s,
+       struct xfs_attr3_icleaf_hdr     *ichdr_s,
+       int                             start_s,
+       struct xfs_attr_leafblock       *leaf_d,
+       struct xfs_attr3_icleaf_hdr     *ichdr_d,
+       int                             start_d,
+       int                             count)
+{
+       struct xfs_attr_leaf_entry      *entry_s;
+       struct xfs_attr_leaf_entry      *entry_d;
+       int                             desti;
+       int                             tmp;
+       int                             i;
+
+       /*
+        * Check for nothing to do.
+        */
+       if (count == 0)
+               return;
+
+       /*
+        * Set up environment.
+        */
+       ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC ||
+              ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC);
+       ASSERT(ichdr_s->magic == ichdr_d->magic);
+       ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8);
+       ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s))
+                                       + xfs_attr3_leaf_hdr_size(leaf_s));
+       ASSERT(ichdr_d->count < args->geo->blksize / 8);
+       ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d))
+                                       + xfs_attr3_leaf_hdr_size(leaf_d));
+
+       ASSERT(start_s < ichdr_s->count);
+       ASSERT(start_d <= ichdr_d->count);
+       ASSERT(count <= ichdr_s->count);
+
+
+       /*
+        * Move the entries in the destination leaf up to make a hole?
+        */
+       if (start_d < ichdr_d->count) {
+               tmp  = ichdr_d->count - start_d;
+               tmp *= sizeof(xfs_attr_leaf_entry_t);
+               entry_s = &xfs_attr3_leaf_entryp(leaf_d)[start_d];
+               entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d + count];
+               memmove(entry_d, entry_s, tmp);
+       }
+
+       /*
+        * Copy all entry's in the same (sorted) order,
+        * but allocate attribute info packed and in sequence.
+        */
+       entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
+       entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d];
+       desti = start_d;
+       for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) {
+               ASSERT(be16_to_cpu(entry_s->nameidx) >= ichdr_s->firstused);
+               tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i);
+#ifdef GROT
+               /*
+                * Code to drop INCOMPLETE entries.  Difficult to use as we
+                * may also need to change the insertion index.  Code turned
+                * off for 6.2, should be revisited later.
+                */
+               if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
+                       memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);
+                       ichdr_s->usedbytes -= tmp;
+                       ichdr_s->count -= 1;
+                       entry_d--;      /* to compensate for ++ in loop hdr */
+                       desti--;
+                       if ((start_s + i) < offset)
+                               result++;       /* insertion index adjustment */
+               } else {
+#endif /* GROT */
+                       ichdr_d->firstused -= tmp;
+                       /* both on-disk, don't endian flip twice */
+                       entry_d->hashval = entry_s->hashval;
+                       entry_d->nameidx = cpu_to_be16(ichdr_d->firstused);
+                       entry_d->flags = entry_s->flags;
+                       ASSERT(be16_to_cpu(entry_d->nameidx) + tmp
+                                                       <= args->geo->blksize);
+                       memmove(xfs_attr3_leaf_name(leaf_d, desti),
+                               xfs_attr3_leaf_name(leaf_s, start_s + i), tmp);
+                       ASSERT(be16_to_cpu(entry_s->nameidx) + tmp
+                                                       <= args->geo->blksize);
+                       memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);
+                       ichdr_s->usedbytes -= tmp;
+                       ichdr_d->usedbytes += tmp;
+                       ichdr_s->count -= 1;
+                       ichdr_d->count += 1;
+                       tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t)
+                                       + xfs_attr3_leaf_hdr_size(leaf_d);
+                       ASSERT(ichdr_d->firstused >= tmp);
+#ifdef GROT
+               }
+#endif /* GROT */
+       }
+
+       /*
+        * Zero out the entries we just copied.
+        */
+       if (start_s == ichdr_s->count) {
+               tmp = count * sizeof(xfs_attr_leaf_entry_t);
+               entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
+               ASSERT(((char *)entry_s + tmp) <=
+                      ((char *)leaf_s + args->geo->blksize));
+               memset(entry_s, 0, tmp);
+       } else {
+               /*
+                * Move the remaining entries down to fill the hole,
+                * then zero the entries at the top.
+                */
+               tmp  = (ichdr_s->count - count) * sizeof(xfs_attr_leaf_entry_t);
+               entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s + count];
+               entry_d = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
+               memmove(entry_d, entry_s, tmp);
+
+               tmp = count * sizeof(xfs_attr_leaf_entry_t);
+               entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count];
+               ASSERT(((char *)entry_s + tmp) <=
+                      ((char *)leaf_s + args->geo->blksize));
+               memset(entry_s, 0, tmp);
+       }
+
+       /*
+        * Fill in the freemap information
+        */
+       ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d);
+       ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t);
+       ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
+       ichdr_d->freemap[1].base = 0;
+       ichdr_d->freemap[2].base = 0;
+       ichdr_d->freemap[1].size = 0;
+       ichdr_d->freemap[2].size = 0;
+       ichdr_s->holes = 1;     /* leaf may not be compact */
+}
+
+/*
+ * Pick up the last hashvalue from a leaf block.
+ */
+xfs_dahash_t
+xfs_attr_leaf_lasthash(
+       struct xfs_buf  *bp,
+       int             *count)
+{
+       struct xfs_attr3_icleaf_hdr ichdr;
+       struct xfs_attr_leaf_entry *entries;
+
+       xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr);
+       entries = xfs_attr3_leaf_entryp(bp->b_addr);
+       if (count)
+               *count = ichdr.count;
+       if (!ichdr.count)
+               return 0;
+       return be32_to_cpu(entries[ichdr.count - 1].hashval);
+}
+
+/*
+ * Calculate the number of bytes used to store the indicated attribute
+ * (whether local or remote only calculate bytes in this block).
+ */
+STATIC int
+xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
+{
+       struct xfs_attr_leaf_entry *entries;
+       xfs_attr_leaf_name_local_t *name_loc;
+       xfs_attr_leaf_name_remote_t *name_rmt;
+       int size;
+
+       entries = xfs_attr3_leaf_entryp(leaf);
+       if (entries[index].flags & XFS_ATTR_LOCAL) {
+               name_loc = xfs_attr3_leaf_name_local(leaf, index);
+               size = xfs_attr_leaf_entsize_local(name_loc->namelen,
+                                                  be16_to_cpu(name_loc->valuelen));
+       } else {
+               name_rmt = xfs_attr3_leaf_name_remote(leaf, index);
+               size = xfs_attr_leaf_entsize_remote(name_rmt->namelen);
+       }
+       return size;
+}
+
+/*
+ * Calculate the number of bytes that would be required to store the new
+ * attribute (whether local or remote only calculate bytes in this block).
+ * This routine decides as a side effect whether the attribute will be
+ * a "local" or a "remote" attribute.
+ */
+int
+xfs_attr_leaf_newentsize(
+       struct xfs_da_args      *args,
+       int                     *local)
+{
+       int                     size;
+
+       size = xfs_attr_leaf_entsize_local(args->namelen, args->valuelen);
+       if (size < xfs_attr_leaf_entsize_local_max(args->geo->blksize)) {
+               if (local)
+                       *local = 1;
+               return size;
+       }
+       if (local)
+               *local = 0;
+       return xfs_attr_leaf_entsize_remote(args->namelen);
+}
+
+
+/*========================================================================
+ * Manage the INCOMPLETE flag in a leaf entry
+ *========================================================================*/
+
+/*
+ * Clear the INCOMPLETE flag on an entry in a leaf block.
+ */
+int
+xfs_attr3_leaf_clearflag(
+       struct xfs_da_args      *args)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr_leaf_entry *entry;
+       struct xfs_attr_leaf_name_remote *name_rmt;
+       struct xfs_buf          *bp;
+       int                     error;
+#ifdef DEBUG
+       struct xfs_attr3_icleaf_hdr ichdr;
+       xfs_attr_leaf_name_local_t *name_loc;
+       int namelen;
+       char *name;
+#endif /* DEBUG */
+
+       trace_xfs_attr_leaf_clearflag(args);
+       /*
+        * Set up the operation.
+        */
+       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+       if (error)
+               return error;
+
+       leaf = bp->b_addr;
+       entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
+       ASSERT(entry->flags & XFS_ATTR_INCOMPLETE);
+
+#ifdef DEBUG
+       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       ASSERT(args->index < ichdr.count);
+       ASSERT(args->index >= 0);
+
+       if (entry->flags & XFS_ATTR_LOCAL) {
+               name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
+               namelen = name_loc->namelen;
+               name = (char *)name_loc->nameval;
+       } else {
+               name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
+               namelen = name_rmt->namelen;
+               name = (char *)name_rmt->name;
+       }
+       ASSERT(be32_to_cpu(entry->hashval) == args->hashval);
+       ASSERT(namelen == args->namelen);
+       ASSERT(memcmp(name, args->name, namelen) == 0);
+#endif /* DEBUG */
+
+       entry->flags &= ~XFS_ATTR_INCOMPLETE;
+       xfs_trans_log_buf(args->trans, bp,
+                        XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
+
+       if (args->rmtblkno) {
+               ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
+               name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
+               name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
+               name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
+               xfs_trans_log_buf(args->trans, bp,
+                        XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
+       }
+
+       /*
+        * Commit the flag value change and start the next trans in series.
+        */
+       return xfs_trans_roll(&args->trans, args->dp);
+}
+
+/*
+ * Set the INCOMPLETE flag on an entry in a leaf block.
+ */
+int
+xfs_attr3_leaf_setflag(
+       struct xfs_da_args      *args)
+{
+       struct xfs_attr_leafblock *leaf;
+       struct xfs_attr_leaf_entry *entry;
+       struct xfs_attr_leaf_name_remote *name_rmt;
+       struct xfs_buf          *bp;
+       int error;
+#ifdef DEBUG
+       struct xfs_attr3_icleaf_hdr ichdr;
+#endif
+
+       trace_xfs_attr_leaf_setflag(args);
+
+       /*
+        * Set up the operation.
+        */
+       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+       if (error)
+               return error;
+
+       leaf = bp->b_addr;
+#ifdef DEBUG
+       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       ASSERT(args->index < ichdr.count);
+       ASSERT(args->index >= 0);
+#endif
+       entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
+
+       ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0);
+       entry->flags |= XFS_ATTR_INCOMPLETE;
+       xfs_trans_log_buf(args->trans, bp,
+                       XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
+       if ((entry->flags & XFS_ATTR_LOCAL) == 0) {
+               name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
+               name_rmt->valueblk = 0;
+               name_rmt->valuelen = 0;
+               xfs_trans_log_buf(args->trans, bp,
+                        XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
+       }
+
+       /*
+        * Commit the flag value change and start the next trans in series.
+        */
+       return xfs_trans_roll(&args->trans, args->dp);
+}
+
+/*
+ * In a single transaction, clear the INCOMPLETE flag on the leaf entry
+ * given by args->blkno/index and set the INCOMPLETE flag on the leaf
+ * entry given by args->blkno2/index2.
+ *
+ * Note that they could be in different blocks, or in the same block.
+ */
+int
+xfs_attr3_leaf_flipflags(
+       struct xfs_da_args      *args)
+{
+       struct xfs_attr_leafblock *leaf1;
+       struct xfs_attr_leafblock *leaf2;
+       struct xfs_attr_leaf_entry *entry1;
+       struct xfs_attr_leaf_entry *entry2;
+       struct xfs_attr_leaf_name_remote *name_rmt;
+       struct xfs_buf          *bp1;
+       struct xfs_buf          *bp2;
+       int error;
+#ifdef DEBUG
+       struct xfs_attr3_icleaf_hdr ichdr1;
+       struct xfs_attr3_icleaf_hdr ichdr2;
+       xfs_attr_leaf_name_local_t *name_loc;
+       int namelen1, namelen2;
+       char *name1, *name2;
+#endif /* DEBUG */
+
+       trace_xfs_attr_leaf_flipflags(args);
+
+       /*
+        * Read the block containing the "old" attr
+        */
+       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1);
+       if (error)
+               return error;
+
+       /*
+        * Read the block containing the "new" attr, if it is different
+        */
+       if (args->blkno2 != args->blkno) {
+               error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2,
+                                          -1, &bp2);
+               if (error)
+                       return error;
+       } else {
+               bp2 = bp1;
+       }
+
+       leaf1 = bp1->b_addr;
+       entry1 = &xfs_attr3_leaf_entryp(leaf1)[args->index];
+
+       leaf2 = bp2->b_addr;
+       entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2];
+
+#ifdef DEBUG
+       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
+       ASSERT(args->index < ichdr1.count);
+       ASSERT(args->index >= 0);
+
+       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
+       ASSERT(args->index2 < ichdr2.count);
+       ASSERT(args->index2 >= 0);
+
+       if (entry1->flags & XFS_ATTR_LOCAL) {
+               name_loc = xfs_attr3_leaf_name_local(leaf1, args->index);
+               namelen1 = name_loc->namelen;
+               name1 = (char *)name_loc->nameval;
+       } else {
+               name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
+               namelen1 = name_rmt->namelen;
+               name1 = (char *)name_rmt->name;
+       }
+       if (entry2->flags & XFS_ATTR_LOCAL) {
+               name_loc = xfs_attr3_leaf_name_local(leaf2, args->index2);
+               namelen2 = name_loc->namelen;
+               name2 = (char *)name_loc->nameval;
+       } else {
+               name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2);
+               namelen2 = name_rmt->namelen;
+               name2 = (char *)name_rmt->name;
+       }
+       ASSERT(be32_to_cpu(entry1->hashval) == be32_to_cpu(entry2->hashval));
+       ASSERT(namelen1 == namelen2);
+       ASSERT(memcmp(name1, name2, namelen1) == 0);
+#endif /* DEBUG */
+
+       ASSERT(entry1->flags & XFS_ATTR_INCOMPLETE);
+       ASSERT((entry2->flags & XFS_ATTR_INCOMPLETE) == 0);
+
+       entry1->flags &= ~XFS_ATTR_INCOMPLETE;
+       xfs_trans_log_buf(args->trans, bp1,
+                         XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1)));
+       if (args->rmtblkno) {
+               ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
+               name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
+               name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
+               name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
+               xfs_trans_log_buf(args->trans, bp1,
+                        XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
+       }
+
+       entry2->flags |= XFS_ATTR_INCOMPLETE;
+       xfs_trans_log_buf(args->trans, bp2,
+                         XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2)));
+       if ((entry2->flags & XFS_ATTR_LOCAL) == 0) {
+               name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2);
+               name_rmt->valueblk = 0;
+               name_rmt->valuelen = 0;
+               xfs_trans_log_buf(args->trans, bp2,
+                        XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt)));
+       }
+
+       /*
+        * Commit the flag value change and start the next trans in series.
+        */
+       error = xfs_trans_roll(&args->trans, args->dp);
+
+       return error;
+}
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
new file mode 100644 (file)
index 0000000..a8bbc56
--- /dev/null
@@ -0,0 +1,628 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_attr_remote.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_buf_item.h"
+#include "xfs_error.h"
+
+#define ATTR_RMTVALUE_MAPSIZE  1       /* # of map entries at once */
+
+/*
+ * Each contiguous block has a header, so it is not just a simple attribute
+ * length to FSB conversion.
+ */
+int
+xfs_attr3_rmt_blocks(
+       struct xfs_mount *mp,
+       int             attrlen)
+{
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
+               return (attrlen + buflen - 1) / buflen;
+       }
+       return XFS_B_TO_FSB(mp, attrlen);
+}
+
+/*
+ * Checking of the remote attribute header is split into two parts. The verifier
+ * does CRC, location and bounds checking, the unpacking function checks the
+ * attribute parameters and owner.
+ */
+static bool
+xfs_attr3_rmt_hdr_ok(
+       void                    *ptr,
+       xfs_ino_t               ino,
+       uint32_t                offset,
+       uint32_t                size,
+       xfs_daddr_t             bno)
+{
+       struct xfs_attr3_rmt_hdr *rmt = ptr;
+
+       if (bno != be64_to_cpu(rmt->rm_blkno))
+               return false;
+       if (offset != be32_to_cpu(rmt->rm_offset))
+               return false;
+       if (size != be32_to_cpu(rmt->rm_bytes))
+               return false;
+       if (ino != be64_to_cpu(rmt->rm_owner))
+               return false;
+
+       /* ok */
+       return true;
+}
+
+static bool
+xfs_attr3_rmt_verify(
+       struct xfs_mount        *mp,
+       void                    *ptr,
+       int                     fsbsize,
+       xfs_daddr_t             bno)
+{
+       struct xfs_attr3_rmt_hdr *rmt = ptr;
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return false;
+       if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
+               return false;
+       if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
+               return false;
+       if (be64_to_cpu(rmt->rm_blkno) != bno)
+               return false;
+       if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
+               return false;
+       if (be32_to_cpu(rmt->rm_offset) +
+                               be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
+               return false;
+       if (rmt->rm_owner == 0)
+               return false;
+
+       return true;
+}
+
+static void
+xfs_attr3_rmt_read_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       char            *ptr;
+       int             len;
+       xfs_daddr_t     bno;
+       int             blksize = mp->m_attr_geo->blksize;
+
+       /* no verification of non-crc buffers */
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       ptr = bp->b_addr;
+       bno = bp->b_bn;
+       len = BBTOB(bp->b_length);
+       ASSERT(len >= blksize);
+
+       while (len > 0) {
+               if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
+                       xfs_buf_ioerror(bp, EFSBADCRC);
+                       break;
+               }
+               if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
+                       xfs_buf_ioerror(bp, EFSCORRUPTED);
+                       break;
+               }
+               len -= blksize;
+               ptr += blksize;
+               bno += BTOBB(blksize);
+       }
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+       else
+               ASSERT(len == 0);
+}
+
+static void
+xfs_attr3_rmt_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       char            *ptr;
+       int             len;
+       xfs_daddr_t     bno;
+       int             blksize = mp->m_attr_geo->blksize;
+
+       /* no verification of non-crc buffers */
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       ptr = bp->b_addr;
+       bno = bp->b_bn;
+       len = BBTOB(bp->b_length);
+       ASSERT(len >= blksize);
+
+       while (len > 0) {
+               if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
+                       xfs_buf_ioerror(bp, EFSCORRUPTED);
+                       xfs_verifier_error(bp);
+                       return;
+               }
+               if (bip) {
+                       struct xfs_attr3_rmt_hdr *rmt;
+
+                       rmt = (struct xfs_attr3_rmt_hdr *)ptr;
+                       rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+               }
+               xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
+
+               len -= blksize;
+               ptr += blksize;
+               bno += BTOBB(blksize);
+       }
+       ASSERT(len == 0);
+}
+
+const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
+       .verify_read = xfs_attr3_rmt_read_verify,
+       .verify_write = xfs_attr3_rmt_write_verify,
+};
+
+STATIC int
+xfs_attr3_rmt_hdr_set(
+       struct xfs_mount        *mp,
+       void                    *ptr,
+       xfs_ino_t               ino,
+       uint32_t                offset,
+       uint32_t                size,
+       xfs_daddr_t             bno)
+{
+       struct xfs_attr3_rmt_hdr *rmt = ptr;
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return 0;
+
+       rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
+       rmt->rm_offset = cpu_to_be32(offset);
+       rmt->rm_bytes = cpu_to_be32(size);
+       uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
+       rmt->rm_owner = cpu_to_be64(ino);
+       rmt->rm_blkno = cpu_to_be64(bno);
+
+       return sizeof(struct xfs_attr3_rmt_hdr);
+}
+
+/*
+ * Helper functions to copy attribute data in and out of the one disk extents
+ */
+STATIC int
+xfs_attr_rmtval_copyout(
+       struct xfs_mount *mp,
+       struct xfs_buf  *bp,
+       xfs_ino_t       ino,
+       int             *offset,
+       int             *valuelen,
+       __uint8_t       **dst)
+{
+       char            *src = bp->b_addr;
+       xfs_daddr_t     bno = bp->b_bn;
+       int             len = BBTOB(bp->b_length);
+       int             blksize = mp->m_attr_geo->blksize;
+
+       ASSERT(len >= blksize);
+
+       while (len > 0 && *valuelen > 0) {
+               int hdr_size = 0;
+               int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
+
+               byte_cnt = min(*valuelen, byte_cnt);
+
+               if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                       if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
+                                                 byte_cnt, bno)) {
+                               xfs_alert(mp,
+"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
+                                       bno, *offset, byte_cnt, ino);
+                               return EFSCORRUPTED;
+                       }
+                       hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
+               }
+
+               memcpy(*dst, src + hdr_size, byte_cnt);
+
+               /* roll buffer forwards */
+               len -= blksize;
+               src += blksize;
+               bno += BTOBB(blksize);
+
+               /* roll attribute data forwards */
+               *valuelen -= byte_cnt;
+               *dst += byte_cnt;
+               *offset += byte_cnt;
+       }
+       return 0;
+}
+
+STATIC void
+xfs_attr_rmtval_copyin(
+       struct xfs_mount *mp,
+       struct xfs_buf  *bp,
+       xfs_ino_t       ino,
+       int             *offset,
+       int             *valuelen,
+       __uint8_t       **src)
+{
+       char            *dst = bp->b_addr;
+       xfs_daddr_t     bno = bp->b_bn;
+       int             len = BBTOB(bp->b_length);
+       int             blksize = mp->m_attr_geo->blksize;
+
+       ASSERT(len >= blksize);
+
+       while (len > 0 && *valuelen > 0) {
+               int hdr_size;
+               int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
+
+               byte_cnt = min(*valuelen, byte_cnt);
+               hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
+                                                byte_cnt, bno);
+
+               memcpy(dst + hdr_size, *src, byte_cnt);
+
+               /*
+                * If this is the last block, zero the remainder of it.
+                * Check that we are actually the last block, too.
+                */
+               if (byte_cnt + hdr_size < blksize) {
+                       ASSERT(*valuelen - byte_cnt == 0);
+                       ASSERT(len == blksize);
+                       memset(dst + hdr_size + byte_cnt, 0,
+                                       blksize - hdr_size - byte_cnt);
+               }
+
+               /* roll buffer forwards */
+               len -= blksize;
+               dst += blksize;
+               bno += BTOBB(blksize);
+
+               /* roll attribute data forwards */
+               *valuelen -= byte_cnt;
+               *src += byte_cnt;
+               *offset += byte_cnt;
+       }
+}
+
+/*
+ * Read the value associated with an attribute from the out-of-line buffer
+ * that we stored it in.
+ */
+int
+xfs_attr_rmtval_get(
+       struct xfs_da_args      *args)
+{
+       struct xfs_bmbt_irec    map[ATTR_RMTVALUE_MAPSIZE];
+       struct xfs_mount        *mp = args->dp->i_mount;
+       struct xfs_buf          *bp;
+       xfs_dablk_t             lblkno = args->rmtblkno;
+       __uint8_t               *dst = args->value;
+       int                     valuelen;
+       int                     nmap;
+       int                     error;
+       int                     blkcnt = args->rmtblkcnt;
+       int                     i;
+       int                     offset = 0;
+
+       trace_xfs_attr_rmtval_get(args);
+
+       ASSERT(!(args->flags & ATTR_KERNOVAL));
+       ASSERT(args->rmtvaluelen == args->valuelen);
+
+       valuelen = args->rmtvaluelen;
+       while (valuelen > 0) {
+               nmap = ATTR_RMTVALUE_MAPSIZE;
+               error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
+                                      blkcnt, map, &nmap,
+                                      XFS_BMAPI_ATTRFORK);
+               if (error)
+                       return error;
+               ASSERT(nmap >= 1);
+
+               for (i = 0; (i < nmap) && (valuelen > 0); i++) {
+                       xfs_daddr_t     dblkno;
+                       int             dblkcnt;
+
+                       ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
+                              (map[i].br_startblock != HOLESTARTBLOCK));
+                       dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
+                       dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
+                       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+                                                  dblkno, dblkcnt, 0, &bp,
+                                                  &xfs_attr3_rmt_buf_ops);
+                       if (error)
+                               return error;
+
+                       error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
+                                                       &offset, &valuelen,
+                                                       &dst);
+                       xfs_buf_relse(bp);
+                       if (error)
+                               return error;
+
+                       /* roll attribute extent map forwards */
+                       lblkno += map[i].br_blockcount;
+                       blkcnt -= map[i].br_blockcount;
+               }
+       }
+       ASSERT(valuelen == 0);
+       return 0;
+}
+
+/*
+ * Write the value associated with an attribute into the out-of-line buffer
+ * that we have defined for it.
+ */
+int
+xfs_attr_rmtval_set(
+       struct xfs_da_args      *args)
+{
+       struct xfs_inode        *dp = args->dp;
+       struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_bmbt_irec    map;
+       xfs_dablk_t             lblkno;
+       xfs_fileoff_t           lfileoff = 0;
+       __uint8_t               *src = args->value;
+       int                     blkcnt;
+       int                     valuelen;
+       int                     nmap;
+       int                     error;
+       int                     offset = 0;
+
+       trace_xfs_attr_rmtval_set(args);
+
+       /*
+        * Find a "hole" in the attribute address space large enough for
+        * us to drop the new attribute's value into. Because CRC enable
+        * attributes have headers, we can't just do a straight byte to FSB
+        * conversion and have to take the header space into account.
+        */
+       blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
+       error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
+                                                  XFS_ATTR_FORK);
+       if (error)
+               return error;
+
+       args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
+       args->rmtblkcnt = blkcnt;
+
+       /*
+        * Roll through the "value", allocating blocks on disk as required.
+        */
+       while (blkcnt > 0) {
+               int     committed;
+
+               /*
+                * Allocate a single extent, up to the size of the value.
+                */
+               xfs_bmap_init(args->flist, args->firstblock);
+               nmap = 1;
+               error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
+                                 blkcnt,
+                                 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
+                                 args->firstblock, args->total, &map, &nmap,
+                                 args->flist);
+               if (!error) {
+                       error = xfs_bmap_finish(&args->trans, args->flist,
+                                               &committed);
+               }
+               if (error) {
+                       ASSERT(committed);
+                       args->trans = NULL;
+                       xfs_bmap_cancel(args->flist);
+                       return error;
+               }
+
+               /*
+                * bmap_finish() may have committed the last trans and started
+                * a new one.  We need the inode to be in all transactions.
+                */
+               if (committed)
+                       xfs_trans_ijoin(args->trans, dp, 0);
+
+               ASSERT(nmap == 1);
+               ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
+                      (map.br_startblock != HOLESTARTBLOCK));
+               lblkno += map.br_blockcount;
+               blkcnt -= map.br_blockcount;
+
+               /*
+                * Start the next trans in the chain.
+                */
+               error = xfs_trans_roll(&args->trans, dp);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * Roll through the "value", copying the attribute value to the
+        * already-allocated blocks.  Blocks are written synchronously
+        * so that we can know they are all on disk before we turn off
+        * the INCOMPLETE flag.
+        */
+       lblkno = args->rmtblkno;
+       blkcnt = args->rmtblkcnt;
+       valuelen = args->rmtvaluelen;
+       while (valuelen > 0) {
+               struct xfs_buf  *bp;
+               xfs_daddr_t     dblkno;
+               int             dblkcnt;
+
+               ASSERT(blkcnt > 0);
+
+               xfs_bmap_init(args->flist, args->firstblock);
+               nmap = 1;
+               error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
+                                      blkcnt, &map, &nmap,
+                                      XFS_BMAPI_ATTRFORK);
+               if (error)
+                       return error;
+               ASSERT(nmap == 1);
+               ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
+                      (map.br_startblock != HOLESTARTBLOCK));
+
+               dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
+               dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
+
+               bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
+               if (!bp)
+                       return ENOMEM;
+               bp->b_ops = &xfs_attr3_rmt_buf_ops;
+
+               xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
+                                      &valuelen, &src);
+
+               error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
+               xfs_buf_relse(bp);
+               if (error)
+                       return error;
+
+
+               /* roll attribute extent map forwards */
+               lblkno += map.br_blockcount;
+               blkcnt -= map.br_blockcount;
+       }
+       ASSERT(valuelen == 0);
+       return 0;
+}
+
+/*
+ * Remove the value associated with an attribute by deleting the
+ * out-of-line buffer that it is stored on.
+ */
+int
+xfs_attr_rmtval_remove(
+       struct xfs_da_args      *args)
+{
+       struct xfs_mount        *mp = args->dp->i_mount;
+       xfs_dablk_t             lblkno;
+       int                     blkcnt;
+       int                     error;
+       int                     done;
+
+       trace_xfs_attr_rmtval_remove(args);
+
+       /*
+        * Roll through the "value", invalidating the attribute value's blocks.
+        */
+       lblkno = args->rmtblkno;
+       blkcnt = args->rmtblkcnt;
+       while (blkcnt > 0) {
+               struct xfs_bmbt_irec    map;
+               struct xfs_buf          *bp;
+               xfs_daddr_t             dblkno;
+               int                     dblkcnt;
+               int                     nmap;
+
+               /*
+                * Try to remember where we decided to put the value.
+                */
+               nmap = 1;
+               error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
+                                      blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
+               if (error)
+                       return error;
+               ASSERT(nmap == 1);
+               ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
+                      (map.br_startblock != HOLESTARTBLOCK));
+
+               dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
+               dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
+
+               /*
+                * If the "remote" value is in the cache, remove it.
+                */
+               bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
+               if (bp) {
+                       xfs_buf_stale(bp);
+                       xfs_buf_relse(bp);
+                       bp = NULL;
+               }
+
+               lblkno += map.br_blockcount;
+               blkcnt -= map.br_blockcount;
+       }
+
+       /*
+        * Keep de-allocating extents until the remote-value region is gone.
+        */
+       lblkno = args->rmtblkno;
+       blkcnt = args->rmtblkcnt;
+       done = 0;
+       while (!done) {
+               int committed;
+
+               xfs_bmap_init(args->flist, args->firstblock);
+               error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
+                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
+                                   1, args->firstblock, args->flist,
+                                   &done);
+               if (!error) {
+                       error = xfs_bmap_finish(&args->trans, args->flist,
+                                               &committed);
+               }
+               if (error) {
+                       ASSERT(committed);
+                       args->trans = NULL;
+                       xfs_bmap_cancel(args->flist);
+                       return error;
+               }
+
+               /*
+                * bmap_finish() may have committed the last trans and started
+                * a new one.  We need the inode to be in all transactions.
+                */
+               if (committed)
+                       xfs_trans_ijoin(args->trans, args->dp, 0);
+
+               /*
+                * Close out trans and start the next one in the chain.
+                */
+               error = xfs_trans_roll(&args->trans, args->dp);
+               if (error)
+                       return error;
+       }
+       return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
new file mode 100644 (file)
index 0000000..b44d631
--- /dev/null
@@ -0,0 +1,5609 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_extfree_item.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_quota.h"
+#include "xfs_trans_space.h"
+#include "xfs_buf_item.h"
+#include "xfs_trace.h"
+#include "xfs_symlink.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_dinode.h"
+#include "xfs_filestream.h"
+
+
+kmem_zone_t            *xfs_bmap_free_item_zone;
+
+/*
+ * Miscellaneous helper functions
+ */
+
+/*
+ * Compute and fill in the value of the maximum depth of a bmap btree
+ * in this filesystem.  Done once, during mount.
+ */
+void
+xfs_bmap_compute_maxlevels(
+       xfs_mount_t     *mp,            /* file system mount structure */
+       int             whichfork)      /* data or attr fork */
+{
+       int             level;          /* btree level */
+       uint            maxblocks;      /* max blocks at this level */
+       uint            maxleafents;    /* max leaf entries possible */
+       int             maxrootrecs;    /* max records in root block */
+       int             minleafrecs;    /* min records in leaf block */
+       int             minnoderecs;    /* min records in node block */
+       int             sz;             /* root block size */
+
+       /*
+        * The maximum number of extents in a file, hence the maximum
+        * number of leaf entries, is controlled by the type of di_nextents
+        * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
+        * (a signed 16-bit number, xfs_aextnum_t).
+        *
+        * Note that we can no longer assume that if we are in ATTR1 that
+        * the fork offset of all the inodes will be
+        * (xfs_default_attroffset(ip) >> 3) because we could have mounted
+        * with ATTR2 and then mounted back with ATTR1, keeping the
+        * di_forkoff's fixed but probably at various positions. Therefore,
+        * for both ATTR1 and ATTR2 we have to assume the worst case scenario
+        * of a minimum size available.
+        */
+       if (whichfork == XFS_DATA_FORK) {
+               maxleafents = MAXEXTNUM;
+               sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
+       } else {
+               maxleafents = MAXAEXTNUM;
+               sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
+       }
+       maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
+       minleafrecs = mp->m_bmap_dmnr[0];
+       minnoderecs = mp->m_bmap_dmnr[1];
+       maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+       for (level = 1; maxblocks > 1; level++) {
+               if (maxblocks <= maxrootrecs)
+                       maxblocks = 1;
+               else
+                       maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+       }
+       mp->m_bm_maxlevels[whichfork] = level;
+}
+
+STATIC int                             /* error */
+xfs_bmbt_lookup_eq(
+       struct xfs_btree_cur    *cur,
+       xfs_fileoff_t           off,
+       xfs_fsblock_t           bno,
+       xfs_filblks_t           len,
+       int                     *stat)  /* success/failure */
+{
+       cur->bc_rec.b.br_startoff = off;
+       cur->bc_rec.b.br_startblock = bno;
+       cur->bc_rec.b.br_blockcount = len;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
+STATIC int                             /* error */
+xfs_bmbt_lookup_ge(
+       struct xfs_btree_cur    *cur,
+       xfs_fileoff_t           off,
+       xfs_fsblock_t           bno,
+       xfs_filblks_t           len,
+       int                     *stat)  /* success/failure */
+{
+       cur->bc_rec.b.br_startoff = off;
+       cur->bc_rec.b.br_startblock = bno;
+       cur->bc_rec.b.br_blockcount = len;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Check if the inode needs to be converted to btree format.
+ */
+static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
+{
+       return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+               XFS_IFORK_NEXTENTS(ip, whichfork) >
+                       XFS_IFORK_MAXEXT(ip, whichfork);
+}
+
+/*
+ * Check if the inode should be converted to extent format.
+ */
+static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
+{
+       return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
+               XFS_IFORK_NEXTENTS(ip, whichfork) <=
+                       XFS_IFORK_MAXEXT(ip, whichfork);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [off, bno, len, state].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_bmbt_update(
+       struct xfs_btree_cur    *cur,
+       xfs_fileoff_t           off,
+       xfs_fsblock_t           bno,
+       xfs_filblks_t           len,
+       xfs_exntst_t            state)
+{
+       union xfs_btree_rec     rec;
+
+       xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
+       return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Compute the worst-case number of indirect blocks that will be used
+ * for ip's delayed extent of length "len".
+ */
+STATIC xfs_filblks_t
+xfs_bmap_worst_indlen(
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_filblks_t   len)            /* delayed extent length */
+{
+       int             level;          /* btree level number */
+       int             maxrecs;        /* maximum record count at this level */
+       xfs_mount_t     *mp;            /* mount structure */
+       xfs_filblks_t   rval;           /* return value */
+
+       mp = ip->i_mount;
+       maxrecs = mp->m_bmap_dmxr[0];
+       for (level = 0, rval = 0;
+            level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
+            level++) {
+               len += maxrecs - 1;
+               do_div(len, maxrecs);
+               rval += len;
+               if (len == 1)
+                       return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+                               level - 1;
+               if (level == 0)
+                       maxrecs = mp->m_bmap_dmxr[1];
+       }
+       return rval;
+}
+
+/*
+ * Calculate the default attribute fork offset for newly created inodes.
+ */
+uint
+xfs_default_attroffset(
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       uint                    offset;
+
+       if (mp->m_sb.sb_inodesize == 256) {
+               offset = XFS_LITINO(mp, ip->i_d.di_version) -
+                               XFS_BMDR_SPACE_CALC(MINABTPTRS);
+       } else {
+               offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
+       }
+
+       ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
+       return offset;
+}
+
+/*
+ * Helper routine to reset inode di_forkoff field when switching
+ * attribute fork from local to extent format - we reset it where
+ * possible to make space available for inline data fork extents.
+ */
+STATIC void
+xfs_bmap_forkoff_reset(
+       xfs_inode_t     *ip,
+       int             whichfork)
+{
+       if (whichfork == XFS_ATTR_FORK &&
+           ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
+           ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
+           ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
+               uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
+
+               if (dfl_forkoff > ip->i_d.di_forkoff)
+                       ip->i_d.di_forkoff = dfl_forkoff;
+       }
+}
+
+/*
+ * Debug/sanity checking code
+ */
+
+STATIC int
+xfs_bmap_sanity_check(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp,
+       int                     level)
+{
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+
+       if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
+           block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
+               return 0;
+
+       if (be16_to_cpu(block->bb_level) != level ||
+           be16_to_cpu(block->bb_numrecs) == 0 ||
+           be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
+               return 0;
+
+       return 1;
+}
+
+#ifdef DEBUG
+STATIC struct xfs_buf *
+xfs_bmap_get_bp(
+       struct xfs_btree_cur    *cur,
+       xfs_fsblock_t           bno)
+{
+       struct xfs_log_item_desc *lidp;
+       int                     i;
+
+       if (!cur)
+               return NULL;
+
+       for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
+               if (!cur->bc_bufs[i])
+                       break;
+               if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
+                       return cur->bc_bufs[i];
+       }
+
+       /* Chase down all the log items to see if the bp is there */
+       list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
+               struct xfs_buf_log_item *bip;
+               bip = (struct xfs_buf_log_item *)lidp->lid_item;
+               if (bip->bli_item.li_type == XFS_LI_BUF &&
+                   XFS_BUF_ADDR(bip->bli_buf) == bno)
+                       return bip->bli_buf;
+       }
+
+       return NULL;
+}
+
+STATIC void
+xfs_check_block(
+       struct xfs_btree_block  *block,
+       xfs_mount_t             *mp,
+       int                     root,
+       short                   sz)
+{
+       int                     i, j, dmxr;
+       __be64                  *pp, *thispa;   /* pointer to block address */
+       xfs_bmbt_key_t          *prevp, *keyp;
+
+       ASSERT(be16_to_cpu(block->bb_level) > 0);
+
+       prevp = NULL;
+       for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
+               dmxr = mp->m_bmap_dmxr[0];
+               keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
+
+               if (prevp) {
+                       ASSERT(be64_to_cpu(prevp->br_startoff) <
+                              be64_to_cpu(keyp->br_startoff));
+               }
+               prevp = keyp;
+
+               /*
+                * Compare the block numbers to see if there are dups.
+                */
+               if (root)
+                       pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
+               else
+                       pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
+
+               for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
+                       if (root)
+                               thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
+                       else
+                               thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
+                       if (*thispa == *pp) {
+                               xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
+                                       __func__, j, i,
+                                       (unsigned long long)be64_to_cpu(*thispa));
+                               panic("%s: ptrs are equal in node\n",
+                                       __func__);
+                       }
+               }
+       }
+}
+
+/*
+ * Check that the extents for the inode ip are in the right order in all
+ * btree leaves.
+ */
+
+STATIC void
+xfs_bmap_check_leaf_extents(
+       xfs_btree_cur_t         *cur,   /* btree cursor or null */
+       xfs_inode_t             *ip,            /* incore inode pointer */
+       int                     whichfork)      /* data or attr fork */
+{
+       struct xfs_btree_block  *block; /* current btree block */
+       xfs_fsblock_t           bno;    /* block # of "block" */
+       xfs_buf_t               *bp;    /* buffer for "block" */
+       int                     error;  /* error return value */
+       xfs_extnum_t            i=0, j; /* index into the extents list */
+       xfs_ifork_t             *ifp;   /* fork structure */
+       int                     level;  /* btree level, for checking */
+       xfs_mount_t             *mp;    /* file system mount structure */
+       __be64                  *pp;    /* pointer to block address */
+       xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
+       xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
+       xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
+       int                     bp_release = 0;
+
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
+               return;
+       }
+
+       bno = NULLFSBLOCK;
+       mp = ip->i_mount;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       block = ifp->if_broot;
+       /*
+        * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+        */
+       level = be16_to_cpu(block->bb_level);
+       ASSERT(level > 0);
+       xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
+       pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+       bno = be64_to_cpu(*pp);
+
+       ASSERT(bno != NULLDFSBNO);
+       ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+       ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
+       /*
+        * Go down the tree until leaf level is reached, following the first
+        * pointer (leftmost) at each level.
+        */
+       while (level-- > 0) {
+               /* See if buf is in cur first */
+               bp_release = 0;
+               bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
+               if (!bp) {
+                       bp_release = 1;
+                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+                                               XFS_BMAP_BTREE_REF,
+                                               &xfs_bmbt_buf_ops);
+                       if (error)
+                               goto error_norelse;
+               }
+               block = XFS_BUF_TO_BLOCK(bp);
+               XFS_WANT_CORRUPTED_GOTO(
+                       xfs_bmap_sanity_check(mp, bp, level),
+                       error0);
+               if (level == 0)
+                       break;
+
+               /*
+                * Check this block for basic sanity (increasing keys and
+                * no duplicate blocks).
+                */
+
+               xfs_check_block(block, mp, 0, 0);
+               pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
+               bno = be64_to_cpu(*pp);
+               XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
+               if (bp_release) {
+                       bp_release = 0;
+                       xfs_trans_brelse(NULL, bp);
+               }
+       }
+
+       /*
+        * Here with bp and block set to the leftmost leaf node in the tree.
+        */
+       i = 0;
+
+       /*
+        * Loop over all leaf nodes checking that all extents are in the right order.
+        */
+       for (;;) {
+               xfs_fsblock_t   nextbno;
+               xfs_extnum_t    num_recs;
+
+
+               num_recs = xfs_btree_get_numrecs(block);
+
+               /*
+                * Read-ahead the next leaf block, if any.
+                */
+
+               nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
+
+               /*
+                * Check all the extents to make sure they are OK.
+                * If we had a previous block, the last entry should
+                * conform with the first entry in this one.
+                */
+
+               ep = XFS_BMBT_REC_ADDR(mp, block, 1);
+               if (i) {
+                       ASSERT(xfs_bmbt_disk_get_startoff(&last) +
+                              xfs_bmbt_disk_get_blockcount(&last) <=
+                              xfs_bmbt_disk_get_startoff(ep));
+               }
+               for (j = 1; j < num_recs; j++) {
+                       nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
+                       ASSERT(xfs_bmbt_disk_get_startoff(ep) +
+                              xfs_bmbt_disk_get_blockcount(ep) <=
+                              xfs_bmbt_disk_get_startoff(nextp));
+                       ep = nextp;
+               }
+
+               last = *ep;
+               i += num_recs;
+               if (bp_release) {
+                       bp_release = 0;
+                       xfs_trans_brelse(NULL, bp);
+               }
+               bno = nextbno;
+               /*
+                * If we've reached the end, stop.
+                */
+               if (bno == NULLFSBLOCK)
+                       break;
+
+               bp_release = 0;
+               bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
+               if (!bp) {
+                       bp_release = 1;
+                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+                                               XFS_BMAP_BTREE_REF,
+                                               &xfs_bmbt_buf_ops);
+                       if (error)
+                               goto error_norelse;
+               }
+               block = XFS_BUF_TO_BLOCK(bp);
+       }
+       if (bp_release) {
+               bp_release = 0;
+               xfs_trans_brelse(NULL, bp);
+       }
+       return;
+
+error0:
+       xfs_warn(mp, "%s: at error0", __func__);
+       if (bp_release)
+               xfs_trans_brelse(NULL, bp);
+error_norelse:
+       xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
+               __func__, i);
+       panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
+       return;
+}
+
+/*
+ * Add bmap trace insert entries for all the contents of the extent records.
+ */
+void
+xfs_bmap_trace_exlist(
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_extnum_t    cnt,            /* count of entries in the list */
+       int             whichfork,      /* data or attr fork */
+       unsigned long   caller_ip)
+{
+       xfs_extnum_t    idx;            /* extent record index */
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       int             state = 0;
+
+       if (whichfork == XFS_ATTR_FORK)
+               state |= BMAP_ATTRFORK;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
+       for (idx = 0; idx < cnt; idx++)
+               trace_xfs_extlist(ip, idx, whichfork, caller_ip);
+}
+
+/*
+ * Validate that the bmbt_irecs being returned from bmapi are valid
+ * given the caller's original parameters.  Specifically check the
+ * ranges of the returned irecs to ensure that they only extend beyond
+ * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
+ */
+STATIC void
+xfs_bmap_validate_ret(
+       xfs_fileoff_t           bno,
+       xfs_filblks_t           len,
+       int                     flags,
+       xfs_bmbt_irec_t         *mval,
+       int                     nmap,
+       int                     ret_nmap)
+{
+       int                     i;              /* index to map values */
+
+       ASSERT(ret_nmap <= nmap);
+
+       for (i = 0; i < ret_nmap; i++) {
+               ASSERT(mval[i].br_blockcount > 0);
+               if (!(flags & XFS_BMAPI_ENTIRE)) {
+                       ASSERT(mval[i].br_startoff >= bno);
+                       ASSERT(mval[i].br_blockcount <= len);
+                       ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
+                              bno + len);
+               } else {
+                       ASSERT(mval[i].br_startoff < bno + len);
+                       ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
+                              bno);
+               }
+               ASSERT(i == 0 ||
+                      mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
+                      mval[i].br_startoff);
+               ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
+                      mval[i].br_startblock != HOLESTARTBLOCK);
+               ASSERT(mval[i].br_state == XFS_EXT_NORM ||
+                      mval[i].br_state == XFS_EXT_UNWRITTEN);
+       }
+}
+
+#else
+#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)                do { } while (0)
+#define        xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
+#endif /* DEBUG */
+
+/*
+ * bmap free list manipulation functions
+ */
+
+/*
+ * Add the extent to the list of extents to be free at transaction end.
+ * The list is maintained sorted (by block number).
+ */
+void
+xfs_bmap_add_free(
+       xfs_fsblock_t           bno,            /* fs block number of extent */
+       xfs_filblks_t           len,            /* length of extent */
+       xfs_bmap_free_t         *flist,         /* list of extents */
+       xfs_mount_t             *mp)            /* mount point structure */
+{
+       xfs_bmap_free_item_t    *cur;           /* current (next) element */
+       xfs_bmap_free_item_t    *new;           /* new element */
+       xfs_bmap_free_item_t    *prev;          /* previous element */
+#ifdef DEBUG
+       xfs_agnumber_t          agno;
+       xfs_agblock_t           agbno;
+
+       ASSERT(bno != NULLFSBLOCK);
+       ASSERT(len > 0);
+       ASSERT(len <= MAXEXTLEN);
+       ASSERT(!isnullstartblock(bno));
+       agno = XFS_FSB_TO_AGNO(mp, bno);
+       agbno = XFS_FSB_TO_AGBNO(mp, bno);
+       ASSERT(agno < mp->m_sb.sb_agcount);
+       ASSERT(agbno < mp->m_sb.sb_agblocks);
+       ASSERT(len < mp->m_sb.sb_agblocks);
+       ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
+#endif
+       ASSERT(xfs_bmap_free_item_zone != NULL);
+       new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
+       new->xbfi_startblock = bno;
+       new->xbfi_blockcount = (xfs_extlen_t)len;
+       for (prev = NULL, cur = flist->xbf_first;
+            cur != NULL;
+            prev = cur, cur = cur->xbfi_next) {
+               if (cur->xbfi_startblock >= bno)
+                       break;
+       }
+       if (prev)
+               prev->xbfi_next = new;
+       else
+               flist->xbf_first = new;
+       new->xbfi_next = cur;
+       flist->xbf_count++;
+}
+
+/*
+ * Remove the entry "free" from the free item list.  Prev points to the
+ * previous entry, unless "free" is the head of the list.
+ */
+void
+xfs_bmap_del_free(
+       xfs_bmap_free_t         *flist, /* free item list header */
+       xfs_bmap_free_item_t    *prev,  /* previous item on list, if any */
+       xfs_bmap_free_item_t    *free)  /* list item to be freed */
+{
+       if (prev)
+               prev->xbfi_next = free->xbfi_next;
+       else
+               flist->xbf_first = free->xbfi_next;
+       flist->xbf_count--;
+       kmem_zone_free(xfs_bmap_free_item_zone, free);
+}
+
+/*
+ * Free up any items left in the list.
+ */
+void
+xfs_bmap_cancel(
+       xfs_bmap_free_t         *flist) /* list of bmap_free_items */
+{
+       xfs_bmap_free_item_t    *free;  /* free list item */
+       xfs_bmap_free_item_t    *next;
+
+       if (flist->xbf_count == 0)
+               return;
+       ASSERT(flist->xbf_first != NULL);
+       for (free = flist->xbf_first; free; free = next) {
+               next = free->xbfi_next;
+               xfs_bmap_del_free(flist, NULL, free);
+       }
+       ASSERT(flist->xbf_count == 0);
+}
+
+/*
+ * Inode fork format manipulation functions
+ */
+
+/*
+ * Transform a btree format file with only one leaf node, where the
+ * extents list will fit in the inode, into an extents format file.
+ * Since the file extents are already in-core, all we have to do is
+ * give up the space for the btree root and pitch the leaf block.
+ */
+STATIC int                             /* error */
+xfs_bmap_btree_to_extents(
+       xfs_trans_t             *tp,    /* transaction pointer */
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     *logflagsp, /* inode logging flags */
+       int                     whichfork)  /* data or attr fork */
+{
+       /* REFERENCED */
+       struct xfs_btree_block  *cblock;/* child btree block */
+       xfs_fsblock_t           cbno;   /* child block number */
+       xfs_buf_t               *cbp;   /* child block's buffer */
+       int                     error;  /* error return value */
+       xfs_ifork_t             *ifp;   /* inode fork data */
+       xfs_mount_t             *mp;    /* mount point structure */
+       __be64                  *pp;    /* ptr to block address */
+       struct xfs_btree_block  *rblock;/* root btree block */
+
+       mp = ip->i_mount;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
+       rblock = ifp->if_broot;
+       ASSERT(be16_to_cpu(rblock->bb_level) == 1);
+       ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
+       ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
+       pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
+       cbno = be64_to_cpu(*pp);
+       *logflagsp = 0;
+#ifdef DEBUG
+       if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
+               return error;
+#endif
+       error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
+                               &xfs_bmbt_buf_ops);
+       if (error)
+               return error;
+       cblock = XFS_BUF_TO_BLOCK(cbp);
+       if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
+               return error;
+       xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
+       ip->i_d.di_nblocks--;
+       xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+       xfs_trans_binval(tp, cbp);
+       if (cur->bc_bufs[0] == cbp)
+               cur->bc_bufs[0] = NULL;
+       xfs_iroot_realloc(ip, -1, whichfork);
+       ASSERT(ifp->if_broot == NULL);
+       ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
+       XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+       *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
+       return 0;
+}
+
+/*
+ * Convert an extents-format file into a btree-format file.
+ * The new file will have a root block (in the inode) and a single child block.
+ */
+STATIC int                                     /* error */
+xfs_bmap_extents_to_btree(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_inode_t             *ip,            /* incore inode pointer */
+       xfs_fsblock_t           *firstblock,    /* first-block-allocated */
+       xfs_bmap_free_t         *flist,         /* blocks freed in xaction */
+       xfs_btree_cur_t         **curp,         /* cursor returned to caller */
+       int                     wasdel,         /* converting a delayed alloc */
+       int                     *logflagsp,     /* inode logging flags */
+       int                     whichfork)      /* data or attr fork */
+{
+       struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
+       xfs_buf_t               *abp;           /* buffer for ablock */
+       xfs_alloc_arg_t         args;           /* allocation arguments */
+       xfs_bmbt_rec_t          *arp;           /* child record pointer */
+       struct xfs_btree_block  *block;         /* btree root block */
+       xfs_btree_cur_t         *cur;           /* bmap btree cursor */
+       xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
+       int                     error;          /* error return value */
+       xfs_extnum_t            i, cnt;         /* extent record index */
+       xfs_ifork_t             *ifp;           /* inode fork pointer */
+       xfs_bmbt_key_t          *kp;            /* root block key pointer */
+       xfs_mount_t             *mp;            /* mount structure */
+       xfs_extnum_t            nextents;       /* number of file extents */
+       xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
+
+       mp = ip->i_mount;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
+
+       /*
+        * Make space in the inode incore.
+        */
+       xfs_iroot_realloc(ip, 1, whichfork);
+       ifp->if_flags |= XFS_IFBROOT;
+
+       /*
+        * Fill in the root.
+        */
+       block = ifp->if_broot;
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
+                                XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
+                                XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
+       else
+               xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
+                                XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
+                                XFS_BTREE_LONG_PTRS);
+
+       /*
+        * Need a cursor.  Can't allocate until bb_level is filled in.
+        */
+       cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+       cur->bc_private.b.firstblock = *firstblock;
+       cur->bc_private.b.flist = flist;
+       cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
+       /*
+        * Convert to a btree with two levels, one record in root.
+        */
+       XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
+       memset(&args, 0, sizeof(args));
+       args.tp = tp;
+       args.mp = mp;
+       args.firstblock = *firstblock;
+       if (*firstblock == NULLFSBLOCK) {
+               args.type = XFS_ALLOCTYPE_START_BNO;
+               args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
+       } else if (flist->xbf_low) {
+               args.type = XFS_ALLOCTYPE_START_BNO;
+               args.fsbno = *firstblock;
+       } else {
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+               args.fsbno = *firstblock;
+       }
+       args.minlen = args.maxlen = args.prod = 1;
+       args.wasdel = wasdel;
+       *logflagsp = 0;
+       if ((error = xfs_alloc_vextent(&args))) {
+               xfs_iroot_realloc(ip, -1, whichfork);
+               xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+               return error;
+       }
+       /*
+        * Allocation can't fail, the space was reserved.
+        */
+       ASSERT(args.fsbno != NULLFSBLOCK);
+       ASSERT(*firstblock == NULLFSBLOCK ||
+              args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
+              (flist->xbf_low &&
+               args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
+       *firstblock = cur->bc_private.b.firstblock = args.fsbno;
+       cur->bc_private.b.allocated++;
+       ip->i_d.di_nblocks++;
+       xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+       abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
+       /*
+        * Fill in the child block.
+        */
+       abp->b_ops = &xfs_bmbt_buf_ops;
+       ablock = XFS_BUF_TO_BLOCK(abp);
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               xfs_btree_init_block_int(mp, ablock, abp->b_bn,
+                               XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
+                               XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
+       else
+               xfs_btree_init_block_int(mp, ablock, abp->b_bn,
+                               XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
+                               XFS_BTREE_LONG_PTRS);
+
+       arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       for (cnt = i = 0; i < nextents; i++) {
+               ep = xfs_iext_get_ext(ifp, i);
+               if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
+                       arp->l0 = cpu_to_be64(ep->l0);
+                       arp->l1 = cpu_to_be64(ep->l1);
+                       arp++; cnt++;
+               }
+       }
+       ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
+       xfs_btree_set_numrecs(ablock, cnt);
+
+       /*
+        * Fill in the root key and pointer.
+        */
+       kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
+       arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
+       kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
+       pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
+                                               be16_to_cpu(block->bb_level)));
+       *pp = cpu_to_be64(args.fsbno);
+
+       /*
+        * Do all this logging at the end so that
+        * the root is at the right level.
+        */
+       xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
+       xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
+       ASSERT(*curp == NULL);
+       *curp = cur;
+       *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
+       return 0;
+}
+
+/*
+ * Convert a local file to an extents file.
+ * This code is out of bounds for data forks of regular files,
+ * since the file data needs to get logged so things will stay consistent.
+ * (The bmap-level manipulations are ok, though).
+ */
+void
+xfs_bmap_local_to_extents_empty(
+       struct xfs_inode        *ip,
+       int                     whichfork)
+{
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
+       ASSERT(ifp->if_bytes == 0);
+       ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
+
+       xfs_bmap_forkoff_reset(ip, whichfork);
+       ifp->if_flags &= ~XFS_IFINLINE;
+       ifp->if_flags |= XFS_IFEXTENTS;
+       XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+}
+
+
+STATIC int                             /* error */
+xfs_bmap_local_to_extents(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
+       xfs_extlen_t    total,          /* total blocks needed by transaction */
+       int             *logflagsp,     /* inode logging flags */
+       int             whichfork,
+       void            (*init_fn)(struct xfs_trans *tp,
+                                  struct xfs_buf *bp,
+                                  struct xfs_inode *ip,
+                                  struct xfs_ifork *ifp))
+{
+       int             error = 0;
+       int             flags;          /* logging flags returned */
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       xfs_alloc_arg_t args;           /* allocation arguments */
+       xfs_buf_t       *bp;            /* buffer for extent block */
+       xfs_bmbt_rec_host_t *ep;        /* extent record pointer */
+
+       /*
+        * We don't want to deal with the case of keeping inode data inline yet.
+        * So sending the data fork of a regular inode is invalid.
+        */
+       ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
+
+       if (!ifp->if_bytes) {
+               xfs_bmap_local_to_extents_empty(ip, whichfork);
+               flags = XFS_ILOG_CORE;
+               goto done;
+       }
+
+       flags = 0;
+       error = 0;
+       ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) ==
+                                                               XFS_IFINLINE);
+       memset(&args, 0, sizeof(args));
+       args.tp = tp;
+       args.mp = ip->i_mount;
+       args.firstblock = *firstblock;
+       /*
+        * Allocate a block.  We know we need only one, since the
+        * file currently fits in an inode.
+        */
+       if (*firstblock == NULLFSBLOCK) {
+               args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
+               args.type = XFS_ALLOCTYPE_START_BNO;
+       } else {
+               args.fsbno = *firstblock;
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+       }
+       args.total = total;
+       args.minlen = args.maxlen = args.prod = 1;
+       error = xfs_alloc_vextent(&args);
+       if (error)
+               goto done;
+
+       /* Can't fail, the space was reserved. */
+       ASSERT(args.fsbno != NULLFSBLOCK);
+       ASSERT(args.len == 1);
+       *firstblock = args.fsbno;
+       bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
+
+       /* initialise the block and copy the data */
+       init_fn(tp, bp, ip, ifp);
+
+       /* account for the change in fork size and log everything */
+       xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
+       xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
+       xfs_bmap_local_to_extents_empty(ip, whichfork);
+       flags |= XFS_ILOG_CORE;
+
+       xfs_iext_add(ifp, 0, 1);
+       ep = xfs_iext_get_ext(ifp, 0);
+       xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
+       trace_xfs_bmap_post_update(ip, 0,
+                       whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
+                       _THIS_IP_);
+       XFS_IFORK_NEXT_SET(ip, whichfork, 1);
+       ip->i_d.di_nblocks = 1;
+       xfs_trans_mod_dquot_byino(tp, ip,
+               XFS_TRANS_DQ_BCOUNT, 1L);
+       flags |= xfs_ilog_fext(whichfork);
+
+done:
+       *logflagsp = flags;
+       return error;
+}
+
+/*
+ * Called from xfs_bmap_add_attrfork to handle btree format files.
+ */
+STATIC int                                     /* error */
+xfs_bmap_add_attrfork_btree(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_inode_t             *ip,            /* incore inode pointer */
+       xfs_fsblock_t           *firstblock,    /* first block allocated */
+       xfs_bmap_free_t         *flist,         /* blocks to free at commit */
+       int                     *flags)         /* inode logging flags */
+{
+       xfs_btree_cur_t         *cur;           /* btree cursor */
+       int                     error;          /* error return value */
+       xfs_mount_t             *mp;            /* file system mount struct */
+       int                     stat;           /* newroot status */
+
+       mp = ip->i_mount;
+       if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
+               *flags |= XFS_ILOG_DBROOT;
+       else {
+               cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
+               cur->bc_private.b.flist = flist;
+               cur->bc_private.b.firstblock = *firstblock;
+               if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
+                       goto error0;
+               /* must be at least one entry */
+               XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
+               if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
+                       goto error0;
+               if (stat == 0) {
+                       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+                       return ENOSPC;
+               }
+               *firstblock = cur->bc_private.b.firstblock;
+               cur->bc_private.b.allocated = 0;
+               xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       }
+       return 0;
+error0:
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Called from xfs_bmap_add_attrfork to handle extents format files.
+ */
+STATIC int                                     /* error */
+xfs_bmap_add_attrfork_extents(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_inode_t             *ip,            /* incore inode pointer */
+       xfs_fsblock_t           *firstblock,    /* first block allocated */
+       xfs_bmap_free_t         *flist,         /* blocks to free at commit */
+       int                     *flags)         /* inode logging flags */
+{
+       xfs_btree_cur_t         *cur;           /* bmap btree cursor */
+       int                     error;          /* error return value */
+
+       if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
+               return 0;
+       cur = NULL;
+       error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0,
+               flags, XFS_DATA_FORK);
+       if (cur) {
+               cur->bc_private.b.allocated = 0;
+               xfs_btree_del_cursor(cur,
+                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       }
+       return error;
+}
+
+/*
+ * Called from xfs_bmap_add_attrfork to handle local format files. Each
+ * different data fork content type needs a different callout to do the
+ * conversion. Some are basic and only require special block initialisation
+ * callouts for the data formating, others (directories) are so specialised they
+ * handle everything themselves.
+ *
+ * XXX (dgc): investigate whether directory conversion can use the generic
+ * formatting callout. It should be possible - it's just a very complex
+ * formatter.
+ */
+STATIC int                                     /* error */
+xfs_bmap_add_attrfork_local(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       xfs_inode_t             *ip,            /* incore inode pointer */
+       xfs_fsblock_t           *firstblock,    /* first block allocated */
+       xfs_bmap_free_t         *flist,         /* blocks to free at commit */
+       int                     *flags)         /* inode logging flags */
+{
+       xfs_da_args_t           dargs;          /* args for dir/attr code */
+
+       if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
+               return 0;
+
+       if (S_ISDIR(ip->i_d.di_mode)) {
+               memset(&dargs, 0, sizeof(dargs));
+               dargs.geo = ip->i_mount->m_dir_geo;
+               dargs.dp = ip;
+               dargs.firstblock = firstblock;
+               dargs.flist = flist;
+               dargs.total = dargs.geo->fsbcount;
+               dargs.whichfork = XFS_DATA_FORK;
+               dargs.trans = tp;
+               return xfs_dir2_sf_to_block(&dargs);
+       }
+
+       if (S_ISLNK(ip->i_d.di_mode))
+               return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
+                                                flags, XFS_DATA_FORK,
+                                                xfs_symlink_local_to_remote);
+
+       /* should only be called for types that support local format data */
+       ASSERT(0);
+       return EFSCORRUPTED;
+}
+
+/*
+ * Convert inode from non-attributed to attributed.
+ * Must not be in a transaction, ip must not be locked.
+ */
+int                                            /* error code */
+xfs_bmap_add_attrfork(
+       xfs_inode_t             *ip,            /* incore inode pointer */
+       int                     size,           /* space new attribute needs */
+       int                     rsvd)           /* xact may use reserved blks */
+{
+       xfs_fsblock_t           firstblock;     /* 1st block/ag allocated */
+       xfs_bmap_free_t         flist;          /* freed extent records */
+       xfs_mount_t             *mp;            /* mount structure */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       int                     blks;           /* space reservation */
+       int                     version = 1;    /* superblock attr version */
+       int                     committed;      /* xaction was committed */
+       int                     logflags;       /* logging flags */
+       int                     error;          /* error return value */
+       int                     cancel_flags = 0;
+
+       ASSERT(XFS_IFORK_Q(ip) == 0);
+
+       mp = ip->i_mount;
+       ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
+       tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
+       blks = XFS_ADDAFORK_SPACE_RES(mp);
+       if (rsvd)
+               tp->t_flags |= XFS_TRANS_RESERVE;
+       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
+                       XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
+                       XFS_QMOPT_RES_REGBLKS);
+       if (error)
+               goto trans_cancel;
+       cancel_flags |= XFS_TRANS_ABORT;
+       if (XFS_IFORK_Q(ip))
+               goto trans_cancel;
+       if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
+               /*
+                * For inodes coming from pre-6.2 filesystems.
+                */
+               ASSERT(ip->i_d.di_aformat == 0);
+               ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+       }
+       ASSERT(ip->i_d.di_anextents == 0);
+
+       xfs_trans_ijoin(tp, ip, 0);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       switch (ip->i_d.di_format) {
+       case XFS_DINODE_FMT_DEV:
+               ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
+               break;
+       case XFS_DINODE_FMT_UUID:
+               ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
+               break;
+       case XFS_DINODE_FMT_LOCAL:
+       case XFS_DINODE_FMT_EXTENTS:
+       case XFS_DINODE_FMT_BTREE:
+               ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
+               if (!ip->i_d.di_forkoff)
+                       ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
+               else if (mp->m_flags & XFS_MOUNT_ATTR2)
+                       version = 2;
+               break;
+       default:
+               ASSERT(0);
+               error = EINVAL;
+               goto trans_cancel;
+       }
+
+       ASSERT(ip->i_afp == NULL);
+       ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
+       ip->i_afp->if_flags = XFS_IFEXTENTS;
+       logflags = 0;
+       xfs_bmap_init(&flist, &firstblock);
+       switch (ip->i_d.di_format) {
+       case XFS_DINODE_FMT_LOCAL:
+               error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
+                       &logflags);
+               break;
+       case XFS_DINODE_FMT_EXTENTS:
+               error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
+                       &flist, &logflags);
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
+                       &logflags);
+               break;
+       default:
+               error = 0;
+               break;
+       }
+       if (logflags)
+               xfs_trans_log_inode(tp, ip, logflags);
+       if (error)
+               goto bmap_cancel;
+       if (!xfs_sb_version_hasattr(&mp->m_sb) ||
+          (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
+               __int64_t sbfields = 0;
+
+               spin_lock(&mp->m_sb_lock);
+               if (!xfs_sb_version_hasattr(&mp->m_sb)) {
+                       xfs_sb_version_addattr(&mp->m_sb);
+                       sbfields |= XFS_SB_VERSIONNUM;
+               }
+               if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
+                       xfs_sb_version_addattr2(&mp->m_sb);
+                       sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
+               }
+               if (sbfields) {
+                       spin_unlock(&mp->m_sb_lock);
+                       xfs_mod_sb(tp, sbfields);
+               } else
+                       spin_unlock(&mp->m_sb_lock);
+       }
+
+       error = xfs_bmap_finish(&tp, &flist, &committed);
+       if (error)
+               goto bmap_cancel;
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+
+bmap_cancel:
+       xfs_bmap_cancel(&flist);
+trans_cancel:
+       xfs_trans_cancel(tp, cancel_flags);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+}
+
+/*
+ * Internal and external extent tree search functions.
+ */
+
+/*
+ * Read in the extents to if_extents.
+ * All inode fields are set up by caller, we just traverse the btree
+ * and copy the records in. If the file system cannot contain unwritten
+ * extents, the records are checked for no "state" flags.
+ */
+int                                    /* error */
+xfs_bmap_read_extents(
+       xfs_trans_t             *tp,    /* transaction pointer */
+       xfs_inode_t             *ip,    /* incore inode */
+       int                     whichfork) /* data or attr fork */
+{
+       struct xfs_btree_block  *block; /* current btree block */
+       xfs_fsblock_t           bno;    /* block # of "block" */
+       xfs_buf_t               *bp;    /* buffer for "block" */
+       int                     error;  /* error return value */
+       xfs_exntfmt_t           exntf;  /* XFS_EXTFMT_NOSTATE, if checking */
+       xfs_extnum_t            i, j;   /* index into the extents list */
+       xfs_ifork_t             *ifp;   /* fork structure */
+       int                     level;  /* btree level, for checking */
+       xfs_mount_t             *mp;    /* file system mount structure */
+       __be64                  *pp;    /* pointer to block address */
+       /* REFERENCED */
+       xfs_extnum_t            room;   /* number of entries there's room for */
+
+       bno = NULLFSBLOCK;
+       mp = ip->i_mount;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
+                                       XFS_EXTFMT_INODE(ip);
+       block = ifp->if_broot;
+       /*
+        * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+        */
+       level = be16_to_cpu(block->bb_level);
+       ASSERT(level > 0);
+       pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+       bno = be64_to_cpu(*pp);
+       ASSERT(bno != NULLDFSBNO);
+       ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+       ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+       /*
+        * Go down the tree until leaf level is reached, following the first
+        * pointer (leftmost) at each level.
+        */
+       while (level-- > 0) {
+               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                               XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
+               if (error)
+                       return error;
+               block = XFS_BUF_TO_BLOCK(bp);
+               XFS_WANT_CORRUPTED_GOTO(
+                       xfs_bmap_sanity_check(mp, bp, level),
+                       error0);
+               if (level == 0)
+                       break;
+               pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
+               bno = be64_to_cpu(*pp);
+               XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
+               xfs_trans_brelse(tp, bp);
+       }
+       /*
+        * Here with bp and block set to the leftmost leaf node in the tree.
+        */
+       room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       i = 0;
+       /*
+        * Loop over all leaf nodes.  Copy information to the extent records.
+        */
+       for (;;) {
+               xfs_bmbt_rec_t  *frp;
+               xfs_fsblock_t   nextbno;
+               xfs_extnum_t    num_recs;
+               xfs_extnum_t    start;
+
+               num_recs = xfs_btree_get_numrecs(block);
+               if (unlikely(i + num_recs > room)) {
+                       ASSERT(i + num_recs <= room);
+                       xfs_warn(ip->i_mount,
+                               "corrupt dinode %Lu, (btree extents).",
+                               (unsigned long long) ip->i_ino);
+                       XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
+                               XFS_ERRLEVEL_LOW, ip->i_mount, block);
+                       goto error0;
+               }
+               XFS_WANT_CORRUPTED_GOTO(
+                       xfs_bmap_sanity_check(mp, bp, 0),
+                       error0);
+               /*
+                * Read-ahead the next leaf block, if any.
+                */
+               nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
+               if (nextbno != NULLFSBLOCK)
+                       xfs_btree_reada_bufl(mp, nextbno, 1,
+                                            &xfs_bmbt_buf_ops);
+               /*
+                * Copy records into the extent records.
+                */
+               frp = XFS_BMBT_REC_ADDR(mp, block, 1);
+               start = i;
+               for (j = 0; j < num_recs; j++, i++, frp++) {
+                       xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
+                       trp->l0 = be64_to_cpu(frp->l0);
+                       trp->l1 = be64_to_cpu(frp->l1);
+               }
+               if (exntf == XFS_EXTFMT_NOSTATE) {
+                       /*
+                        * Check all attribute bmap btree records and
+                        * any "older" data bmap btree records for a
+                        * set bit in the "extent flag" position.
+                        */
+                       if (unlikely(xfs_check_nostate_extents(ifp,
+                                       start, num_recs))) {
+                               XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
+                                                XFS_ERRLEVEL_LOW,
+                                                ip->i_mount);
+                               goto error0;
+                       }
+               }
+               xfs_trans_brelse(tp, bp);
+               bno = nextbno;
+               /*
+                * If we've reached the end, stop.
+                */
+               if (bno == NULLFSBLOCK)
+                       break;
+               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+                               XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
+               if (error)
+                       return error;
+               block = XFS_BUF_TO_BLOCK(bp);
+       }
+       ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
+       ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
+       XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
+       return 0;
+error0:
+       xfs_trans_brelse(tp, bp);
+       return EFSCORRUPTED;
+}
+
+
+/*
+ * Search the extent records for the entry containing block bno.
+ * If bno lies in a hole, point to the next entry.  If bno lies
+ * past eof, *eofp will be set, and *prevp will contain the last
+ * entry (null if none).  Else, *lastxp will be set to the index
+ * of the found entry; *gotp will contain the entry.
+ */
+STATIC xfs_bmbt_rec_host_t *           /* pointer to found extent entry */
+xfs_bmap_search_multi_extents(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       xfs_fileoff_t   bno,            /* block number searched for */
+       int             *eofp,          /* out: end of file found */
+       xfs_extnum_t    *lastxp,        /* out: last extent index */
+       xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
+       xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
+{
+       xfs_bmbt_rec_host_t *ep;                /* extent record pointer */
+       xfs_extnum_t    lastx;          /* last extent index */
+
+       /*
+        * Initialize the extent entry structure to catch access to
+        * uninitialized br_startblock field.
+        */
+       gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
+       gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
+       gotp->br_state = XFS_EXT_INVALID;
+#if XFS_BIG_BLKNOS
+       gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
+#else
+       gotp->br_startblock = 0xffffa5a5;
+#endif
+       prevp->br_startoff = NULLFILEOFF;
+
+       ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
+       if (lastx > 0) {
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
+       }
+       if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
+               xfs_bmbt_get_all(ep, gotp);
+               *eofp = 0;
+       } else {
+               if (lastx > 0) {
+                       *gotp = *prevp;
+               }
+               *eofp = 1;
+               ep = NULL;
+       }
+       *lastxp = lastx;
+       return ep;
+}
+
+/*
+ * Search the extents list for the inode, for the extent containing bno.
+ * If bno lies in a hole, point to the next entry.  If bno lies past eof,
+ * *eofp will be set, and *prevp will contain the last entry (null if none).
+ * Else, *lastxp will be set to the index of the found
+ * entry; *gotp will contain the entry.
+ */
+STATIC xfs_bmbt_rec_host_t *                 /* pointer to found extent entry */
+xfs_bmap_search_extents(
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_fileoff_t   bno,            /* block number searched for */
+       int             fork,           /* data or attr fork */
+       int             *eofp,          /* out: end of file found */
+       xfs_extnum_t    *lastxp,        /* out: last extent index */
+       xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
+       xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
+{
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
+
+       XFS_STATS_INC(xs_look_exlist);
+       ifp = XFS_IFORK_PTR(ip, fork);
+
+       ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
+
+       if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
+                    !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
+               xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
+                               "Access to block zero in inode %llu "
+                               "start_block: %llx start_off: %llx "
+                               "blkcnt: %llx extent-state: %x lastx: %x",
+                       (unsigned long long)ip->i_ino,
+                       (unsigned long long)gotp->br_startblock,
+                       (unsigned long long)gotp->br_startoff,
+                       (unsigned long long)gotp->br_blockcount,
+                       gotp->br_state, *lastxp);
+               *lastxp = NULLEXTNUM;
+               *eofp = 1;
+               return NULL;
+       }
+       return ep;
+}
+
+/*
+ * Returns the file-relative block number of the first unused block(s)
+ * in the file with at least "len" logically contiguous blocks free.
+ * This is the lowest-address hole if the file has holes, else the first block
+ * past the end of file.
+ * Return 0 if the file is currently local (in-inode).
+ */
+int                                            /* error */
+xfs_bmap_first_unused(
+       xfs_trans_t     *tp,                    /* transaction pointer */
+       xfs_inode_t     *ip,                    /* incore inode */
+       xfs_extlen_t    len,                    /* size of hole to find */
+       xfs_fileoff_t   *first_unused,          /* unused block */
+       int             whichfork)              /* data or attr fork */
+{
+       int             error;                  /* error return value */
+       int             idx;                    /* extent record index */
+       xfs_ifork_t     *ifp;                   /* inode fork pointer */
+       xfs_fileoff_t   lastaddr;               /* last block number seen */
+       xfs_fileoff_t   lowest;                 /* lowest useful block */
+       xfs_fileoff_t   max;                    /* starting useful block */
+       xfs_fileoff_t   off;                    /* offset for this block */
+       xfs_extnum_t    nextents;               /* number of extent entries */
+
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
+              XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
+              XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+               *first_unused = 0;
+               return 0;
+       }
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+       lowest = *first_unused;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
+               xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
+               off = xfs_bmbt_get_startoff(ep);
+               /*
+                * See if the hole before this extent will work.
+                */
+               if (off >= lowest + len && off - max >= len) {
+                       *first_unused = max;
+                       return 0;
+               }
+               lastaddr = off + xfs_bmbt_get_blockcount(ep);
+               max = XFS_FILEOFF_MAX(lastaddr, lowest);
+       }
+       *first_unused = max;
+       return 0;
+}
+
+/*
+ * Returns the file-relative block number of the last block - 1 before
+ * last_block (input value) in the file.
+ * This is not based on i_size, it is based on the extent records.
+ * Returns 0 for local files, as they do not have extent records.
+ */
+int                                            /* error */
+xfs_bmap_last_before(
+       xfs_trans_t     *tp,                    /* transaction pointer */
+       xfs_inode_t     *ip,                    /* incore inode */
+       xfs_fileoff_t   *last_block,            /* last block */
+       int             whichfork)              /* data or attr fork */
+{
+       xfs_fileoff_t   bno;                    /* input file offset */
+       int             eof;                    /* hit end of file */
+       xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
+       int             error;                  /* error return value */
+       xfs_bmbt_irec_t got;                    /* current extent value */
+       xfs_ifork_t     *ifp;                   /* inode fork pointer */
+       xfs_extnum_t    lastx;                  /* last extent used */
+       xfs_bmbt_irec_t prev;                   /* previous extent value */
+
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+              return EIO;
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+               *last_block = 0;
+               return 0;
+       }
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+       bno = *last_block - 1;
+       ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
+               &prev);
+       if (eof || xfs_bmbt_get_startoff(ep) > bno) {
+               if (prev.br_startoff == NULLFILEOFF)
+                       *last_block = 0;
+               else
+                       *last_block = prev.br_startoff + prev.br_blockcount;
+       }
+       /*
+        * Otherwise *last_block is already the right answer.
+        */
+       return 0;
+}
+
+int
+xfs_bmap_last_extent(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       struct xfs_bmbt_irec    *rec,
+       int                     *is_empty)
+{
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+       int                     error;
+       int                     nextents;
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       return error;
+       }
+
+       nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+       if (nextents == 0) {
+               *is_empty = 1;
+               return 0;
+       }
+
+       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
+       *is_empty = 0;
+       return 0;
+}
+
+/*
+ * Check the last inode extent to determine whether this allocation will result
+ * in blocks being allocated at the end of the file. When we allocate new data
+ * blocks at the end of the file which do not start at the previous data block,
+ * we will try to align the new blocks at stripe unit boundaries.
+ *
+ * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
+ * at, or past the EOF.
+ */
+STATIC int
+xfs_bmap_isaeof(
+       struct xfs_bmalloca     *bma,
+       int                     whichfork)
+{
+       struct xfs_bmbt_irec    rec;
+       int                     is_empty;
+       int                     error;
+
+       bma->aeof = 0;
+       error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
+                                    &is_empty);
+       if (error)
+               return error;
+
+       if (is_empty) {
+               bma->aeof = 1;
+               return 0;
+       }
+
+       /*
+        * Check if we are allocation or past the last extent, or at least into
+        * the last delayed allocated extent.
+        */
+       bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
+               (bma->offset >= rec.br_startoff &&
+                isnullstartblock(rec.br_startblock));
+       return 0;
+}
+
+/*
+ * Returns the file-relative block number of the first block past eof in
+ * the file.  This is not based on i_size, it is based on the extent records.
+ * Returns 0 for local files, as they do not have extent records.
+ */
+int
+xfs_bmap_last_offset(
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           *last_block,
+       int                     whichfork)
+{
+       struct xfs_bmbt_irec    rec;
+       int                     is_empty;
+       int                     error;
+
+       *last_block = 0;
+
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
+               return 0;
+
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+              return EIO;
+
+       error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
+       if (error || is_empty)
+               return error;
+
+       *last_block = rec.br_startoff + rec.br_blockcount;
+       return 0;
+}
+
+/*
+ * Returns whether the selected fork of the inode has exactly one
+ * block or not.  For the data fork we check this matches di_size,
+ * implying the file's range is 0..bsize-1.
+ */
+int                                    /* 1=>1 block, 0=>otherwise */
+xfs_bmap_one_block(
+       xfs_inode_t     *ip,            /* incore inode */
+       int             whichfork)      /* data or attr fork */
+{
+       xfs_bmbt_rec_host_t *ep;        /* ptr to fork's extent */
+       xfs_ifork_t     *ifp;           /* inode fork pointer */
+       int             rval;           /* return value */
+       xfs_bmbt_irec_t s;              /* internal version of extent */
+
+#ifndef DEBUG
+       if (whichfork == XFS_DATA_FORK)
+               return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
+#endif /* !DEBUG */
+       if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
+               return 0;
+       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+               return 0;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+       ep = xfs_iext_get_ext(ifp, 0);
+       xfs_bmbt_get_all(ep, &s);
+       rval = s.br_startoff == 0 && s.br_blockcount == 1;
+       if (rval && whichfork == XFS_DATA_FORK)
+               ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
+       return rval;
+}
+
+/*
+ * Extent tree manipulation functions used during allocation.
+ */
+
+/*
+ * Convert a delayed allocation to a real allocation.
+ */
+STATIC int                             /* error */
+xfs_bmap_add_extent_delay_real(
+       struct xfs_bmalloca     *bma)
+{
+       struct xfs_bmbt_irec    *new = &bma->got;
+       int                     diff;   /* temp value */
+       xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
+       int                     error;  /* error return value */
+       int                     i;      /* temp state */
+       xfs_ifork_t             *ifp;   /* inode fork pointer */
+       xfs_fileoff_t           new_endoff;     /* end offset of new entry */
+       xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
+                                       /* left is 0, right is 1, prev is 2 */
+       int                     rval=0; /* return value (logging flags) */
+       int                     state = 0;/* state bits, accessed thru macros */
+       xfs_filblks_t           da_new; /* new count del alloc blocks used */
+       xfs_filblks_t           da_old; /* old count del alloc blocks used */
+       xfs_filblks_t           temp=0; /* value for da_new calculations */
+       xfs_filblks_t           temp2=0;/* value for da_new calculations */
+       int                     tmp_rval;       /* partial logging flags */
+
+       ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
+
+       ASSERT(bma->idx >= 0);
+       ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(!isnullstartblock(new->br_startblock));
+       ASSERT(!bma->cur ||
+              (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+
+       XFS_STATS_INC(xs_add_exlist);
+
+#define        LEFT            r[0]
+#define        RIGHT           r[1]
+#define        PREV            r[2]
+
+       /*
+        * Set up a bunch of variables to make the tests simpler.
+        */
+       ep = xfs_iext_get_ext(ifp, bma->idx);
+       xfs_bmbt_get_all(ep, &PREV);
+       new_endoff = new->br_startoff + new->br_blockcount;
+       ASSERT(PREV.br_startoff <= new->br_startoff);
+       ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
+
+       da_old = startblockval(PREV.br_startblock);
+       da_new = 0;
+
+       /*
+        * Set flags determining what part of the previous delayed allocation
+        * extent is being replaced by a real allocation.
+        */
+       if (PREV.br_startoff == new->br_startoff)
+               state |= BMAP_LEFT_FILLING;
+       if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
+               state |= BMAP_RIGHT_FILLING;
+
+       /*
+        * Check and set flags if this segment has a left neighbor.
+        * Don't set contiguous if the combined extent would be too large.
+        */
+       if (bma->idx > 0) {
+               state |= BMAP_LEFT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
+
+               if (isnullstartblock(LEFT.br_startblock))
+                       state |= BMAP_LEFT_DELAY;
+       }
+
+       if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
+           LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
+           LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
+           LEFT.br_state == new->br_state &&
+           LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
+               state |= BMAP_LEFT_CONTIG;
+
+       /*
+        * Check and set flags if this segment has a right neighbor.
+        * Don't set contiguous if the combined extent would be too large.
+        * Also check for all-three-contiguous being too large.
+        */
+       if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+               state |= BMAP_RIGHT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
+
+               if (isnullstartblock(RIGHT.br_startblock))
+                       state |= BMAP_RIGHT_DELAY;
+       }
+
+       if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
+           new_endoff == RIGHT.br_startoff &&
+           new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
+           new->br_state == RIGHT.br_state &&
+           new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
+           ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
+                      BMAP_RIGHT_FILLING)) !=
+                     (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
+                      BMAP_RIGHT_FILLING) ||
+            LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
+                       <= MAXEXTLEN))
+               state |= BMAP_RIGHT_CONTIG;
+
+       error = 0;
+       /*
+        * Switch out based on the FILLING and CONTIG state bits.
+        */
+       switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
+                        BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
+       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
+            BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
+               /*
+                * Filling in all of a previously delayed allocation extent.
+                * The left and right neighbors are both contiguous with new.
+                */
+               bma->idx--;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
+                       LEFT.br_blockcount + PREV.br_blockcount +
+                       RIGHT.br_blockcount);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
+               bma->ip->i_d.di_nextents--;
+               if (bma->cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
+                                       RIGHT.br_startblock,
+                                       RIGHT.br_blockcount, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_btree_delete(bma->cur, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_btree_decrement(bma->cur, 0, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
+                                       LEFT.br_startblock,
+                                       LEFT.br_blockcount +
+                                       PREV.br_blockcount +
+                                       RIGHT.br_blockcount, LEFT.br_state);
+                       if (error)
+                               goto done;
+               }
+               break;
+
+       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
+               /*
+                * Filling in all of a previously delayed allocation extent.
+                * The left neighbor is contiguous, the right is not.
+                */
+               bma->idx--;
+
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
+                       LEFT.br_blockcount + PREV.br_blockcount);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
+               if (bma->cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
+                                       LEFT.br_startblock, LEFT.br_blockcount,
+                                       &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
+                                       LEFT.br_startblock,
+                                       LEFT.br_blockcount +
+                                       PREV.br_blockcount, LEFT.br_state);
+                       if (error)
+                               goto done;
+               }
+               break;
+
+       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
+               /*
+                * Filling in all of a previously delayed allocation extent.
+                * The right neighbor is contiguous, the left is not.
+                */
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_startblock(ep, new->br_startblock);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount + RIGHT.br_blockcount);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
+               if (bma->cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
+                                       RIGHT.br_startblock,
+                                       RIGHT.br_blockcount, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
+                                       new->br_startblock,
+                                       PREV.br_blockcount +
+                                       RIGHT.br_blockcount, PREV.br_state);
+                       if (error)
+                               goto done;
+               }
+               break;
+
+       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
+               /*
+                * Filling in all of a previously delayed allocation extent.
+                * Neither the left nor right neighbors are contiguous with
+                * the new one.
+                */
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_startblock(ep, new->br_startblock);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               bma->ip->i_d.di_nextents++;
+               if (bma->cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       error = xfs_btree_insert(bma->cur, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               }
+               break;
+
+       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
+               /*
+                * Filling in the first part of a previous delayed allocation.
+                * The left neighbor is contiguous.
+                */
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
+                       LEFT.br_blockcount + new->br_blockcount);
+               xfs_bmbt_set_startoff(ep,
+                       PREV.br_startoff + new->br_blockcount);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
+
+               temp = PREV.br_blockcount - new->br_blockcount;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep, temp);
+               if (bma->cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
+                                       LEFT.br_startblock, LEFT.br_blockcount,
+                                       &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
+                                       LEFT.br_startblock,
+                                       LEFT.br_blockcount +
+                                       new->br_blockcount,
+                                       LEFT.br_state);
+                       if (error)
+                               goto done;
+               }
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
+                       startblockval(PREV.br_startblock));
+               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               bma->idx--;
+               break;
+
+       case BMAP_LEFT_FILLING:
+               /*
+                * Filling in the first part of a previous delayed allocation.
+                * The left neighbor is not contiguous.
+                */
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_startoff(ep, new_endoff);
+               temp = PREV.br_blockcount - new->br_blockcount;
+               xfs_bmbt_set_blockcount(ep, temp);
+               xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
+               bma->ip->i_d.di_nextents++;
+               if (bma->cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       error = xfs_btree_insert(bma->cur, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               }
+
+               if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+                       error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+                                       bma->firstblock, bma->flist,
+                                       &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
+                       rval |= tmp_rval;
+                       if (error)
+                               goto done;
+               }
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
+                       startblockval(PREV.br_startblock) -
+                       (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+               ep = xfs_iext_get_ext(ifp, bma->idx + 1);
+               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+               trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
+               break;
+
+       case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
+               /*
+                * Filling in the last part of a previous delayed allocation.
+                * The right neighbor is contiguous with the new allocation.
+                */
+               temp = PREV.br_blockcount - new->br_blockcount;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep, temp);
+               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
+                       new->br_startoff, new->br_startblock,
+                       new->br_blockcount + RIGHT.br_blockcount,
+                       RIGHT.br_state);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
+               if (bma->cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
+                                       RIGHT.br_startblock,
+                                       RIGHT.br_blockcount, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_bmbt_update(bma->cur, new->br_startoff,
+                                       new->br_startblock,
+                                       new->br_blockcount +
+                                       RIGHT.br_blockcount,
+                                       RIGHT.br_state);
+                       if (error)
+                               goto done;
+               }
+
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
+                       startblockval(PREV.br_startblock));
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               bma->idx++;
+               break;
+
+       case BMAP_RIGHT_FILLING:
+               /*
+                * Filling in the last part of a previous delayed allocation.
+                * The right neighbor is not contiguous.
+                */
+               temp = PREV.br_blockcount - new->br_blockcount;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep, temp);
+               xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
+               bma->ip->i_d.di_nextents++;
+               if (bma->cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       error = xfs_btree_insert(bma->cur, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               }
+
+               if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+                       error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+                               bma->firstblock, bma->flist, &bma->cur, 1,
+                               &tmp_rval, XFS_DATA_FORK);
+                       rval |= tmp_rval;
+                       if (error)
+                               goto done;
+               }
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
+                       startblockval(PREV.br_startblock) -
+                       (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+               ep = xfs_iext_get_ext(ifp, bma->idx);
+               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               bma->idx++;
+               break;
+
+       case 0:
+               /*
+                * Filling in the middle part of a previous delayed allocation.
+                * Contiguity is impossible here.
+                * This case is avoided almost all the time.
+                *
+                * We start with a delayed allocation:
+                *
+                * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
+                *  PREV @ idx
+                *
+                * and we are allocating:
+                *                     +rrrrrrrrrrrrrrrrr+
+                *                            new
+                *
+                * and we set it up for insertion as:
+                * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
+                *                            new
+                *  PREV @ idx          LEFT              RIGHT
+                *                      inserted at idx + 1
+                */
+               temp = new->br_startoff - PREV.br_startoff;
+               temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep, temp);      /* truncate PREV */
+               LEFT = *new;
+               RIGHT.br_state = PREV.br_state;
+               RIGHT.br_startblock = nullstartblock(
+                               (int)xfs_bmap_worst_indlen(bma->ip, temp2));
+               RIGHT.br_startoff = new_endoff;
+               RIGHT.br_blockcount = temp2;
+               /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
+               xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
+               bma->ip->i_d.di_nextents++;
+               if (bma->cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       error = xfs_btree_insert(bma->cur, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               }
+
+               if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+                       error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+                                       bma->firstblock, bma->flist, &bma->cur,
+                                       1, &tmp_rval, XFS_DATA_FORK);
+                       rval |= tmp_rval;
+                       if (error)
+                               goto done;
+               }
+               temp = xfs_bmap_worst_indlen(bma->ip, temp);
+               temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
+               diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
+                       (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+               if (diff > 0) {
+                       error = xfs_icsb_modify_counters(bma->ip->i_mount,
+                                       XFS_SBS_FDBLOCKS,
+                                       -((int64_t)diff), 0);
+                       ASSERT(!error);
+                       if (error)
+                               goto done;
+               }
+
+               ep = xfs_iext_get_ext(ifp, bma->idx);
+               xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
+               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
+                       nullstartblock((int)temp2));
+               trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
+
+               bma->idx++;
+               da_new = temp + temp2;
+               break;
+
+       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+       case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+       case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
+       case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
+       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+       case BMAP_LEFT_CONTIG:
+       case BMAP_RIGHT_CONTIG:
+               /*
+                * These cases are all impossible.
+                */
+               ASSERT(0);
+       }
+
+       /* convert to a btree if necessary */
+       if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+               int     tmp_logflags;   /* partial log flag return val */
+
+               ASSERT(bma->cur == NULL);
+               error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+                               bma->firstblock, bma->flist, &bma->cur,
+                               da_old > 0, &tmp_logflags, XFS_DATA_FORK);
+               bma->logflags |= tmp_logflags;
+               if (error)
+                       goto done;
+       }
+
+       /* adjust for changes in reserved delayed indirect blocks */
+       if (da_old || da_new) {
+               temp = da_new;
+               if (bma->cur)
+                       temp += bma->cur->bc_private.b.allocated;
+               ASSERT(temp <= da_old);
+               if (temp < da_old)
+                       xfs_icsb_modify_counters(bma->ip->i_mount,
+                                       XFS_SBS_FDBLOCKS,
+                                       (int64_t)(da_old - temp), 0);
+       }
+
+       /* clear out the allocated field, done with it now in any case. */
+       if (bma->cur)
+               bma->cur->bc_private.b.allocated = 0;
+
+       xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
+done:
+       bma->logflags |= rval;
+       return error;
+#undef LEFT
+#undef RIGHT
+#undef PREV
+}
+
+/*
+ * Convert an unwritten allocation to a real allocation or vice versa.
+ */
+STATIC int                             /* error */
+xfs_bmap_add_extent_unwritten_real(
+       struct xfs_trans        *tp,
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_extnum_t            *idx,   /* extent number to update/insert */
+       xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
+       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
+       xfs_fsblock_t           *first, /* pointer to firstblock variable */
+       xfs_bmap_free_t         *flist, /* list of extents to be freed */
+       int                     *logflagsp) /* inode logging flags */
+{
+       xfs_btree_cur_t         *cur;   /* btree cursor */
+       xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
+       int                     error;  /* error return value */
+       int                     i;      /* temp state */
+       xfs_ifork_t             *ifp;   /* inode fork pointer */
+       xfs_fileoff_t           new_endoff;     /* end offset of new entry */
+       xfs_exntst_t            newext; /* new extent state */
+       xfs_exntst_t            oldext; /* old extent state */
+       xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
+                                       /* left is 0, right is 1, prev is 2 */
+       int                     rval=0; /* return value (logging flags) */
+       int                     state = 0;/* state bits, accessed thru macros */
+
+       *logflagsp = 0;
+
+       cur = *curp;
+       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+
+       ASSERT(*idx >= 0);
+       ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(!isnullstartblock(new->br_startblock));
+
+       XFS_STATS_INC(xs_add_exlist);
+
+#define        LEFT            r[0]
+#define        RIGHT           r[1]
+#define        PREV            r[2]
+
+       /*
+        * Set up a bunch of variables to make the tests simpler.
+        */
+       error = 0;
+       ep = xfs_iext_get_ext(ifp, *idx);
+       xfs_bmbt_get_all(ep, &PREV);
+       newext = new->br_state;
+       oldext = (newext == XFS_EXT_UNWRITTEN) ?
+               XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
+       ASSERT(PREV.br_state == oldext);
+       new_endoff = new->br_startoff + new->br_blockcount;
+       ASSERT(PREV.br_startoff <= new->br_startoff);
+       ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
+
+       /*
+        * Set flags determining what part of the previous oldext allocation
+        * extent is being replaced by a newext allocation.
+        */
+       if (PREV.br_startoff == new->br_startoff)
+               state |= BMAP_LEFT_FILLING;
+       if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
+               state |= BMAP_RIGHT_FILLING;
+
+       /*
+        * Check and set flags if this segment has a left neighbor.
+        * Don't set contiguous if the combined extent would be too large.
+        */
+       if (*idx > 0) {
+               state |= BMAP_LEFT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
+
+               if (isnullstartblock(LEFT.br_startblock))
+                       state |= BMAP_LEFT_DELAY;
+       }
+
+       if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
+           LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
+           LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
+           LEFT.br_state == newext &&
+           LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
+               state |= BMAP_LEFT_CONTIG;
+
+       /*
+        * Check and set flags if this segment has a right neighbor.
+        * Don't set contiguous if the combined extent would be too large.
+        * Also check for all-three-contiguous being too large.
+        */
+       if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+               state |= BMAP_RIGHT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
+               if (isnullstartblock(RIGHT.br_startblock))
+                       state |= BMAP_RIGHT_DELAY;
+       }
+
+       if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
+           new_endoff == RIGHT.br_startoff &&
+           new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
+           newext == RIGHT.br_state &&
+           new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
+           ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
+                      BMAP_RIGHT_FILLING)) !=
+                     (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
+                      BMAP_RIGHT_FILLING) ||
+            LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
+                       <= MAXEXTLEN))
+               state |= BMAP_RIGHT_CONTIG;
+
+       /*
+        * Switch out based on the FILLING and CONTIG state bits.
+        */
+       switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
+                        BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
+       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
+            BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * The left and right neighbors are both contiguous with new.
+                */
+               --*idx;
+
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
+                       LEFT.br_blockcount + PREV.br_blockcount +
+                       RIGHT.br_blockcount);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               xfs_iext_remove(ip, *idx + 1, 2, state);
+               ip->i_d.di_nextents -= 2;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+                                       RIGHT.br_startblock,
+                                       RIGHT.br_blockcount, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_btree_delete(cur, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_btree_decrement(cur, 0, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_btree_delete(cur, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_btree_decrement(cur, 0, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                               LEFT.br_startblock,
+                               LEFT.br_blockcount + PREV.br_blockcount +
+                               RIGHT.br_blockcount, LEFT.br_state)))
+                               goto done;
+               }
+               break;
+
+       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * The left neighbor is contiguous, the right is not.
+                */
+               --*idx;
+
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
+                       LEFT.br_blockcount + PREV.br_blockcount);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               xfs_iext_remove(ip, *idx + 1, 1, state);
+               ip->i_d.di_nextents--;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock, PREV.br_blockcount,
+                                       &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_btree_delete(cur, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_btree_decrement(cur, 0, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                               LEFT.br_startblock,
+                               LEFT.br_blockcount + PREV.br_blockcount,
+                               LEFT.br_state)))
+                               goto done;
+               }
+               break;
+
+       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * The right neighbor is contiguous, the left is not.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount + RIGHT.br_blockcount);
+               xfs_bmbt_set_state(ep, newext);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               xfs_iext_remove(ip, *idx + 1, 1, state);
+               ip->i_d.di_nextents--;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+                                       RIGHT.br_startblock,
+                                       RIGHT.br_blockcount, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_btree_delete(cur, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_btree_decrement(cur, 0, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_bmbt_update(cur, new->br_startoff,
+                               new->br_startblock,
+                               new->br_blockcount + RIGHT.br_blockcount,
+                               newext)))
+                               goto done;
+               }
+               break;
+
+       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
+               /*
+                * Setting all of a previous oldext extent to newext.
+                * Neither the left nor right neighbors are contiguous with
+                * the new one.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_state(ep, newext);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               if (cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_bmbt_update(cur, new->br_startoff,
+                               new->br_startblock, new->br_blockcount,
+                               newext)))
+                               goto done;
+               }
+               break;
+
+       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
+               /*
+                * Setting the first part of a previous oldext extent to newext.
+                * The left neighbor is contiguous.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
+                       LEFT.br_blockcount + new->br_blockcount);
+               xfs_bmbt_set_startoff(ep,
+                       PREV.br_startoff + new->br_blockcount);
+               trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
+
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_startblock(ep,
+                       new->br_startblock + new->br_blockcount);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount - new->br_blockcount);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               --*idx;
+
+               if (cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock, PREV.br_blockcount,
+                                       &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_bmbt_update(cur,
+                               PREV.br_startoff + new->br_blockcount,
+                               PREV.br_startblock + new->br_blockcount,
+                               PREV.br_blockcount - new->br_blockcount,
+                               oldext)))
+                               goto done;
+                       if ((error = xfs_btree_decrement(cur, 0, &i)))
+                               goto done;
+                       error = xfs_bmbt_update(cur, LEFT.br_startoff,
+                               LEFT.br_startblock,
+                               LEFT.br_blockcount + new->br_blockcount,
+                               LEFT.br_state);
+                       if (error)
+                               goto done;
+               }
+               break;
+
+       case BMAP_LEFT_FILLING:
+               /*
+                * Setting the first part of a previous oldext extent to newext.
+                * The left neighbor is not contiguous.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
+               xfs_bmbt_set_startoff(ep, new_endoff);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount - new->br_blockcount);
+               xfs_bmbt_set_startblock(ep,
+                       new->br_startblock + new->br_blockcount);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               xfs_iext_insert(ip, *idx, 1, new, state);
+               ip->i_d.di_nextents++;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock, PREV.br_blockcount,
+                                       &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_bmbt_update(cur,
+                               PREV.br_startoff + new->br_blockcount,
+                               PREV.br_startblock + new->br_blockcount,
+                               PREV.br_blockcount - new->br_blockcount,
+                               oldext)))
+                               goto done;
+                       cur->bc_rec.b = *new;
+                       if ((error = xfs_btree_insert(cur, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               }
+               break;
+
+       case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
+               /*
+                * Setting the last part of a previous oldext extent to newext.
+                * The right neighbor is contiguous with the new allocation.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount - new->br_blockcount);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               ++*idx;
+
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
+                       new->br_startoff, new->br_startblock,
+                       new->br_blockcount + RIGHT.br_blockcount, newext);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               if (cur == NULL)
+                       rval = XFS_ILOG_DEXT;
+               else {
+                       rval = 0;
+                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock,
+                                       PREV.br_blockcount, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
+                               PREV.br_startblock,
+                               PREV.br_blockcount - new->br_blockcount,
+                               oldext)))
+                               goto done;
+                       if ((error = xfs_btree_increment(cur, 0, &i)))
+                               goto done;
+                       if ((error = xfs_bmbt_update(cur, new->br_startoff,
+                               new->br_startblock,
+                               new->br_blockcount + RIGHT.br_blockcount,
+                               newext)))
+                               goto done;
+               }
+               break;
+
+       case BMAP_RIGHT_FILLING:
+               /*
+                * Setting the last part of a previous oldext extent to newext.
+                * The right neighbor is not contiguous.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep,
+                       PREV.br_blockcount - new->br_blockcount);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               ++*idx;
+               xfs_iext_insert(ip, *idx, 1, new, state);
+
+               ip->i_d.di_nextents++;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock, PREV.br_blockcount,
+                                       &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
+                               PREV.br_startblock,
+                               PREV.br_blockcount - new->br_blockcount,
+                               oldext)))
+                               goto done;
+                       if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
+                       if ((error = xfs_btree_insert(cur, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               }
+               break;
+
+       case 0:
+               /*
+                * Setting the middle part of a previous oldext extent to
+                * newext.  Contiguity is impossible here.
+                * One extent becomes three extents.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep,
+                       new->br_startoff - PREV.br_startoff);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               r[0] = *new;
+               r[1].br_startoff = new_endoff;
+               r[1].br_blockcount =
+                       PREV.br_startoff + PREV.br_blockcount - new_endoff;
+               r[1].br_startblock = new->br_startblock + new->br_blockcount;
+               r[1].br_state = oldext;
+
+               ++*idx;
+               xfs_iext_insert(ip, *idx, 2, &r[0], state);
+
+               ip->i_d.di_nextents += 2;
+               if (cur == NULL)
+                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
+               else {
+                       rval = XFS_ILOG_CORE;
+                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
+                                       PREV.br_startblock, PREV.br_blockcount,
+                                       &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       /* new right extent - oldext */
+                       if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
+                               r[1].br_startblock, r[1].br_blockcount,
+                               r[1].br_state)))
+                               goto done;
+                       /* new left extent - oldext */
+                       cur->bc_rec.b = PREV;
+                       cur->bc_rec.b.br_blockcount =
+                               new->br_startoff - PREV.br_startoff;
+                       if ((error = xfs_btree_insert(cur, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       /*
+                        * Reset the cursor to the position of the new extent
+                        * we are about to insert as we can't trust it after
+                        * the previous insert.
+                        */
+                       if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                                       new->br_startblock, new->br_blockcount,
+                                       &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       /* new middle extent - newext */
+                       cur->bc_rec.b.br_state = new->br_state;
+                       if ((error = xfs_btree_insert(cur, &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               }
+               break;
+
+       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+       case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+       case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
+       case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
+       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+       case BMAP_LEFT_CONTIG:
+       case BMAP_RIGHT_CONTIG:
+               /*
+                * These cases are all impossible.
+                */
+               ASSERT(0);
+       }
+
+       /* convert to a btree if necessary */
+       if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
+               int     tmp_logflags;   /* partial log flag return val */
+
+               ASSERT(cur == NULL);
+               error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur,
+                               0, &tmp_logflags, XFS_DATA_FORK);
+               *logflagsp |= tmp_logflags;
+               if (error)
+                       goto done;
+       }
+
+       /* clear out the allocated field, done with it now in any case. */
+       if (cur) {
+               cur->bc_private.b.allocated = 0;
+               *curp = cur;
+       }
+
+       xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
+done:
+       *logflagsp |= rval;
+       return error;
+#undef LEFT
+#undef RIGHT
+#undef PREV
+}
+
+/*
+ * Convert a hole to a delayed allocation.
+ */
+STATIC void
+xfs_bmap_add_extent_hole_delay(
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_extnum_t            *idx,   /* extent number to update/insert */
+       xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
+{
+       xfs_ifork_t             *ifp;   /* inode fork pointer */
+       xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
+       xfs_filblks_t           newlen=0;       /* new indirect size */
+       xfs_filblks_t           oldlen=0;       /* old indirect size */
+       xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
+       int                     state;  /* state bits, accessed thru macros */
+       xfs_filblks_t           temp=0; /* temp for indirect calculations */
+
+       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       state = 0;
+       ASSERT(isnullstartblock(new->br_startblock));
+
+       /*
+        * Check and set flags if this segment has a left neighbor
+        */
+       if (*idx > 0) {
+               state |= BMAP_LEFT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
+
+               if (isnullstartblock(left.br_startblock))
+                       state |= BMAP_LEFT_DELAY;
+       }
+
+       /*
+        * Check and set flags if the current (right) segment exists.
+        * If it doesn't exist, we're converting the hole at end-of-file.
+        */
+       if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+               state |= BMAP_RIGHT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
+
+               if (isnullstartblock(right.br_startblock))
+                       state |= BMAP_RIGHT_DELAY;
+       }
+
+       /*
+        * Set contiguity flags on the left and right neighbors.
+        * Don't let extents get too large, even if the pieces are contiguous.
+        */
+       if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
+           left.br_startoff + left.br_blockcount == new->br_startoff &&
+           left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
+               state |= BMAP_LEFT_CONTIG;
+
+       if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
+           new->br_startoff + new->br_blockcount == right.br_startoff &&
+           new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
+           (!(state & BMAP_LEFT_CONTIG) ||
+            (left.br_blockcount + new->br_blockcount +
+             right.br_blockcount <= MAXEXTLEN)))
+               state |= BMAP_RIGHT_CONTIG;
+
+       /*
+        * Switch out based on the contiguity flags.
+        */
+       switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+               /*
+                * New allocation is contiguous with delayed allocations
+                * on the left and on the right.
+                * Merge all three into a single extent record.
+                */
+               --*idx;
+               temp = left.br_blockcount + new->br_blockcount +
+                       right.br_blockcount;
+
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
+               oldlen = startblockval(left.br_startblock) +
+                       startblockval(new->br_startblock) +
+                       startblockval(right.br_startblock);
+               newlen = xfs_bmap_worst_indlen(ip, temp);
+               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
+                       nullstartblock((int)newlen));
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               xfs_iext_remove(ip, *idx + 1, 1, state);
+               break;
+
+       case BMAP_LEFT_CONTIG:
+               /*
+                * New allocation is contiguous with a delayed allocation
+                * on the left.
+                * Merge the new allocation with the left neighbor.
+                */
+               --*idx;
+               temp = left.br_blockcount + new->br_blockcount;
+
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
+               oldlen = startblockval(left.br_startblock) +
+                       startblockval(new->br_startblock);
+               newlen = xfs_bmap_worst_indlen(ip, temp);
+               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
+                       nullstartblock((int)newlen));
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+
+       case BMAP_RIGHT_CONTIG:
+               /*
+                * New allocation is contiguous with a delayed allocation
+                * on the right.
+                * Merge the new allocation with the right neighbor.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               temp = new->br_blockcount + right.br_blockcount;
+               oldlen = startblockval(new->br_startblock) +
+                       startblockval(right.br_startblock);
+               newlen = xfs_bmap_worst_indlen(ip, temp);
+               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
+                       new->br_startoff,
+                       nullstartblock((int)newlen), temp, right.br_state);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+
+       case 0:
+               /*
+                * New allocation is not contiguous with another
+                * delayed allocation.
+                * Insert a new entry.
+                */
+               oldlen = newlen = 0;
+               xfs_iext_insert(ip, *idx, 1, new, state);
+               break;
+       }
+       if (oldlen != newlen) {
+               ASSERT(oldlen > newlen);
+               xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
+                       (int64_t)(oldlen - newlen), 0);
+               /*
+                * Nothing to do for disk quota accounting here.
+                */
+       }
+}
+
+/*
+ * Convert a hole to a real allocation.
+ */
+STATIC int                             /* error */
+xfs_bmap_add_extent_hole_real(
+       struct xfs_bmalloca     *bma,
+       int                     whichfork)
+{
+       struct xfs_bmbt_irec    *new = &bma->got;
+       int                     error;  /* error return value */
+       int                     i;      /* temp state */
+       xfs_ifork_t             *ifp;   /* inode fork pointer */
+       xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
+       xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
+       int                     rval=0; /* return value (logging flags) */
+       int                     state;  /* state bits, accessed thru macros */
+
+       ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+
+       ASSERT(bma->idx >= 0);
+       ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(!isnullstartblock(new->br_startblock));
+       ASSERT(!bma->cur ||
+              !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+
+       XFS_STATS_INC(xs_add_exlist);
+
+       state = 0;
+       if (whichfork == XFS_ATTR_FORK)
+               state |= BMAP_ATTRFORK;
+
+       /*
+        * Check and set flags if this segment has a left neighbor.
+        */
+       if (bma->idx > 0) {
+               state |= BMAP_LEFT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
+               if (isnullstartblock(left.br_startblock))
+                       state |= BMAP_LEFT_DELAY;
+       }
+
+       /*
+        * Check and set flags if this segment has a current value.
+        * Not true if we're inserting into the "hole" at eof.
+        */
+       if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+               state |= BMAP_RIGHT_VALID;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
+               if (isnullstartblock(right.br_startblock))
+                       state |= BMAP_RIGHT_DELAY;
+       }
+
+       /*
+        * We're inserting a real allocation between "left" and "right".
+        * Set the contiguity flags.  Don't let extents get too large.
+        */
+       if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
+           left.br_startoff + left.br_blockcount == new->br_startoff &&
+           left.br_startblock + left.br_blockcount == new->br_startblock &&
+           left.br_state == new->br_state &&
+           left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
+               state |= BMAP_LEFT_CONTIG;
+
+       if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
+           new->br_startoff + new->br_blockcount == right.br_startoff &&
+           new->br_startblock + new->br_blockcount == right.br_startblock &&
+           new->br_state == right.br_state &&
+           new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
+           (!(state & BMAP_LEFT_CONTIG) ||
+            left.br_blockcount + new->br_blockcount +
+            right.br_blockcount <= MAXEXTLEN))
+               state |= BMAP_RIGHT_CONTIG;
+
+       error = 0;
+       /*
+        * Select which case we're in here, and implement it.
+        */
+       switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+               /*
+                * New allocation is contiguous with real allocations on the
+                * left and on the right.
+                * Merge all three into a single extent record.
+                */
+               --bma->idx;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
+                       left.br_blockcount + new->br_blockcount +
+                       right.br_blockcount);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
+
+               XFS_IFORK_NEXT_SET(bma->ip, whichfork,
+                       XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
+               if (bma->cur == NULL) {
+                       rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
+               } else {
+                       rval = XFS_ILOG_CORE;
+                       error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
+                                       right.br_startblock, right.br_blockcount,
+                                       &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_btree_delete(bma->cur, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_btree_decrement(bma->cur, 0, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_bmbt_update(bma->cur, left.br_startoff,
+                                       left.br_startblock,
+                                       left.br_blockcount +
+                                               new->br_blockcount +
+                                               right.br_blockcount,
+                                       left.br_state);
+                       if (error)
+                               goto done;
+               }
+               break;
+
+       case BMAP_LEFT_CONTIG:
+               /*
+                * New allocation is contiguous with a real allocation
+                * on the left.
+                * Merge the new allocation with the left neighbor.
+                */
+               --bma->idx;
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
+                       left.br_blockcount + new->br_blockcount);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               if (bma->cur == NULL) {
+                       rval = xfs_ilog_fext(whichfork);
+               } else {
+                       rval = 0;
+                       error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
+                                       left.br_startblock, left.br_blockcount,
+                                       &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_bmbt_update(bma->cur, left.br_startoff,
+                                       left.br_startblock,
+                                       left.br_blockcount +
+                                               new->br_blockcount,
+                                       left.br_state);
+                       if (error)
+                               goto done;
+               }
+               break;
+
+       case BMAP_RIGHT_CONTIG:
+               /*
+                * New allocation is contiguous with a real allocation
+                * on the right.
+                * Merge the new allocation with the right neighbor.
+                */
+               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
+                       new->br_startoff, new->br_startblock,
+                       new->br_blockcount + right.br_blockcount,
+                       right.br_state);
+               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+
+               if (bma->cur == NULL) {
+                       rval = xfs_ilog_fext(whichfork);
+               } else {
+                       rval = 0;
+                       error = xfs_bmbt_lookup_eq(bma->cur,
+                                       right.br_startoff,
+                                       right.br_startblock,
+                                       right.br_blockcount, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       error = xfs_bmbt_update(bma->cur, new->br_startoff,
+                                       new->br_startblock,
+                                       new->br_blockcount +
+                                               right.br_blockcount,
+                                       right.br_state);
+                       if (error)
+                               goto done;
+               }
+               break;
+
+       case 0:
+               /*
+                * New allocation is not contiguous with another
+                * real allocation.
+                * Insert a new entry.
+                */
+               xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
+               XFS_IFORK_NEXT_SET(bma->ip, whichfork,
+                       XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
+               if (bma->cur == NULL) {
+                       rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
+               } else {
+                       rval = XFS_ILOG_CORE;
+                       error = xfs_bmbt_lookup_eq(bma->cur,
+                                       new->br_startoff,
+                                       new->br_startblock,
+                                       new->br_blockcount, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       bma->cur->bc_rec.b.br_state = new->br_state;
+                       error = xfs_btree_insert(bma->cur, &i);
+                       if (error)
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               }
+               break;
+       }
+
+       /* convert to a btree if necessary */
+       if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
+               int     tmp_logflags;   /* partial log flag return val */
+
+               ASSERT(bma->cur == NULL);
+               error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+                               bma->firstblock, bma->flist, &bma->cur,
+                               0, &tmp_logflags, whichfork);
+               bma->logflags |= tmp_logflags;
+               if (error)
+                       goto done;
+       }
+
+       /* clear out the allocated field, done with it now in any case. */
+       if (bma->cur)
+               bma->cur->bc_private.b.allocated = 0;
+
+       xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
+done:
+       bma->logflags |= rval;
+       return error;
+}
+
+/*
+ * Functions used in the extent read, allocate and remove paths
+ */
+
+/*
+ * Adjust the size of the new extent based on di_extsize and rt extsize.
+ */
+int
+xfs_bmap_extsize_align(
+       xfs_mount_t     *mp,
+       xfs_bmbt_irec_t *gotp,          /* next extent pointer */
+       xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
+       xfs_extlen_t    extsz,          /* align to this extent size */
+       int             rt,             /* is this a realtime inode? */
+       int             eof,            /* is extent at end-of-file? */
+       int             delay,          /* creating delalloc extent? */
+       int             convert,        /* overwriting unwritten extent? */
+       xfs_fileoff_t   *offp,          /* in/out: aligned offset */
+       xfs_extlen_t    *lenp)          /* in/out: aligned length */
+{
+       xfs_fileoff_t   orig_off;       /* original offset */
+       xfs_extlen_t    orig_alen;      /* original length */
+       xfs_fileoff_t   orig_end;       /* original off+len */
+       xfs_fileoff_t   nexto;          /* next file offset */
+       xfs_fileoff_t   prevo;          /* previous file offset */
+       xfs_fileoff_t   align_off;      /* temp for offset */
+       xfs_extlen_t    align_alen;     /* temp for length */
+       xfs_extlen_t    temp;           /* temp for calculations */
+
+       if (convert)
+               return 0;
+
+       orig_off = align_off = *offp;
+       orig_alen = align_alen = *lenp;
+       orig_end = orig_off + orig_alen;
+
+       /*
+        * If this request overlaps an existing extent, then don't
+        * attempt to perform any additional alignment.
+        */
+       if (!delay && !eof &&
+           (orig_off >= gotp->br_startoff) &&
+           (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
+               return 0;
+       }
+
+       /*
+        * If the file offset is unaligned vs. the extent size
+        * we need to align it.  This will be possible unless
+        * the file was previously written with a kernel that didn't
+        * perform this alignment, or if a truncate shot us in the
+        * foot.
+        */
+       temp = do_mod(orig_off, extsz);
+       if (temp) {
+               align_alen += temp;
+               align_off -= temp;
+       }
+       /*
+        * Same adjustment for the end of the requested area.
+        */
+       if ((temp = (align_alen % extsz))) {
+               align_alen += extsz - temp;
+       }
+       /*
+        * If the previous block overlaps with this proposed allocation
+        * then move the start forward without adjusting the length.
+        */
+       if (prevp->br_startoff != NULLFILEOFF) {
+               if (prevp->br_startblock == HOLESTARTBLOCK)
+                       prevo = prevp->br_startoff;
+               else
+                       prevo = prevp->br_startoff + prevp->br_blockcount;
+       } else
+               prevo = 0;
+       if (align_off != orig_off && align_off < prevo)
+               align_off = prevo;
+       /*
+        * If the next block overlaps with this proposed allocation
+        * then move the start back without adjusting the length,
+        * but not before offset 0.
+        * This may of course make the start overlap previous block,
+        * and if we hit the offset 0 limit then the next block
+        * can still overlap too.
+        */
+       if (!eof && gotp->br_startoff != NULLFILEOFF) {
+               if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
+                   (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
+                       nexto = gotp->br_startoff + gotp->br_blockcount;
+               else
+                       nexto = gotp->br_startoff;
+       } else
+               nexto = NULLFILEOFF;
+       if (!eof &&
+           align_off + align_alen != orig_end &&
+           align_off + align_alen > nexto)
+               align_off = nexto > align_alen ? nexto - align_alen : 0;
+       /*
+        * If we're now overlapping the next or previous extent that
+        * means we can't fit an extsz piece in this hole.  Just move
+        * the start forward to the first valid spot and set
+        * the length so we hit the end.
+        */
+       if (align_off != orig_off && align_off < prevo)
+               align_off = prevo;
+       if (align_off + align_alen != orig_end &&
+           align_off + align_alen > nexto &&
+           nexto != NULLFILEOFF) {
+               ASSERT(nexto > prevo);
+               align_alen = nexto - align_off;
+       }
+
+       /*
+        * If realtime, and the result isn't a multiple of the realtime
+        * extent size we need to remove blocks until it is.
+        */
+       if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
+               /*
+                * We're not covering the original request, or
+                * we won't be able to once we fix the length.
+                */
+               if (orig_off < align_off ||
+                   orig_end > align_off + align_alen ||
+                   align_alen - temp < orig_alen)
+                       return EINVAL;
+               /*
+                * Try to fix it by moving the start up.
+                */
+               if (align_off + temp <= orig_off) {
+                       align_alen -= temp;
+                       align_off += temp;
+               }
+               /*
+                * Try to fix it by moving the end in.
+                */
+               else if (align_off + align_alen - temp >= orig_end)
+                       align_alen -= temp;
+               /*
+                * Set the start to the minimum then trim the length.
+                */
+               else {
+                       align_alen -= orig_off - align_off;
+                       align_off = orig_off;
+                       align_alen -= align_alen % mp->m_sb.sb_rextsize;
+               }
+               /*
+                * Result doesn't cover the request, fail it.
+                */
+               if (orig_off < align_off || orig_end > align_off + align_alen)
+                       return EINVAL;
+       } else {
+               ASSERT(orig_off >= align_off);
+               ASSERT(orig_end <= align_off + align_alen);
+       }
+
+#ifdef DEBUG
+       if (!eof && gotp->br_startoff != NULLFILEOFF)
+               ASSERT(align_off + align_alen <= gotp->br_startoff);
+       if (prevp->br_startoff != NULLFILEOFF)
+               ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
+#endif
+
+       *lenp = align_alen;
+       *offp = align_off;
+       return 0;
+}
+
+#define XFS_ALLOC_GAP_UNITS    4
+
+void
+xfs_bmap_adjacent(
+       struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
+{
+       xfs_fsblock_t   adjust;         /* adjustment to block numbers */
+       xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
+       xfs_mount_t     *mp;            /* mount point structure */
+       int             nullfb;         /* true if ap->firstblock isn't set */
+       int             rt;             /* true if inode is realtime */
+
+#define        ISVALID(x,y)    \
+       (rt ? \
+               (x) < mp->m_sb.sb_rblocks : \
+               XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
+               XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
+               XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
+
+       mp = ap->ip->i_mount;
+       nullfb = *ap->firstblock == NULLFSBLOCK;
+       rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
+       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
+       /*
+        * If allocating at eof, and there's a previous real block,
+        * try to use its last block as our starting point.
+        */
+       if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
+           !isnullstartblock(ap->prev.br_startblock) &&
+           ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
+                   ap->prev.br_startblock)) {
+               ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
+               /*
+                * Adjust for the gap between prevp and us.
+                */
+               adjust = ap->offset -
+                       (ap->prev.br_startoff + ap->prev.br_blockcount);
+               if (adjust &&
+                   ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
+                       ap->blkno += adjust;
+       }
+       /*
+        * If not at eof, then compare the two neighbor blocks.
+        * Figure out whether either one gives us a good starting point,
+        * and pick the better one.
+        */
+       else if (!ap->eof) {
+               xfs_fsblock_t   gotbno;         /* right side block number */
+               xfs_fsblock_t   gotdiff=0;      /* right side difference */
+               xfs_fsblock_t   prevbno;        /* left side block number */
+               xfs_fsblock_t   prevdiff=0;     /* left side difference */
+
+               /*
+                * If there's a previous (left) block, select a requested
+                * start block based on it.
+                */
+               if (ap->prev.br_startoff != NULLFILEOFF &&
+                   !isnullstartblock(ap->prev.br_startblock) &&
+                   (prevbno = ap->prev.br_startblock +
+                              ap->prev.br_blockcount) &&
+                   ISVALID(prevbno, ap->prev.br_startblock)) {
+                       /*
+                        * Calculate gap to end of previous block.
+                        */
+                       adjust = prevdiff = ap->offset -
+                               (ap->prev.br_startoff +
+                                ap->prev.br_blockcount);
+                       /*
+                        * Figure the startblock based on the previous block's
+                        * end and the gap size.
+                        * Heuristic!
+                        * If the gap is large relative to the piece we're
+                        * allocating, or using it gives us an invalid block
+                        * number, then just use the end of the previous block.
+                        */
+                       if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
+                           ISVALID(prevbno + prevdiff,
+                                   ap->prev.br_startblock))
+                               prevbno += adjust;
+                       else
+                               prevdiff += adjust;
+                       /*
+                        * If the firstblock forbids it, can't use it,
+                        * must use default.
+                        */
+                       if (!rt && !nullfb &&
+                           XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
+                               prevbno = NULLFSBLOCK;
+               }
+               /*
+                * No previous block or can't follow it, just default.
+                */
+               else
+                       prevbno = NULLFSBLOCK;
+               /*
+                * If there's a following (right) block, select a requested
+                * start block based on it.
+                */
+               if (!isnullstartblock(ap->got.br_startblock)) {
+                       /*
+                        * Calculate gap to start of next block.
+                        */
+                       adjust = gotdiff = ap->got.br_startoff - ap->offset;
+                       /*
+                        * Figure the startblock based on the next block's
+                        * start and the gap size.
+                        */
+                       gotbno = ap->got.br_startblock;
+                       /*
+                        * Heuristic!
+                        * If the gap is large relative to the piece we're
+                        * allocating, or using it gives us an invalid block
+                        * number, then just use the start of the next block
+                        * offset by our length.
+                        */
+                       if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
+                           ISVALID(gotbno - gotdiff, gotbno))
+                               gotbno -= adjust;
+                       else if (ISVALID(gotbno - ap->length, gotbno)) {
+                               gotbno -= ap->length;
+                               gotdiff += adjust - ap->length;
+                       } else
+                               gotdiff += adjust;
+                       /*
+                        * If the firstblock forbids it, can't use it,
+                        * must use default.
+                        */
+                       if (!rt && !nullfb &&
+                           XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
+                               gotbno = NULLFSBLOCK;
+               }
+               /*
+                * No next block, just default.
+                */
+               else
+                       gotbno = NULLFSBLOCK;
+               /*
+                * If both valid, pick the better one, else the only good
+                * one, else ap->blkno is already set (to 0 or the inode block).
+                */
+               if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
+                       ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
+               else if (prevbno != NULLFSBLOCK)
+                       ap->blkno = prevbno;
+               else if (gotbno != NULLFSBLOCK)
+                       ap->blkno = gotbno;
+       }
+#undef ISVALID
+}
+
+static int
+xfs_bmap_longest_free_extent(
+       struct xfs_trans        *tp,
+       xfs_agnumber_t          ag,
+       xfs_extlen_t            *blen,
+       int                     *notinit)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_perag        *pag;
+       xfs_extlen_t            longest;
+       int                     error = 0;
+
+       pag = xfs_perag_get(mp, ag);
+       if (!pag->pagf_init) {
+               error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
+               if (error)
+                       goto out;
+
+               if (!pag->pagf_init) {
+                       *notinit = 1;
+                       goto out;
+               }
+       }
+
+       longest = xfs_alloc_longest_free_extent(mp, pag);
+       if (*blen < longest)
+               *blen = longest;
+
+out:
+       xfs_perag_put(pag);
+       return error;
+}
+
+static void
+xfs_bmap_select_minlen(
+       struct xfs_bmalloca     *ap,
+       struct xfs_alloc_arg    *args,
+       xfs_extlen_t            *blen,
+       int                     notinit)
+{
+       if (notinit || *blen < ap->minlen) {
+               /*
+                * Since we did a BUF_TRYLOCK above, it is possible that
+                * there is space for this request.
+                */
+               args->minlen = ap->minlen;
+       } else if (*blen < args->maxlen) {
+               /*
+                * If the best seen length is less than the request length,
+                * use the best as the minimum.
+                */
+               args->minlen = *blen;
+       } else {
+               /*
+                * Otherwise we've seen an extent as big as maxlen, use that
+                * as the minimum.
+                */
+               args->minlen = args->maxlen;
+       }
+}
+
+STATIC int
+xfs_bmap_btalloc_nullfb(
+       struct xfs_bmalloca     *ap,
+       struct xfs_alloc_arg    *args,
+       xfs_extlen_t            *blen)
+{
+       struct xfs_mount        *mp = ap->ip->i_mount;
+       xfs_agnumber_t          ag, startag;
+       int                     notinit = 0;
+       int                     error;
+
+       args->type = XFS_ALLOCTYPE_START_BNO;
+       args->total = ap->total;
+
+       startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
+       if (startag == NULLAGNUMBER)
+               startag = ag = 0;
+
+       while (*blen < args->maxlen) {
+               error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
+                                                    &notinit);
+               if (error)
+                       return error;
+
+               if (++ag == mp->m_sb.sb_agcount)
+                       ag = 0;
+               if (ag == startag)
+                       break;
+       }
+
+       xfs_bmap_select_minlen(ap, args, blen, notinit);
+       return 0;
+}
+
+STATIC int
+xfs_bmap_btalloc_filestreams(
+       struct xfs_bmalloca     *ap,
+       struct xfs_alloc_arg    *args,
+       xfs_extlen_t            *blen)
+{
+       struct xfs_mount        *mp = ap->ip->i_mount;
+       xfs_agnumber_t          ag;
+       int                     notinit = 0;
+       int                     error;
+
+       args->type = XFS_ALLOCTYPE_NEAR_BNO;
+       args->total = ap->total;
+
+       ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
+       if (ag == NULLAGNUMBER)
+               ag = 0;
+
+       error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
+       if (error)
+               return error;
+
+       if (*blen < args->maxlen) {
+               error = xfs_filestream_new_ag(ap, &ag);
+               if (error)
+                       return error;
+
+               error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
+                                                    &notinit);
+               if (error)
+                       return error;
+
+       }
+
+       xfs_bmap_select_minlen(ap, args, blen, notinit);
+
+       /*
+        * Set the failure fallback case to look in the selected AG as stream
+        * may have moved.
+        */
+       ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
+       return 0;
+}
+
+STATIC int
+xfs_bmap_btalloc(
+       struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
+{
+       xfs_mount_t     *mp;            /* mount point structure */
+       xfs_alloctype_t atype = 0;      /* type for allocation routines */
+       xfs_extlen_t    align;          /* minimum allocation alignment */
+       xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
+       xfs_agnumber_t  ag;
+       xfs_alloc_arg_t args;
+       xfs_extlen_t    blen;
+       xfs_extlen_t    nextminlen = 0;
+       int             nullfb;         /* true if ap->firstblock isn't set */
+       int             isaligned;
+       int             tryagain;
+       int             error;
+       int             stripe_align;
+
+       ASSERT(ap->length);
+
+       mp = ap->ip->i_mount;
+
+       /* stripe alignment for allocation is determined by mount parameters */
+       stripe_align = 0;
+       if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
+               stripe_align = mp->m_swidth;
+       else if (mp->m_dalign)
+               stripe_align = mp->m_dalign;
+
+       align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
+       if (unlikely(align)) {
+               error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
+                                               align, 0, ap->eof, 0, ap->conv,
+                                               &ap->offset, &ap->length);
+               ASSERT(!error);
+               ASSERT(ap->length);
+       }
+
+
+       nullfb = *ap->firstblock == NULLFSBLOCK;
+       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
+       if (nullfb) {
+               if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
+                       ag = xfs_filestream_lookup_ag(ap->ip);
+                       ag = (ag != NULLAGNUMBER) ? ag : 0;
+                       ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
+               } else {
+                       ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+               }
+       } else
+               ap->blkno = *ap->firstblock;
+
+       xfs_bmap_adjacent(ap);
+
+       /*
+        * If allowed, use ap->blkno; otherwise must use firstblock since
+        * it's in the right allocation group.
+        */
+       if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
+               ;
+       else
+               ap->blkno = *ap->firstblock;
+       /*
+        * Normal allocation, done through xfs_alloc_vextent.
+        */
+       tryagain = isaligned = 0;
+       memset(&args, 0, sizeof(args));
+       args.tp = ap->tp;
+       args.mp = mp;
+       args.fsbno = ap->blkno;
+
+       /* Trim the allocation back to the maximum an AG can fit. */
+       args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
+       args.firstblock = *ap->firstblock;
+       blen = 0;
+       if (nullfb) {
+               /*
+                * Search for an allocation group with a single extent large
+                * enough for the request.  If one isn't found, then adjust
+                * the minimum allocation size to the largest space found.
+                */
+               if (ap->userdata && xfs_inode_is_filestream(ap->ip))
+                       error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
+               else
+                       error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
+               if (error)
+                       return error;
+       } else if (ap->flist->xbf_low) {
+               if (xfs_inode_is_filestream(ap->ip))
+                       args.type = XFS_ALLOCTYPE_FIRST_AG;
+               else
+                       args.type = XFS_ALLOCTYPE_START_BNO;
+               args.total = args.minlen = ap->minlen;
+       } else {
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+               args.total = ap->total;
+               args.minlen = ap->minlen;
+       }
+       /* apply extent size hints if obtained earlier */
+       if (unlikely(align)) {
+               args.prod = align;
+               if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
+                       args.mod = (xfs_extlen_t)(args.prod - args.mod);
+       } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
+               args.prod = 1;
+               args.mod = 0;
+       } else {
+               args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
+               if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
+                       args.mod = (xfs_extlen_t)(args.prod - args.mod);
+       }
+       /*
+        * If we are not low on available data blocks, and the
+        * underlying logical volume manager is a stripe, and
+        * the file offset is zero then try to allocate data
+        * blocks on stripe unit boundary.
+        * NOTE: ap->aeof is only set if the allocation length
+        * is >= the stripe unit and the allocation offset is
+        * at the end of file.
+        */
+       if (!ap->flist->xbf_low && ap->aeof) {
+               if (!ap->offset) {
+                       args.alignment = stripe_align;
+                       atype = args.type;
+                       isaligned = 1;
+                       /*
+                        * Adjust for alignment
+                        */
+                       if (blen > args.alignment && blen <= args.maxlen)
+                               args.minlen = blen - args.alignment;
+                       args.minalignslop = 0;
+               } else {
+                       /*
+                        * First try an exact bno allocation.
+                        * If it fails then do a near or start bno
+                        * allocation with alignment turned on.
+                        */
+                       atype = args.type;
+                       tryagain = 1;
+                       args.type = XFS_ALLOCTYPE_THIS_BNO;
+                       args.alignment = 1;
+                       /*
+                        * Compute the minlen+alignment for the
+                        * next case.  Set slop so that the value
+                        * of minlen+alignment+slop doesn't go up
+                        * between the calls.
+                        */
+                       if (blen > stripe_align && blen <= args.maxlen)
+                               nextminlen = blen - stripe_align;
+                       else
+                               nextminlen = args.minlen;
+                       if (nextminlen + stripe_align > args.minlen + 1)
+                               args.minalignslop =
+                                       nextminlen + stripe_align -
+                                       args.minlen - 1;
+                       else
+                               args.minalignslop = 0;
+               }
+       } else {
+               args.alignment = 1;
+               args.minalignslop = 0;
+       }
+       args.minleft = ap->minleft;
+       args.wasdel = ap->wasdel;
+       args.isfl = 0;
+       args.userdata = ap->userdata;
+       if ((error = xfs_alloc_vextent(&args)))
+               return error;
+       if (tryagain && args.fsbno == NULLFSBLOCK) {
+               /*
+                * Exact allocation failed. Now try with alignment
+                * turned on.
+                */
+               args.type = atype;
+               args.fsbno = ap->blkno;
+               args.alignment = stripe_align;
+               args.minlen = nextminlen;
+               args.minalignslop = 0;
+               isaligned = 1;
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+       }
+       if (isaligned && args.fsbno == NULLFSBLOCK) {
+               /*
+                * allocation failed, so turn off alignment and
+                * try again.
+                */
+               args.type = atype;
+               args.fsbno = ap->blkno;
+               args.alignment = 0;
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+       }
+       if (args.fsbno == NULLFSBLOCK && nullfb &&
+           args.minlen > ap->minlen) {
+               args.minlen = ap->minlen;
+               args.type = XFS_ALLOCTYPE_START_BNO;
+               args.fsbno = ap->blkno;
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+       }
+       if (args.fsbno == NULLFSBLOCK && nullfb) {
+               args.fsbno = 0;
+               args.type = XFS_ALLOCTYPE_FIRST_AG;
+               args.total = ap->minlen;
+               args.minleft = 0;
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+               ap->flist->xbf_low = 1;
+       }
+       if (args.fsbno != NULLFSBLOCK) {
+               /*
+                * check the allocation happened at the same or higher AG than
+                * the first block that was allocated.
+                */
+               ASSERT(*ap->firstblock == NULLFSBLOCK ||
+                      XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
+                      XFS_FSB_TO_AGNO(mp, args.fsbno) ||
+                      (ap->flist->xbf_low &&
+                       XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
+                       XFS_FSB_TO_AGNO(mp, args.fsbno)));
+
+               ap->blkno = args.fsbno;
+               if (*ap->firstblock == NULLFSBLOCK)
+                       *ap->firstblock = args.fsbno;
+               ASSERT(nullfb || fb_agno == args.agno ||
+                      (ap->flist->xbf_low && fb_agno < args.agno));
+               ap->length = args.len;
+               ap->ip->i_d.di_nblocks += args.len;
+               xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+               if (ap->wasdel)
+                       ap->ip->i_delayed_blks -= args.len;
+               /*
+                * Adjust the disk quota also. This was reserved
+                * earlier.
+                */
+               xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
+                       ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
+                                       XFS_TRANS_DQ_BCOUNT,
+                       (long) args.len);
+       } else {
+               ap->blkno = NULLFSBLOCK;
+               ap->length = 0;
+       }
+       return 0;
+}
+
+/*
+ * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
+ * It figures out where to ask the underlying allocator to put the new extent.
+ */
+STATIC int
+xfs_bmap_alloc(
+       struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
+{
+       if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
+               return xfs_bmap_rtalloc(ap);
+       return xfs_bmap_btalloc(ap);
+}
+
+/*
+ * Trim the returned map to the required bounds
+ */
+STATIC void
+xfs_bmapi_trim_map(
+       struct xfs_bmbt_irec    *mval,
+       struct xfs_bmbt_irec    *got,
+       xfs_fileoff_t           *bno,
+       xfs_filblks_t           len,
+       xfs_fileoff_t           obno,
+       xfs_fileoff_t           end,
+       int                     n,
+       int                     flags)
+{
+       if ((flags & XFS_BMAPI_ENTIRE) ||
+           got->br_startoff + got->br_blockcount <= obno) {
+               *mval = *got;
+               if (isnullstartblock(got->br_startblock))
+                       mval->br_startblock = DELAYSTARTBLOCK;
+               return;
+       }
+
+       if (obno > *bno)
+               *bno = obno;
+       ASSERT((*bno >= obno) || (n == 0));
+       ASSERT(*bno < end);
+       mval->br_startoff = *bno;
+       if (isnullstartblock(got->br_startblock))
+               mval->br_startblock = DELAYSTARTBLOCK;
+       else
+               mval->br_startblock = got->br_startblock +
+                                       (*bno - got->br_startoff);
+       /*
+        * Return the minimum of what we got and what we asked for for
+        * the length.  We can use the len variable here because it is
+        * modified below and we could have been there before coming
+        * here if the first part of the allocation didn't overlap what
+        * was asked for.
+        */
+       mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
+                       got->br_blockcount - (*bno - got->br_startoff));
+       mval->br_state = got->br_state;
+       ASSERT(mval->br_blockcount <= len);
+       return;
+}
+
+/*
+ * Update and validate the extent map to return
+ */
+STATIC void
+xfs_bmapi_update_map(
+       struct xfs_bmbt_irec    **map,
+       xfs_fileoff_t           *bno,
+       xfs_filblks_t           *len,
+       xfs_fileoff_t           obno,
+       xfs_fileoff_t           end,
+       int                     *n,
+       int                     flags)
+{
+       xfs_bmbt_irec_t *mval = *map;
+
+       ASSERT((flags & XFS_BMAPI_ENTIRE) ||
+              ((mval->br_startoff + mval->br_blockcount) <= end));
+       ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
+              (mval->br_startoff < obno));
+
+       *bno = mval->br_startoff + mval->br_blockcount;
+       *len = end - *bno;
+       if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
+               /* update previous map with new information */
+               ASSERT(mval->br_startblock == mval[-1].br_startblock);
+               ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
+               ASSERT(mval->br_state == mval[-1].br_state);
+               mval[-1].br_blockcount = mval->br_blockcount;
+               mval[-1].br_state = mval->br_state;
+       } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
+                  mval[-1].br_startblock != DELAYSTARTBLOCK &&
+                  mval[-1].br_startblock != HOLESTARTBLOCK &&
+                  mval->br_startblock == mval[-1].br_startblock +
+                                         mval[-1].br_blockcount &&
+                  ((flags & XFS_BMAPI_IGSTATE) ||
+                       mval[-1].br_state == mval->br_state)) {
+               ASSERT(mval->br_startoff ==
+                      mval[-1].br_startoff + mval[-1].br_blockcount);
+               mval[-1].br_blockcount += mval->br_blockcount;
+       } else if (*n > 0 &&
+                  mval->br_startblock == DELAYSTARTBLOCK &&
+                  mval[-1].br_startblock == DELAYSTARTBLOCK &&
+                  mval->br_startoff ==
+                  mval[-1].br_startoff + mval[-1].br_blockcount) {
+               mval[-1].br_blockcount += mval->br_blockcount;
+               mval[-1].br_state = mval->br_state;
+       } else if (!((*n == 0) &&
+                    ((mval->br_startoff + mval->br_blockcount) <=
+                     obno))) {
+               mval++;
+               (*n)++;
+       }
+       *map = mval;
+}
+
+/*
+ * Map file blocks to filesystem blocks without allocation.
+ */
+int
+xfs_bmapi_read(
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           bno,
+       xfs_filblks_t           len,
+       struct xfs_bmbt_irec    *mval,
+       int                     *nmap,
+       int                     flags)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp;
+       struct xfs_bmbt_irec    got;
+       struct xfs_bmbt_irec    prev;
+       xfs_fileoff_t           obno;
+       xfs_fileoff_t           end;
+       xfs_extnum_t            lastx;
+       int                     error;
+       int                     eof;
+       int                     n = 0;
+       int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+                                               XFS_ATTR_FORK : XFS_DATA_FORK;
+
+       ASSERT(*nmap >= 1);
+       ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
+                          XFS_BMAPI_IGSTATE)));
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
+               return EFSCORRUPTED;
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return EIO;
+
+       XFS_STATS_INC(xs_blk_mapr);
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               error = xfs_iread_extents(NULL, ip, whichfork);
+               if (error)
+                       return error;
+       }
+
+       xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
+       end = bno + len;
+       obno = bno;
+
+       while (bno < end && n < *nmap) {
+               /* Reading past eof, act as though there's a hole up to end. */
+               if (eof)
+                       got.br_startoff = end;
+               if (got.br_startoff > bno) {
+                       /* Reading in a hole.  */
+                       mval->br_startoff = bno;
+                       mval->br_startblock = HOLESTARTBLOCK;
+                       mval->br_blockcount =
+                               XFS_FILBLKS_MIN(len, got.br_startoff - bno);
+                       mval->br_state = XFS_EXT_NORM;
+                       bno += mval->br_blockcount;
+                       len -= mval->br_blockcount;
+                       mval++;
+                       n++;
+                       continue;
+               }
+
+               /* set up the extent map to return. */
+               xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
+               xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
+
+               /* If we're done, stop now. */
+               if (bno >= end || n >= *nmap)
+                       break;
+
+               /* Else go on to the next record. */
+               if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
+               else
+                       eof = 1;
+       }
+       *nmap = n;
+       return 0;
+}
+
+STATIC int
+xfs_bmapi_reserve_delalloc(
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           aoff,
+       xfs_filblks_t           len,
+       struct xfs_bmbt_irec    *got,
+       struct xfs_bmbt_irec    *prev,
+       xfs_extnum_t            *lastx,
+       int                     eof)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       xfs_extlen_t            alen;
+       xfs_extlen_t            indlen;
+       char                    rt = XFS_IS_REALTIME_INODE(ip);
+       xfs_extlen_t            extsz;
+       int                     error;
+
+       alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
+       if (!eof)
+               alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
+
+       /* Figure out the extent size, adjust alen */
+       extsz = xfs_get_extsz_hint(ip);
+       if (extsz) {
+               /*
+                * Make sure we don't exceed a single extent length when we
+                * align the extent by reducing length we are going to
+                * allocate by the maximum amount extent size aligment may
+                * require.
+                */
+               alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
+               error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
+                                              1, 0, &aoff, &alen);
+               ASSERT(!error);
+       }
+
+       if (rt)
+               extsz = alen / mp->m_sb.sb_rextsize;
+
+       /*
+        * Make a transaction-less quota reservation for delayed allocation
+        * blocks.  This number gets adjusted later.  We return if we haven't
+        * allocated blocks already inside this loop.
+        */
+       error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
+                       rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+       if (error)
+               return error;
+
+       /*
+        * Split changing sb for alen and indlen since they could be coming
+        * from different places.
+        */
+       indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
+       ASSERT(indlen > 0);
+
+       if (rt) {
+               error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
+                                         -((int64_t)extsz), 0);
+       } else {
+               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                                -((int64_t)alen), 0);
+       }
+
+       if (error)
+               goto out_unreserve_quota;
+
+       error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                        -((int64_t)indlen), 0);
+       if (error)
+               goto out_unreserve_blocks;
+
+
+       ip->i_delayed_blks += alen;
+
+       got->br_startoff = aoff;
+       got->br_startblock = nullstartblock(indlen);
+       got->br_blockcount = alen;
+       got->br_state = XFS_EXT_NORM;
+       xfs_bmap_add_extent_hole_delay(ip, lastx, got);
+
+       /*
+        * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
+        * might have merged it into one of the neighbouring ones.
+        */
+       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+
+       ASSERT(got->br_startoff <= aoff);
+       ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
+       ASSERT(isnullstartblock(got->br_startblock));
+       ASSERT(got->br_state == XFS_EXT_NORM);
+       return 0;
+
+out_unreserve_blocks:
+       if (rt)
+               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
+       else
+               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
+out_unreserve_quota:
+       if (XFS_IS_QUOTA_ON(mp))
+               xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
+                               XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+       return error;
+}
+
+/*
+ * Map file blocks to filesystem blocks, adding delayed allocations as needed.
+ */
+int
+xfs_bmapi_delay(
+       struct xfs_inode        *ip,    /* incore inode */
+       xfs_fileoff_t           bno,    /* starting file offs. mapped */
+       xfs_filblks_t           len,    /* length to map in file */
+       struct xfs_bmbt_irec    *mval,  /* output: map values */
+       int                     *nmap,  /* i/o: mval size/count */
+       int                     flags)  /* XFS_BMAPI_... */
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       struct xfs_bmbt_irec    got;    /* current file extent record */
+       struct xfs_bmbt_irec    prev;   /* previous file extent record */
+       xfs_fileoff_t           obno;   /* old block number (offset) */
+       xfs_fileoff_t           end;    /* end of mapped file region */
+       xfs_extnum_t            lastx;  /* last useful extent number */
+       int                     eof;    /* we've hit the end of extents */
+       int                     n = 0;  /* current extent index */
+       int                     error = 0;
+
+       ASSERT(*nmap >= 1);
+       ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
+       ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
+               return EFSCORRUPTED;
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return EIO;
+
+       XFS_STATS_INC(xs_blk_mapw);
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+               if (error)
+                       return error;
+       }
+
+       xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
+       end = bno + len;
+       obno = bno;
+
+       while (bno < end && n < *nmap) {
+               if (eof || got.br_startoff > bno) {
+                       error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
+                                                          &prev, &lastx, eof);
+                       if (error) {
+                               if (n == 0) {
+                                       *nmap = 0;
+                                       return error;
+                               }
+                               break;
+                       }
+               }
+
+               /* set up the extent map to return. */
+               xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
+               xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
+
+               /* If we're done, stop now. */
+               if (bno >= end || n >= *nmap)
+                       break;
+
+               /* Else go on to the next record. */
+               prev = got;
+               if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
+               else
+                       eof = 1;
+       }
+
+       *nmap = n;
+       return 0;
+}
+
+
+int
+__xfs_bmapi_allocate(
+       struct xfs_bmalloca     *bma)
+{
+       struct xfs_mount        *mp = bma->ip->i_mount;
+       int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
+                                               XFS_ATTR_FORK : XFS_DATA_FORK;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+       int                     tmp_logflags = 0;
+       int                     error;
+
+       ASSERT(bma->length > 0);
+
+       /*
+        * For the wasdelay case, we could also just allocate the stuff asked
+        * for in this bmap call but that wouldn't be as good.
+        */
+       if (bma->wasdel) {
+               bma->length = (xfs_extlen_t)bma->got.br_blockcount;
+               bma->offset = bma->got.br_startoff;
+               if (bma->idx != NULLEXTNUM && bma->idx) {
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
+                                        &bma->prev);
+               }
+       } else {
+               bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
+               if (!bma->eof)
+                       bma->length = XFS_FILBLKS_MIN(bma->length,
+                                       bma->got.br_startoff - bma->offset);
+       }
+
+       /*
+        * Indicate if this is the first user data in the file, or just any
+        * user data.
+        */
+       if (!(bma->flags & XFS_BMAPI_METADATA)) {
+               bma->userdata = (bma->offset == 0) ?
+                       XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
+       }
+
+       bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
+
+       /*
+        * Only want to do the alignment at the eof if it is userdata and
+        * allocation length is larger than a stripe unit.
+        */
+       if (mp->m_dalign && bma->length >= mp->m_dalign &&
+           !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
+               error = xfs_bmap_isaeof(bma, whichfork);
+               if (error)
+                       return error;
+       }
+
+       error = xfs_bmap_alloc(bma);
+       if (error)
+               return error;
+
+       if (bma->flist->xbf_low)
+               bma->minleft = 0;
+       if (bma->cur)
+               bma->cur->bc_private.b.firstblock = *bma->firstblock;
+       if (bma->blkno == NULLFSBLOCK)
+               return 0;
+       if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
+               bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
+               bma->cur->bc_private.b.firstblock = *bma->firstblock;
+               bma->cur->bc_private.b.flist = bma->flist;
+       }
+       /*
+        * Bump the number of extents we've allocated
+        * in this call.
+        */
+       bma->nallocs++;
+
+       if (bma->cur)
+               bma->cur->bc_private.b.flags =
+                       bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
+
+       bma->got.br_startoff = bma->offset;
+       bma->got.br_startblock = bma->blkno;
+       bma->got.br_blockcount = bma->length;
+       bma->got.br_state = XFS_EXT_NORM;
+
+       /*
+        * A wasdelay extent has been initialized, so shouldn't be flagged
+        * as unwritten.
+        */
+       if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
+           xfs_sb_version_hasextflgbit(&mp->m_sb))
+               bma->got.br_state = XFS_EXT_UNWRITTEN;
+
+       if (bma->wasdel)
+               error = xfs_bmap_add_extent_delay_real(bma);
+       else
+               error = xfs_bmap_add_extent_hole_real(bma, whichfork);
+
+       bma->logflags |= tmp_logflags;
+       if (error)
+               return error;
+
+       /*
+        * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
+        * or xfs_bmap_add_extent_hole_real might have merged it into one of
+        * the neighbouring ones.
+        */
+       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
+
+       ASSERT(bma->got.br_startoff <= bma->offset);
+       ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
+              bma->offset + bma->length);
+       ASSERT(bma->got.br_state == XFS_EXT_NORM ||
+              bma->got.br_state == XFS_EXT_UNWRITTEN);
+       return 0;
+}
+
+STATIC int
+xfs_bmapi_convert_unwritten(
+       struct xfs_bmalloca     *bma,
+       struct xfs_bmbt_irec    *mval,
+       xfs_filblks_t           len,
+       int                     flags)
+{
+       int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+                                               XFS_ATTR_FORK : XFS_DATA_FORK;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+       int                     tmp_logflags = 0;
+       int                     error;
+
+       /* check if we need to do unwritten->real conversion */
+       if (mval->br_state == XFS_EXT_UNWRITTEN &&
+           (flags & XFS_BMAPI_PREALLOC))
+               return 0;
+
+       /* check if we need to do real->unwritten conversion */
+       if (mval->br_state == XFS_EXT_NORM &&
+           (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
+                       (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
+               return 0;
+
+       /*
+        * Modify (by adding) the state flag, if writing.
+        */
+       ASSERT(mval->br_blockcount <= len);
+       if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
+               bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
+                                       bma->ip, whichfork);
+               bma->cur->bc_private.b.firstblock = *bma->firstblock;
+               bma->cur->bc_private.b.flist = bma->flist;
+       }
+       mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
+                               ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
+
+       error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
+                       &bma->cur, mval, bma->firstblock, bma->flist,
+                       &tmp_logflags);
+       bma->logflags |= tmp_logflags;
+       if (error)
+               return error;
+
+       /*
+        * Update our extent pointer, given that
+        * xfs_bmap_add_extent_unwritten_real might have merged it into one
+        * of the neighbouring ones.
+        */
+       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
+
+       /*
+        * We may have combined previously unwritten space with written space,
+        * so generate another request.
+        */
+       if (mval->br_blockcount < len)
+               return EAGAIN;
+       return 0;
+}
+
+/*
+ * Map file blocks to filesystem blocks, and allocate blocks or convert the
+ * extent state if necessary.  Details behaviour is controlled by the flags
+ * parameter.  Only allocates blocks from a single allocation group, to avoid
+ * locking problems.
+ *
+ * The returned value in "firstblock" from the first call in a transaction
+ * must be remembered and presented to subsequent calls in "firstblock".
+ * An upper bound for the number of blocks to be allocated is supplied to
+ * the first call in "total"; if no allocation group has that many free
+ * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
+ */
+int
+xfs_bmapi_write(
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       xfs_fileoff_t           bno,            /* starting file offs. mapped */
+       xfs_filblks_t           len,            /* length to map in file */
+       int                     flags,          /* XFS_BMAPI_... */
+       xfs_fsblock_t           *firstblock,    /* first allocated block
+                                                  controls a.g. for allocs */
+       xfs_extlen_t            total,          /* total blocks needed */
+       struct xfs_bmbt_irec    *mval,          /* output: map values */
+       int                     *nmap,          /* i/o: mval size/count */
+       struct xfs_bmap_free    *flist)         /* i/o: list extents to free */
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp;
+       struct xfs_bmalloca     bma = { NULL }; /* args for xfs_bmap_alloc */
+       xfs_fileoff_t           end;            /* end of mapped file region */
+       int                     eof;            /* after the end of extents */
+       int                     error;          /* error return */
+       int                     n;              /* current extent index */
+       xfs_fileoff_t           obno;           /* old block number (offset) */
+       int                     whichfork;      /* data or attr fork */
+       char                    inhole;         /* current location is hole in file */
+       char                    wasdelay;       /* old extent was delayed */
+
+#ifdef DEBUG
+       xfs_fileoff_t           orig_bno;       /* original block number value */
+       int                     orig_flags;     /* original flags arg value */
+       xfs_filblks_t           orig_len;       /* original value of len arg */
+       struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
+       int                     orig_nmap;      /* original value of *nmap */
+
+       orig_bno = bno;
+       orig_len = len;
+       orig_flags = flags;
+       orig_mval = mval;
+       orig_nmap = *nmap;
+#endif
+       whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+               XFS_ATTR_FORK : XFS_DATA_FORK;
+
+       ASSERT(*nmap >= 1);
+       ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
+       ASSERT(!(flags & XFS_BMAPI_IGSTATE));
+       ASSERT(tp != NULL);
+       ASSERT(len > 0);
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
+               return EFSCORRUPTED;
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return EIO;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+
+       XFS_STATS_INC(xs_blk_mapw);
+
+       if (*firstblock == NULLFSBLOCK) {
+               if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
+                       bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
+               else
+                       bma.minleft = 1;
+       } else {
+               bma.minleft = 0;
+       }
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       goto error0;
+       }
+
+       xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
+                               &bma.prev);
+       n = 0;
+       end = bno + len;
+       obno = bno;
+
+       bma.tp = tp;
+       bma.ip = ip;
+       bma.total = total;
+       bma.userdata = 0;
+       bma.flist = flist;
+       bma.firstblock = firstblock;
+
+       if (flags & XFS_BMAPI_STACK_SWITCH)
+               bma.stack_switch = 1;
+
+       while (bno < end && n < *nmap) {
+               inhole = eof || bma.got.br_startoff > bno;
+               wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
+
+               /*
+                * First, deal with the hole before the allocated space
+                * that we found, if any.
+                */
+               if (inhole || wasdelay) {
+                       bma.eof = eof;
+                       bma.conv = !!(flags & XFS_BMAPI_CONVERT);
+                       bma.wasdel = wasdelay;
+                       bma.offset = bno;
+                       bma.flags = flags;
+
+                       /*
+                        * There's a 32/64 bit type mismatch between the
+                        * allocation length request (which can be 64 bits in
+                        * length) and the bma length request, which is
+                        * xfs_extlen_t and therefore 32 bits. Hence we have to
+                        * check for 32-bit overflows and handle them here.
+                        */
+                       if (len > (xfs_filblks_t)MAXEXTLEN)
+                               bma.length = MAXEXTLEN;
+                       else
+                               bma.length = len;
+
+                       ASSERT(len > 0);
+                       ASSERT(bma.length > 0);
+                       error = xfs_bmapi_allocate(&bma);
+                       if (error)
+                               goto error0;
+                       if (bma.blkno == NULLFSBLOCK)
+                               break;
+               }
+
+               /* Deal with the allocated space we found.  */
+               xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
+                                                       end, n, flags);
+
+               /* Execute unwritten extent conversion if necessary */
+               error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
+               if (error == EAGAIN)
+                       continue;
+               if (error)
+                       goto error0;
+
+               /* update the extent map to return */
+               xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
+
+               /*
+                * If we're done, stop now.  Stop when we've allocated
+                * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
+                * the transaction may get too big.
+                */
+               if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
+                       break;
+
+               /* Else go on to the next record. */
+               bma.prev = bma.got;
+               if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
+                                        &bma.got);
+               } else
+                       eof = 1;
+       }
+       *nmap = n;
+
+       /*
+        * Transform from btree to extents, give it cur.
+        */
+       if (xfs_bmap_wants_extents(ip, whichfork)) {
+               int             tmp_logflags = 0;
+
+               ASSERT(bma.cur);
+               error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
+                       &tmp_logflags, whichfork);
+               bma.logflags |= tmp_logflags;
+               if (error)
+                       goto error0;
+       }
+
+       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
+              XFS_IFORK_NEXTENTS(ip, whichfork) >
+               XFS_IFORK_MAXEXT(ip, whichfork));
+       error = 0;
+error0:
+       /*
+        * Log everything.  Do this after conversion, there's no point in
+        * logging the extent records if we've converted to btree format.
+        */
+       if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+               bma.logflags &= ~xfs_ilog_fext(whichfork);
+       else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
+                XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+               bma.logflags &= ~xfs_ilog_fbroot(whichfork);
+       /*
+        * Log whatever the flags say, even if error.  Otherwise we might miss
+        * detecting a case where the data is changed, there's an error,
+        * and it's not logged so we don't shutdown when we should.
+        */
+       if (bma.logflags)
+               xfs_trans_log_inode(tp, ip, bma.logflags);
+
+       if (bma.cur) {
+               if (!error) {
+                       ASSERT(*firstblock == NULLFSBLOCK ||
+                              XFS_FSB_TO_AGNO(mp, *firstblock) ==
+                              XFS_FSB_TO_AGNO(mp,
+                                      bma.cur->bc_private.b.firstblock) ||
+                              (flist->xbf_low &&
+                               XFS_FSB_TO_AGNO(mp, *firstblock) <
+                               XFS_FSB_TO_AGNO(mp,
+                                       bma.cur->bc_private.b.firstblock)));
+                       *firstblock = bma.cur->bc_private.b.firstblock;
+               }
+               xfs_btree_del_cursor(bma.cur,
+                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       }
+       if (!error)
+               xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
+                       orig_nmap, *nmap);
+       return error;
+}
+
+/*
+ * Called by xfs_bmapi to update file extent records and the btree
+ * after removing space (or undoing a delayed allocation).
+ */
+STATIC int                             /* error */
+xfs_bmap_del_extent(
+       xfs_inode_t             *ip,    /* incore inode pointer */
+       xfs_trans_t             *tp,    /* current transaction pointer */
+       xfs_extnum_t            *idx,   /* extent number to update/delete */
+       xfs_bmap_free_t         *flist, /* list of extents to be freed */
+       xfs_btree_cur_t         *cur,   /* if null, not a btree */
+       xfs_bmbt_irec_t         *del,   /* data to remove from extents */
+       int                     *logflagsp, /* inode logging flags */
+       int                     whichfork) /* data or attr fork */
+{
+       xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
+       xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
+       xfs_fsblock_t           del_endblock=0; /* first block past del */
+       xfs_fileoff_t           del_endoff;     /* first offset past del */
+       int                     delay;  /* current block is delayed allocated */
+       int                     do_fx;  /* free extent at end of routine */
+       xfs_bmbt_rec_host_t     *ep;    /* current extent entry pointer */
+       int                     error;  /* error return value */
+       int                     flags;  /* inode logging flags */
+       xfs_bmbt_irec_t         got;    /* current extent entry */
+       xfs_fileoff_t           got_endoff;     /* first offset past got */
+       int                     i;      /* temp state */
+       xfs_ifork_t             *ifp;   /* inode fork pointer */
+       xfs_mount_t             *mp;    /* mount structure */
+       xfs_filblks_t           nblks;  /* quota/sb block count */
+       xfs_bmbt_irec_t         new;    /* new record to be inserted */
+       /* REFERENCED */
+       uint                    qfield; /* quota field to update */
+       xfs_filblks_t           temp;   /* for indirect length calculations */
+       xfs_filblks_t           temp2;  /* for indirect length calculations */
+       int                     state = 0;
+
+       XFS_STATS_INC(xs_del_exlist);
+
+       if (whichfork == XFS_ATTR_FORK)
+               state |= BMAP_ATTRFORK;
+
+       mp = ip->i_mount;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
+               (uint)sizeof(xfs_bmbt_rec_t)));
+       ASSERT(del->br_blockcount > 0);
+       ep = xfs_iext_get_ext(ifp, *idx);
+       xfs_bmbt_get_all(ep, &got);
+       ASSERT(got.br_startoff <= del->br_startoff);
+       del_endoff = del->br_startoff + del->br_blockcount;
+       got_endoff = got.br_startoff + got.br_blockcount;
+       ASSERT(got_endoff >= del_endoff);
+       delay = isnullstartblock(got.br_startblock);
+       ASSERT(isnullstartblock(del->br_startblock) == delay);
+       flags = 0;
+       qfield = 0;
+       error = 0;
+       /*
+        * If deleting a real allocation, must free up the disk space.
+        */
+       if (!delay) {
+               flags = XFS_ILOG_CORE;
+               /*
+                * Realtime allocation.  Free it and record di_nblocks update.
+                */
+               if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
+                       xfs_fsblock_t   bno;
+                       xfs_filblks_t   len;
+
+                       ASSERT(do_mod(del->br_blockcount,
+                                     mp->m_sb.sb_rextsize) == 0);
+                       ASSERT(do_mod(del->br_startblock,
+                                     mp->m_sb.sb_rextsize) == 0);
+                       bno = del->br_startblock;
+                       len = del->br_blockcount;
+                       do_div(bno, mp->m_sb.sb_rextsize);
+                       do_div(len, mp->m_sb.sb_rextsize);
+                       error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+                       if (error)
+                               goto done;
+                       do_fx = 0;
+                       nblks = len * mp->m_sb.sb_rextsize;
+                       qfield = XFS_TRANS_DQ_RTBCOUNT;
+               }
+               /*
+                * Ordinary allocation.
+                */
+               else {
+                       do_fx = 1;
+                       nblks = del->br_blockcount;
+                       qfield = XFS_TRANS_DQ_BCOUNT;
+               }
+               /*
+                * Set up del_endblock and cur for later.
+                */
+               del_endblock = del->br_startblock + del->br_blockcount;
+               if (cur) {
+                       if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                       got.br_startblock, got.br_blockcount,
+                                       &i)))
+                               goto done;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               }
+               da_old = da_new = 0;
+       } else {
+               da_old = startblockval(got.br_startblock);
+               da_new = 0;
+               nblks = 0;
+               do_fx = 0;
+       }
+       /*
+        * Set flag value to use in switch statement.
+        * Left-contig is 2, right-contig is 1.
+        */
+       switch (((got.br_startoff == del->br_startoff) << 1) |
+               (got_endoff == del_endoff)) {
+       case 3:
+               /*
+                * Matches the whole extent.  Delete the entry.
+                */
+               xfs_iext_remove(ip, *idx, 1,
+                               whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
+               --*idx;
+               if (delay)
+                       break;
+
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                       XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+               flags |= XFS_ILOG_CORE;
+               if (!cur) {
+                       flags |= xfs_ilog_fext(whichfork);
+                       break;
+               }
+               if ((error = xfs_btree_delete(cur, &i)))
+                       goto done;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               break;
+
+       case 2:
+               /*
+                * Deleting the first part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_startoff(ep, del_endoff);
+               temp = got.br_blockcount - del->br_blockcount;
+               xfs_bmbt_set_blockcount(ep, temp);
+               if (delay) {
+                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                               da_old);
+                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
+                       trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+                       da_new = temp;
+                       break;
+               }
+               xfs_bmbt_set_startblock(ep, del_endblock);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               if (!cur) {
+                       flags |= xfs_ilog_fext(whichfork);
+                       break;
+               }
+               if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
+                               got.br_blockcount - del->br_blockcount,
+                               got.br_state)))
+                       goto done;
+               break;
+
+       case 1:
+               /*
+                * Deleting the last part of the extent.
+                */
+               temp = got.br_blockcount - del->br_blockcount;
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep, temp);
+               if (delay) {
+                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                               da_old);
+                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
+                       trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+                       da_new = temp;
+                       break;
+               }
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               if (!cur) {
+                       flags |= xfs_ilog_fext(whichfork);
+                       break;
+               }
+               if ((error = xfs_bmbt_update(cur, got.br_startoff,
+                               got.br_startblock,
+                               got.br_blockcount - del->br_blockcount,
+                               got.br_state)))
+                       goto done;
+               break;
+
+       case 0:
+               /*
+                * Deleting the middle of the extent.
+                */
+               temp = del->br_startoff - got.br_startoff;
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep, temp);
+               new.br_startoff = del_endoff;
+               temp2 = got_endoff - del_endoff;
+               new.br_blockcount = temp2;
+               new.br_state = got.br_state;
+               if (!delay) {
+                       new.br_startblock = del_endblock;
+                       flags |= XFS_ILOG_CORE;
+                       if (cur) {
+                               if ((error = xfs_bmbt_update(cur,
+                                               got.br_startoff,
+                                               got.br_startblock, temp,
+                                               got.br_state)))
+                                       goto done;
+                               if ((error = xfs_btree_increment(cur, 0, &i)))
+                                       goto done;
+                               cur->bc_rec.b = new;
+                               error = xfs_btree_insert(cur, &i);
+                               if (error && error != ENOSPC)
+                                       goto done;
+                               /*
+                                * If get no-space back from btree insert,
+                                * it tried a split, and we have a zero
+                                * block reservation.
+                                * Fix up our state and return the error.
+                                */
+                               if (error == ENOSPC) {
+                                       /*
+                                        * Reset the cursor, don't trust
+                                        * it after any insert operation.
+                                        */
+                                       if ((error = xfs_bmbt_lookup_eq(cur,
+                                                       got.br_startoff,
+                                                       got.br_startblock,
+                                                       temp, &i)))
+                                               goto done;
+                                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                                       /*
+                                        * Update the btree record back
+                                        * to the original value.
+                                        */
+                                       if ((error = xfs_bmbt_update(cur,
+                                                       got.br_startoff,
+                                                       got.br_startblock,
+                                                       got.br_blockcount,
+                                                       got.br_state)))
+                                               goto done;
+                                       /*
+                                        * Reset the extent record back
+                                        * to the original value.
+                                        */
+                                       xfs_bmbt_set_blockcount(ep,
+                                               got.br_blockcount);
+                                       flags = 0;
+                                       error = ENOSPC;
+                                       goto done;
+                               }
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       } else
+                               flags |= xfs_ilog_fext(whichfork);
+                       XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+               } else {
+                       ASSERT(whichfork == XFS_DATA_FORK);
+                       temp = xfs_bmap_worst_indlen(ip, temp);
+                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
+                       temp2 = xfs_bmap_worst_indlen(ip, temp2);
+                       new.br_startblock = nullstartblock((int)temp2);
+                       da_new = temp + temp2;
+                       while (da_new > da_old) {
+                               if (temp) {
+                                       temp--;
+                                       da_new--;
+                                       xfs_bmbt_set_startblock(ep,
+                                               nullstartblock((int)temp));
+                               }
+                               if (da_new == da_old)
+                                       break;
+                               if (temp2) {
+                                       temp2--;
+                                       da_new--;
+                                       new.br_startblock =
+                                               nullstartblock((int)temp2);
+                               }
+                       }
+               }
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               xfs_iext_insert(ip, *idx + 1, 1, &new, state);
+               ++*idx;
+               break;
+       }
+       /*
+        * If we need to, add to list of extents to delete.
+        */
+       if (do_fx)
+               xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
+                       mp);
+       /*
+        * Adjust inode # blocks in the file.
+        */
+       if (nblks)
+               ip->i_d.di_nblocks -= nblks;
+       /*
+        * Adjust quota data.
+        */
+       if (qfield)
+               xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
+
+       /*
+        * Account for change in delayed indirect blocks.
+        * Nothing to do for disk quota accounting here.
+        */
+       ASSERT(da_old >= da_new);
+       if (da_old > da_new) {
+               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                       (int64_t)(da_old - da_new), 0);
+       }
+done:
+       *logflagsp = flags;
+       return error;
+}
+
+/*
+ * Unmap (remove) blocks from a file.
+ * If nexts is nonzero then the number of extents to remove is limited to
+ * that value.  If not all extents in the block range can be removed then
+ * *done is set.
+ */
+int                                            /* error */
+xfs_bunmapi(
+       xfs_trans_t             *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* incore inode */
+       xfs_fileoff_t           bno,            /* starting offset to unmap */
+       xfs_filblks_t           len,            /* length to unmap in file */
+       int                     flags,          /* misc flags */
+       xfs_extnum_t            nexts,          /* number of extents max */
+       xfs_fsblock_t           *firstblock,    /* first allocated block
+                                                  controls a.g. for allocs */
+       xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
+       int                     *done)          /* set if not done yet */
+{
+       xfs_btree_cur_t         *cur;           /* bmap btree cursor */
+       xfs_bmbt_irec_t         del;            /* extent being deleted */
+       int                     eof;            /* is deleting at eof */
+       xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
+       int                     error;          /* error return value */
+       xfs_extnum_t            extno;          /* extent number in list */
+       xfs_bmbt_irec_t         got;            /* current extent record */
+       xfs_ifork_t             *ifp;           /* inode fork pointer */
+       int                     isrt;           /* freeing in rt area */
+       xfs_extnum_t            lastx;          /* last extent index used */
+       int                     logflags;       /* transaction logging flags */
+       xfs_extlen_t            mod;            /* rt extent offset */
+       xfs_mount_t             *mp;            /* mount structure */
+       xfs_extnum_t            nextents;       /* number of file extents */
+       xfs_bmbt_irec_t         prev;           /* previous extent record */
+       xfs_fileoff_t           start;          /* first file offset deleted */
+       int                     tmp_logflags;   /* partial logging flags */
+       int                     wasdel;         /* was a delayed alloc extent */
+       int                     whichfork;      /* data or attribute fork */
+       xfs_fsblock_t           sum;
+
+       trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
+
+       whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+               XFS_ATTR_FORK : XFS_DATA_FORK;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (unlikely(
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
+               XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
+                                ip->i_mount);
+               return EFSCORRUPTED;
+       }
+       mp = ip->i_mount;
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return EIO;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(len > 0);
+       ASSERT(nexts >= 0);
+
+       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
+           (error = xfs_iread_extents(tp, ip, whichfork)))
+               return error;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       if (nextents == 0) {
+               *done = 1;
+               return 0;
+       }
+       XFS_STATS_INC(xs_blk_unmap);
+       isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+       start = bno;
+       bno = start + len - 1;
+       ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
+               &prev);
+
+       /*
+        * Check to see if the given block number is past the end of the
+        * file, back up to the last block if so...
+        */
+       if (eof) {
+               ep = xfs_iext_get_ext(ifp, --lastx);
+               xfs_bmbt_get_all(ep, &got);
+               bno = got.br_startoff + got.br_blockcount - 1;
+       }
+       logflags = 0;
+       if (ifp->if_flags & XFS_IFBROOT) {
+               ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
+               cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+               cur->bc_private.b.firstblock = *firstblock;
+               cur->bc_private.b.flist = flist;
+               cur->bc_private.b.flags = 0;
+       } else
+               cur = NULL;
+
+       if (isrt) {
+               /*
+                * Synchronize by locking the bitmap inode.
+                */
+               xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+               xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+       }
+
+       extno = 0;
+       while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
+              (nexts == 0 || extno < nexts)) {
+               /*
+                * Is the found extent after a hole in which bno lives?
+                * Just back up to the previous extent, if so.
+                */
+               if (got.br_startoff > bno) {
+                       if (--lastx < 0)
+                               break;
+                       ep = xfs_iext_get_ext(ifp, lastx);
+                       xfs_bmbt_get_all(ep, &got);
+               }
+               /*
+                * Is the last block of this extent before the range
+                * we're supposed to delete?  If so, we're done.
+                */
+               bno = XFS_FILEOFF_MIN(bno,
+                       got.br_startoff + got.br_blockcount - 1);
+               if (bno < start)
+                       break;
+               /*
+                * Then deal with the (possibly delayed) allocated space
+                * we found.
+                */
+               ASSERT(ep != NULL);
+               del = got;
+               wasdel = isnullstartblock(del.br_startblock);
+               if (got.br_startoff < start) {
+                       del.br_startoff = start;
+                       del.br_blockcount -= start - got.br_startoff;
+                       if (!wasdel)
+                               del.br_startblock += start - got.br_startoff;
+               }
+               if (del.br_startoff + del.br_blockcount > bno + 1)
+                       del.br_blockcount = bno + 1 - del.br_startoff;
+               sum = del.br_startblock + del.br_blockcount;
+               if (isrt &&
+                   (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
+                       /*
+                        * Realtime extent not lined up at the end.
+                        * The extent could have been split into written
+                        * and unwritten pieces, or we could just be
+                        * unmapping part of it.  But we can't really
+                        * get rid of part of a realtime extent.
+                        */
+                       if (del.br_state == XFS_EXT_UNWRITTEN ||
+                           !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
+                               /*
+                                * This piece is unwritten, or we're not
+                                * using unwritten extents.  Skip over it.
+                                */
+                               ASSERT(bno >= mod);
+                               bno -= mod > del.br_blockcount ?
+                                       del.br_blockcount : mod;
+                               if (bno < got.br_startoff) {
+                                       if (--lastx >= 0)
+                                               xfs_bmbt_get_all(xfs_iext_get_ext(
+                                                       ifp, lastx), &got);
+                               }
+                               continue;
+                       }
+                       /*
+                        * It's written, turn it unwritten.
+                        * This is better than zeroing it.
+                        */
+                       ASSERT(del.br_state == XFS_EXT_NORM);
+                       ASSERT(xfs_trans_get_block_res(tp) > 0);
+                       /*
+                        * If this spans a realtime extent boundary,
+                        * chop it back to the start of the one we end at.
+                        */
+                       if (del.br_blockcount > mod) {
+                               del.br_startoff += del.br_blockcount - mod;
+                               del.br_startblock += del.br_blockcount - mod;
+                               del.br_blockcount = mod;
+                       }
+                       del.br_state = XFS_EXT_UNWRITTEN;
+                       error = xfs_bmap_add_extent_unwritten_real(tp, ip,
+                                       &lastx, &cur, &del, firstblock, flist,
+                                       &logflags);
+                       if (error)
+                               goto error0;
+                       goto nodelete;
+               }
+               if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
+                       /*
+                        * Realtime extent is lined up at the end but not
+                        * at the front.  We'll get rid of full extents if
+                        * we can.
+                        */
+                       mod = mp->m_sb.sb_rextsize - mod;
+                       if (del.br_blockcount > mod) {
+                               del.br_blockcount -= mod;
+                               del.br_startoff += mod;
+                               del.br_startblock += mod;
+                       } else if ((del.br_startoff == start &&
+                                   (del.br_state == XFS_EXT_UNWRITTEN ||
+                                    xfs_trans_get_block_res(tp) == 0)) ||
+                                  !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
+                               /*
+                                * Can't make it unwritten.  There isn't
+                                * a full extent here so just skip it.
+                                */
+                               ASSERT(bno >= del.br_blockcount);
+                               bno -= del.br_blockcount;
+                               if (got.br_startoff > bno) {
+                                       if (--lastx >= 0) {
+                                               ep = xfs_iext_get_ext(ifp,
+                                                                     lastx);
+                                               xfs_bmbt_get_all(ep, &got);
+                                       }
+                               }
+                               continue;
+                       } else if (del.br_state == XFS_EXT_UNWRITTEN) {
+                               /*
+                                * This one is already unwritten.
+                                * It must have a written left neighbor.
+                                * Unwrite the killed part of that one and
+                                * try again.
+                                */
+                               ASSERT(lastx > 0);
+                               xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+                                               lastx - 1), &prev);
+                               ASSERT(prev.br_state == XFS_EXT_NORM);
+                               ASSERT(!isnullstartblock(prev.br_startblock));
+                               ASSERT(del.br_startblock ==
+                                      prev.br_startblock + prev.br_blockcount);
+                               if (prev.br_startoff < start) {
+                                       mod = start - prev.br_startoff;
+                                       prev.br_blockcount -= mod;
+                                       prev.br_startblock += mod;
+                                       prev.br_startoff = start;
+                               }
+                               prev.br_state = XFS_EXT_UNWRITTEN;
+                               lastx--;
+                               error = xfs_bmap_add_extent_unwritten_real(tp,
+                                               ip, &lastx, &cur, &prev,
+                                               firstblock, flist, &logflags);
+                               if (error)
+                                       goto error0;
+                               goto nodelete;
+                       } else {
+                               ASSERT(del.br_state == XFS_EXT_NORM);
+                               del.br_state = XFS_EXT_UNWRITTEN;
+                               error = xfs_bmap_add_extent_unwritten_real(tp,
+                                               ip, &lastx, &cur, &del,
+                                               firstblock, flist, &logflags);
+                               if (error)
+                                       goto error0;
+                               goto nodelete;
+                       }
+               }
+               if (wasdel) {
+                       ASSERT(startblockval(del.br_startblock) > 0);
+                       /* Update realtime/data freespace, unreserve quota */
+                       if (isrt) {
+                               xfs_filblks_t rtexts;
+
+                               rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
+                               do_div(rtexts, mp->m_sb.sb_rextsize);
+                               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
+                                               (int64_t)rtexts, 0);
+                               (void)xfs_trans_reserve_quota_nblks(NULL,
+                                       ip, -((long)del.br_blockcount), 0,
+                                       XFS_QMOPT_RES_RTBLKS);
+                       } else {
+                               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                               (int64_t)del.br_blockcount, 0);
+                               (void)xfs_trans_reserve_quota_nblks(NULL,
+                                       ip, -((long)del.br_blockcount), 0,
+                                       XFS_QMOPT_RES_REGBLKS);
+                       }
+                       ip->i_delayed_blks -= del.br_blockcount;
+                       if (cur)
+                               cur->bc_private.b.flags |=
+                                       XFS_BTCUR_BPRV_WASDEL;
+               } else if (cur)
+                       cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
+               /*
+                * If it's the case where the directory code is running
+                * with no block reservation, and the deleted block is in
+                * the middle of its extent, and the resulting insert
+                * of an extent would cause transformation to btree format,
+                * then reject it.  The calling code will then swap
+                * blocks around instead.
+                * We have to do this now, rather than waiting for the
+                * conversion to btree format, since the transaction
+                * will be dirty.
+                */
+               if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
+                   XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+                   XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
+                       XFS_IFORK_MAXEXT(ip, whichfork) &&
+                   del.br_startoff > got.br_startoff &&
+                   del.br_startoff + del.br_blockcount <
+                   got.br_startoff + got.br_blockcount) {
+                       error = ENOSPC;
+                       goto error0;
+               }
+               error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
+                               &tmp_logflags, whichfork);
+               logflags |= tmp_logflags;
+               if (error)
+                       goto error0;
+               bno = del.br_startoff - 1;
+nodelete:
+               /*
+                * If not done go on to the next (previous) record.
+                */
+               if (bno != (xfs_fileoff_t)-1 && bno >= start) {
+                       if (lastx >= 0) {
+                               ep = xfs_iext_get_ext(ifp, lastx);
+                               if (xfs_bmbt_get_startoff(ep) > bno) {
+                                       if (--lastx >= 0)
+                                               ep = xfs_iext_get_ext(ifp,
+                                                                     lastx);
+                               }
+                               xfs_bmbt_get_all(ep, &got);
+                       }
+                       extno++;
+               }
+       }
+       *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
+
+       /*
+        * Convert to a btree if necessary.
+        */
+       if (xfs_bmap_needs_btree(ip, whichfork)) {
+               ASSERT(cur == NULL);
+               error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
+                       &cur, 0, &tmp_logflags, whichfork);
+               logflags |= tmp_logflags;
+               if (error)
+                       goto error0;
+       }
+       /*
+        * transform from btree to extents, give it cur
+        */
+       else if (xfs_bmap_wants_extents(ip, whichfork)) {
+               ASSERT(cur != NULL);
+               error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
+                       whichfork);
+               logflags |= tmp_logflags;
+               if (error)
+                       goto error0;
+       }
+       /*
+        * transform from extents to local?
+        */
+       error = 0;
+error0:
+       /*
+        * Log everything.  Do this after conversion, there's no point in
+        * logging the extent records if we've converted to btree format.
+        */
+       if ((logflags & xfs_ilog_fext(whichfork)) &&
+           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
+               logflags &= ~xfs_ilog_fext(whichfork);
+       else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
+                XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
+               logflags &= ~xfs_ilog_fbroot(whichfork);
+       /*
+        * Log inode even in the error case, if the transaction
+        * is dirty we'll need to shut down the filesystem.
+        */
+       if (logflags)
+               xfs_trans_log_inode(tp, ip, logflags);
+       if (cur) {
+               if (!error) {
+                       *firstblock = cur->bc_private.b.firstblock;
+                       cur->bc_private.b.allocated = 0;
+               }
+               xfs_btree_del_cursor(cur,
+                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       }
+       return error;
+}
+
+/*
+ * Shift extent records to the left to cover a hole.
+ *
+ * The maximum number of extents to be shifted in a single operation
+ * is @num_exts, and @current_ext keeps track of the current extent
+ * index we have shifted. @offset_shift_fsb is the length by which each
+ * extent is shifted. If there is no hole to shift the extents
+ * into, this will be considered invalid operation and we abort immediately.
+ */
+int
+xfs_bmap_shift_extents(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     *done,
+       xfs_fileoff_t           start_fsb,
+       xfs_fileoff_t           offset_shift_fsb,
+       xfs_extnum_t            *current_ext,
+       xfs_fsblock_t           *firstblock,
+       struct xfs_bmap_free    *flist,
+       int                     num_exts)
+{
+       struct xfs_btree_cur            *cur;
+       struct xfs_bmbt_rec_host        *gotp;
+       struct xfs_bmbt_irec            got;
+       struct xfs_bmbt_irec            left;
+       struct xfs_mount                *mp = ip->i_mount;
+       struct xfs_ifork                *ifp;
+       xfs_extnum_t                    nexts = 0;
+       xfs_fileoff_t                   startoff;
+       int                             error = 0;
+       int                             i;
+       int                             whichfork = XFS_DATA_FORK;
+       int                             logflags;
+       xfs_filblks_t                   blockcount = 0;
+       int                             total_extents;
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmap_shift_extents",
+                                XFS_ERRLEVEL_LOW, mp);
+               return EFSCORRUPTED;
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return EIO;
+
+       ASSERT(current_ext != NULL);
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               /* Read in all the extents */
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * If *current_ext is 0, we would need to lookup the extent
+        * from where we would start shifting and store it in gotp.
+        */
+       if (!*current_ext) {
+               gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
+               /*
+                * gotp can be null in 2 cases: 1) if there are no extents
+                * or 2) start_fsb lies in a hole beyond which there are
+                * no extents. Either way, we are done.
+                */
+               if (!gotp) {
+                       *done = 1;
+                       return 0;
+               }
+       }
+
+       /* We are going to change core inode */
+       logflags = XFS_ILOG_CORE;
+       if (ifp->if_flags & XFS_IFBROOT) {
+               cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+               cur->bc_private.b.firstblock = *firstblock;
+               cur->bc_private.b.flist = flist;
+               cur->bc_private.b.flags = 0;
+       } else {
+               cur = NULL;
+               logflags |= XFS_ILOG_DEXT;
+       }
+
+       /*
+        * There may be delalloc extents in the data fork before the range we
+        * are collapsing out, so we cannot
+        * use the count of real extents here. Instead we have to calculate it
+        * from the incore fork.
+        */
+       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+       while (nexts++ < num_exts && *current_ext < total_extents) {
+
+               gotp = xfs_iext_get_ext(ifp, *current_ext);
+               xfs_bmbt_get_all(gotp, &got);
+               startoff = got.br_startoff - offset_shift_fsb;
+
+               /*
+                * Before shifting extent into hole, make sure that the hole
+                * is large enough to accomodate the shift.
+                */
+               if (*current_ext) {
+                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+                                               *current_ext - 1), &left);
+
+                       if (startoff < left.br_startoff + left.br_blockcount)
+                               error = EINVAL;
+               } else if (offset_shift_fsb > got.br_startoff) {
+                       /*
+                        * When first extent is shifted, offset_shift_fsb
+                        * should be less than the stating offset of
+                        * the first extent.
+                        */
+                       error = EINVAL;
+               }
+
+               if (error)
+                       goto del_cursor;
+
+               if (cur) {
+                       error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                                  got.br_startblock,
+                                                  got.br_blockcount,
+                                                  &i);
+                       if (error)
+                               goto del_cursor;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+               }
+
+               /* Check if we can merge 2 adjacent extents */
+               if (*current_ext &&
+                   left.br_startoff + left.br_blockcount == startoff &&
+                   left.br_startblock + left.br_blockcount ==
+                               got.br_startblock &&
+                   left.br_state == got.br_state &&
+                   left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
+                       blockcount = left.br_blockcount +
+                               got.br_blockcount;
+                       xfs_iext_remove(ip, *current_ext, 1, 0);
+                       if (cur) {
+                               error = xfs_btree_delete(cur, &i);
+                               if (error)
+                                       goto del_cursor;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                       }
+                       XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+                       gotp = xfs_iext_get_ext(ifp, --*current_ext);
+                       xfs_bmbt_get_all(gotp, &got);
+
+                       /* Make cursor point to the extent we will update */
+                       if (cur) {
+                               error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                                          got.br_startblock,
+                                                          got.br_blockcount,
+                                                          &i);
+                               if (error)
+                                       goto del_cursor;
+                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                       }
+
+                       xfs_bmbt_set_blockcount(gotp, blockcount);
+                       got.br_blockcount = blockcount;
+               } else {
+                       /* We have to update the startoff */
+                       xfs_bmbt_set_startoff(gotp, startoff);
+                       got.br_startoff = startoff;
+               }
+
+               if (cur) {
+                       error = xfs_bmbt_update(cur, got.br_startoff,
+                                               got.br_startblock,
+                                               got.br_blockcount,
+                                               got.br_state);
+                       if (error)
+                               goto del_cursor;
+               }
+
+               (*current_ext)++;
+               total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+       }
+
+       /* Check if we are done */
+       if (*current_ext == total_extents)
+               *done = 1;
+
+del_cursor:
+       if (cur)
+               xfs_btree_del_cursor(cur,
+                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+       xfs_trans_log_inode(tp, ip, logflags);
+       return error;
+}
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
new file mode 100644 (file)
index 0000000..de65bb8
--- /dev/null
@@ -0,0 +1,967 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_quota.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_dinode.h"
+
+/*
+ * Determine the extent state.
+ */
+/* ARGSUSED */
+STATIC xfs_exntst_t
+xfs_extent_state(
+       xfs_filblks_t           blks,
+       int                     extent_flag)
+{
+       if (extent_flag) {
+               ASSERT(blks != 0);      /* saved for DMIG */
+               return XFS_EXT_UNWRITTEN;
+       }
+       return XFS_EXT_NORM;
+}
+
+/*
+ * Convert on-disk form of btree root to in-memory form.
+ */
+void
+xfs_bmdr_to_bmbt(
+       struct xfs_inode        *ip,
+       xfs_bmdr_block_t        *dblock,
+       int                     dblocklen,
+       struct xfs_btree_block  *rblock,
+       int                     rblocklen)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       int                     dmxr;
+       xfs_bmbt_key_t          *fkp;
+       __be64                  *fpp;
+       xfs_bmbt_key_t          *tkp;
+       __be64                  *tpp;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
+                                XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
+                                XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
+       else
+               xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
+                                XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
+                                XFS_BTREE_LONG_PTRS);
+
+       rblock->bb_level = dblock->bb_level;
+       ASSERT(be16_to_cpu(rblock->bb_level) > 0);
+       rblock->bb_numrecs = dblock->bb_numrecs;
+       dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
+       fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
+       tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
+       fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
+       tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
+       dmxr = be16_to_cpu(dblock->bb_numrecs);
+       memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
+       memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
+}
+
+/*
+ * Convert a compressed bmap extent record to an uncompressed form.
+ * This code must be in sync with the routines xfs_bmbt_get_startoff,
+ * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
+ */
+STATIC void
+__xfs_bmbt_get_all(
+               __uint64_t l0,
+               __uint64_t l1,
+               xfs_bmbt_irec_t *s)
+{
+       int     ext_flag;
+       xfs_exntst_t st;
+
+       ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
+       s->br_startoff = ((xfs_fileoff_t)l0 &
+                          xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+#if XFS_BIG_BLKNOS
+       s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) |
+                          (((xfs_fsblock_t)l1) >> 21);
+#else
+#ifdef DEBUG
+       {
+               xfs_dfsbno_t    b;
+
+               b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) |
+                   (((xfs_dfsbno_t)l1) >> 21);
+               ASSERT((b >> 32) == 0 || isnulldstartblock(b));
+               s->br_startblock = (xfs_fsblock_t)b;
+       }
+#else  /* !DEBUG */
+       s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21);
+#endif /* DEBUG */
+#endif /* XFS_BIG_BLKNOS */
+       s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21));
+       /* This is xfs_extent_state() in-line */
+       if (ext_flag) {
+               ASSERT(s->br_blockcount != 0);  /* saved for DMIG */
+               st = XFS_EXT_UNWRITTEN;
+       } else
+               st = XFS_EXT_NORM;
+       s->br_state = st;
+}
+
+void
+xfs_bmbt_get_all(
+       xfs_bmbt_rec_host_t *r,
+       xfs_bmbt_irec_t *s)
+{
+       __xfs_bmbt_get_all(r->l0, r->l1, s);
+}
+
+/*
+ * Extract the blockcount field from an in memory bmap extent record.
+ */
+xfs_filblks_t
+xfs_bmbt_get_blockcount(
+       xfs_bmbt_rec_host_t     *r)
+{
+       return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21));
+}
+
+/*
+ * Extract the startblock field from an in memory bmap extent record.
+ */
+xfs_fsblock_t
+xfs_bmbt_get_startblock(
+       xfs_bmbt_rec_host_t     *r)
+{
+#if XFS_BIG_BLKNOS
+       return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) |
+              (((xfs_fsblock_t)r->l1) >> 21);
+#else
+#ifdef DEBUG
+       xfs_dfsbno_t    b;
+
+       b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) |
+           (((xfs_dfsbno_t)r->l1) >> 21);
+       ASSERT((b >> 32) == 0 || isnulldstartblock(b));
+       return (xfs_fsblock_t)b;
+#else  /* !DEBUG */
+       return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21);
+#endif /* DEBUG */
+#endif /* XFS_BIG_BLKNOS */
+}
+
+/*
+ * Extract the startoff field from an in memory bmap extent record.
+ */
+xfs_fileoff_t
+xfs_bmbt_get_startoff(
+       xfs_bmbt_rec_host_t     *r)
+{
+       return ((xfs_fileoff_t)r->l0 &
+                xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+}
+
+xfs_exntst_t
+xfs_bmbt_get_state(
+       xfs_bmbt_rec_host_t     *r)
+{
+       int     ext_flag;
+
+       ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN));
+       return xfs_extent_state(xfs_bmbt_get_blockcount(r),
+                               ext_flag);
+}
+
+/*
+ * Extract the blockcount field from an on disk bmap extent record.
+ */
+xfs_filblks_t
+xfs_bmbt_disk_get_blockcount(
+       xfs_bmbt_rec_t  *r)
+{
+       return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21));
+}
+
+/*
+ * Extract the startoff field from a disk format bmap extent record.
+ */
+xfs_fileoff_t
+xfs_bmbt_disk_get_startoff(
+       xfs_bmbt_rec_t  *r)
+{
+       return ((xfs_fileoff_t)be64_to_cpu(r->l0) &
+                xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
+}
+
+
+/*
+ * Set all the fields in a bmap extent record from the arguments.
+ */
+void
+xfs_bmbt_set_allf(
+       xfs_bmbt_rec_host_t     *r,
+       xfs_fileoff_t           startoff,
+       xfs_fsblock_t           startblock,
+       xfs_filblks_t           blockcount,
+       xfs_exntst_t            state)
+{
+       int             extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
+
+       ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
+       ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
+       ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
+
+#if XFS_BIG_BLKNOS
+       ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
+
+       r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+               ((xfs_bmbt_rec_base_t)startoff << 9) |
+               ((xfs_bmbt_rec_base_t)startblock >> 43);
+       r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
+               ((xfs_bmbt_rec_base_t)blockcount &
+               (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
+#else  /* !XFS_BIG_BLKNOS */
+       if (isnullstartblock(startblock)) {
+               r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+                       ((xfs_bmbt_rec_base_t)startoff << 9) |
+                        (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
+               r->l1 = xfs_mask64hi(11) |
+                         ((xfs_bmbt_rec_base_t)startblock << 21) |
+                         ((xfs_bmbt_rec_base_t)blockcount &
+                          (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
+       } else {
+               r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+                       ((xfs_bmbt_rec_base_t)startoff << 9);
+               r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
+                        ((xfs_bmbt_rec_base_t)blockcount &
+                        (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
+       }
+#endif /* XFS_BIG_BLKNOS */
+}
+
+/*
+ * Set all the fields in a bmap extent record from the uncompressed form.
+ */
+void
+xfs_bmbt_set_all(
+       xfs_bmbt_rec_host_t *r,
+       xfs_bmbt_irec_t *s)
+{
+       xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock,
+                            s->br_blockcount, s->br_state);
+}
+
+
+/*
+ * Set all the fields in a disk format bmap extent record from the arguments.
+ */
+void
+xfs_bmbt_disk_set_allf(
+       xfs_bmbt_rec_t          *r,
+       xfs_fileoff_t           startoff,
+       xfs_fsblock_t           startblock,
+       xfs_filblks_t           blockcount,
+       xfs_exntst_t            state)
+{
+       int                     extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
+
+       ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
+       ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
+       ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
+
+#if XFS_BIG_BLKNOS
+       ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
+
+       r->l0 = cpu_to_be64(
+               ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+                ((xfs_bmbt_rec_base_t)startoff << 9) |
+                ((xfs_bmbt_rec_base_t)startblock >> 43));
+       r->l1 = cpu_to_be64(
+               ((xfs_bmbt_rec_base_t)startblock << 21) |
+                ((xfs_bmbt_rec_base_t)blockcount &
+                 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
+#else  /* !XFS_BIG_BLKNOS */
+       if (isnullstartblock(startblock)) {
+               r->l0 = cpu_to_be64(
+                       ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+                        ((xfs_bmbt_rec_base_t)startoff << 9) |
+                         (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
+               r->l1 = cpu_to_be64(xfs_mask64hi(11) |
+                         ((xfs_bmbt_rec_base_t)startblock << 21) |
+                         ((xfs_bmbt_rec_base_t)blockcount &
+                          (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
+       } else {
+               r->l0 = cpu_to_be64(
+                       ((xfs_bmbt_rec_base_t)extent_flag << 63) |
+                        ((xfs_bmbt_rec_base_t)startoff << 9));
+               r->l1 = cpu_to_be64(
+                       ((xfs_bmbt_rec_base_t)startblock << 21) |
+                        ((xfs_bmbt_rec_base_t)blockcount &
+                         (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
+       }
+#endif /* XFS_BIG_BLKNOS */
+}
+
+/*
+ * Set all the fields in a bmap extent record from the uncompressed form.
+ */
+STATIC void
+xfs_bmbt_disk_set_all(
+       xfs_bmbt_rec_t  *r,
+       xfs_bmbt_irec_t *s)
+{
+       xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock,
+                                 s->br_blockcount, s->br_state);
+}
+
+/*
+ * Set the blockcount field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_blockcount(
+       xfs_bmbt_rec_host_t *r,
+       xfs_filblks_t   v)
+{
+       ASSERT((v & xfs_mask64hi(43)) == 0);
+       r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) |
+                 (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21));
+}
+
+/*
+ * Set the startblock field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_startblock(
+       xfs_bmbt_rec_host_t *r,
+       xfs_fsblock_t   v)
+{
+#if XFS_BIG_BLKNOS
+       ASSERT((v & xfs_mask64hi(12)) == 0);
+       r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) |
+                 (xfs_bmbt_rec_base_t)(v >> 43);
+       r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) |
+                 (xfs_bmbt_rec_base_t)(v << 21);
+#else  /* !XFS_BIG_BLKNOS */
+       if (isnullstartblock(v)) {
+               r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
+               r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) |
+                         ((xfs_bmbt_rec_base_t)v << 21) |
+                         (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
+       } else {
+               r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9);
+               r->l1 = ((xfs_bmbt_rec_base_t)v << 21) |
+                         (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
+       }
+#endif /* XFS_BIG_BLKNOS */
+}
+
+/*
+ * Set the startoff field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_startoff(
+       xfs_bmbt_rec_host_t *r,
+       xfs_fileoff_t   v)
+{
+       ASSERT((v & xfs_mask64hi(9)) == 0);
+       r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) |
+               ((xfs_bmbt_rec_base_t)v << 9) |
+                 (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
+}
+
+/*
+ * Set the extent state field in a bmap extent record.
+ */
+void
+xfs_bmbt_set_state(
+       xfs_bmbt_rec_host_t *r,
+       xfs_exntst_t    v)
+{
+       ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
+       if (v == XFS_EXT_NORM)
+               r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN);
+       else
+               r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN);
+}
+
+/*
+ * Convert in-memory form of btree root to on-disk form.
+ */
+void
+xfs_bmbt_to_bmdr(
+       struct xfs_mount        *mp,
+       struct xfs_btree_block  *rblock,
+       int                     rblocklen,
+       xfs_bmdr_block_t        *dblock,
+       int                     dblocklen)
+{
+       int                     dmxr;
+       xfs_bmbt_key_t          *fkp;
+       __be64                  *fpp;
+       xfs_bmbt_key_t          *tkp;
+       __be64                  *tpp;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
+               ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid));
+               ASSERT(rblock->bb_u.l.bb_blkno ==
+                      cpu_to_be64(XFS_BUF_DADDR_NULL));
+       } else
+               ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
+       ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
+       ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
+       ASSERT(rblock->bb_level != 0);
+       dblock->bb_level = rblock->bb_level;
+       dblock->bb_numrecs = rblock->bb_numrecs;
+       dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
+       fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
+       tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
+       fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
+       tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
+       dmxr = be16_to_cpu(dblock->bb_numrecs);
+       memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
+       memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
+}
+
+/*
+ * Check extent records, which have just been read, for
+ * any bit in the extent flag field. ASSERT on debug
+ * kernels, as this condition should not occur.
+ * Return an error condition (1) if any flags found,
+ * otherwise return 0.
+ */
+
+int
+xfs_check_nostate_extents(
+       xfs_ifork_t             *ifp,
+       xfs_extnum_t            idx,
+       xfs_extnum_t            num)
+{
+       for (; num > 0; num--, idx++) {
+               xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
+               if ((ep->l0 >>
+                    (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
+                       ASSERT(0);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+
+STATIC struct xfs_btree_cur *
+xfs_bmbt_dup_cursor(
+       struct xfs_btree_cur    *cur)
+{
+       struct xfs_btree_cur    *new;
+
+       new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
+                       cur->bc_private.b.ip, cur->bc_private.b.whichfork);
+
+       /*
+        * Copy the firstblock, flist, and flags values,
+        * since init cursor doesn't get them.
+        */
+       new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
+       new->bc_private.b.flist = cur->bc_private.b.flist;
+       new->bc_private.b.flags = cur->bc_private.b.flags;
+
+       return new;
+}
+
+STATIC void
+xfs_bmbt_update_cursor(
+       struct xfs_btree_cur    *src,
+       struct xfs_btree_cur    *dst)
+{
+       ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
+              (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
+       ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist);
+
+       dst->bc_private.b.allocated += src->bc_private.b.allocated;
+       dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
+
+       src->bc_private.b.allocated = 0;
+}
+
+STATIC int
+xfs_bmbt_alloc_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *start,
+       union xfs_btree_ptr     *new,
+       int                     *stat)
+{
+       xfs_alloc_arg_t         args;           /* block allocation args */
+       int                     error;          /* error return value */
+
+       memset(&args, 0, sizeof(args));
+       args.tp = cur->bc_tp;
+       args.mp = cur->bc_mp;
+       args.fsbno = cur->bc_private.b.firstblock;
+       args.firstblock = args.fsbno;
+
+       if (args.fsbno == NULLFSBLOCK) {
+               args.fsbno = be64_to_cpu(start->l);
+               args.type = XFS_ALLOCTYPE_START_BNO;
+               /*
+                * Make sure there is sufficient room left in the AG to
+                * complete a full tree split for an extent insert.  If
+                * we are converting the middle part of an extent then
+                * we may need space for two tree splits.
+                *
+                * We are relying on the caller to make the correct block
+                * reservation for this operation to succeed.  If the
+                * reservation amount is insufficient then we may fail a
+                * block allocation here and corrupt the filesystem.
+                */
+               args.minleft = xfs_trans_get_block_res(args.tp);
+       } else if (cur->bc_private.b.flist->xbf_low) {
+               args.type = XFS_ALLOCTYPE_START_BNO;
+       } else {
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+       }
+
+       args.minlen = args.maxlen = args.prod = 1;
+       args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
+       if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
+               error = ENOSPC;
+               goto error0;
+       }
+       error = xfs_alloc_vextent(&args);
+       if (error)
+               goto error0;
+
+       if (args.fsbno == NULLFSBLOCK && args.minleft) {
+               /*
+                * Could not find an AG with enough free space to satisfy
+                * a full btree split.  Try again without minleft and if
+                * successful activate the lowspace algorithm.
+                */
+               args.fsbno = 0;
+               args.type = XFS_ALLOCTYPE_FIRST_AG;
+               args.minleft = 0;
+               error = xfs_alloc_vextent(&args);
+               if (error)
+                       goto error0;
+               cur->bc_private.b.flist->xbf_low = 1;
+       }
+       if (args.fsbno == NULLFSBLOCK) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+               *stat = 0;
+               return 0;
+       }
+       ASSERT(args.len == 1);
+       cur->bc_private.b.firstblock = args.fsbno;
+       cur->bc_private.b.allocated++;
+       cur->bc_private.b.ip->i_d.di_nblocks++;
+       xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
+       xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
+                       XFS_TRANS_DQ_BCOUNT, 1L);
+
+       new->l = cpu_to_be64(args.fsbno);
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+
+ error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+STATIC int
+xfs_bmbt_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = cur->bc_mp;
+       struct xfs_inode        *ip = cur->bc_private.b.ip;
+       struct xfs_trans        *tp = cur->bc_tp;
+       xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
+
+       xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
+       ip->i_d.di_nblocks--;
+
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+       xfs_trans_binval(tp, bp);
+       return 0;
+}
+
+STATIC int
+xfs_bmbt_get_minrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       if (level == cur->bc_nlevels - 1) {
+               struct xfs_ifork        *ifp;
+
+               ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
+                                   cur->bc_private.b.whichfork);
+
+               return xfs_bmbt_maxrecs(cur->bc_mp,
+                                       ifp->if_broot_bytes, level == 0) / 2;
+       }
+
+       return cur->bc_mp->m_bmap_dmnr[level != 0];
+}
+
+int
+xfs_bmbt_get_maxrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       if (level == cur->bc_nlevels - 1) {
+               struct xfs_ifork        *ifp;
+
+               ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
+                                   cur->bc_private.b.whichfork);
+
+               return xfs_bmbt_maxrecs(cur->bc_mp,
+                                       ifp->if_broot_bytes, level == 0);
+       }
+
+       return cur->bc_mp->m_bmap_dmxr[level != 0];
+
+}
+
+/*
+ * Get the maximum records we could store in the on-disk format.
+ *
+ * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
+ * for the root node this checks the available space in the dinode fork
+ * so that we can resize the in-memory buffer to match it.  After a
+ * resize to the maximum size this function returns the same value
+ * as xfs_bmbt_get_maxrecs for the root node, too.
+ */
+STATIC int
+xfs_bmbt_get_dmaxrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       if (level != cur->bc_nlevels - 1)
+               return cur->bc_mp->m_bmap_dmxr[level != 0];
+       return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0);
+}
+
+STATIC void
+xfs_bmbt_init_key_from_rec(
+       union xfs_btree_key     *key,
+       union xfs_btree_rec     *rec)
+{
+       key->bmbt.br_startoff =
+               cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
+}
+
+STATIC void
+xfs_bmbt_init_rec_from_key(
+       union xfs_btree_key     *key,
+       union xfs_btree_rec     *rec)
+{
+       ASSERT(key->bmbt.br_startoff != 0);
+
+       xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
+                              0, 0, XFS_EXT_NORM);
+}
+
+STATIC void
+xfs_bmbt_init_rec_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *rec)
+{
+       xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
+}
+
+STATIC void
+xfs_bmbt_init_ptr_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       ptr->l = 0;
+}
+
+STATIC __int64_t
+xfs_bmbt_key_diff(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *key)
+{
+       return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
+                                     cur->bc_rec.b.br_startoff;
+}
+
+static bool
+xfs_bmbt_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       unsigned int            level;
+
+       switch (block->bb_magic) {
+       case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
+               if (!xfs_sb_version_hascrc(&mp->m_sb))
+                       return false;
+               if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
+                       return false;
+               /*
+                * XXX: need a better way of verifying the owner here. Right now
+                * just make sure there has been one set.
+                */
+               if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
+                       return false;
+               /* fall through */
+       case cpu_to_be32(XFS_BMAP_MAGIC):
+               break;
+       default:
+               return false;
+       }
+
+       /*
+        * numrecs and level verification.
+        *
+        * We don't know what fork we belong to, so just verify that the level
+        * is less than the maximum of the two. Later checks will be more
+        * precise.
+        */
+       level = be16_to_cpu(block->bb_level);
+       if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
+               return false;
+       if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
+               return false;
+
+       /* sibling pointer verification */
+       if (!block->bb_u.l.bb_leftsib ||
+           (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) &&
+            !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
+               return false;
+       if (!block->bb_u.l.bb_rightsib ||
+           (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) &&
+            !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
+               return false;
+
+       return true;
+}
+
+static void
+xfs_bmbt_read_verify(
+       struct xfs_buf  *bp)
+{
+       if (!xfs_btree_lblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_bmbt_verify(bp))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
+       }
+}
+
+static void
+xfs_bmbt_write_verify(
+       struct xfs_buf  *bp)
+{
+       if (!xfs_bmbt_verify(bp)) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+       xfs_btree_lblock_calc_crc(bp);
+}
+
+const struct xfs_buf_ops xfs_bmbt_buf_ops = {
+       .verify_read = xfs_bmbt_read_verify,
+       .verify_write = xfs_bmbt_write_verify,
+};
+
+
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_bmbt_keys_inorder(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *k1,
+       union xfs_btree_key     *k2)
+{
+       return be64_to_cpu(k1->bmbt.br_startoff) <
+               be64_to_cpu(k2->bmbt.br_startoff);
+}
+
+STATIC int
+xfs_bmbt_recs_inorder(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *r1,
+       union xfs_btree_rec     *r2)
+{
+       return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
+               xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
+               xfs_bmbt_disk_get_startoff(&r2->bmbt);
+}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_bmbt_ops = {
+       .rec_len                = sizeof(xfs_bmbt_rec_t),
+       .key_len                = sizeof(xfs_bmbt_key_t),
+
+       .dup_cursor             = xfs_bmbt_dup_cursor,
+       .update_cursor          = xfs_bmbt_update_cursor,
+       .alloc_block            = xfs_bmbt_alloc_block,
+       .free_block             = xfs_bmbt_free_block,
+       .get_maxrecs            = xfs_bmbt_get_maxrecs,
+       .get_minrecs            = xfs_bmbt_get_minrecs,
+       .get_dmaxrecs           = xfs_bmbt_get_dmaxrecs,
+       .init_key_from_rec      = xfs_bmbt_init_key_from_rec,
+       .init_rec_from_key      = xfs_bmbt_init_rec_from_key,
+       .init_rec_from_cur      = xfs_bmbt_init_rec_from_cur,
+       .init_ptr_from_cur      = xfs_bmbt_init_ptr_from_cur,
+       .key_diff               = xfs_bmbt_key_diff,
+       .buf_ops                = &xfs_bmbt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+       .keys_inorder           = xfs_bmbt_keys_inorder,
+       .recs_inorder           = xfs_bmbt_recs_inorder,
+#endif
+};
+
+/*
+ * Allocate a new bmap btree cursor.
+ */
+struct xfs_btree_cur *                         /* new bmap btree cursor */
+xfs_bmbt_init_cursor(
+       struct xfs_mount        *mp,            /* file system mount point */
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_inode        *ip,            /* inode owning the btree */
+       int                     whichfork)      /* data or attr fork */
+{
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+       struct xfs_btree_cur    *cur;
+
+       cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+
+       cur->bc_tp = tp;
+       cur->bc_mp = mp;
+       cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
+       cur->bc_btnum = XFS_BTNUM_BMAP;
+       cur->bc_blocklog = mp->m_sb.sb_blocklog;
+
+       cur->bc_ops = &xfs_bmbt_ops;
+       cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
+
+       cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
+       cur->bc_private.b.ip = ip;
+       cur->bc_private.b.firstblock = NULLFSBLOCK;
+       cur->bc_private.b.flist = NULL;
+       cur->bc_private.b.allocated = 0;
+       cur->bc_private.b.flags = 0;
+       cur->bc_private.b.whichfork = whichfork;
+
+       return cur;
+}
+
+/*
+ * Calculate number of records in a bmap btree block.
+ */
+int
+xfs_bmbt_maxrecs(
+       struct xfs_mount        *mp,
+       int                     blocklen,
+       int                     leaf)
+{
+       blocklen -= XFS_BMBT_BLOCK_LEN(mp);
+
+       if (leaf)
+               return blocklen / sizeof(xfs_bmbt_rec_t);
+       return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
+}
+
+/*
+ * Calculate number of records in a bmap btree inode root.
+ */
+int
+xfs_bmdr_maxrecs(
+       int                     blocklen,
+       int                     leaf)
+{
+       blocklen -= sizeof(xfs_bmdr_block_t);
+
+       if (leaf)
+               return blocklen / sizeof(xfs_bmdr_rec_t);
+       return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
+}
+
+/*
+ * Change the owner of a btree format fork fo the inode passed in. Change it to
+ * the owner of that is passed in so that we can change owners before or after
+ * we switch forks between inodes. The operation that the caller is doing will
+ * determine whether is needs to change owner before or after the switch.
+ *
+ * For demand paged transactional modification, the fork switch should be done
+ * after reading in all the blocks, modifying them and pinning them in the
+ * transaction. For modification when the buffers are already pinned in memory,
+ * the fork switch can be done before changing the owner as we won't need to
+ * validate the owner until the btree buffers are unpinned and writes can occur
+ * again.
+ *
+ * For recovery based ownership change, there is no transactional context and
+ * so a buffer list must be supplied so that we can record the buffers that we
+ * modified for the caller to issue IO on.
+ */
+int
+xfs_bmbt_change_owner(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       xfs_ino_t               new_owner,
+       struct list_head        *buffer_list)
+{
+       struct xfs_btree_cur    *cur;
+       int                     error;
+
+       ASSERT(tp || buffer_list);
+       ASSERT(!(tp && buffer_list));
+       if (whichfork == XFS_DATA_FORK)
+               ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE);
+       else
+               ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE);
+
+       cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
+       if (!cur)
+               return ENOMEM;
+
+       error = xfs_btree_change_owner(cur, new_owner, buffer_list);
+       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       return error;
+}
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
new file mode 100644 (file)
index 0000000..036b4fd
--- /dev/null
@@ -0,0 +1,3989 @@
+/*
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_buf_item.h"
+#include "xfs_btree.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+
+/*
+ * Cursor allocation zone.
+ */
+kmem_zone_t    *xfs_btree_cur_zone;
+
+/*
+ * Btree magic numbers.
+ */
+static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
+       { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
+         XFS_FIBT_MAGIC },
+       { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
+         XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
+};
+#define xfs_btree_magic(cur) \
+       xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
+
+
+STATIC int                             /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lblock(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       struct xfs_btree_block  *block, /* btree long form block pointer */
+       int                     level,  /* level of the btree block */
+       struct xfs_buf          *bp)    /* buffer for block, if any */
+{
+       int                     lblock_ok = 1; /* block passes checks */
+       struct xfs_mount        *mp;    /* file system mount point */
+
+       mp = cur->bc_mp;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               lblock_ok = lblock_ok &&
+                       uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
+                       block->bb_u.l.bb_blkno == cpu_to_be64(
+                               bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
+       }
+
+       lblock_ok = lblock_ok &&
+               be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
+               be16_to_cpu(block->bb_level) == level &&
+               be16_to_cpu(block->bb_numrecs) <=
+                       cur->bc_ops->get_maxrecs(cur, level) &&
+               block->bb_u.l.bb_leftsib &&
+               (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
+                XFS_FSB_SANITY_CHECK(mp,
+                       be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
+               block->bb_u.l.bb_rightsib &&
+               (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
+                XFS_FSB_SANITY_CHECK(mp,
+                       be64_to_cpu(block->bb_u.l.bb_rightsib)));
+
+       if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
+                       XFS_ERRTAG_BTREE_CHECK_LBLOCK,
+                       XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
+               if (bp)
+                       trace_xfs_btree_corrupt(bp, _RET_IP_);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+               return EFSCORRUPTED;
+       }
+       return 0;
+}
+
+STATIC int                             /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sblock(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       struct xfs_btree_block  *block, /* btree short form block pointer */
+       int                     level,  /* level of the btree block */
+       struct xfs_buf          *bp)    /* buffer containing block */
+{
+       struct xfs_mount        *mp;    /* file system mount point */
+       struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
+       struct xfs_agf          *agf;   /* ag. freespace structure */
+       xfs_agblock_t           agflen; /* native ag. freespace length */
+       int                     sblock_ok = 1; /* block passes checks */
+
+       mp = cur->bc_mp;
+       agbp = cur->bc_private.a.agbp;
+       agf = XFS_BUF_TO_AGF(agbp);
+       agflen = be32_to_cpu(agf->agf_length);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               sblock_ok = sblock_ok &&
+                       uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
+                       block->bb_u.s.bb_blkno == cpu_to_be64(
+                               bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
+       }
+
+       sblock_ok = sblock_ok &&
+               be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
+               be16_to_cpu(block->bb_level) == level &&
+               be16_to_cpu(block->bb_numrecs) <=
+                       cur->bc_ops->get_maxrecs(cur, level) &&
+               (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
+                be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
+               block->bb_u.s.bb_leftsib &&
+               (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
+                be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
+               block->bb_u.s.bb_rightsib;
+
+       if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
+                       XFS_ERRTAG_BTREE_CHECK_SBLOCK,
+                       XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
+               if (bp)
+                       trace_xfs_btree_corrupt(bp, _RET_IP_);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+               return EFSCORRUPTED;
+       }
+       return 0;
+}
+
+/*
+ * Debug routine: check that block header is ok.
+ */
+int
+xfs_btree_check_block(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       struct xfs_btree_block  *block, /* generic btree block pointer */
+       int                     level,  /* level of the btree block */
+       struct xfs_buf          *bp)    /* buffer containing block, if any */
+{
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               return xfs_btree_check_lblock(cur, block, level, bp);
+       else
+               return xfs_btree_check_sblock(cur, block, level, bp);
+}
+
+/*
+ * Check that (long) pointer is ok.
+ */
+int                                    /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_lptr(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_dfsbno_t            bno,    /* btree block disk address */
+       int                     level)  /* btree block level */
+{
+       XFS_WANT_CORRUPTED_RETURN(
+               level > 0 &&
+               bno != NULLDFSBNO &&
+               XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
+       return 0;
+}
+
+#ifdef DEBUG
+/*
+ * Check that (short) pointer is ok.
+ */
+STATIC int                             /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_sptr(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agblock_t           bno,    /* btree block disk address */
+       int                     level)  /* btree block level */
+{
+       xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
+
+       XFS_WANT_CORRUPTED_RETURN(
+               level > 0 &&
+               bno != NULLAGBLOCK &&
+               bno != 0 &&
+               bno < agblocks);
+       return 0;
+}
+
+/*
+ * Check that block ptr is ok.
+ */
+STATIC int                             /* error (0 or EFSCORRUPTED) */
+xfs_btree_check_ptr(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       union xfs_btree_ptr     *ptr,   /* btree block disk address */
+       int                     index,  /* offset from ptr to check */
+       int                     level)  /* btree block level */
+{
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+               return xfs_btree_check_lptr(cur,
+                               be64_to_cpu((&ptr->l)[index]), level);
+       } else {
+               return xfs_btree_check_sptr(cur,
+                               be32_to_cpu((&ptr->s)[index]), level);
+       }
+}
+#endif
+
+/*
+ * Calculate CRC on the whole btree block and stuff it into the
+ * long-form btree header.
+ *
+ * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
+ * it into the buffer so recovery knows what the last modifcation was that made
+ * it to disk.
+ */
+void
+xfs_btree_lblock_calc_crc(
+       struct xfs_buf          *bp)
+{
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+       if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+               return;
+       if (bip)
+               block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+       xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+}
+
+bool
+xfs_btree_lblock_verify_crc(
+       struct xfs_buf          *bp)
+{
+       if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+               return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+
+       return true;
+}
+
+/*
+ * Calculate CRC on the whole btree block and stuff it into the
+ * short-form btree header.
+ *
+ * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
+ * it into the buffer so recovery knows what the last modifcation was that made
+ * it to disk.
+ */
+void
+xfs_btree_sblock_calc_crc(
+       struct xfs_buf          *bp)
+{
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+       if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+               return;
+       if (bip)
+               block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+       xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+}
+
+bool
+xfs_btree_sblock_verify_crc(
+       struct xfs_buf          *bp)
+{
+       if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+               return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+
+       return true;
+}
+
+/*
+ * Delete the btree cursor.
+ */
+void
+xfs_btree_del_cursor(
+       xfs_btree_cur_t *cur,           /* btree cursor */
+       int             error)          /* del because of error */
+{
+       int             i;              /* btree level */
+
+       /*
+        * Clear the buffer pointers, and release the buffers.
+        * If we're doing this in the face of an error, we
+        * need to make sure to inspect all of the entries
+        * in the bc_bufs array for buffers to be unlocked.
+        * This is because some of the btree code works from
+        * level n down to 0, and if we get an error along
+        * the way we won't have initialized all the entries
+        * down to 0.
+        */
+       for (i = 0; i < cur->bc_nlevels; i++) {
+               if (cur->bc_bufs[i])
+                       xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
+               else if (!error)
+                       break;
+       }
+       /*
+        * Can't free a bmap cursor without having dealt with the
+        * allocated indirect blocks' accounting.
+        */
+       ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
+              cur->bc_private.b.allocated == 0);
+       /*
+        * Free the cursor.
+        */
+       kmem_zone_free(xfs_btree_cur_zone, cur);
+}
+
+/*
+ * Duplicate the btree cursor.
+ * Allocate a new one, copy the record, re-get the buffers.
+ */
+int                                    /* error */
+xfs_btree_dup_cursor(
+       xfs_btree_cur_t *cur,           /* input cursor */
+       xfs_btree_cur_t **ncur)         /* output cursor */
+{
+       xfs_buf_t       *bp;            /* btree block's buffer pointer */
+       int             error;          /* error return value */
+       int             i;              /* level number of btree block */
+       xfs_mount_t     *mp;            /* mount structure for filesystem */
+       xfs_btree_cur_t *new;           /* new cursor value */
+       xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
+
+       tp = cur->bc_tp;
+       mp = cur->bc_mp;
+
+       /*
+        * Allocate a new cursor like the old one.
+        */
+       new = cur->bc_ops->dup_cursor(cur);
+
+       /*
+        * Copy the record currently in the cursor.
+        */
+       new->bc_rec = cur->bc_rec;
+
+       /*
+        * For each level current, re-get the buffer and copy the ptr value.
+        */
+       for (i = 0; i < new->bc_nlevels; i++) {
+               new->bc_ptrs[i] = cur->bc_ptrs[i];
+               new->bc_ra[i] = cur->bc_ra[i];
+               bp = cur->bc_bufs[i];
+               if (bp) {
+                       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                                                  XFS_BUF_ADDR(bp), mp->m_bsize,
+                                                  0, &bp,
+                                                  cur->bc_ops->buf_ops);
+                       if (error) {
+                               xfs_btree_del_cursor(new, error);
+                               *ncur = NULL;
+                               return error;
+                       }
+               }
+               new->bc_bufs[i] = bp;
+       }
+       *ncur = new;
+       return 0;
+}
+
+/*
+ * XFS btree block layout and addressing:
+ *
+ * There are two types of blocks in the btree: leaf and non-leaf blocks.
+ *
+ * The leaf record start with a header then followed by records containing
+ * the values.  A non-leaf block also starts with the same header, and
+ * then first contains lookup keys followed by an equal number of pointers
+ * to the btree blocks at the previous level.
+ *
+ *             +--------+-------+-------+-------+-------+-------+-------+
+ * Leaf:       | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
+ *             +--------+-------+-------+-------+-------+-------+-------+
+ *
+ *             +--------+-------+-------+-------+-------+-------+-------+
+ * Non-Leaf:   | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
+ *             +--------+-------+-------+-------+-------+-------+-------+
+ *
+ * The header is called struct xfs_btree_block for reasons better left unknown
+ * and comes in different versions for short (32bit) and long (64bit) block
+ * pointers.  The record and key structures are defined by the btree instances
+ * and opaque to the btree core.  The block pointers are simple disk endian
+ * integers, available in a short (32bit) and long (64bit) variant.
+ *
+ * The helpers below calculate the offset of a given record, key or pointer
+ * into a btree block (xfs_btree_*_offset) or return a pointer to the given
+ * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
+ * inside the btree block is done using indices starting at one, not zero!
+ */
+
+/*
+ * Return size of the btree block header for this btree instance.
+ */
+static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
+{
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+               if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+                       return XFS_BTREE_LBLOCK_CRC_LEN;
+               return XFS_BTREE_LBLOCK_LEN;
+       }
+       if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+               return XFS_BTREE_SBLOCK_CRC_LEN;
+       return XFS_BTREE_SBLOCK_LEN;
+}
+
+/*
+ * Return size of btree block pointers for this btree instance.
+ */
+static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
+{
+       return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+               sizeof(__be64) : sizeof(__be32);
+}
+
+/*
+ * Calculate offset of the n-th record in a btree block.
+ */
+STATIC size_t
+xfs_btree_rec_offset(
+       struct xfs_btree_cur    *cur,
+       int                     n)
+{
+       return xfs_btree_block_len(cur) +
+               (n - 1) * cur->bc_ops->rec_len;
+}
+
+/*
+ * Calculate offset of the n-th key in a btree block.
+ */
+STATIC size_t
+xfs_btree_key_offset(
+       struct xfs_btree_cur    *cur,
+       int                     n)
+{
+       return xfs_btree_block_len(cur) +
+               (n - 1) * cur->bc_ops->key_len;
+}
+
+/*
+ * Calculate offset of the n-th block pointer in a btree block.
+ */
+STATIC size_t
+xfs_btree_ptr_offset(
+       struct xfs_btree_cur    *cur,
+       int                     n,
+       int                     level)
+{
+       return xfs_btree_block_len(cur) +
+               cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
+               (n - 1) * xfs_btree_ptr_len(cur);
+}
+
+/*
+ * Return a pointer to the n-th record in the btree block.
+ */
+STATIC union xfs_btree_rec *
+xfs_btree_rec_addr(
+       struct xfs_btree_cur    *cur,
+       int                     n,
+       struct xfs_btree_block  *block)
+{
+       return (union xfs_btree_rec *)
+               ((char *)block + xfs_btree_rec_offset(cur, n));
+}
+
+/*
+ * Return a pointer to the n-th key in the btree block.
+ */
+STATIC union xfs_btree_key *
+xfs_btree_key_addr(
+       struct xfs_btree_cur    *cur,
+       int                     n,
+       struct xfs_btree_block  *block)
+{
+       return (union xfs_btree_key *)
+               ((char *)block + xfs_btree_key_offset(cur, n));
+}
+
+/*
+ * Return a pointer to the n-th block pointer in the btree block.
+ */
+STATIC union xfs_btree_ptr *
+xfs_btree_ptr_addr(
+       struct xfs_btree_cur    *cur,
+       int                     n,
+       struct xfs_btree_block  *block)
+{
+       int                     level = xfs_btree_get_level(block);
+
+       ASSERT(block->bb_level != 0);
+
+       return (union xfs_btree_ptr *)
+               ((char *)block + xfs_btree_ptr_offset(cur, n, level));
+}
+
+/*
+ * Get the root block which is stored in the inode.
+ *
+ * For now this btree implementation assumes the btree root is always
+ * stored in the if_broot field of an inode fork.
+ */
+STATIC struct xfs_btree_block *
+xfs_btree_get_iroot(
+       struct xfs_btree_cur    *cur)
+{
+       struct xfs_ifork        *ifp;
+
+       ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
+       return (struct xfs_btree_block *)ifp->if_broot;
+}
+
+/*
+ * Retrieve the block pointer from the cursor at the given level.
+ * This may be an inode btree root or from a buffer.
+ */
+STATIC struct xfs_btree_block *                /* generic btree block pointer */
+xfs_btree_get_block(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     level,  /* level in btree */
+       struct xfs_buf          **bpp)  /* buffer containing the block */
+{
+       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+           (level == cur->bc_nlevels - 1)) {
+               *bpp = NULL;
+               return xfs_btree_get_iroot(cur);
+       }
+
+       *bpp = cur->bc_bufs[level];
+       return XFS_BUF_TO_BLOCK(*bpp);
+}
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Long-form addressing.
+ */
+xfs_buf_t *                            /* buffer for fsbno */
+xfs_btree_get_bufl(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_fsblock_t   fsbno,          /* file system block number */
+       uint            lock)           /* lock flags for get_buf */
+{
+       xfs_daddr_t             d;              /* real disk block address */
+
+       ASSERT(fsbno != NULLFSBLOCK);
+       d = XFS_FSB_TO_DADDR(mp, fsbno);
+       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+}
+
+/*
+ * Get a buffer for the block, return it with no data read.
+ * Short-form addressing.
+ */
+xfs_buf_t *                            /* buffer for agno/agbno */
+xfs_btree_get_bufs(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_agnumber_t  agno,           /* allocation group number */
+       xfs_agblock_t   agbno,          /* allocation group block number */
+       uint            lock)           /* lock flags for get_buf */
+{
+       xfs_daddr_t             d;              /* real disk block address */
+
+       ASSERT(agno != NULLAGNUMBER);
+       ASSERT(agbno != NULLAGBLOCK);
+       d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+}
+
+/*
+ * Check for the cursor referring to the last block at the given level.
+ */
+int                                    /* 1=is last block, 0=not last block */
+xfs_btree_islastblock(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level)  /* level to check */
+{
+       struct xfs_btree_block  *block; /* generic btree block pointer */
+       xfs_buf_t               *bp;    /* buffer containing block */
+
+       block = xfs_btree_get_block(cur, level, &bp);
+       xfs_btree_check_block(cur, block, level, bp);
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
+       else
+               return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
+}
+
+/*
+ * Change the cursor to point to the first record at the given level.
+ * Other levels are unaffected.
+ */
+STATIC int                             /* success=1, failure=0 */
+xfs_btree_firstrec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level)  /* level to change */
+{
+       struct xfs_btree_block  *block; /* generic btree block pointer */
+       xfs_buf_t               *bp;    /* buffer containing block */
+
+       /*
+        * Get the block pointer for this level.
+        */
+       block = xfs_btree_get_block(cur, level, &bp);
+       xfs_btree_check_block(cur, block, level, bp);
+       /*
+        * It's empty, there is no such record.
+        */
+       if (!block->bb_numrecs)
+               return 0;
+       /*
+        * Set the ptr value to 1, that's the first record/key.
+        */
+       cur->bc_ptrs[level] = 1;
+       return 1;
+}
+
+/*
+ * Change the cursor to point to the last record in the current block
+ * at the given level.  Other levels are unaffected.
+ */
+STATIC int                             /* success=1, failure=0 */
+xfs_btree_lastrec(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     level)  /* level to change */
+{
+       struct xfs_btree_block  *block; /* generic btree block pointer */
+       xfs_buf_t               *bp;    /* buffer containing block */
+
+       /*
+        * Get the block pointer for this level.
+        */
+       block = xfs_btree_get_block(cur, level, &bp);
+       xfs_btree_check_block(cur, block, level, bp);
+       /*
+        * It's empty, there is no such record.
+        */
+       if (!block->bb_numrecs)
+               return 0;
+       /*
+        * Set the ptr value to numrecs, that's the last record/key.
+        */
+       cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
+       return 1;
+}
+
+/*
+ * Compute first and last byte offsets for the fields given.
+ * Interprets the offsets table, which contains struct field offsets.
+ */
+void
+xfs_btree_offsets(
+       __int64_t       fields,         /* bitmask of fields */
+       const short     *offsets,       /* table of field offsets */
+       int             nbits,          /* number of bits to inspect */
+       int             *first,         /* output: first byte offset */
+       int             *last)          /* output: last byte offset */
+{
+       int             i;              /* current bit number */
+       __int64_t       imask;          /* mask for current bit number */
+
+       ASSERT(fields != 0);
+       /*
+        * Find the lowest bit, so the first byte offset.
+        */
+       for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
+               if (imask & fields) {
+                       *first = offsets[i];
+                       break;
+               }
+       }
+       /*
+        * Find the highest bit, so the last byte offset.
+        */
+       for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
+               if (imask & fields) {
+                       *last = offsets[i + 1] - 1;
+                       break;
+               }
+       }
+}
+
+/*
+ * Get a buffer for the block, return it read in.
+ * Long-form addressing.
+ */
+int
+xfs_btree_read_bufl(
+       struct xfs_mount        *mp,            /* file system mount point */
+       struct xfs_trans        *tp,            /* transaction pointer */
+       xfs_fsblock_t           fsbno,          /* file system block number */
+       uint                    lock,           /* lock flags for read_buf */
+       struct xfs_buf          **bpp,          /* buffer for fsbno */
+       int                     refval,         /* ref count value for buffer */
+       const struct xfs_buf_ops *ops)
+{
+       struct xfs_buf          *bp;            /* return value */
+       xfs_daddr_t             d;              /* real disk block address */
+       int                     error;
+
+       ASSERT(fsbno != NULLFSBLOCK);
+       d = XFS_FSB_TO_DADDR(mp, fsbno);
+       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+                                  mp->m_bsize, lock, &bp, ops);
+       if (error)
+               return error;
+       if (bp)
+               xfs_buf_set_ref(bp, refval);
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Read-ahead the block, don't wait for it, don't return a buffer.
+ * Long-form addressing.
+ */
+/* ARGSUSED */
+void
+xfs_btree_reada_bufl(
+       struct xfs_mount        *mp,            /* file system mount point */
+       xfs_fsblock_t           fsbno,          /* file system block number */
+       xfs_extlen_t            count,          /* count of filesystem blocks */
+       const struct xfs_buf_ops *ops)
+{
+       xfs_daddr_t             d;
+
+       ASSERT(fsbno != NULLFSBLOCK);
+       d = XFS_FSB_TO_DADDR(mp, fsbno);
+       xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
+}
+
+/*
+ * Read-ahead the block, don't wait for it, don't return a buffer.
+ * Short-form addressing.
+ */
+/* ARGSUSED */
+void
+xfs_btree_reada_bufs(
+       struct xfs_mount        *mp,            /* file system mount point */
+       xfs_agnumber_t          agno,           /* allocation group number */
+       xfs_agblock_t           agbno,          /* allocation group block number */
+       xfs_extlen_t            count,          /* count of filesystem blocks */
+       const struct xfs_buf_ops *ops)
+{
+       xfs_daddr_t             d;
+
+       ASSERT(agno != NULLAGNUMBER);
+       ASSERT(agbno != NULLAGBLOCK);
+       d = XFS_AGB_TO_DADDR(mp, agno, agbno);
+       xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
+}
+
+STATIC int
+xfs_btree_readahead_lblock(
+       struct xfs_btree_cur    *cur,
+       int                     lr,
+       struct xfs_btree_block  *block)
+{
+       int                     rval = 0;
+       xfs_dfsbno_t            left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+       xfs_dfsbno_t            right = be64_to_cpu(block->bb_u.l.bb_rightsib);
+
+       if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
+               xfs_btree_reada_bufl(cur->bc_mp, left, 1,
+                                    cur->bc_ops->buf_ops);
+               rval++;
+       }
+
+       if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
+               xfs_btree_reada_bufl(cur->bc_mp, right, 1,
+                                    cur->bc_ops->buf_ops);
+               rval++;
+       }
+
+       return rval;
+}
+
+STATIC int
+xfs_btree_readahead_sblock(
+       struct xfs_btree_cur    *cur,
+       int                     lr,
+       struct xfs_btree_block *block)
+{
+       int                     rval = 0;
+       xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
+       xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
+
+
+       if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
+               xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+                                    left, 1, cur->bc_ops->buf_ops);
+               rval++;
+       }
+
+       if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
+               xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
+                                    right, 1, cur->bc_ops->buf_ops);
+               rval++;
+       }
+
+       return rval;
+}
+
+/*
+ * Read-ahead btree blocks, at the given level.
+ * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
+ */
+STATIC int
+xfs_btree_readahead(
+       struct xfs_btree_cur    *cur,           /* btree cursor */
+       int                     lev,            /* level in btree */
+       int                     lr)             /* left/right bits */
+{
+       struct xfs_btree_block  *block;
+
+       /*
+        * No readahead needed if we are at the root level and the
+        * btree root is stored in the inode.
+        */
+       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+           (lev == cur->bc_nlevels - 1))
+               return 0;
+
+       if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
+               return 0;
+
+       cur->bc_ra[lev] |= lr;
+       block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
+
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               return xfs_btree_readahead_lblock(cur, lr, block);
+       return xfs_btree_readahead_sblock(cur, lr, block);
+}
+
+STATIC xfs_daddr_t
+xfs_btree_ptr_to_daddr(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+               ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
+
+               return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+       } else {
+               ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
+               ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
+
+               return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
+                                       be32_to_cpu(ptr->s));
+       }
+}
+
+/*
+ * Readahead @count btree blocks at the given @ptr location.
+ *
+ * We don't need to care about long or short form btrees here as we have a
+ * method of converting the ptr directly to a daddr available to us.
+ */
+STATIC void
+xfs_btree_readahead_ptr(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       xfs_extlen_t            count)
+{
+       xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
+                         xfs_btree_ptr_to_daddr(cur, ptr),
+                         cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
+}
+
+/*
+ * Set the buffer for level "lev" in the cursor to bp, releasing
+ * any previous buffer.
+ */
+STATIC void
+xfs_btree_setbuf(
+       xfs_btree_cur_t         *cur,   /* btree cursor */
+       int                     lev,    /* level in btree */
+       xfs_buf_t               *bp)    /* new buffer to set */
+{
+       struct xfs_btree_block  *b;     /* btree block */
+
+       if (cur->bc_bufs[lev])
+               xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
+       cur->bc_bufs[lev] = bp;
+       cur->bc_ra[lev] = 0;
+
+       b = XFS_BUF_TO_BLOCK(bp);
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+               if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
+                       cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
+               if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
+                       cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
+       } else {
+               if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
+                       cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
+               if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
+                       cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
+       }
+}
+
+STATIC int
+xfs_btree_ptr_is_null(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               return ptr->l == cpu_to_be64(NULLDFSBNO);
+       else
+               return ptr->s == cpu_to_be32(NULLAGBLOCK);
+}
+
+STATIC void
+xfs_btree_set_ptr_null(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               ptr->l = cpu_to_be64(NULLDFSBNO);
+       else
+               ptr->s = cpu_to_be32(NULLAGBLOCK);
+}
+
+/*
+ * Get/set/init sibling pointers
+ */
+STATIC void
+xfs_btree_get_sibling(
+       struct xfs_btree_cur    *cur,
+       struct xfs_btree_block  *block,
+       union xfs_btree_ptr     *ptr,
+       int                     lr)
+{
+       ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
+
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+               if (lr == XFS_BB_RIGHTSIB)
+                       ptr->l = block->bb_u.l.bb_rightsib;
+               else
+                       ptr->l = block->bb_u.l.bb_leftsib;
+       } else {
+               if (lr == XFS_BB_RIGHTSIB)
+                       ptr->s = block->bb_u.s.bb_rightsib;
+               else
+                       ptr->s = block->bb_u.s.bb_leftsib;
+       }
+}
+
+STATIC void
+xfs_btree_set_sibling(
+       struct xfs_btree_cur    *cur,
+       struct xfs_btree_block  *block,
+       union xfs_btree_ptr     *ptr,
+       int                     lr)
+{
+       ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
+
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+               if (lr == XFS_BB_RIGHTSIB)
+                       block->bb_u.l.bb_rightsib = ptr->l;
+               else
+                       block->bb_u.l.bb_leftsib = ptr->l;
+       } else {
+               if (lr == XFS_BB_RIGHTSIB)
+                       block->bb_u.s.bb_rightsib = ptr->s;
+               else
+                       block->bb_u.s.bb_leftsib = ptr->s;
+       }
+}
+
+void
+xfs_btree_init_block_int(
+       struct xfs_mount        *mp,
+       struct xfs_btree_block  *buf,
+       xfs_daddr_t             blkno,
+       __u32                   magic,
+       __u16                   level,
+       __u16                   numrecs,
+       __u64                   owner,
+       unsigned int            flags)
+{
+       buf->bb_magic = cpu_to_be32(magic);
+       buf->bb_level = cpu_to_be16(level);
+       buf->bb_numrecs = cpu_to_be16(numrecs);
+
+       if (flags & XFS_BTREE_LONG_PTRS) {
+               buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
+               buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
+               if (flags & XFS_BTREE_CRC_BLOCKS) {
+                       buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
+                       buf->bb_u.l.bb_owner = cpu_to_be64(owner);
+                       uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
+                       buf->bb_u.l.bb_pad = 0;
+                       buf->bb_u.l.bb_lsn = 0;
+               }
+       } else {
+               /* owner is a 32 bit value on short blocks */
+               __u32 __owner = (__u32)owner;
+
+               buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
+               buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
+               if (flags & XFS_BTREE_CRC_BLOCKS) {
+                       buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
+                       buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
+                       uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
+                       buf->bb_u.s.bb_lsn = 0;
+               }
+       }
+}
+
+void
+xfs_btree_init_block(
+       struct xfs_mount *mp,
+       struct xfs_buf  *bp,
+       __u32           magic,
+       __u16           level,
+       __u16           numrecs,
+       __u64           owner,
+       unsigned int    flags)
+{
+       xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
+                                magic, level, numrecs, owner, flags);
+}
+
+STATIC void
+xfs_btree_init_block_cur(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp,
+       int                     level,
+       int                     numrecs)
+{
+       __u64 owner;
+
+       /*
+        * we can pull the owner from the cursor right now as the different
+        * owners align directly with the pointer size of the btree. This may
+        * change in future, but is safe for current users of the generic btree
+        * code.
+        */
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               owner = cur->bc_private.b.ip->i_ino;
+       else
+               owner = cur->bc_private.a.agno;
+
+       xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
+                                xfs_btree_magic(cur), level, numrecs,
+                                owner, cur->bc_flags);
+}
+
+/*
+ * Return true if ptr is the last record in the btree and
+ * we need to track updates to this record.  The decision
+ * will be further refined in the update_lastrec method.
+ */
+STATIC int
+xfs_btree_is_lastrec(
+       struct xfs_btree_cur    *cur,
+       struct xfs_btree_block  *block,
+       int                     level)
+{
+       union xfs_btree_ptr     ptr;
+
+       if (level > 0)
+               return 0;
+       if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
+               return 0;
+
+       xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+       if (!xfs_btree_ptr_is_null(cur, &ptr))
+               return 0;
+       return 1;
+}
+
+STATIC void
+xfs_btree_buf_to_ptr(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp,
+       union xfs_btree_ptr     *ptr)
+{
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
+                                       XFS_BUF_ADDR(bp)));
+       else {
+               ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
+                                       XFS_BUF_ADDR(bp)));
+       }
+}
+
+STATIC void
+xfs_btree_set_refs(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       switch (cur->bc_btnum) {
+       case XFS_BTNUM_BNO:
+       case XFS_BTNUM_CNT:
+               xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
+               break;
+       case XFS_BTNUM_INO:
+       case XFS_BTNUM_FINO:
+               xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
+               break;
+       case XFS_BTNUM_BMAP:
+               xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
+               break;
+       default:
+               ASSERT(0);
+       }
+}
+
+STATIC int
+xfs_btree_get_buf_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       int                     flags,
+       struct xfs_btree_block  **block,
+       struct xfs_buf          **bpp)
+{
+       struct xfs_mount        *mp = cur->bc_mp;
+       xfs_daddr_t             d;
+
+       /* need to sort out how callers deal with failures first */
+       ASSERT(!(flags & XBF_TRYLOCK));
+
+       d = xfs_btree_ptr_to_daddr(cur, ptr);
+       *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
+                                mp->m_bsize, flags);
+
+       if (!*bpp)
+               return ENOMEM;
+
+       (*bpp)->b_ops = cur->bc_ops->buf_ops;
+       *block = XFS_BUF_TO_BLOCK(*bpp);
+       return 0;
+}
+
+/*
+ * Read in the buffer at the given ptr and return the buffer and
+ * the block pointer within the buffer.
+ */
+STATIC int
+xfs_btree_read_buf_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       int                     flags,
+       struct xfs_btree_block  **block,
+       struct xfs_buf          **bpp)
+{
+       struct xfs_mount        *mp = cur->bc_mp;
+       xfs_daddr_t             d;
+       int                     error;
+
+       /* need to sort out how callers deal with failures first */
+       ASSERT(!(flags & XBF_TRYLOCK));
+
+       d = xfs_btree_ptr_to_daddr(cur, ptr);
+       error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
+                                  mp->m_bsize, flags, bpp,
+                                  cur->bc_ops->buf_ops);
+       if (error)
+               return error;
+
+       xfs_btree_set_refs(cur, *bpp);
+       *block = XFS_BUF_TO_BLOCK(*bpp);
+       return 0;
+}
+
+/*
+ * Copy keys from one btree block to another.
+ */
+STATIC void
+xfs_btree_copy_keys(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *dst_key,
+       union xfs_btree_key     *src_key,
+       int                     numkeys)
+{
+       ASSERT(numkeys >= 0);
+       memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
+}
+
+/*
+ * Copy records from one btree block to another.
+ */
+STATIC void
+xfs_btree_copy_recs(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *dst_rec,
+       union xfs_btree_rec     *src_rec,
+       int                     numrecs)
+{
+       ASSERT(numrecs >= 0);
+       memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
+}
+
+/*
+ * Copy block pointers from one btree block to another.
+ */
+STATIC void
+xfs_btree_copy_ptrs(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *dst_ptr,
+       union xfs_btree_ptr     *src_ptr,
+       int                     numptrs)
+{
+       ASSERT(numptrs >= 0);
+       memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
+}
+
+/*
+ * Shift keys one index left/right inside a single btree block.
+ */
+STATIC void
+xfs_btree_shift_keys(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *key,
+       int                     dir,
+       int                     numkeys)
+{
+       char                    *dst_key;
+
+       ASSERT(numkeys >= 0);
+       ASSERT(dir == 1 || dir == -1);
+
+       dst_key = (char *)key + (dir * cur->bc_ops->key_len);
+       memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
+}
+
+/*
+ * Shift records one index left/right inside a single btree block.
+ */
+STATIC void
+xfs_btree_shift_recs(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *rec,
+       int                     dir,
+       int                     numrecs)
+{
+       char                    *dst_rec;
+
+       ASSERT(numrecs >= 0);
+       ASSERT(dir == 1 || dir == -1);
+
+       dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
+       memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
+}
+
+/*
+ * Shift block pointers one index left/right inside a single btree block.
+ */
+STATIC void
+xfs_btree_shift_ptrs(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       int                     dir,
+       int                     numptrs)
+{
+       char                    *dst_ptr;
+
+       ASSERT(numptrs >= 0);
+       ASSERT(dir == 1 || dir == -1);
+
+       dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
+       memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
+}
+
+/*
+ * Log key values from the btree block.
+ */
+STATIC void
+xfs_btree_log_keys(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp,
+       int                     first,
+       int                     last)
+{
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+
+       if (bp) {
+               xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
+               xfs_trans_log_buf(cur->bc_tp, bp,
+                                 xfs_btree_key_offset(cur, first),
+                                 xfs_btree_key_offset(cur, last + 1) - 1);
+       } else {
+               xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+                               xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+       }
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Log record values from the btree block.
+ */
+void
+xfs_btree_log_recs(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp,
+       int                     first,
+       int                     last)
+{
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+
+       xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
+       xfs_trans_log_buf(cur->bc_tp, bp,
+                         xfs_btree_rec_offset(cur, first),
+                         xfs_btree_rec_offset(cur, last + 1) - 1);
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Log block pointer fields from a btree block (nonleaf).
+ */
+STATIC void
+xfs_btree_log_ptrs(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       struct xfs_buf          *bp,    /* buffer containing btree block */
+       int                     first,  /* index of first pointer to log */
+       int                     last)   /* index of last pointer to log */
+{
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
+
+       if (bp) {
+               struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+               int                     level = xfs_btree_get_level(block);
+
+               xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
+               xfs_trans_log_buf(cur->bc_tp, bp,
+                               xfs_btree_ptr_offset(cur, first, level),
+                               xfs_btree_ptr_offset(cur, last + 1, level) - 1);
+       } else {
+               xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+                       xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+       }
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Log fields from a btree block header.
+ */
+void
+xfs_btree_log_block(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       struct xfs_buf          *bp,    /* buffer containing btree block */
+       int                     fields) /* mask of fields: XFS_BB_... */
+{
+       int                     first;  /* first byte offset logged */
+       int                     last;   /* last byte offset logged */
+       static const short      soffsets[] = {  /* table of offsets (short) */
+               offsetof(struct xfs_btree_block, bb_magic),
+               offsetof(struct xfs_btree_block, bb_level),
+               offsetof(struct xfs_btree_block, bb_numrecs),
+               offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
+               offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
+               offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
+               offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
+               offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
+               offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
+               offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
+               XFS_BTREE_SBLOCK_CRC_LEN
+       };
+       static const short      loffsets[] = {  /* table of offsets (long) */
+               offsetof(struct xfs_btree_block, bb_magic),
+               offsetof(struct xfs_btree_block, bb_level),
+               offsetof(struct xfs_btree_block, bb_numrecs),
+               offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
+               offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
+               offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
+               offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
+               offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
+               offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
+               offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
+               offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
+               XFS_BTREE_LBLOCK_CRC_LEN
+       };
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
+
+       if (bp) {
+               int nbits;
+
+               if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
+                       /*
+                        * We don't log the CRC when updating a btree
+                        * block but instead recreate it during log
+                        * recovery.  As the log buffers have checksums
+                        * of their own this is safe and avoids logging a crc
+                        * update in a lot of places.
+                        */
+                       if (fields == XFS_BB_ALL_BITS)
+                               fields = XFS_BB_ALL_BITS_CRC;
+                       nbits = XFS_BB_NUM_BITS_CRC;
+               } else {
+                       nbits = XFS_BB_NUM_BITS;
+               }
+               xfs_btree_offsets(fields,
+                                 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+                                       loffsets : soffsets,
+                                 nbits, &first, &last);
+               xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
+               xfs_trans_log_buf(cur->bc_tp, bp, first, last);
+       } else {
+               xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
+                       xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+       }
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+}
+
+/*
+ * Increment cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                            /* error */
+xfs_btree_increment(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       int                     *stat)          /* success/failure */
+{
+       struct xfs_btree_block  *block;
+       union xfs_btree_ptr     ptr;
+       struct xfs_buf          *bp;
+       int                     error;          /* error return value */
+       int                     lev;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGI(cur, level);
+
+       ASSERT(level < cur->bc_nlevels);
+
+       /* Read-ahead to the right at this level. */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+
+       /* Get a pointer to the btree block. */
+       block = xfs_btree_get_block(cur, level, &bp);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, block, level, bp);
+       if (error)
+               goto error0;
+#endif
+
+       /* We're done if we remain in the block after the increment. */
+       if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
+               goto out1;
+
+       /* Fail if we just went off the right edge of the tree. */
+       xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+       if (xfs_btree_ptr_is_null(cur, &ptr))
+               goto out0;
+
+       XFS_BTREE_STATS_INC(cur, increment);
+
+       /*
+        * March up the tree incrementing pointers.
+        * Stop when we don't go off the right edge of a block.
+        */
+       for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+               block = xfs_btree_get_block(cur, lev, &bp);
+
+#ifdef DEBUG
+               error = xfs_btree_check_block(cur, block, lev, bp);
+               if (error)
+                       goto error0;
+#endif
+
+               if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
+                       break;
+
+               /* Read-ahead the right block for the next loop. */
+               xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
+       }
+
+       /*
+        * If we went off the root then we are either seriously
+        * confused or have the tree root in an inode.
+        */
+       if (lev == cur->bc_nlevels) {
+               if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+                       goto out0;
+               ASSERT(0);
+               error = EFSCORRUPTED;
+               goto error0;
+       }
+       ASSERT(lev < cur->bc_nlevels);
+
+       /*
+        * Now walk back down the tree, fixing up the cursor's buffer
+        * pointers and key numbers.
+        */
+       for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
+               union xfs_btree_ptr     *ptrp;
+
+               ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
+               --lev;
+               error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
+               if (error)
+                       goto error0;
+
+               xfs_btree_setbuf(cur, lev, bp);
+               cur->bc_ptrs[lev] = 1;
+       }
+out1:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+
+out0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 0;
+       return 0;
+
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+/*
+ * Decrement cursor by one record at the level.
+ * For nonzero levels the leaf-ward information is untouched.
+ */
+int                                            /* error */
+xfs_btree_decrement(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       int                     *stat)          /* success/failure */
+{
+       struct xfs_btree_block  *block;
+       xfs_buf_t               *bp;
+       int                     error;          /* error return value */
+       int                     lev;
+       union xfs_btree_ptr     ptr;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGI(cur, level);
+
+       ASSERT(level < cur->bc_nlevels);
+
+       /* Read-ahead to the left at this level. */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
+
+       /* We're done if we remain in the block after the decrement. */
+       if (--cur->bc_ptrs[level] > 0)
+               goto out1;
+
+       /* Get a pointer to the btree block. */
+       block = xfs_btree_get_block(cur, level, &bp);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, block, level, bp);
+       if (error)
+               goto error0;
+#endif
+
+       /* Fail if we just went off the left edge of the tree. */
+       xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
+       if (xfs_btree_ptr_is_null(cur, &ptr))
+               goto out0;
+
+       XFS_BTREE_STATS_INC(cur, decrement);
+
+       /*
+        * March up the tree decrementing pointers.
+        * Stop when we don't go off the left edge of a block.
+        */
+       for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
+               if (--cur->bc_ptrs[lev] > 0)
+                       break;
+               /* Read-ahead the left block for the next loop. */
+               xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
+       }
+
+       /*
+        * If we went off the root then we are seriously confused.
+        * or the root of the tree is in an inode.
+        */
+       if (lev == cur->bc_nlevels) {
+               if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+                       goto out0;
+               ASSERT(0);
+               error = EFSCORRUPTED;
+               goto error0;
+       }
+       ASSERT(lev < cur->bc_nlevels);
+
+       /*
+        * Now walk back down the tree, fixing up the cursor's buffer
+        * pointers and key numbers.
+        */
+       for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
+               union xfs_btree_ptr     *ptrp;
+
+               ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
+               --lev;
+               error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
+               if (error)
+                       goto error0;
+               xfs_btree_setbuf(cur, lev, bp);
+               cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
+       }
+out1:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+
+out0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 0;
+       return 0;
+
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+STATIC int
+xfs_btree_lookup_get_block(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     level,  /* level in the btree */
+       union xfs_btree_ptr     *pp,    /* ptr to btree block */
+       struct xfs_btree_block  **blkp) /* return btree block */
+{
+       struct xfs_buf          *bp;    /* buffer pointer for btree block */
+       int                     error = 0;
+
+       /* special case the root block if in an inode */
+       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+           (level == cur->bc_nlevels - 1)) {
+               *blkp = xfs_btree_get_iroot(cur);
+               return 0;
+       }
+
+       /*
+        * If the old buffer at this level for the disk address we are
+        * looking for re-use it.
+        *
+        * Otherwise throw it away and get a new one.
+        */
+       bp = cur->bc_bufs[level];
+       if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
+               *blkp = XFS_BUF_TO_BLOCK(bp);
+               return 0;
+       }
+
+       error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
+       if (error)
+               return error;
+
+       xfs_btree_setbuf(cur, level, bp);
+       return 0;
+}
+
+/*
+ * Get current search key.  For level 0 we don't actually have a key
+ * structure so we make one up from the record.  For all other levels
+ * we just return the right key.
+ */
+STATIC union xfs_btree_key *
+xfs_lookup_get_search_key(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       int                     keyno,
+       struct xfs_btree_block  *block,
+       union xfs_btree_key     *kp)
+{
+       if (level == 0) {
+               cur->bc_ops->init_key_from_rec(kp,
+                               xfs_btree_rec_addr(cur, keyno, block));
+               return kp;
+       }
+
+       return xfs_btree_key_addr(cur, keyno, block);
+}
+
+/*
+ * Lookup the record.  The cursor is made to point to it, based on dir.
+ * stat is set to 0 if can't find any such record, 1 for success.
+ */
+int                                    /* error */
+xfs_btree_lookup(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_lookup_t            dir,    /* <=, ==, or >= */
+       int                     *stat)  /* success/failure */
+{
+       struct xfs_btree_block  *block; /* current btree block */
+       __int64_t               diff;   /* difference for the current key */
+       int                     error;  /* error return value */
+       int                     keyno;  /* current key number */
+       int                     level;  /* level in the btree */
+       union xfs_btree_ptr     *pp;    /* ptr to btree block */
+       union xfs_btree_ptr     ptr;    /* ptr to btree block */
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGI(cur, dir);
+
+       XFS_BTREE_STATS_INC(cur, lookup);
+
+       block = NULL;
+       keyno = 0;
+
+       /* initialise start pointer from cursor */
+       cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+       pp = &ptr;
+
+       /*
+        * Iterate over each level in the btree, starting at the root.
+        * For each level above the leaves, find the key we need, based
+        * on the lookup record, then follow the corresponding block
+        * pointer down to the next level.
+        */
+       for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+               /* Get the block we need to do the lookup on. */
+               error = xfs_btree_lookup_get_block(cur, level, pp, &block);
+               if (error)
+                       goto error0;
+
+               if (diff == 0) {
+                       /*
+                        * If we already had a key match at a higher level, we
+                        * know we need to use the first entry in this block.
+                        */
+                       keyno = 1;
+               } else {
+                       /* Otherwise search this block. Do a binary search. */
+
+                       int     high;   /* high entry number */
+                       int     low;    /* low entry number */
+
+                       /* Set low and high entry numbers, 1-based. */
+                       low = 1;
+                       high = xfs_btree_get_numrecs(block);
+                       if (!high) {
+                               /* Block is empty, must be an empty leaf. */
+                               ASSERT(level == 0 && cur->bc_nlevels == 1);
+
+                               cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
+                               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+                               *stat = 0;
+                               return 0;
+                       }
+
+                       /* Binary search the block. */
+                       while (low <= high) {
+                               union xfs_btree_key     key;
+                               union xfs_btree_key     *kp;
+
+                               XFS_BTREE_STATS_INC(cur, compare);
+
+                               /* keyno is average of low and high. */
+                               keyno = (low + high) >> 1;
+
+                               /* Get current search key */
+                               kp = xfs_lookup_get_search_key(cur, level,
+                                               keyno, block, &key);
+
+                               /*
+                                * Compute difference to get next direction:
+                                *  - less than, move right
+                                *  - greater than, move left
+                                *  - equal, we're done
+                                */
+                               diff = cur->bc_ops->key_diff(cur, kp);
+                               if (diff < 0)
+                                       low = keyno + 1;
+                               else if (diff > 0)
+                                       high = keyno - 1;
+                               else
+                                       break;
+                       }
+               }
+
+               /*
+                * If there are more levels, set up for the next level
+                * by getting the block number and filling in the cursor.
+                */
+               if (level > 0) {
+                       /*
+                        * If we moved left, need the previous key number,
+                        * unless there isn't one.
+                        */
+                       if (diff > 0 && --keyno < 1)
+                               keyno = 1;
+                       pp = xfs_btree_ptr_addr(cur, keyno, block);
+
+#ifdef DEBUG
+                       error = xfs_btree_check_ptr(cur, pp, 0, level);
+                       if (error)
+                               goto error0;
+#endif
+                       cur->bc_ptrs[level] = keyno;
+               }
+       }
+
+       /* Done with the search. See if we need to adjust the results. */
+       if (dir != XFS_LOOKUP_LE && diff < 0) {
+               keyno++;
+               /*
+                * If ge search and we went off the end of the block, but it's
+                * not the last block, we're in the wrong block.
+                */
+               xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+               if (dir == XFS_LOOKUP_GE &&
+                   keyno > xfs_btree_get_numrecs(block) &&
+                   !xfs_btree_ptr_is_null(cur, &ptr)) {
+                       int     i;
+
+                       cur->bc_ptrs[0] = keyno;
+                       error = xfs_btree_increment(cur, 0, &i);
+                       if (error)
+                               goto error0;
+                       XFS_WANT_CORRUPTED_RETURN(i == 1);
+                       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+                       *stat = 1;
+                       return 0;
+               }
+       } else if (dir == XFS_LOOKUP_LE && diff > 0)
+               keyno--;
+       cur->bc_ptrs[0] = keyno;
+
+       /* Return if we succeeded or not. */
+       if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
+               *stat = 0;
+       else if (dir != XFS_LOOKUP_EQ || diff == 0)
+               *stat = 1;
+       else
+               *stat = 0;
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       return 0;
+
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+/*
+ * Update keys at all levels from here to the root along the cursor's path.
+ */
+STATIC int
+xfs_btree_updkey(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *keyp,
+       int                     level)
+{
+       struct xfs_btree_block  *block;
+       struct xfs_buf          *bp;
+       union xfs_btree_key     *kp;
+       int                     ptr;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
+
+       ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
+
+       /*
+        * Go up the tree from this level toward the root.
+        * At each level, update the key value to the value input.
+        * Stop when we reach a level where the cursor isn't pointing
+        * at the first entry in the block.
+        */
+       for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
+#ifdef DEBUG
+               int             error;
+#endif
+               block = xfs_btree_get_block(cur, level, &bp);
+#ifdef DEBUG
+               error = xfs_btree_check_block(cur, block, level, bp);
+               if (error) {
+                       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+                       return error;
+               }
+#endif
+               ptr = cur->bc_ptrs[level];
+               kp = xfs_btree_key_addr(cur, ptr, block);
+               xfs_btree_copy_keys(cur, kp, keyp, 1);
+               xfs_btree_log_keys(cur, bp, ptr, ptr);
+       }
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       return 0;
+}
+
+/*
+ * Update the record referred to by cur to the value in the
+ * given record. This either works (return 0) or gets an
+ * EFSCORRUPTED error.
+ */
+int
+xfs_btree_update(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *rec)
+{
+       struct xfs_btree_block  *block;
+       struct xfs_buf          *bp;
+       int                     error;
+       int                     ptr;
+       union xfs_btree_rec     *rp;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGR(cur, rec);
+
+       /* Pick up the current block. */
+       block = xfs_btree_get_block(cur, 0, &bp);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, block, 0, bp);
+       if (error)
+               goto error0;
+#endif
+       /* Get the address of the rec to be updated. */
+       ptr = cur->bc_ptrs[0];
+       rp = xfs_btree_rec_addr(cur, ptr, block);
+
+       /* Fill in the new contents and log them. */
+       xfs_btree_copy_recs(cur, rp, rec, 1);
+       xfs_btree_log_recs(cur, bp, ptr, ptr);
+
+       /*
+        * If we are tracking the last record in the tree and
+        * we are at the far right edge of the tree, update it.
+        */
+       if (xfs_btree_is_lastrec(cur, block, 0)) {
+               cur->bc_ops->update_lastrec(cur, block, rec,
+                                           ptr, LASTREC_UPDATE);
+       }
+
+       /* Updating first rec in leaf. Pass new key value up to our parent. */
+       if (ptr == 1) {
+               union xfs_btree_key     key;
+
+               cur->bc_ops->init_key_from_rec(&key, rec);
+               error = xfs_btree_updkey(cur, &key, 1);
+               if (error)
+                       goto error0;
+       }
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       return 0;
+
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+/*
+ * Move 1 record left from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int                                     /* error */
+xfs_btree_lshift(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       int                     *stat)          /* success/failure */
+{
+       union xfs_btree_key     key;            /* btree key */
+       struct xfs_buf          *lbp;           /* left buffer pointer */
+       struct xfs_btree_block  *left;          /* left btree block */
+       int                     lrecs;          /* left record count */
+       struct xfs_buf          *rbp;           /* right buffer pointer */
+       struct xfs_btree_block  *right;         /* right btree block */
+       int                     rrecs;          /* right record count */
+       union xfs_btree_ptr     lptr;           /* left btree pointer */
+       union xfs_btree_key     *rkp = NULL;    /* right btree key */
+       union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
+       union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
+       int                     error;          /* error return value */
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGI(cur, level);
+
+       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+           level == cur->bc_nlevels - 1)
+               goto out0;
+
+       /* Set up variables for this block as "right". */
+       right = xfs_btree_get_block(cur, level, &rbp);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, right, level, rbp);
+       if (error)
+               goto error0;
+#endif
+
+       /* If we've got no left sibling then we can't shift an entry left. */
+       xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
+       if (xfs_btree_ptr_is_null(cur, &lptr))
+               goto out0;
+
+       /*
+        * If the cursor entry is the one that would be moved, don't
+        * do it... it's too complicated.
+        */
+       if (cur->bc_ptrs[level] <= 1)
+               goto out0;
+
+       /* Set up the left neighbor as "left". */
+       error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
+       if (error)
+               goto error0;
+
+       /* If it's full, it can't take another entry. */
+       lrecs = xfs_btree_get_numrecs(left);
+       if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
+               goto out0;
+
+       rrecs = xfs_btree_get_numrecs(right);
+
+       /*
+        * We add one entry to the left side and remove one for the right side.
+        * Account for it here, the changes will be updated on disk and logged
+        * later.
+        */
+       lrecs++;
+       rrecs--;
+
+       XFS_BTREE_STATS_INC(cur, lshift);
+       XFS_BTREE_STATS_ADD(cur, moves, 1);
+
+       /*
+        * If non-leaf, copy a key and a ptr to the left block.
+        * Log the changes to the left block.
+        */
+       if (level > 0) {
+               /* It's a non-leaf.  Move keys and pointers. */
+               union xfs_btree_key     *lkp;   /* left btree key */
+               union xfs_btree_ptr     *lpp;   /* left address pointer */
+
+               lkp = xfs_btree_key_addr(cur, lrecs, left);
+               rkp = xfs_btree_key_addr(cur, 1, right);
+
+               lpp = xfs_btree_ptr_addr(cur, lrecs, left);
+               rpp = xfs_btree_ptr_addr(cur, 1, right);
+#ifdef DEBUG
+               error = xfs_btree_check_ptr(cur, rpp, 0, level);
+               if (error)
+                       goto error0;
+#endif
+               xfs_btree_copy_keys(cur, lkp, rkp, 1);
+               xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
+
+               xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
+               xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
+
+               ASSERT(cur->bc_ops->keys_inorder(cur,
+                       xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
+       } else {
+               /* It's a leaf.  Move records.  */
+               union xfs_btree_rec     *lrp;   /* left record pointer */
+
+               lrp = xfs_btree_rec_addr(cur, lrecs, left);
+               rrp = xfs_btree_rec_addr(cur, 1, right);
+
+               xfs_btree_copy_recs(cur, lrp, rrp, 1);
+               xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
+
+               ASSERT(cur->bc_ops->recs_inorder(cur,
+                       xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
+       }
+
+       xfs_btree_set_numrecs(left, lrecs);
+       xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
+
+       xfs_btree_set_numrecs(right, rrecs);
+       xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
+
+       /*
+        * Slide the contents of right down one entry.
+        */
+       XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
+       if (level > 0) {
+               /* It's a nonleaf. operate on keys and ptrs */
+#ifdef DEBUG
+               int                     i;              /* loop index */
+
+               for (i = 0; i < rrecs; i++) {
+                       error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
+                       if (error)
+                               goto error0;
+               }
+#endif
+               xfs_btree_shift_keys(cur,
+                               xfs_btree_key_addr(cur, 2, right),
+                               -1, rrecs);
+               xfs_btree_shift_ptrs(cur,
+                               xfs_btree_ptr_addr(cur, 2, right),
+                               -1, rrecs);
+
+               xfs_btree_log_keys(cur, rbp, 1, rrecs);
+               xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
+       } else {
+               /* It's a leaf. operate on records */
+               xfs_btree_shift_recs(cur,
+                       xfs_btree_rec_addr(cur, 2, right),
+                       -1, rrecs);
+               xfs_btree_log_recs(cur, rbp, 1, rrecs);
+
+               /*
+                * If it's the first record in the block, we'll need a key
+                * structure to pass up to the next level (updkey).
+                */
+               cur->bc_ops->init_key_from_rec(&key,
+                       xfs_btree_rec_addr(cur, 1, right));
+               rkp = &key;
+       }
+
+       /* Update the parent key values of right. */
+       error = xfs_btree_updkey(cur, rkp, level + 1);
+       if (error)
+               goto error0;
+
+       /* Slide the cursor value left one. */
+       cur->bc_ptrs[level]--;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+
+out0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 0;
+       return 0;
+
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+/*
+ * Move 1 record right from cur/level if possible.
+ * Update cur to reflect the new path.
+ */
+STATIC int                                     /* error */
+xfs_btree_rshift(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       int                     *stat)          /* success/failure */
+{
+       union xfs_btree_key     key;            /* btree key */
+       struct xfs_buf          *lbp;           /* left buffer pointer */
+       struct xfs_btree_block  *left;          /* left btree block */
+       struct xfs_buf          *rbp;           /* right buffer pointer */
+       struct xfs_btree_block  *right;         /* right btree block */
+       struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
+       union xfs_btree_ptr     rptr;           /* right block pointer */
+       union xfs_btree_key     *rkp;           /* right btree key */
+       int                     rrecs;          /* right record count */
+       int                     lrecs;          /* left record count */
+       int                     error;          /* error return value */
+       int                     i;              /* loop counter */
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGI(cur, level);
+
+       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+           (level == cur->bc_nlevels - 1))
+               goto out0;
+
+       /* Set up variables for this block as "left". */
+       left = xfs_btree_get_block(cur, level, &lbp);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, left, level, lbp);
+       if (error)
+               goto error0;
+#endif
+
+       /* If we've got no right sibling then we can't shift an entry right. */
+       xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
+       if (xfs_btree_ptr_is_null(cur, &rptr))
+               goto out0;
+
+       /*
+        * If the cursor entry is the one that would be moved, don't
+        * do it... it's too complicated.
+        */
+       lrecs = xfs_btree_get_numrecs(left);
+       if (cur->bc_ptrs[level] >= lrecs)
+               goto out0;
+
+       /* Set up the right neighbor as "right". */
+       error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
+       if (error)
+               goto error0;
+
+       /* If it's full, it can't take another entry. */
+       rrecs = xfs_btree_get_numrecs(right);
+       if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
+               goto out0;
+
+       XFS_BTREE_STATS_INC(cur, rshift);
+       XFS_BTREE_STATS_ADD(cur, moves, rrecs);
+
+       /*
+        * Make a hole at the start of the right neighbor block, then
+        * copy the last left block entry to the hole.
+        */
+       if (level > 0) {
+               /* It's a nonleaf. make a hole in the keys and ptrs */
+               union xfs_btree_key     *lkp;
+               union xfs_btree_ptr     *lpp;
+               union xfs_btree_ptr     *rpp;
+
+               lkp = xfs_btree_key_addr(cur, lrecs, left);
+               lpp = xfs_btree_ptr_addr(cur, lrecs, left);
+               rkp = xfs_btree_key_addr(cur, 1, right);
+               rpp = xfs_btree_ptr_addr(cur, 1, right);
+
+#ifdef DEBUG
+               for (i = rrecs - 1; i >= 0; i--) {
+                       error = xfs_btree_check_ptr(cur, rpp, i, level);
+                       if (error)
+                               goto error0;
+               }
+#endif
+
+               xfs_btree_shift_keys(cur, rkp, 1, rrecs);
+               xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
+
+#ifdef DEBUG
+               error = xfs_btree_check_ptr(cur, lpp, 0, level);
+               if (error)
+                       goto error0;
+#endif
+
+               /* Now put the new data in, and log it. */
+               xfs_btree_copy_keys(cur, rkp, lkp, 1);
+               xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
+
+               xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
+               xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
+
+               ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
+                       xfs_btree_key_addr(cur, 2, right)));
+       } else {
+               /* It's a leaf. make a hole in the records */
+               union xfs_btree_rec     *lrp;
+               union xfs_btree_rec     *rrp;
+
+               lrp = xfs_btree_rec_addr(cur, lrecs, left);
+               rrp = xfs_btree_rec_addr(cur, 1, right);
+
+               xfs_btree_shift_recs(cur, rrp, 1, rrecs);
+
+               /* Now put the new data in, and log it. */
+               xfs_btree_copy_recs(cur, rrp, lrp, 1);
+               xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
+
+               cur->bc_ops->init_key_from_rec(&key, rrp);
+               rkp = &key;
+
+               ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
+                       xfs_btree_rec_addr(cur, 2, right)));
+       }
+
+       /*
+        * Decrement and log left's numrecs, bump and log right's numrecs.
+        */
+       xfs_btree_set_numrecs(left, --lrecs);
+       xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
+
+       xfs_btree_set_numrecs(right, ++rrecs);
+       xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
+
+       /*
+        * Using a temporary cursor, update the parent key values of the
+        * block on the right.
+        */
+       error = xfs_btree_dup_cursor(cur, &tcur);
+       if (error)
+               goto error0;
+       i = xfs_btree_lastrec(tcur, level);
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+       error = xfs_btree_increment(tcur, level, &i);
+       if (error)
+               goto error1;
+
+       error = xfs_btree_updkey(tcur, rkp, level + 1);
+       if (error)
+               goto error1;
+
+       xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+
+out0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 0;
+       return 0;
+
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+
+error1:
+       XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
+       xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Split cur/level block in half.
+ * Return new block number and the key to its first
+ * record (to be inserted into parent).
+ */
+STATIC int                                     /* error */
+xfs_btree_split(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       union xfs_btree_ptr     *ptrp,
+       union xfs_btree_key     *key,
+       struct xfs_btree_cur    **curp,
+       int                     *stat)          /* success/failure */
+{
+       union xfs_btree_ptr     lptr;           /* left sibling block ptr */
+       struct xfs_buf          *lbp;           /* left buffer pointer */
+       struct xfs_btree_block  *left;          /* left btree block */
+       union xfs_btree_ptr     rptr;           /* right sibling block ptr */
+       struct xfs_buf          *rbp;           /* right buffer pointer */
+       struct xfs_btree_block  *right;         /* right btree block */
+       union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
+       struct xfs_buf          *rrbp;          /* right-right buffer pointer */
+       struct xfs_btree_block  *rrblock;       /* right-right btree block */
+       int                     lrecs;
+       int                     rrecs;
+       int                     src_index;
+       int                     error;          /* error return value */
+#ifdef DEBUG
+       int                     i;
+#endif
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
+
+       XFS_BTREE_STATS_INC(cur, split);
+
+       /* Set up left block (current one). */
+       left = xfs_btree_get_block(cur, level, &lbp);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, left, level, lbp);
+       if (error)
+               goto error0;
+#endif
+
+       xfs_btree_buf_to_ptr(cur, lbp, &lptr);
+
+       /* Allocate the new block. If we can't do it, we're toast. Give up. */
+       error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
+       if (error)
+               goto error0;
+       if (*stat == 0)
+               goto out0;
+       XFS_BTREE_STATS_INC(cur, alloc);
+
+       /* Set up the new block as "right". */
+       error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
+       if (error)
+               goto error0;
+
+       /* Fill in the btree header for the new right block. */
+       xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
+
+       /*
+        * Split the entries between the old and the new block evenly.
+        * Make sure that if there's an odd number of entries now, that
+        * each new block will have the same number of entries.
+        */
+       lrecs = xfs_btree_get_numrecs(left);
+       rrecs = lrecs / 2;
+       if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
+               rrecs++;
+       src_index = (lrecs - rrecs + 1);
+
+       XFS_BTREE_STATS_ADD(cur, moves, rrecs);
+
+       /*
+        * Copy btree block entries from the left block over to the
+        * new block, the right. Update the right block and log the
+        * changes.
+        */
+       if (level > 0) {
+               /* It's a non-leaf.  Move keys and pointers. */
+               union xfs_btree_key     *lkp;   /* left btree key */
+               union xfs_btree_ptr     *lpp;   /* left address pointer */
+               union xfs_btree_key     *rkp;   /* right btree key */
+               union xfs_btree_ptr     *rpp;   /* right address pointer */
+
+               lkp = xfs_btree_key_addr(cur, src_index, left);
+               lpp = xfs_btree_ptr_addr(cur, src_index, left);
+               rkp = xfs_btree_key_addr(cur, 1, right);
+               rpp = xfs_btree_ptr_addr(cur, 1, right);
+
+#ifdef DEBUG
+               for (i = src_index; i < rrecs; i++) {
+                       error = xfs_btree_check_ptr(cur, lpp, i, level);
+                       if (error)
+                               goto error0;
+               }
+#endif
+
+               xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
+               xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
+
+               xfs_btree_log_keys(cur, rbp, 1, rrecs);
+               xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
+
+               /* Grab the keys to the entries moved to the right block */
+               xfs_btree_copy_keys(cur, key, rkp, 1);
+       } else {
+               /* It's a leaf.  Move records.  */
+               union xfs_btree_rec     *lrp;   /* left record pointer */
+               union xfs_btree_rec     *rrp;   /* right record pointer */
+
+               lrp = xfs_btree_rec_addr(cur, src_index, left);
+               rrp = xfs_btree_rec_addr(cur, 1, right);
+
+               xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
+               xfs_btree_log_recs(cur, rbp, 1, rrecs);
+
+               cur->bc_ops->init_key_from_rec(key,
+                       xfs_btree_rec_addr(cur, 1, right));
+       }
+
+
+       /*
+        * Find the left block number by looking in the buffer.
+        * Adjust numrecs, sibling pointers.
+        */
+       xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
+       xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
+       xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
+       xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
+
+       lrecs -= rrecs;
+       xfs_btree_set_numrecs(left, lrecs);
+       xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
+
+       xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
+       xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+
+       /*
+        * If there's a block to the new block's right, make that block
+        * point back to right instead of to left.
+        */
+       if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
+               error = xfs_btree_read_buf_block(cur, &rrptr,
+                                                       0, &rrblock, &rrbp);
+               if (error)
+                       goto error0;
+               xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
+               xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
+       }
+       /*
+        * If the cursor is really in the right block, move it there.
+        * If it's just pointing past the last entry in left, then we'll
+        * insert there, so don't change anything in that case.
+        */
+       if (cur->bc_ptrs[level] > lrecs + 1) {
+               xfs_btree_setbuf(cur, level, rbp);
+               cur->bc_ptrs[level] -= lrecs;
+       }
+       /*
+        * If there are more levels, we'll need another cursor which refers
+        * the right block, no matter where this cursor was.
+        */
+       if (level + 1 < cur->bc_nlevels) {
+               error = xfs_btree_dup_cursor(cur, curp);
+               if (error)
+                       goto error0;
+               (*curp)->bc_ptrs[level + 1]++;
+       }
+       *ptrp = rptr;
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+out0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 0;
+       return 0;
+
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+/*
+ * Copy the old inode root contents into a real block and make the
+ * broot point to it.
+ */
+int                                            /* error */
+xfs_btree_new_iroot(
+       struct xfs_btree_cur    *cur,           /* btree cursor */
+       int                     *logflags,      /* logging flags for inode */
+       int                     *stat)          /* return status - 0 fail */
+{
+       struct xfs_buf          *cbp;           /* buffer for cblock */
+       struct xfs_btree_block  *block;         /* btree block */
+       struct xfs_btree_block  *cblock;        /* child btree block */
+       union xfs_btree_key     *ckp;           /* child key pointer */
+       union xfs_btree_ptr     *cpp;           /* child ptr pointer */
+       union xfs_btree_key     *kp;            /* pointer to btree key */
+       union xfs_btree_ptr     *pp;            /* pointer to block addr */
+       union xfs_btree_ptr     nptr;           /* new block addr */
+       int                     level;          /* btree level */
+       int                     error;          /* error return code */
+#ifdef DEBUG
+       int                     i;              /* loop counter */
+#endif
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_STATS_INC(cur, newroot);
+
+       ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+
+       level = cur->bc_nlevels - 1;
+
+       block = xfs_btree_get_iroot(cur);
+       pp = xfs_btree_ptr_addr(cur, 1, block);
+
+       /* Allocate the new block. If we can't do it, we're toast. Give up. */
+       error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
+       if (error)
+               goto error0;
+       if (*stat == 0) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+               return 0;
+       }
+       XFS_BTREE_STATS_INC(cur, alloc);
+
+       /* Copy the root into a real block. */
+       error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
+       if (error)
+               goto error0;
+
+       /*
+        * we can't just memcpy() the root in for CRC enabled btree blocks.
+        * In that case have to also ensure the blkno remains correct
+        */
+       memcpy(cblock, block, xfs_btree_block_len(cur));
+       if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
+               if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+                       cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
+               else
+                       cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
+       }
+
+       be16_add_cpu(&block->bb_level, 1);
+       xfs_btree_set_numrecs(block, 1);
+       cur->bc_nlevels++;
+       cur->bc_ptrs[level + 1] = 1;
+
+       kp = xfs_btree_key_addr(cur, 1, block);
+       ckp = xfs_btree_key_addr(cur, 1, cblock);
+       xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
+
+       cpp = xfs_btree_ptr_addr(cur, 1, cblock);
+#ifdef DEBUG
+       for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
+               error = xfs_btree_check_ptr(cur, pp, i, level);
+               if (error)
+                       goto error0;
+       }
+#endif
+       xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
+
+#ifdef DEBUG
+       error = xfs_btree_check_ptr(cur, &nptr, 0, level);
+       if (error)
+               goto error0;
+#endif
+       xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
+
+       xfs_iroot_realloc(cur->bc_private.b.ip,
+                         1 - xfs_btree_get_numrecs(cblock),
+                         cur->bc_private.b.whichfork);
+
+       xfs_btree_setbuf(cur, level, cbp);
+
+       /*
+        * Do all this logging at the end so that
+        * the root is at the right level.
+        */
+       xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
+       xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
+       xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
+
+       *logflags |=
+               XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
+       *stat = 1;
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       return 0;
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+/*
+ * Allocate a new root block, fill it in.
+ */
+STATIC int                             /* error */
+xfs_btree_new_root(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     *stat)  /* success/failure */
+{
+       struct xfs_btree_block  *block; /* one half of the old root block */
+       struct xfs_buf          *bp;    /* buffer containing block */
+       int                     error;  /* error return value */
+       struct xfs_buf          *lbp;   /* left buffer pointer */
+       struct xfs_btree_block  *left;  /* left btree block */
+       struct xfs_buf          *nbp;   /* new (root) buffer */
+       struct xfs_btree_block  *new;   /* new (root) btree block */
+       int                     nptr;   /* new value for key index, 1 or 2 */
+       struct xfs_buf          *rbp;   /* right buffer pointer */
+       struct xfs_btree_block  *right; /* right btree block */
+       union xfs_btree_ptr     rptr;
+       union xfs_btree_ptr     lptr;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_STATS_INC(cur, newroot);
+
+       /* initialise our start point from the cursor */
+       cur->bc_ops->init_ptr_from_cur(cur, &rptr);
+
+       /* Allocate the new block. If we can't do it, we're toast. Give up. */
+       error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
+       if (error)
+               goto error0;
+       if (*stat == 0)
+               goto out0;
+       XFS_BTREE_STATS_INC(cur, alloc);
+
+       /* Set up the new block. */
+       error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
+       if (error)
+               goto error0;
+
+       /* Set the root in the holding structure  increasing the level by 1. */
+       cur->bc_ops->set_root(cur, &lptr, 1);
+
+       /*
+        * At the previous root level there are now two blocks: the old root,
+        * and the new block generated when it was split.  We don't know which
+        * one the cursor is pointing at, so we set up variables "left" and
+        * "right" for each case.
+        */
+       block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
+       if (error)
+               goto error0;
+#endif
+
+       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+       if (!xfs_btree_ptr_is_null(cur, &rptr)) {
+               /* Our block is left, pick up the right block. */
+               lbp = bp;
+               xfs_btree_buf_to_ptr(cur, lbp, &lptr);
+               left = block;
+               error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
+               if (error)
+                       goto error0;
+               bp = rbp;
+               nptr = 1;
+       } else {
+               /* Our block is right, pick up the left block. */
+               rbp = bp;
+               xfs_btree_buf_to_ptr(cur, rbp, &rptr);
+               right = block;
+               xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
+               error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
+               if (error)
+                       goto error0;
+               bp = lbp;
+               nptr = 2;
+       }
+       /* Fill in the new block's btree header and log it. */
+       xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
+       xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
+       ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
+                       !xfs_btree_ptr_is_null(cur, &rptr));
+
+       /* Fill in the key data in the new root. */
+       if (xfs_btree_get_level(left) > 0) {
+               xfs_btree_copy_keys(cur,
+                               xfs_btree_key_addr(cur, 1, new),
+                               xfs_btree_key_addr(cur, 1, left), 1);
+               xfs_btree_copy_keys(cur,
+                               xfs_btree_key_addr(cur, 2, new),
+                               xfs_btree_key_addr(cur, 1, right), 1);
+       } else {
+               cur->bc_ops->init_key_from_rec(
+                               xfs_btree_key_addr(cur, 1, new),
+                               xfs_btree_rec_addr(cur, 1, left));
+               cur->bc_ops->init_key_from_rec(
+                               xfs_btree_key_addr(cur, 2, new),
+                               xfs_btree_rec_addr(cur, 1, right));
+       }
+       xfs_btree_log_keys(cur, nbp, 1, 2);
+
+       /* Fill in the pointer data in the new root. */
+       xfs_btree_copy_ptrs(cur,
+               xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
+       xfs_btree_copy_ptrs(cur,
+               xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
+       xfs_btree_log_ptrs(cur, nbp, 1, 2);
+
+       /* Fix up the cursor. */
+       xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
+       cur->bc_ptrs[cur->bc_nlevels] = nptr;
+       cur->bc_nlevels++;
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+out0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 0;
+       return 0;
+}
+
+STATIC int
+xfs_btree_make_block_unfull(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     level,  /* btree level */
+       int                     numrecs,/* # of recs in block */
+       int                     *oindex,/* old tree index */
+       int                     *index, /* new tree index */
+       union xfs_btree_ptr     *nptr,  /* new btree ptr */
+       struct xfs_btree_cur    **ncur, /* new btree cursor */
+       union xfs_btree_rec     *nrec,  /* new record */
+       int                     *stat)
+{
+       union xfs_btree_key     key;    /* new btree key value */
+       int                     error = 0;
+
+       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+           level == cur->bc_nlevels - 1) {
+               struct xfs_inode *ip = cur->bc_private.b.ip;
+
+               if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
+                       /* A root block that can be made bigger. */
+                       xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
+               } else {
+                       /* A root block that needs replacing */
+                       int     logflags = 0;
+
+                       error = xfs_btree_new_iroot(cur, &logflags, stat);
+                       if (error || *stat == 0)
+                               return error;
+
+                       xfs_trans_log_inode(cur->bc_tp, ip, logflags);
+               }
+
+               return 0;
+       }
+
+       /* First, try shifting an entry to the right neighbor. */
+       error = xfs_btree_rshift(cur, level, stat);
+       if (error || *stat)
+               return error;
+
+       /* Next, try shifting an entry to the left neighbor. */
+       error = xfs_btree_lshift(cur, level, stat);
+       if (error)
+               return error;
+
+       if (*stat) {
+               *oindex = *index = cur->bc_ptrs[level];
+               return 0;
+       }
+
+       /*
+        * Next, try splitting the current block in half.
+        *
+        * If this works we have to re-set our variables because we
+        * could be in a different block now.
+        */
+       error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
+       if (error || *stat == 0)
+               return error;
+
+
+       *index = cur->bc_ptrs[level];
+       cur->bc_ops->init_rec_from_key(&key, nrec);
+       return 0;
+}
+
+/*
+ * Insert one record/level.  Return information to the caller
+ * allowing the next level up to proceed if necessary.
+ */
+STATIC int
+xfs_btree_insrec(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       int                     level,  /* level to insert record at */
+       union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
+       union xfs_btree_rec     *recp,  /* i/o: record data inserted */
+       struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
+       int                     *stat)  /* success/failure */
+{
+       struct xfs_btree_block  *block; /* btree block */
+       struct xfs_buf          *bp;    /* buffer for block */
+       union xfs_btree_key     key;    /* btree key */
+       union xfs_btree_ptr     nptr;   /* new block ptr */
+       struct xfs_btree_cur    *ncur;  /* new btree cursor */
+       union xfs_btree_rec     nrec;   /* new record count */
+       int                     optr;   /* old key/record index */
+       int                     ptr;    /* key/record index */
+       int                     numrecs;/* number of records */
+       int                     error;  /* error return value */
+#ifdef DEBUG
+       int                     i;
+#endif
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
+
+       ncur = NULL;
+
+       /*
+        * If we have an external root pointer, and we've made it to the
+        * root level, allocate a new root block and we're done.
+        */
+       if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+           (level >= cur->bc_nlevels)) {
+               error = xfs_btree_new_root(cur, stat);
+               xfs_btree_set_ptr_null(cur, ptrp);
+
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+               return error;
+       }
+
+       /* If we're off the left edge, return failure. */
+       ptr = cur->bc_ptrs[level];
+       if (ptr == 0) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+               *stat = 0;
+               return 0;
+       }
+
+       /* Make a key out of the record data to be inserted, and save it. */
+       cur->bc_ops->init_key_from_rec(&key, recp);
+
+       optr = ptr;
+
+       XFS_BTREE_STATS_INC(cur, insrec);
+
+       /* Get pointers to the btree buffer and block. */
+       block = xfs_btree_get_block(cur, level, &bp);
+       numrecs = xfs_btree_get_numrecs(block);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, block, level, bp);
+       if (error)
+               goto error0;
+
+       /* Check that the new entry is being inserted in the right place. */
+       if (ptr <= numrecs) {
+               if (level == 0) {
+                       ASSERT(cur->bc_ops->recs_inorder(cur, recp,
+                               xfs_btree_rec_addr(cur, ptr, block)));
+               } else {
+                       ASSERT(cur->bc_ops->keys_inorder(cur, &key,
+                               xfs_btree_key_addr(cur, ptr, block)));
+               }
+       }
+#endif
+
+       /*
+        * If the block is full, we can't insert the new entry until we
+        * make the block un-full.
+        */
+       xfs_btree_set_ptr_null(cur, &nptr);
+       if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
+               error = xfs_btree_make_block_unfull(cur, level, numrecs,
+                                       &optr, &ptr, &nptr, &ncur, &nrec, stat);
+               if (error || *stat == 0)
+                       goto error0;
+       }
+
+       /*
+        * The current block may have changed if the block was
+        * previously full and we have just made space in it.
+        */
+       block = xfs_btree_get_block(cur, level, &bp);
+       numrecs = xfs_btree_get_numrecs(block);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, block, level, bp);
+       if (error)
+               return error;
+#endif
+
+       /*
+        * At this point we know there's room for our new entry in the block
+        * we're pointing at.
+        */
+       XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
+
+       if (level > 0) {
+               /* It's a nonleaf. make a hole in the keys and ptrs */
+               union xfs_btree_key     *kp;
+               union xfs_btree_ptr     *pp;
+
+               kp = xfs_btree_key_addr(cur, ptr, block);
+               pp = xfs_btree_ptr_addr(cur, ptr, block);
+
+#ifdef DEBUG
+               for (i = numrecs - ptr; i >= 0; i--) {
+                       error = xfs_btree_check_ptr(cur, pp, i, level);
+                       if (error)
+                               return error;
+               }
+#endif
+
+               xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
+               xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
+
+#ifdef DEBUG
+               error = xfs_btree_check_ptr(cur, ptrp, 0, level);
+               if (error)
+                       goto error0;
+#endif
+
+               /* Now put the new data in, bump numrecs and log it. */
+               xfs_btree_copy_keys(cur, kp, &key, 1);
+               xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
+               numrecs++;
+               xfs_btree_set_numrecs(block, numrecs);
+               xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
+               xfs_btree_log_keys(cur, bp, ptr, numrecs);
+#ifdef DEBUG
+               if (ptr < numrecs) {
+                       ASSERT(cur->bc_ops->keys_inorder(cur, kp,
+                               xfs_btree_key_addr(cur, ptr + 1, block)));
+               }
+#endif
+       } else {
+               /* It's a leaf. make a hole in the records */
+               union xfs_btree_rec             *rp;
+
+               rp = xfs_btree_rec_addr(cur, ptr, block);
+
+               xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
+
+               /* Now put the new data in, bump numrecs and log it. */
+               xfs_btree_copy_recs(cur, rp, recp, 1);
+               xfs_btree_set_numrecs(block, ++numrecs);
+               xfs_btree_log_recs(cur, bp, ptr, numrecs);
+#ifdef DEBUG
+               if (ptr < numrecs) {
+                       ASSERT(cur->bc_ops->recs_inorder(cur, rp,
+                               xfs_btree_rec_addr(cur, ptr + 1, block)));
+               }
+#endif
+       }
+
+       /* Log the new number of records in the btree header. */
+       xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
+
+       /* If we inserted at the start of a block, update the parents' keys. */
+       if (optr == 1) {
+               error = xfs_btree_updkey(cur, &key, level + 1);
+               if (error)
+                       goto error0;
+       }
+
+       /*
+        * If we are tracking the last record in the tree and
+        * we are at the far right edge of the tree, update it.
+        */
+       if (xfs_btree_is_lastrec(cur, block, level)) {
+               cur->bc_ops->update_lastrec(cur, block, recp,
+                                           ptr, LASTREC_INSREC);
+       }
+
+       /*
+        * Return the new block number, if any.
+        * If there is one, give back a record value and a cursor too.
+        */
+       *ptrp = nptr;
+       if (!xfs_btree_ptr_is_null(cur, &nptr)) {
+               *recp = nrec;
+               *curp = ncur;
+       }
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+/*
+ * Insert the record at the point referenced by cur.
+ *
+ * A multi-level split of the tree on insert will invalidate the original
+ * cursor.  All callers of this function should assume that the cursor is
+ * no longer valid and revalidate it.
+ */
+int
+xfs_btree_insert(
+       struct xfs_btree_cur    *cur,
+       int                     *stat)
+{
+       int                     error;  /* error return value */
+       int                     i;      /* result value, 0 for failure */
+       int                     level;  /* current level number in btree */
+       union xfs_btree_ptr     nptr;   /* new block number (split result) */
+       struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
+       struct xfs_btree_cur    *pcur;  /* previous level's cursor */
+       union xfs_btree_rec     rec;    /* record to insert */
+
+       level = 0;
+       ncur = NULL;
+       pcur = cur;
+
+       xfs_btree_set_ptr_null(cur, &nptr);
+       cur->bc_ops->init_rec_from_cur(cur, &rec);
+
+       /*
+        * Loop going up the tree, starting at the leaf level.
+        * Stop when we don't get a split block, that must mean that
+        * the insert is finished with this level.
+        */
+       do {
+               /*
+                * Insert nrec/nptr into this level of the tree.
+                * Note if we fail, nptr will be null.
+                */
+               error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
+               if (error) {
+                       if (pcur != cur)
+                               xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
+                       goto error0;
+               }
+
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               level++;
+
+               /*
+                * See if the cursor we just used is trash.
+                * Can't trash the caller's cursor, but otherwise we should
+                * if ncur is a new cursor or we're about to be done.
+                */
+               if (pcur != cur &&
+                   (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
+                       /* Save the state from the cursor before we trash it */
+                       if (cur->bc_ops->update_cursor)
+                               cur->bc_ops->update_cursor(pcur, cur);
+                       cur->bc_nlevels = pcur->bc_nlevels;
+                       xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
+               }
+               /* If we got a new cursor, switch to it. */
+               if (ncur) {
+                       pcur = ncur;
+                       ncur = NULL;
+               }
+       } while (!xfs_btree_ptr_is_null(cur, &nptr));
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = i;
+       return 0;
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+/*
+ * Try to merge a non-leaf block back into the inode root.
+ *
+ * Note: the killroot names comes from the fact that we're effectively
+ * killing the old root block.  But because we can't just delete the
+ * inode we have to copy the single block it was pointing to into the
+ * inode.
+ */
+STATIC int
+xfs_btree_kill_iroot(
+       struct xfs_btree_cur    *cur)
+{
+       int                     whichfork = cur->bc_private.b.whichfork;
+       struct xfs_inode        *ip = cur->bc_private.b.ip;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+       struct xfs_btree_block  *block;
+       struct xfs_btree_block  *cblock;
+       union xfs_btree_key     *kp;
+       union xfs_btree_key     *ckp;
+       union xfs_btree_ptr     *pp;
+       union xfs_btree_ptr     *cpp;
+       struct xfs_buf          *cbp;
+       int                     level;
+       int                     index;
+       int                     numrecs;
+#ifdef DEBUG
+       union xfs_btree_ptr     ptr;
+       int                     i;
+#endif
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+       ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+       ASSERT(cur->bc_nlevels > 1);
+
+       /*
+        * Don't deal with the root block needs to be a leaf case.
+        * We're just going to turn the thing back into extents anyway.
+        */
+       level = cur->bc_nlevels - 1;
+       if (level == 1)
+               goto out0;
+
+       /*
+        * Give up if the root has multiple children.
+        */
+       block = xfs_btree_get_iroot(cur);
+       if (xfs_btree_get_numrecs(block) != 1)
+               goto out0;
+
+       cblock = xfs_btree_get_block(cur, level - 1, &cbp);
+       numrecs = xfs_btree_get_numrecs(cblock);
+
+       /*
+        * Only do this if the next level will fit.
+        * Then the data must be copied up to the inode,
+        * instead of freeing the root you free the next level.
+        */
+       if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
+               goto out0;
+
+       XFS_BTREE_STATS_INC(cur, killroot);
+
+#ifdef DEBUG
+       xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
+       ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
+       xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
+       ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
+#endif
+
+       index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
+       if (index) {
+               xfs_iroot_realloc(cur->bc_private.b.ip, index,
+                                 cur->bc_private.b.whichfork);
+               block = ifp->if_broot;
+       }
+
+       be16_add_cpu(&block->bb_numrecs, index);
+       ASSERT(block->bb_numrecs == cblock->bb_numrecs);
+
+       kp = xfs_btree_key_addr(cur, 1, block);
+       ckp = xfs_btree_key_addr(cur, 1, cblock);
+       xfs_btree_copy_keys(cur, kp, ckp, numrecs);
+
+       pp = xfs_btree_ptr_addr(cur, 1, block);
+       cpp = xfs_btree_ptr_addr(cur, 1, cblock);
+#ifdef DEBUG
+       for (i = 0; i < numrecs; i++) {
+               int             error;
+
+               error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
+               if (error) {
+                       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+                       return error;
+               }
+       }
+#endif
+       xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
+
+       cur->bc_ops->free_block(cur, cbp);
+       XFS_BTREE_STATS_INC(cur, free);
+
+       cur->bc_bufs[level - 1] = NULL;
+       be16_add_cpu(&block->bb_level, -1);
+       xfs_trans_log_inode(cur->bc_tp, ip,
+               XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+       cur->bc_nlevels--;
+out0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       return 0;
+}
+
+/*
+ * Kill the current root node, and replace it with it's only child node.
+ */
+STATIC int
+xfs_btree_kill_root(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp,
+       int                     level,
+       union xfs_btree_ptr     *newroot)
+{
+       int                     error;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_STATS_INC(cur, killroot);
+
+       /*
+        * Update the root pointer, decreasing the level by 1 and then
+        * free the old root.
+        */
+       cur->bc_ops->set_root(cur, newroot, -1);
+
+       error = cur->bc_ops->free_block(cur, bp);
+       if (error) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+               return error;
+       }
+
+       XFS_BTREE_STATS_INC(cur, free);
+
+       cur->bc_bufs[level] = NULL;
+       cur->bc_ra[level] = 0;
+       cur->bc_nlevels--;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       return 0;
+}
+
+STATIC int
+xfs_btree_dec_cursor(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       int                     *stat)
+{
+       int                     error;
+       int                     i;
+
+       if (level > 0) {
+               error = xfs_btree_decrement(cur, level, &i);
+               if (error)
+                       return error;
+       }
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Single level of the btree record deletion routine.
+ * Delete record pointed to by cur/level.
+ * Remove the record from its block then rebalance the tree.
+ * Return 0 for error, 1 for done, 2 to go on to the next level.
+ */
+STATIC int                                     /* error */
+xfs_btree_delrec(
+       struct xfs_btree_cur    *cur,           /* btree cursor */
+       int                     level,          /* level removing record from */
+       int                     *stat)          /* fail/done/go-on */
+{
+       struct xfs_btree_block  *block;         /* btree block */
+       union xfs_btree_ptr     cptr;           /* current block ptr */
+       struct xfs_buf          *bp;            /* buffer for block */
+       int                     error;          /* error return value */
+       int                     i;              /* loop counter */
+       union xfs_btree_key     key;            /* storage for keyp */
+       union xfs_btree_key     *keyp = &key;   /* passed to the next level */
+       union xfs_btree_ptr     lptr;           /* left sibling block ptr */
+       struct xfs_buf          *lbp;           /* left buffer pointer */
+       struct xfs_btree_block  *left;          /* left btree block */
+       int                     lrecs = 0;      /* left record count */
+       int                     ptr;            /* key/record index */
+       union xfs_btree_ptr     rptr;           /* right sibling block ptr */
+       struct xfs_buf          *rbp;           /* right buffer pointer */
+       struct xfs_btree_block  *right;         /* right btree block */
+       struct xfs_btree_block  *rrblock;       /* right-right btree block */
+       struct xfs_buf          *rrbp;          /* right-right buffer pointer */
+       int                     rrecs = 0;      /* right record count */
+       struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
+       int                     numrecs;        /* temporary numrec count */
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_TRACE_ARGI(cur, level);
+
+       tcur = NULL;
+
+       /* Get the index of the entry being deleted, check for nothing there. */
+       ptr = cur->bc_ptrs[level];
+       if (ptr == 0) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+               *stat = 0;
+               return 0;
+       }
+
+       /* Get the buffer & block containing the record or key/ptr. */
+       block = xfs_btree_get_block(cur, level, &bp);
+       numrecs = xfs_btree_get_numrecs(block);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, block, level, bp);
+       if (error)
+               goto error0;
+#endif
+
+       /* Fail if we're off the end of the block. */
+       if (ptr > numrecs) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+               *stat = 0;
+               return 0;
+       }
+
+       XFS_BTREE_STATS_INC(cur, delrec);
+       XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
+
+       /* Excise the entries being deleted. */
+       if (level > 0) {
+               /* It's a nonleaf. operate on keys and ptrs */
+               union xfs_btree_key     *lkp;
+               union xfs_btree_ptr     *lpp;
+
+               lkp = xfs_btree_key_addr(cur, ptr + 1, block);
+               lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
+
+#ifdef DEBUG
+               for (i = 0; i < numrecs - ptr; i++) {
+                       error = xfs_btree_check_ptr(cur, lpp, i, level);
+                       if (error)
+                               goto error0;
+               }
+#endif
+
+               if (ptr < numrecs) {
+                       xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
+                       xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
+                       xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
+                       xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
+               }
+
+               /*
+                * If it's the first record in the block, we'll need to pass a
+                * key up to the next level (updkey).
+                */
+               if (ptr == 1)
+                       keyp = xfs_btree_key_addr(cur, 1, block);
+       } else {
+               /* It's a leaf. operate on records */
+               if (ptr < numrecs) {
+                       xfs_btree_shift_recs(cur,
+                               xfs_btree_rec_addr(cur, ptr + 1, block),
+                               -1, numrecs - ptr);
+                       xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
+               }
+
+               /*
+                * If it's the first record in the block, we'll need a key
+                * structure to pass up to the next level (updkey).
+                */
+               if (ptr == 1) {
+                       cur->bc_ops->init_key_from_rec(&key,
+                                       xfs_btree_rec_addr(cur, 1, block));
+                       keyp = &key;
+               }
+       }
+
+       /*
+        * Decrement and log the number of entries in the block.
+        */
+       xfs_btree_set_numrecs(block, --numrecs);
+       xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
+
+       /*
+        * If we are tracking the last record in the tree and
+        * we are at the far right edge of the tree, update it.
+        */
+       if (xfs_btree_is_lastrec(cur, block, level)) {
+               cur->bc_ops->update_lastrec(cur, block, NULL,
+                                           ptr, LASTREC_DELREC);
+       }
+
+       /*
+        * We're at the root level.  First, shrink the root block in-memory.
+        * Try to get rid of the next level down.  If we can't then there's
+        * nothing left to do.
+        */
+       if (level == cur->bc_nlevels - 1) {
+               if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
+                       xfs_iroot_realloc(cur->bc_private.b.ip, -1,
+                                         cur->bc_private.b.whichfork);
+
+                       error = xfs_btree_kill_iroot(cur);
+                       if (error)
+                               goto error0;
+
+                       error = xfs_btree_dec_cursor(cur, level, stat);
+                       if (error)
+                               goto error0;
+                       *stat = 1;
+                       return 0;
+               }
+
+               /*
+                * If this is the root level, and there's only one entry left,
+                * and it's NOT the leaf level, then we can get rid of this
+                * level.
+                */
+               if (numrecs == 1 && level > 0) {
+                       union xfs_btree_ptr     *pp;
+                       /*
+                        * pp is still set to the first pointer in the block.
+                        * Make it the new root of the btree.
+                        */
+                       pp = xfs_btree_ptr_addr(cur, 1, block);
+                       error = xfs_btree_kill_root(cur, bp, level, pp);
+                       if (error)
+                               goto error0;
+               } else if (level > 0) {
+                       error = xfs_btree_dec_cursor(cur, level, stat);
+                       if (error)
+                               goto error0;
+               }
+               *stat = 1;
+               return 0;
+       }
+
+       /*
+        * If we deleted the leftmost entry in the block, update the
+        * key values above us in the tree.
+        */
+       if (ptr == 1) {
+               error = xfs_btree_updkey(cur, keyp, level + 1);
+               if (error)
+                       goto error0;
+       }
+
+       /*
+        * If the number of records remaining in the block is at least
+        * the minimum, we're done.
+        */
+       if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
+               error = xfs_btree_dec_cursor(cur, level, stat);
+               if (error)
+                       goto error0;
+               return 0;
+       }
+
+       /*
+        * Otherwise, we have to move some records around to keep the
+        * tree balanced.  Look at the left and right sibling blocks to
+        * see if we can re-balance by moving only one record.
+        */
+       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+       xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
+
+       if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
+               /*
+                * One child of root, need to get a chance to copy its contents
+                * into the root and delete it. Can't go up to next level,
+                * there's nothing to delete there.
+                */
+               if (xfs_btree_ptr_is_null(cur, &rptr) &&
+                   xfs_btree_ptr_is_null(cur, &lptr) &&
+                   level == cur->bc_nlevels - 2) {
+                       error = xfs_btree_kill_iroot(cur);
+                       if (!error)
+                               error = xfs_btree_dec_cursor(cur, level, stat);
+                       if (error)
+                               goto error0;
+                       return 0;
+               }
+       }
+
+       ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
+              !xfs_btree_ptr_is_null(cur, &lptr));
+
+       /*
+        * Duplicate the cursor so our btree manipulations here won't
+        * disrupt the next level up.
+        */
+       error = xfs_btree_dup_cursor(cur, &tcur);
+       if (error)
+               goto error0;
+
+       /*
+        * If there's a right sibling, see if it's ok to shift an entry
+        * out of it.
+        */
+       if (!xfs_btree_ptr_is_null(cur, &rptr)) {
+               /*
+                * Move the temp cursor to the last entry in the next block.
+                * Actually any entry but the first would suffice.
+                */
+               i = xfs_btree_lastrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+               error = xfs_btree_increment(tcur, level, &i);
+               if (error)
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+               i = xfs_btree_lastrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+               /* Grab a pointer to the block. */
+               right = xfs_btree_get_block(tcur, level, &rbp);
+#ifdef DEBUG
+               error = xfs_btree_check_block(tcur, right, level, rbp);
+               if (error)
+                       goto error0;
+#endif
+               /* Grab the current block number, for future use. */
+               xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
+
+               /*
+                * If right block is full enough so that removing one entry
+                * won't make it too empty, and left-shifting an entry out
+                * of right to us works, we're done.
+                */
+               if (xfs_btree_get_numrecs(right) - 1 >=
+                   cur->bc_ops->get_minrecs(tcur, level)) {
+                       error = xfs_btree_lshift(tcur, level, &i);
+                       if (error)
+                               goto error0;
+                       if (i) {
+                               ASSERT(xfs_btree_get_numrecs(block) >=
+                                      cur->bc_ops->get_minrecs(tcur, level));
+
+                               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+                               tcur = NULL;
+
+                               error = xfs_btree_dec_cursor(cur, level, stat);
+                               if (error)
+                                       goto error0;
+                               return 0;
+                       }
+               }
+
+               /*
+                * Otherwise, grab the number of records in right for
+                * future reference, and fix up the temp cursor to point
+                * to our block again (last record).
+                */
+               rrecs = xfs_btree_get_numrecs(right);
+               if (!xfs_btree_ptr_is_null(cur, &lptr)) {
+                       i = xfs_btree_firstrec(tcur, level);
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+                       error = xfs_btree_decrement(tcur, level, &i);
+                       if (error)
+                               goto error0;
+                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               }
+       }
+
+       /*
+        * If there's a left sibling, see if it's ok to shift an entry
+        * out of it.
+        */
+       if (!xfs_btree_ptr_is_null(cur, &lptr)) {
+               /*
+                * Move the temp cursor to the first entry in the
+                * previous block.
+                */
+               i = xfs_btree_firstrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+               error = xfs_btree_decrement(tcur, level, &i);
+               if (error)
+                       goto error0;
+               i = xfs_btree_firstrec(tcur, level);
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+               /* Grab a pointer to the block. */
+               left = xfs_btree_get_block(tcur, level, &lbp);
+#ifdef DEBUG
+               error = xfs_btree_check_block(cur, left, level, lbp);
+               if (error)
+                       goto error0;
+#endif
+               /* Grab the current block number, for future use. */
+               xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
+
+               /*
+                * If left block is full enough so that removing one entry
+                * won't make it too empty, and right-shifting an entry out
+                * of left to us works, we're done.
+                */
+               if (xfs_btree_get_numrecs(left) - 1 >=
+                   cur->bc_ops->get_minrecs(tcur, level)) {
+                       error = xfs_btree_rshift(tcur, level, &i);
+                       if (error)
+                               goto error0;
+                       if (i) {
+                               ASSERT(xfs_btree_get_numrecs(block) >=
+                                      cur->bc_ops->get_minrecs(tcur, level));
+                               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+                               tcur = NULL;
+                               if (level == 0)
+                                       cur->bc_ptrs[0]++;
+                               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+                               *stat = 1;
+                               return 0;
+                       }
+               }
+
+               /*
+                * Otherwise, grab the number of records in right for
+                * future reference.
+                */
+               lrecs = xfs_btree_get_numrecs(left);
+       }
+
+       /* Delete the temp cursor, we're done with it. */
+       xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+       tcur = NULL;
+
+       /* If here, we need to do a join to keep the tree balanced. */
+       ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
+
+       if (!xfs_btree_ptr_is_null(cur, &lptr) &&
+           lrecs + xfs_btree_get_numrecs(block) <=
+                       cur->bc_ops->get_maxrecs(cur, level)) {
+               /*
+                * Set "right" to be the starting block,
+                * "left" to be the left neighbor.
+                */
+               rptr = cptr;
+               right = block;
+               rbp = bp;
+               error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
+               if (error)
+                       goto error0;
+
+       /*
+        * If that won't work, see if we can join with the right neighbor block.
+        */
+       } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
+                  rrecs + xfs_btree_get_numrecs(block) <=
+                       cur->bc_ops->get_maxrecs(cur, level)) {
+               /*
+                * Set "left" to be the starting block,
+                * "right" to be the right neighbor.
+                */
+               lptr = cptr;
+               left = block;
+               lbp = bp;
+               error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
+               if (error)
+                       goto error0;
+
+       /*
+        * Otherwise, we can't fix the imbalance.
+        * Just return.  This is probably a logic error, but it's not fatal.
+        */
+       } else {
+               error = xfs_btree_dec_cursor(cur, level, stat);
+               if (error)
+                       goto error0;
+               return 0;
+       }
+
+       rrecs = xfs_btree_get_numrecs(right);
+       lrecs = xfs_btree_get_numrecs(left);
+
+       /*
+        * We're now going to join "left" and "right" by moving all the stuff
+        * in "right" to "left" and deleting "right".
+        */
+       XFS_BTREE_STATS_ADD(cur, moves, rrecs);
+       if (level > 0) {
+               /* It's a non-leaf.  Move keys and pointers. */
+               union xfs_btree_key     *lkp;   /* left btree key */
+               union xfs_btree_ptr     *lpp;   /* left address pointer */
+               union xfs_btree_key     *rkp;   /* right btree key */
+               union xfs_btree_ptr     *rpp;   /* right address pointer */
+
+               lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
+               lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
+               rkp = xfs_btree_key_addr(cur, 1, right);
+               rpp = xfs_btree_ptr_addr(cur, 1, right);
+#ifdef DEBUG
+               for (i = 1; i < rrecs; i++) {
+                       error = xfs_btree_check_ptr(cur, rpp, i, level);
+                       if (error)
+                               goto error0;
+               }
+#endif
+               xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
+               xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
+
+               xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
+               xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
+       } else {
+               /* It's a leaf.  Move records.  */
+               union xfs_btree_rec     *lrp;   /* left record pointer */
+               union xfs_btree_rec     *rrp;   /* right record pointer */
+
+               lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
+               rrp = xfs_btree_rec_addr(cur, 1, right);
+
+               xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
+               xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
+       }
+
+       XFS_BTREE_STATS_INC(cur, join);
+
+       /*
+        * Fix up the number of records and right block pointer in the
+        * surviving block, and log it.
+        */
+       xfs_btree_set_numrecs(left, lrecs + rrecs);
+       xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
+       xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
+       xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
+
+       /* If there is a right sibling, point it to the remaining block. */
+       xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
+       if (!xfs_btree_ptr_is_null(cur, &cptr)) {
+               error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
+               if (error)
+                       goto error0;
+               xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
+               xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
+       }
+
+       /* Free the deleted block. */
+       error = cur->bc_ops->free_block(cur, rbp);
+       if (error)
+               goto error0;
+       XFS_BTREE_STATS_INC(cur, free);
+
+       /*
+        * If we joined with the left neighbor, set the buffer in the
+        * cursor to the left block, and fix up the index.
+        */
+       if (bp != lbp) {
+               cur->bc_bufs[level] = lbp;
+               cur->bc_ptrs[level] += lrecs;
+               cur->bc_ra[level] = 0;
+       }
+       /*
+        * If we joined with the right neighbor and there's a level above
+        * us, increment the cursor at that level.
+        */
+       else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
+                  (level + 1 < cur->bc_nlevels)) {
+               error = xfs_btree_increment(cur, level + 1, &i);
+               if (error)
+                       goto error0;
+       }
+
+       /*
+        * Readjust the ptr at this level if it's not a leaf, since it's
+        * still pointing at the deletion point, which makes the cursor
+        * inconsistent.  If this makes the ptr 0, the caller fixes it up.
+        * We can't use decrement because it would change the next level up.
+        */
+       if (level > 0)
+               cur->bc_ptrs[level]--;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       /* Return value means the next level up has something to do. */
+       *stat = 2;
+       return 0;
+
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       if (tcur)
+               xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Delete the record pointed to by cur.
+ * The cursor refers to the place where the record was (could be inserted)
+ * when the operation returns.
+ */
+int                                    /* error */
+xfs_btree_delete(
+       struct xfs_btree_cur    *cur,
+       int                     *stat)  /* success/failure */
+{
+       int                     error;  /* error return value */
+       int                     level;
+       int                     i;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+       /*
+        * Go up the tree, starting at leaf level.
+        *
+        * If 2 is returned then a join was done; go to the next level.
+        * Otherwise we are done.
+        */
+       for (level = 0, i = 2; i == 2; level++) {
+               error = xfs_btree_delrec(cur, level, &i);
+               if (error)
+                       goto error0;
+       }
+
+       if (i == 0) {
+               for (level = 1; level < cur->bc_nlevels; level++) {
+                       if (cur->bc_ptrs[level] == 0) {
+                               error = xfs_btree_decrement(cur, level, &i);
+                               if (error)
+                                       goto error0;
+                               break;
+                       }
+               }
+       }
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = i;
+       return 0;
+error0:
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+       return error;
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int                                    /* error */
+xfs_btree_get_rec(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       union xfs_btree_rec     **recp, /* output: btree record */
+       int                     *stat)  /* output: success/failure */
+{
+       struct xfs_btree_block  *block; /* btree block */
+       struct xfs_buf          *bp;    /* buffer pointer */
+       int                     ptr;    /* record number */
+#ifdef DEBUG
+       int                     error;  /* error return value */
+#endif
+
+       ptr = cur->bc_ptrs[0];
+       block = xfs_btree_get_block(cur, 0, &bp);
+
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, block, 0, bp);
+       if (error)
+               return error;
+#endif
+
+       /*
+        * Off the right end or left end, return failure.
+        */
+       if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
+               *stat = 0;
+               return 0;
+       }
+
+       /*
+        * Point to the record and extract its data.
+        */
+       *recp = xfs_btree_rec_addr(cur, ptr, block);
+       *stat = 1;
+       return 0;
+}
+
+/*
+ * Change the owner of a btree.
+ *
+ * The mechanism we use here is ordered buffer logging. Because we don't know
+ * how many buffers were are going to need to modify, we don't really want to
+ * have to make transaction reservations for the worst case of every buffer in a
+ * full size btree as that may be more space that we can fit in the log....
+ *
+ * We do the btree walk in the most optimal manner possible - we have sibling
+ * pointers so we can just walk all the blocks on each level from left to right
+ * in a single pass, and then move to the next level and do the same. We can
+ * also do readahead on the sibling pointers to get IO moving more quickly,
+ * though for slow disks this is unlikely to make much difference to performance
+ * as the amount of CPU work we have to do before moving to the next block is
+ * relatively small.
+ *
+ * For each btree block that we load, modify the owner appropriately, set the
+ * buffer as an ordered buffer and log it appropriately. We need to ensure that
+ * we mark the region we change dirty so that if the buffer is relogged in
+ * a subsequent transaction the changes we make here as an ordered buffer are
+ * correctly relogged in that transaction.  If we are in recovery context, then
+ * just queue the modified buffer as delayed write buffer so the transaction
+ * recovery completion writes the changes to disk.
+ */
+static int
+xfs_btree_block_change_owner(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       __uint64_t              new_owner,
+       struct list_head        *buffer_list)
+{
+       struct xfs_btree_block  *block;
+       struct xfs_buf          *bp;
+       union xfs_btree_ptr     rptr;
+
+       /* do right sibling readahead */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+
+       /* modify the owner */
+       block = xfs_btree_get_block(cur, level, &bp);
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
+       else
+               block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
+
+       /*
+        * If the block is a root block hosted in an inode, we might not have a
+        * buffer pointer here and we shouldn't attempt to log the change as the
+        * information is already held in the inode and discarded when the root
+        * block is formatted into the on-disk inode fork. We still change it,
+        * though, so everything is consistent in memory.
+        */
+       if (bp) {
+               if (cur->bc_tp) {
+                       xfs_trans_ordered_buf(cur->bc_tp, bp);
+                       xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
+               } else {
+                       xfs_buf_delwri_queue(bp, buffer_list);
+               }
+       } else {
+               ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+               ASSERT(level == cur->bc_nlevels - 1);
+       }
+
+       /* now read rh sibling block for next iteration */
+       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+       if (xfs_btree_ptr_is_null(cur, &rptr))
+               return ENOENT;
+
+       return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
+}
+
+int
+xfs_btree_change_owner(
+       struct xfs_btree_cur    *cur,
+       __uint64_t              new_owner,
+       struct list_head        *buffer_list)
+{
+       union xfs_btree_ptr     lptr;
+       int                     level;
+       struct xfs_btree_block  *block = NULL;
+       int                     error = 0;
+
+       cur->bc_ops->init_ptr_from_cur(cur, &lptr);
+
+       /* for each level */
+       for (level = cur->bc_nlevels - 1; level >= 0; level--) {
+               /* grab the left hand block */
+               error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
+               if (error)
+                       return error;
+
+               /* readahead the left most block for the next level down */
+               if (level > 0) {
+                       union xfs_btree_ptr     *ptr;
+
+                       ptr = xfs_btree_ptr_addr(cur, 1, block);
+                       xfs_btree_readahead_ptr(cur, ptr, 1);
+
+                       /* save for the next iteration of the loop */
+                       lptr = *ptr;
+               }
+
+               /* for each buffer in the level */
+               do {
+                       error = xfs_btree_block_change_owner(cur, level,
+                                                            new_owner,
+                                                            buffer_list);
+               } while (!error);
+
+               if (error != ENOENT)
+                       return error;
+       }
+
+       return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
new file mode 100644 (file)
index 0000000..a1a4e3e
--- /dev/null
@@ -0,0 +1,2665 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_buf_item.h"
+
+/*
+ * xfs_da_btree.c
+ *
+ * Routines to implement directories as Btrees of hashed names.
+ */
+
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
+
+/*
+ * Routines used for growing the Btree.
+ */
+STATIC int xfs_da3_root_split(xfs_da_state_t *state,
+                                           xfs_da_state_blk_t *existing_root,
+                                           xfs_da_state_blk_t *new_child);
+STATIC int xfs_da3_node_split(xfs_da_state_t *state,
+                                           xfs_da_state_blk_t *existing_blk,
+                                           xfs_da_state_blk_t *split_blk,
+                                           xfs_da_state_blk_t *blk_to_add,
+                                           int treelevel,
+                                           int *result);
+STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
+                                        xfs_da_state_blk_t *node_blk_1,
+                                        xfs_da_state_blk_t *node_blk_2);
+STATIC void xfs_da3_node_add(xfs_da_state_t *state,
+                                  xfs_da_state_blk_t *old_node_blk,
+                                  xfs_da_state_blk_t *new_node_blk);
+
+/*
+ * Routines used for shrinking the Btree.
+ */
+STATIC int xfs_da3_root_join(xfs_da_state_t *state,
+                                          xfs_da_state_blk_t *root_blk);
+STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
+STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
+                                             xfs_da_state_blk_t *drop_blk);
+STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
+                                        xfs_da_state_blk_t *src_node_blk,
+                                        xfs_da_state_blk_t *dst_node_blk);
+
+/*
+ * Utility routines.
+ */
+STATIC int     xfs_da3_blk_unlink(xfs_da_state_t *state,
+                                 xfs_da_state_blk_t *drop_blk,
+                                 xfs_da_state_blk_t *save_blk);
+
+
+kmem_zone_t *xfs_da_state_zone;        /* anchor for state struct zone */
+
+/*
+ * Allocate a dir-state structure.
+ * We don't put them on the stack since they're large.
+ */
+xfs_da_state_t *
+xfs_da_state_alloc(void)
+{
+       return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
+}
+
+/*
+ * Kill the altpath contents of a da-state structure.
+ */
+STATIC void
+xfs_da_state_kill_altpath(xfs_da_state_t *state)
+{
+       int     i;
+
+       for (i = 0; i < state->altpath.active; i++)
+               state->altpath.blk[i].bp = NULL;
+       state->altpath.active = 0;
+}
+
+/*
+ * Free a da-state structure.
+ */
+void
+xfs_da_state_free(xfs_da_state_t *state)
+{
+       xfs_da_state_kill_altpath(state);
+#ifdef DEBUG
+       memset((char *)state, 0, sizeof(*state));
+#endif /* DEBUG */
+       kmem_zone_free(xfs_da_state_zone, state);
+}
+
+static bool
+xfs_da3_node_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_da_intnode   *hdr = bp->b_addr;
+       struct xfs_da3_icnode_hdr ichdr;
+       const struct xfs_dir_ops *ops;
+
+       ops = xfs_dir_get_ops(mp, NULL);
+
+       ops->node_hdr_from_disk(&ichdr, hdr);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
+
+               if (ichdr.magic != XFS_DA3_NODE_MAGIC)
+                       return false;
+
+               if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
+                       return false;
+       } else {
+               if (ichdr.magic != XFS_DA_NODE_MAGIC)
+                       return false;
+       }
+       if (ichdr.level == 0)
+               return false;
+       if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
+               return false;
+       if (ichdr.count == 0)
+               return false;
+
+       /*
+        * we don't know if the node is for and attribute or directory tree,
+        * so only fail if the count is outside both bounds
+        */
+       if (ichdr.count > mp->m_dir_geo->node_ents &&
+           ichdr.count > mp->m_attr_geo->node_ents)
+               return false;
+
+       /* XXX: hash order check? */
+
+       return true;
+}
+
+static void
+xfs_da3_node_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
+
+       if (!xfs_da3_node_verify(bp)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (bip)
+               hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+       xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
+}
+
+/*
+ * leaf/node format detection on trees is sketchy, so a node read can be done on
+ * leaf level blocks when detection identifies the tree as a node format tree
+ * incorrectly. In this case, we need to swap the verifier to match the correct
+ * format of the block being read.
+ */
+static void
+xfs_da3_node_read_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_da_blkinfo   *info = bp->b_addr;
+
+       switch (be16_to_cpu(info->magic)) {
+               case XFS_DA3_NODE_MAGIC:
+                       if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
+                               xfs_buf_ioerror(bp, EFSBADCRC);
+                               break;
+                       }
+                       /* fall through */
+               case XFS_DA_NODE_MAGIC:
+                       if (!xfs_da3_node_verify(bp)) {
+                               xfs_buf_ioerror(bp, EFSCORRUPTED);
+                               break;
+                       }
+                       return;
+               case XFS_ATTR_LEAF_MAGIC:
+               case XFS_ATTR3_LEAF_MAGIC:
+                       bp->b_ops = &xfs_attr3_leaf_buf_ops;
+                       bp->b_ops->verify_read(bp);
+                       return;
+               case XFS_DIR2_LEAFN_MAGIC:
+               case XFS_DIR3_LEAFN_MAGIC:
+                       bp->b_ops = &xfs_dir3_leafn_buf_ops;
+                       bp->b_ops->verify_read(bp);
+                       return;
+               default:
+                       break;
+       }
+
+       /* corrupt block */
+       xfs_verifier_error(bp);
+}
+
+const struct xfs_buf_ops xfs_da3_node_buf_ops = {
+       .verify_read = xfs_da3_node_read_verify,
+       .verify_write = xfs_da3_node_write_verify,
+};
+
+int
+xfs_da3_node_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp,
+       int                     which_fork)
+{
+       int                     err;
+
+       err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
+                                       which_fork, &xfs_da3_node_buf_ops);
+       if (!err && tp) {
+               struct xfs_da_blkinfo   *info = (*bpp)->b_addr;
+               int                     type;
+
+               switch (be16_to_cpu(info->magic)) {
+               case XFS_DA_NODE_MAGIC:
+               case XFS_DA3_NODE_MAGIC:
+                       type = XFS_BLFT_DA_NODE_BUF;
+                       break;
+               case XFS_ATTR_LEAF_MAGIC:
+               case XFS_ATTR3_LEAF_MAGIC:
+                       type = XFS_BLFT_ATTR_LEAF_BUF;
+                       break;
+               case XFS_DIR2_LEAFN_MAGIC:
+               case XFS_DIR3_LEAFN_MAGIC:
+                       type = XFS_BLFT_DIR_LEAFN_BUF;
+                       break;
+               default:
+                       type = 0;
+                       ASSERT(0);
+                       break;
+               }
+               xfs_trans_buf_set_type(tp, *bpp, type);
+       }
+       return err;
+}
+
+/*========================================================================
+ * Routines used for growing the Btree.
+ *========================================================================*/
+
+/*
+ * Create the initial contents of an intermediate node.
+ */
+int
+xfs_da3_node_create(
+       struct xfs_da_args      *args,
+       xfs_dablk_t             blkno,
+       int                     level,
+       struct xfs_buf          **bpp,
+       int                     whichfork)
+{
+       struct xfs_da_intnode   *node;
+       struct xfs_trans        *tp = args->trans;
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_da3_icnode_hdr ichdr = {0};
+       struct xfs_buf          *bp;
+       int                     error;
+       struct xfs_inode        *dp = args->dp;
+
+       trace_xfs_da_node_create(args);
+       ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
+
+       error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork);
+       if (error)
+               return error;
+       bp->b_ops = &xfs_da3_node_buf_ops;
+       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
+       node = bp->b_addr;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
+
+               ichdr.magic = XFS_DA3_NODE_MAGIC;
+               hdr3->info.blkno = cpu_to_be64(bp->b_bn);
+               hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
+               uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_uuid);
+       } else {
+               ichdr.magic = XFS_DA_NODE_MAGIC;
+       }
+       ichdr.level = level;
+
+       dp->d_ops->node_hdr_to_disk(node, &ichdr);
+       xfs_trans_log_buf(tp, bp,
+               XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
+
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Split a leaf node, rebalance, then possibly split
+ * intermediate nodes, rebalance, etc.
+ */
+int                                                    /* error */
+xfs_da3_split(
+       struct xfs_da_state     *state)
+{
+       struct xfs_da_state_blk *oldblk;
+       struct xfs_da_state_blk *newblk;
+       struct xfs_da_state_blk *addblk;
+       struct xfs_da_intnode   *node;
+       struct xfs_buf          *bp;
+       int                     max;
+       int                     action = 0;
+       int                     error;
+       int                     i;
+
+       trace_xfs_da_split(state->args);
+
+       /*
+        * Walk back up the tree splitting/inserting/adjusting as necessary.
+        * If we need to insert and there isn't room, split the node, then
+        * decide which fragment to insert the new block from below into.
+        * Note that we may split the root this way, but we need more fixup.
+        */
+       max = state->path.active - 1;
+       ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
+       ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
+              state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
+
+       addblk = &state->path.blk[max];         /* initial dummy value */
+       for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
+               oldblk = &state->path.blk[i];
+               newblk = &state->altpath.blk[i];
+
+               /*
+                * If a leaf node then
+                *     Allocate a new leaf node, then rebalance across them.
+                * else if an intermediate node then
+                *     We split on the last layer, must we split the node?
+                */
+               switch (oldblk->magic) {
+               case XFS_ATTR_LEAF_MAGIC:
+                       error = xfs_attr3_leaf_split(state, oldblk, newblk);
+                       if ((error != 0) && (error != ENOSPC)) {
+                               return error;   /* GROT: attr is inconsistent */
+                       }
+                       if (!error) {
+                               addblk = newblk;
+                               break;
+                       }
+                       /*
+                        * Entry wouldn't fit, split the leaf again.
+                        */
+                       state->extravalid = 1;
+                       if (state->inleaf) {
+                               state->extraafter = 0;  /* before newblk */
+                               trace_xfs_attr_leaf_split_before(state->args);
+                               error = xfs_attr3_leaf_split(state, oldblk,
+                                                           &state->extrablk);
+                       } else {
+                               state->extraafter = 1;  /* after newblk */
+                               trace_xfs_attr_leaf_split_after(state->args);
+                               error = xfs_attr3_leaf_split(state, newblk,
+                                                           &state->extrablk);
+                       }
+                       if (error)
+                               return error;   /* GROT: attr inconsistent */
+                       addblk = newblk;
+                       break;
+               case XFS_DIR2_LEAFN_MAGIC:
+                       error = xfs_dir2_leafn_split(state, oldblk, newblk);
+                       if (error)
+                               return error;
+                       addblk = newblk;
+                       break;
+               case XFS_DA_NODE_MAGIC:
+                       error = xfs_da3_node_split(state, oldblk, newblk, addblk,
+                                                        max - i, &action);
+                       addblk->bp = NULL;
+                       if (error)
+                               return error;   /* GROT: dir is inconsistent */
+                       /*
+                        * Record the newly split block for the next time thru?
+                        */
+                       if (action)
+                               addblk = newblk;
+                       else
+                               addblk = NULL;
+                       break;
+               }
+
+               /*
+                * Update the btree to show the new hashval for this child.
+                */
+               xfs_da3_fixhashpath(state, &state->path);
+       }
+       if (!addblk)
+               return 0;
+
+       /*
+        * Split the root node.
+        */
+       ASSERT(state->path.active == 0);
+       oldblk = &state->path.blk[0];
+       error = xfs_da3_root_split(state, oldblk, addblk);
+       if (error) {
+               addblk->bp = NULL;
+               return error;   /* GROT: dir is inconsistent */
+       }
+
+       /*
+        * Update pointers to the node which used to be block 0 and
+        * just got bumped because of the addition of a new root node.
+        * There might be three blocks involved if a double split occurred,
+        * and the original block 0 could be at any position in the list.
+        *
+        * Note: the magic numbers and sibling pointers are in the same
+        * physical place for both v2 and v3 headers (by design). Hence it
+        * doesn't matter which version of the xfs_da_intnode structure we use
+        * here as the result will be the same using either structure.
+        */
+       node = oldblk->bp->b_addr;
+       if (node->hdr.info.forw) {
+               if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
+                       bp = addblk->bp;
+               } else {
+                       ASSERT(state->extravalid);
+                       bp = state->extrablk.bp;
+               }
+               node = bp->b_addr;
+               node->hdr.info.back = cpu_to_be32(oldblk->blkno);
+               xfs_trans_log_buf(state->args->trans, bp,
+                   XFS_DA_LOGRANGE(node, &node->hdr.info,
+                   sizeof(node->hdr.info)));
+       }
+       node = oldblk->bp->b_addr;
+       if (node->hdr.info.back) {
+               if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
+                       bp = addblk->bp;
+               } else {
+                       ASSERT(state->extravalid);
+                       bp = state->extrablk.bp;
+               }
+               node = bp->b_addr;
+               node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
+               xfs_trans_log_buf(state->args->trans, bp,
+                   XFS_DA_LOGRANGE(node, &node->hdr.info,
+                   sizeof(node->hdr.info)));
+       }
+       addblk->bp = NULL;
+       return 0;
+}
+
+/*
+ * Split the root.  We have to create a new root and point to the two
+ * parts (the split old root) that we just created.  Copy block zero to
+ * the EOF, extending the inode in process.
+ */
+STATIC int                                             /* error */
+xfs_da3_root_split(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *blk1,
+       struct xfs_da_state_blk *blk2)
+{
+       struct xfs_da_intnode   *node;
+       struct xfs_da_intnode   *oldroot;
+       struct xfs_da_node_entry *btree;
+       struct xfs_da3_icnode_hdr nodehdr;
+       struct xfs_da_args      *args;
+       struct xfs_buf          *bp;
+       struct xfs_inode        *dp;
+       struct xfs_trans        *tp;
+       struct xfs_mount        *mp;
+       struct xfs_dir2_leaf    *leaf;
+       xfs_dablk_t             blkno;
+       int                     level;
+       int                     error;
+       int                     size;
+
+       trace_xfs_da_root_split(state->args);
+
+       /*
+        * Copy the existing (incorrect) block from the root node position
+        * to a free space somewhere.
+        */
+       args = state->args;
+       error = xfs_da_grow_inode(args, &blkno);
+       if (error)
+               return error;
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = state->mp;
+       error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
+       if (error)
+               return error;
+       node = bp->b_addr;
+       oldroot = blk1->bp->b_addr;
+       if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
+           oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
+               struct xfs_da3_icnode_hdr nodehdr;
+
+               dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot);
+               btree = dp->d_ops->node_tree_p(oldroot);
+               size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);
+               level = nodehdr.level;
+
+               /*
+                * we are about to copy oldroot to bp, so set up the type
+                * of bp while we know exactly what it will be.
+                */
+               xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
+       } else {
+               struct xfs_dir3_icleaf_hdr leafhdr;
+               struct xfs_dir2_leaf_entry *ents;
+
+               leaf = (xfs_dir2_leaf_t *)oldroot;
+               dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+               ents = dp->d_ops->leaf_ents_p(leaf);
+
+               ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
+                      leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
+               size = (int)((char *)&ents[leafhdr.count] - (char *)leaf);
+               level = 0;
+
+               /*
+                * we are about to copy oldroot to bp, so set up the type
+                * of bp while we know exactly what it will be.
+                */
+               xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
+       }
+
+       /*
+        * we can copy most of the information in the node from one block to
+        * another, but for CRC enabled headers we have to make sure that the
+        * block specific identifiers are kept intact. We update the buffer
+        * directly for this.
+        */
+       memcpy(node, oldroot, size);
+       if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
+           oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
+               struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
+
+               node3->hdr.info.blkno = cpu_to_be64(bp->b_bn);
+       }
+       xfs_trans_log_buf(tp, bp, 0, size - 1);
+
+       bp->b_ops = blk1->bp->b_ops;
+       xfs_trans_buf_copy_type(bp, blk1->bp);
+       blk1->bp = bp;
+       blk1->blkno = blkno;
+
+       /*
+        * Set up the new root node.
+        */
+       error = xfs_da3_node_create(args,
+               (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
+               level + 1, &bp, args->whichfork);
+       if (error)
+               return error;
+
+       node = bp->b_addr;
+       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+       btree = dp->d_ops->node_tree_p(node);
+       btree[0].hashval = cpu_to_be32(blk1->hashval);
+       btree[0].before = cpu_to_be32(blk1->blkno);
+       btree[1].hashval = cpu_to_be32(blk2->hashval);
+       btree[1].before = cpu_to_be32(blk2->blkno);
+       nodehdr.count = 2;
+       dp->d_ops->node_hdr_to_disk(node, &nodehdr);
+
+#ifdef DEBUG
+       if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+           oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
+               ASSERT(blk1->blkno >= args->geo->leafblk &&
+                      blk1->blkno < args->geo->freeblk);
+               ASSERT(blk2->blkno >= args->geo->leafblk &&
+                      blk2->blkno < args->geo->freeblk);
+       }
+#endif
+
+       /* Header is already logged by xfs_da_node_create */
+       xfs_trans_log_buf(tp, bp,
+               XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
+
+       return 0;
+}
+
+/*
+ * Split the node, rebalance, then add the new entry.
+ */
+STATIC int                                             /* error */
+xfs_da3_node_split(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *oldblk,
+       struct xfs_da_state_blk *newblk,
+       struct xfs_da_state_blk *addblk,
+       int                     treelevel,
+       int                     *result)
+{
+       struct xfs_da_intnode   *node;
+       struct xfs_da3_icnode_hdr nodehdr;
+       xfs_dablk_t             blkno;
+       int                     newcount;
+       int                     error;
+       int                     useextra;
+       struct xfs_inode        *dp = state->args->dp;
+
+       trace_xfs_da_node_split(state->args);
+
+       node = oldblk->bp->b_addr;
+       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+
+       /*
+        * With V2 dirs the extra block is data or freespace.
+        */
+       useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
+       newcount = 1 + useextra;
+       /*
+        * Do we have to split the node?
+        */
+       if (nodehdr.count + newcount > state->args->geo->node_ents) {
+               /*
+                * Allocate a new node, add to the doubly linked chain of
+                * nodes, then move some of our excess entries into it.
+                */
+               error = xfs_da_grow_inode(state->args, &blkno);
+               if (error)
+                       return error;   /* GROT: dir is inconsistent */
+
+               error = xfs_da3_node_create(state->args, blkno, treelevel,
+                                          &newblk->bp, state->args->whichfork);
+               if (error)
+                       return error;   /* GROT: dir is inconsistent */
+               newblk->blkno = blkno;
+               newblk->magic = XFS_DA_NODE_MAGIC;
+               xfs_da3_node_rebalance(state, oldblk, newblk);
+               error = xfs_da3_blk_link(state, oldblk, newblk);
+               if (error)
+                       return error;
+               *result = 1;
+       } else {
+               *result = 0;
+       }
+
+       /*
+        * Insert the new entry(s) into the correct block
+        * (updating last hashval in the process).
+        *
+        * xfs_da3_node_add() inserts BEFORE the given index,
+        * and as a result of using node_lookup_int() we always
+        * point to a valid entry (not after one), but a split
+        * operation always results in a new block whose hashvals
+        * FOLLOW the current block.
+        *
+        * If we had double-split op below us, then add the extra block too.
+        */
+       node = oldblk->bp->b_addr;
+       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+       if (oldblk->index <= nodehdr.count) {
+               oldblk->index++;
+               xfs_da3_node_add(state, oldblk, addblk);
+               if (useextra) {
+                       if (state->extraafter)
+                               oldblk->index++;
+                       xfs_da3_node_add(state, oldblk, &state->extrablk);
+                       state->extravalid = 0;
+               }
+       } else {
+               newblk->index++;
+               xfs_da3_node_add(state, newblk, addblk);
+               if (useextra) {
+                       if (state->extraafter)
+                               newblk->index++;
+                       xfs_da3_node_add(state, newblk, &state->extrablk);
+                       state->extravalid = 0;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Balance the btree elements between two intermediate nodes,
+ * usually one full and one empty.
+ *
+ * NOTE: if blk2 is empty, then it will get the upper half of blk1.
+ */
+STATIC void
+xfs_da3_node_rebalance(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *blk1,
+       struct xfs_da_state_blk *blk2)
+{
+       struct xfs_da_intnode   *node1;
+       struct xfs_da_intnode   *node2;
+       struct xfs_da_intnode   *tmpnode;
+       struct xfs_da_node_entry *btree1;
+       struct xfs_da_node_entry *btree2;
+       struct xfs_da_node_entry *btree_s;
+       struct xfs_da_node_entry *btree_d;
+       struct xfs_da3_icnode_hdr nodehdr1;
+       struct xfs_da3_icnode_hdr nodehdr2;
+       struct xfs_trans        *tp;
+       int                     count;
+       int                     tmp;
+       int                     swap = 0;
+       struct xfs_inode        *dp = state->args->dp;
+
+       trace_xfs_da_node_rebalance(state->args);
+
+       node1 = blk1->bp->b_addr;
+       node2 = blk2->bp->b_addr;
+       dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
+       dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
+       btree1 = dp->d_ops->node_tree_p(node1);
+       btree2 = dp->d_ops->node_tree_p(node2);
+
+       /*
+        * Figure out how many entries need to move, and in which direction.
+        * Swap the nodes around if that makes it simpler.
+        */
+       if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
+           ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
+            (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
+                       be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
+               tmpnode = node1;
+               node1 = node2;
+               node2 = tmpnode;
+               dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
+               dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
+               btree1 = dp->d_ops->node_tree_p(node1);
+               btree2 = dp->d_ops->node_tree_p(node2);
+               swap = 1;
+       }
+
+       count = (nodehdr1.count - nodehdr2.count) / 2;
+       if (count == 0)
+               return;
+       tp = state->args->trans;
+       /*
+        * Two cases: high-to-low and low-to-high.
+        */
+       if (count > 0) {
+               /*
+                * Move elements in node2 up to make a hole.
+                */
+               tmp = nodehdr2.count;
+               if (tmp > 0) {
+                       tmp *= (uint)sizeof(xfs_da_node_entry_t);
+                       btree_s = &btree2[0];
+                       btree_d = &btree2[count];
+                       memmove(btree_d, btree_s, tmp);
+               }
+
+               /*
+                * Move the req'd B-tree elements from high in node1 to
+                * low in node2.
+                */
+               nodehdr2.count += count;
+               tmp = count * (uint)sizeof(xfs_da_node_entry_t);
+               btree_s = &btree1[nodehdr1.count - count];
+               btree_d = &btree2[0];
+               memcpy(btree_d, btree_s, tmp);
+               nodehdr1.count -= count;
+       } else {
+               /*
+                * Move the req'd B-tree elements from low in node2 to
+                * high in node1.
+                */
+               count = -count;
+               tmp = count * (uint)sizeof(xfs_da_node_entry_t);
+               btree_s = &btree2[0];
+               btree_d = &btree1[nodehdr1.count];
+               memcpy(btree_d, btree_s, tmp);
+               nodehdr1.count += count;
+
+               xfs_trans_log_buf(tp, blk1->bp,
+                       XFS_DA_LOGRANGE(node1, btree_d, tmp));
+
+               /*
+                * Move elements in node2 down to fill the hole.
+                */
+               tmp  = nodehdr2.count - count;
+               tmp *= (uint)sizeof(xfs_da_node_entry_t);
+               btree_s = &btree2[count];
+               btree_d = &btree2[0];
+               memmove(btree_d, btree_s, tmp);
+               nodehdr2.count -= count;
+       }
+
+       /*
+        * Log header of node 1 and all current bits of node 2.
+        */
+       dp->d_ops->node_hdr_to_disk(node1, &nodehdr1);
+       xfs_trans_log_buf(tp, blk1->bp,
+               XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size));
+
+       dp->d_ops->node_hdr_to_disk(node2, &nodehdr2);
+       xfs_trans_log_buf(tp, blk2->bp,
+               XFS_DA_LOGRANGE(node2, &node2->hdr,
+                               dp->d_ops->node_hdr_size +
+                               (sizeof(btree2[0]) * nodehdr2.count)));
+
+       /*
+        * Record the last hashval from each block for upward propagation.
+        * (note: don't use the swapped node pointers)
+        */
+       if (swap) {
+               node1 = blk1->bp->b_addr;
+               node2 = blk2->bp->b_addr;
+               dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
+               dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
+               btree1 = dp->d_ops->node_tree_p(node1);
+               btree2 = dp->d_ops->node_tree_p(node2);
+       }
+       blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
+       blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
+
+       /*
+        * Adjust the expected index for insertion.
+        */
+       if (blk1->index >= nodehdr1.count) {
+               blk2->index = blk1->index - nodehdr1.count;
+               blk1->index = nodehdr1.count + 1;       /* make it invalid */
+       }
+}
+
+/*
+ * Add a new entry to an intermediate node.
+ */
+STATIC void
+xfs_da3_node_add(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *oldblk,
+       struct xfs_da_state_blk *newblk)
+{
+       struct xfs_da_intnode   *node;
+       struct xfs_da3_icnode_hdr nodehdr;
+       struct xfs_da_node_entry *btree;
+       int                     tmp;
+       struct xfs_inode        *dp = state->args->dp;
+
+       trace_xfs_da_node_add(state->args);
+
+       node = oldblk->bp->b_addr;
+       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+       btree = dp->d_ops->node_tree_p(node);
+
+       ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
+       ASSERT(newblk->blkno != 0);
+       if (state->args->whichfork == XFS_DATA_FORK)
+               ASSERT(newblk->blkno >= state->args->geo->leafblk &&
+                      newblk->blkno < state->args->geo->freeblk);
+
+       /*
+        * We may need to make some room before we insert the new node.
+        */
+       tmp = 0;
+       if (oldblk->index < nodehdr.count) {
+               tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
+               memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
+       }
+       btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
+       btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
+       xfs_trans_log_buf(state->args->trans, oldblk->bp,
+               XFS_DA_LOGRANGE(node, &btree[oldblk->index],
+                               tmp + sizeof(*btree)));
+
+       nodehdr.count += 1;
+       dp->d_ops->node_hdr_to_disk(node, &nodehdr);
+       xfs_trans_log_buf(state->args->trans, oldblk->bp,
+               XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
+
+       /*
+        * Copy the last hash value from the oldblk to propagate upwards.
+        */
+       oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
+}
+
+/*========================================================================
+ * Routines used for shrinking the Btree.
+ *========================================================================*/
+
+/*
+ * Deallocate an empty leaf node, remove it from its parent,
+ * possibly deallocating that block, etc...
+ */
+int
+xfs_da3_join(
+       struct xfs_da_state     *state)
+{
+       struct xfs_da_state_blk *drop_blk;
+       struct xfs_da_state_blk *save_blk;
+       int                     action = 0;
+       int                     error;
+
+       trace_xfs_da_join(state->args);
+
+       drop_blk = &state->path.blk[ state->path.active-1 ];
+       save_blk = &state->altpath.blk[ state->path.active-1 ];
+       ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
+       ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
+              drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
+
+       /*
+        * Walk back up the tree joining/deallocating as necessary.
+        * When we stop dropping blocks, break out.
+        */
+       for (  ; state->path.active >= 2; drop_blk--, save_blk--,
+                state->path.active--) {
+               /*
+                * See if we can combine the block with a neighbor.
+                *   (action == 0) => no options, just leave
+                *   (action == 1) => coalesce, then unlink
+                *   (action == 2) => block empty, unlink it
+                */
+               switch (drop_blk->magic) {
+               case XFS_ATTR_LEAF_MAGIC:
+                       error = xfs_attr3_leaf_toosmall(state, &action);
+                       if (error)
+                               return error;
+                       if (action == 0)
+                               return 0;
+                       xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
+                       break;
+               case XFS_DIR2_LEAFN_MAGIC:
+                       error = xfs_dir2_leafn_toosmall(state, &action);
+                       if (error)
+                               return error;
+                       if (action == 0)
+                               return 0;
+                       xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
+                       break;
+               case XFS_DA_NODE_MAGIC:
+                       /*
+                        * Remove the offending node, fixup hashvals,
+                        * check for a toosmall neighbor.
+                        */
+                       xfs_da3_node_remove(state, drop_blk);
+                       xfs_da3_fixhashpath(state, &state->path);
+                       error = xfs_da3_node_toosmall(state, &action);
+                       if (error)
+                               return error;
+                       if (action == 0)
+                               return 0;
+                       xfs_da3_node_unbalance(state, drop_blk, save_blk);
+                       break;
+               }
+               xfs_da3_fixhashpath(state, &state->altpath);
+               error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
+               xfs_da_state_kill_altpath(state);
+               if (error)
+                       return error;
+               error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
+                                                        drop_blk->bp);
+               drop_blk->bp = NULL;
+               if (error)
+                       return error;
+       }
+       /*
+        * We joined all the way to the top.  If it turns out that
+        * we only have one entry in the root, make the child block
+        * the new root.
+        */
+       xfs_da3_node_remove(state, drop_blk);
+       xfs_da3_fixhashpath(state, &state->path);
+       error = xfs_da3_root_join(state, &state->path.blk[0]);
+       return error;
+}
+
+#ifdef DEBUG
+static void
+xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
+{
+       __be16  magic = blkinfo->magic;
+
+       if (level == 1) {
+               ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+                      magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
+                      magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
+                      magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
+       } else {
+               ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
+                      magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
+       }
+       ASSERT(!blkinfo->forw);
+       ASSERT(!blkinfo->back);
+}
+#else  /* !DEBUG */
+#define        xfs_da_blkinfo_onlychild_validate(blkinfo, level)
+#endif /* !DEBUG */
+
+/*
+ * We have only one entry in the root.  Copy the only remaining child of
+ * the old root to block 0 as the new root node.
+ */
+STATIC int
+xfs_da3_root_join(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *root_blk)
+{
+       struct xfs_da_intnode   *oldroot;
+       struct xfs_da_args      *args;
+       xfs_dablk_t             child;
+       struct xfs_buf          *bp;
+       struct xfs_da3_icnode_hdr oldroothdr;
+       struct xfs_da_node_entry *btree;
+       int                     error;
+       struct xfs_inode        *dp = state->args->dp;
+
+       trace_xfs_da_root_join(state->args);
+
+       ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
+
+       args = state->args;
+       oldroot = root_blk->bp->b_addr;
+       dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot);
+       ASSERT(oldroothdr.forw == 0);
+       ASSERT(oldroothdr.back == 0);
+
+       /*
+        * If the root has more than one child, then don't do anything.
+        */
+       if (oldroothdr.count > 1)
+               return 0;
+
+       /*
+        * Read in the (only) child block, then copy those bytes into
+        * the root block's buffer and free the original child block.
+        */
+       btree = dp->d_ops->node_tree_p(oldroot);
+       child = be32_to_cpu(btree[0].before);
+       ASSERT(child != 0);
+       error = xfs_da3_node_read(args->trans, dp, child, -1, &bp,
+                                            args->whichfork);
+       if (error)
+               return error;
+       xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
+
+       /*
+        * This could be copying a leaf back into the root block in the case of
+        * there only being a single leaf block left in the tree. Hence we have
+        * to update the b_ops pointer as well to match the buffer type change
+        * that could occur. For dir3 blocks we also need to update the block
+        * number in the buffer header.
+        */
+       memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);
+       root_blk->bp->b_ops = bp->b_ops;
+       xfs_trans_buf_copy_type(root_blk->bp, bp);
+       if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
+               struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
+               da3->blkno = cpu_to_be64(root_blk->bp->b_bn);
+       }
+       xfs_trans_log_buf(args->trans, root_blk->bp, 0,
+                         args->geo->blksize - 1);
+       error = xfs_da_shrink_inode(args, child, bp);
+       return error;
+}
+
+/*
+ * Check a node block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ */
+STATIC int
+xfs_da3_node_toosmall(
+       struct xfs_da_state     *state,
+       int                     *action)
+{
+       struct xfs_da_intnode   *node;
+       struct xfs_da_state_blk *blk;
+       struct xfs_da_blkinfo   *info;
+       xfs_dablk_t             blkno;
+       struct xfs_buf          *bp;
+       struct xfs_da3_icnode_hdr nodehdr;
+       int                     count;
+       int                     forward;
+       int                     error;
+       int                     retval;
+       int                     i;
+       struct xfs_inode        *dp = state->args->dp;
+
+       trace_xfs_da_node_toosmall(state->args);
+
+       /*
+        * Check for the degenerate case of the block being over 50% full.
+        * If so, it's not worth even looking to see if we might be able
+        * to coalesce with a sibling.
+        */
+       blk = &state->path.blk[ state->path.active-1 ];
+       info = blk->bp->b_addr;
+       node = (xfs_da_intnode_t *)info;
+       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+       if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
+               *action = 0;    /* blk over 50%, don't try to join */
+               return 0;       /* blk over 50%, don't try to join */
+       }
+
+       /*
+        * Check for the degenerate case of the block being empty.
+        * If the block is empty, we'll simply delete it, no need to
+        * coalesce it with a sibling block.  We choose (arbitrarily)
+        * to merge with the forward block unless it is NULL.
+        */
+       if (nodehdr.count == 0) {
+               /*
+                * Make altpath point to the block we want to keep and
+                * path point to the block we want to drop (this one).
+                */
+               forward = (info->forw != 0);
+               memcpy(&state->altpath, &state->path, sizeof(state->path));
+               error = xfs_da3_path_shift(state, &state->altpath, forward,
+                                                0, &retval);
+               if (error)
+                       return error;
+               if (retval) {
+                       *action = 0;
+               } else {
+                       *action = 2;
+               }
+               return 0;
+       }
+
+       /*
+        * Examine each sibling block to see if we can coalesce with
+        * at least 25% free space to spare.  We need to figure out
+        * whether to merge with the forward or the backward block.
+        * We prefer coalescing with the lower numbered sibling so as
+        * to shrink a directory over time.
+        */
+       count  = state->args->geo->node_ents;
+       count -= state->args->geo->node_ents >> 2;
+       count -= nodehdr.count;
+
+       /* start with smaller blk num */
+       forward = nodehdr.forw < nodehdr.back;
+       for (i = 0; i < 2; forward = !forward, i++) {
+               struct xfs_da3_icnode_hdr thdr;
+               if (forward)
+                       blkno = nodehdr.forw;
+               else
+                       blkno = nodehdr.back;
+               if (blkno == 0)
+                       continue;
+               error = xfs_da3_node_read(state->args->trans, dp,
+                                       blkno, -1, &bp, state->args->whichfork);
+               if (error)
+                       return error;
+
+               node = bp->b_addr;
+               dp->d_ops->node_hdr_from_disk(&thdr, node);
+               xfs_trans_brelse(state->args->trans, bp);
+
+               if (count - thdr.count >= 0)
+                       break;  /* fits with at least 25% to spare */
+       }
+       if (i >= 2) {
+               *action = 0;
+               return 0;
+       }
+
+       /*
+        * Make altpath point to the block we want to keep (the lower
+        * numbered block) and path point to the block we want to drop.
+        */
+       memcpy(&state->altpath, &state->path, sizeof(state->path));
+       if (blkno < blk->blkno) {
+               error = xfs_da3_path_shift(state, &state->altpath, forward,
+                                                0, &retval);
+       } else {
+               error = xfs_da3_path_shift(state, &state->path, forward,
+                                                0, &retval);
+       }
+       if (error)
+               return error;
+       if (retval) {
+               *action = 0;
+               return 0;
+       }
+       *action = 1;
+       return 0;
+}
+
+/*
+ * Pick up the last hashvalue from an intermediate node.
+ */
+STATIC uint
+xfs_da3_node_lasthash(
+       struct xfs_inode        *dp,
+       struct xfs_buf          *bp,
+       int                     *count)
+{
+       struct xfs_da_intnode    *node;
+       struct xfs_da_node_entry *btree;
+       struct xfs_da3_icnode_hdr nodehdr;
+
+       node = bp->b_addr;
+       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+       if (count)
+               *count = nodehdr.count;
+       if (!nodehdr.count)
+               return 0;
+       btree = dp->d_ops->node_tree_p(node);
+       return be32_to_cpu(btree[nodehdr.count - 1].hashval);
+}
+
+/*
+ * Walk back up the tree adjusting hash values as necessary,
+ * when we stop making changes, return.
+ */
+void
+xfs_da3_fixhashpath(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_path *path)
+{
+       struct xfs_da_state_blk *blk;
+       struct xfs_da_intnode   *node;
+       struct xfs_da_node_entry *btree;
+       xfs_dahash_t            lasthash=0;
+       int                     level;
+       int                     count;
+       struct xfs_inode        *dp = state->args->dp;
+
+       trace_xfs_da_fixhashpath(state->args);
+
+       level = path->active-1;
+       blk = &path->blk[ level ];
+       switch (blk->magic) {
+       case XFS_ATTR_LEAF_MAGIC:
+               lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
+               if (count == 0)
+                       return;
+               break;
+       case XFS_DIR2_LEAFN_MAGIC:
+               lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count);
+               if (count == 0)
+                       return;
+               break;
+       case XFS_DA_NODE_MAGIC:
+               lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);
+               if (count == 0)
+                       return;
+               break;
+       }
+       for (blk--, level--; level >= 0; blk--, level--) {
+               struct xfs_da3_icnode_hdr nodehdr;
+
+               node = blk->bp->b_addr;
+               dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+               btree = dp->d_ops->node_tree_p(node);
+               if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
+                       break;
+               blk->hashval = lasthash;
+               btree[blk->index].hashval = cpu_to_be32(lasthash);
+               xfs_trans_log_buf(state->args->trans, blk->bp,
+                                 XFS_DA_LOGRANGE(node, &btree[blk->index],
+                                                 sizeof(*btree)));
+
+               lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
+       }
+}
+
+/*
+ * Remove an entry from an intermediate node.
+ */
+STATIC void
+xfs_da3_node_remove(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *drop_blk)
+{
+       struct xfs_da_intnode   *node;
+       struct xfs_da3_icnode_hdr nodehdr;
+       struct xfs_da_node_entry *btree;
+       int                     index;
+       int                     tmp;
+       struct xfs_inode        *dp = state->args->dp;
+
+       trace_xfs_da_node_remove(state->args);
+
+       node = drop_blk->bp->b_addr;
+       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+       ASSERT(drop_blk->index < nodehdr.count);
+       ASSERT(drop_blk->index >= 0);
+
+       /*
+        * Copy over the offending entry, or just zero it out.
+        */
+       index = drop_blk->index;
+       btree = dp->d_ops->node_tree_p(node);
+       if (index < nodehdr.count - 1) {
+               tmp  = nodehdr.count - index - 1;
+               tmp *= (uint)sizeof(xfs_da_node_entry_t);
+               memmove(&btree[index], &btree[index + 1], tmp);
+               xfs_trans_log_buf(state->args->trans, drop_blk->bp,
+                   XFS_DA_LOGRANGE(node, &btree[index], tmp));
+               index = nodehdr.count - 1;
+       }
+       memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
+       xfs_trans_log_buf(state->args->trans, drop_blk->bp,
+           XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
+       nodehdr.count -= 1;
+       dp->d_ops->node_hdr_to_disk(node, &nodehdr);
+       xfs_trans_log_buf(state->args->trans, drop_blk->bp,
+           XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
+
+       /*
+        * Copy the last hash value from the block to propagate upwards.
+        */
+       drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
+}
+
+/*
+ * Unbalance the elements between two intermediate nodes,
+ * move all Btree elements from one node into another.
+ */
+STATIC void
+xfs_da3_node_unbalance(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *drop_blk,
+       struct xfs_da_state_blk *save_blk)
+{
+       struct xfs_da_intnode   *drop_node;
+       struct xfs_da_intnode   *save_node;
+       struct xfs_da_node_entry *drop_btree;
+       struct xfs_da_node_entry *save_btree;
+       struct xfs_da3_icnode_hdr drop_hdr;
+       struct xfs_da3_icnode_hdr save_hdr;
+       struct xfs_trans        *tp;
+       int                     sindex;
+       int                     tmp;
+       struct xfs_inode        *dp = state->args->dp;
+
+       trace_xfs_da_node_unbalance(state->args);
+
+       drop_node = drop_blk->bp->b_addr;
+       save_node = save_blk->bp->b_addr;
+       dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node);
+       dp->d_ops->node_hdr_from_disk(&save_hdr, save_node);
+       drop_btree = dp->d_ops->node_tree_p(drop_node);
+       save_btree = dp->d_ops->node_tree_p(save_node);
+       tp = state->args->trans;
+
+       /*
+        * If the dying block has lower hashvals, then move all the
+        * elements in the remaining block up to make a hole.
+        */
+       if ((be32_to_cpu(drop_btree[0].hashval) <
+                       be32_to_cpu(save_btree[0].hashval)) ||
+           (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
+                       be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
+               /* XXX: check this - is memmove dst correct? */
+               tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
+               memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
+
+               sindex = 0;
+               xfs_trans_log_buf(tp, save_blk->bp,
+                       XFS_DA_LOGRANGE(save_node, &save_btree[0],
+                               (save_hdr.count + drop_hdr.count) *
+                                               sizeof(xfs_da_node_entry_t)));
+       } else {
+               sindex = save_hdr.count;
+               xfs_trans_log_buf(tp, save_blk->bp,
+                       XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
+                               drop_hdr.count * sizeof(xfs_da_node_entry_t)));
+       }
+
+       /*
+        * Move all the B-tree elements from drop_blk to save_blk.
+        */
+       tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
+       memcpy(&save_btree[sindex], &drop_btree[0], tmp);
+       save_hdr.count += drop_hdr.count;
+
+       dp->d_ops->node_hdr_to_disk(save_node, &save_hdr);
+       xfs_trans_log_buf(tp, save_blk->bp,
+               XFS_DA_LOGRANGE(save_node, &save_node->hdr,
+                               dp->d_ops->node_hdr_size));
+
+       /*
+        * Save the last hashval in the remaining block for upward propagation.
+        */
+       save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
+}
+
+/*========================================================================
+ * Routines used for finding things in the Btree.
+ *========================================================================*/
+
+/*
+ * Walk down the Btree looking for a particular filename, filling
+ * in the state structure as we go.
+ *
+ * We will set the state structure to point to each of the elements
+ * in each of the nodes where either the hashval is or should be.
+ *
+ * We support duplicate hashval's so for each entry in the current
+ * node that could contain the desired hashval, descend.  This is a
+ * pruned depth-first tree search.
+ */
+int                                                    /* error */
+xfs_da3_node_lookup_int(
+       struct xfs_da_state     *state,
+       int                     *result)
+{
+       struct xfs_da_state_blk *blk;
+       struct xfs_da_blkinfo   *curr;
+       struct xfs_da_intnode   *node;
+       struct xfs_da_node_entry *btree;
+       struct xfs_da3_icnode_hdr nodehdr;
+       struct xfs_da_args      *args;
+       xfs_dablk_t             blkno;
+       xfs_dahash_t            hashval;
+       xfs_dahash_t            btreehashval;
+       int                     probe;
+       int                     span;
+       int                     max;
+       int                     error;
+       int                     retval;
+       struct xfs_inode        *dp = state->args->dp;
+
+       args = state->args;
+
+       /*
+        * Descend thru the B-tree searching each level for the right
+        * node to use, until the right hashval is found.
+        */
+       blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0;
+       for (blk = &state->path.blk[0], state->path.active = 1;
+                        state->path.active <= XFS_DA_NODE_MAXDEPTH;
+                        blk++, state->path.active++) {
+               /*
+                * Read the next node down in the tree.
+                */
+               blk->blkno = blkno;
+               error = xfs_da3_node_read(args->trans, args->dp, blkno,
+                                       -1, &blk->bp, args->whichfork);
+               if (error) {
+                       blk->blkno = 0;
+                       state->path.active--;
+                       return error;
+               }
+               curr = blk->bp->b_addr;
+               blk->magic = be16_to_cpu(curr->magic);
+
+               if (blk->magic == XFS_ATTR_LEAF_MAGIC ||
+                   blk->magic == XFS_ATTR3_LEAF_MAGIC) {
+                       blk->magic = XFS_ATTR_LEAF_MAGIC;
+                       blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
+                       break;
+               }
+
+               if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
+                   blk->magic == XFS_DIR3_LEAFN_MAGIC) {
+                       blk->magic = XFS_DIR2_LEAFN_MAGIC;
+                       blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
+                                                              blk->bp, NULL);
+                       break;
+               }
+
+               blk->magic = XFS_DA_NODE_MAGIC;
+
+
+               /*
+                * Search an intermediate node for a match.
+                */
+               node = blk->bp->b_addr;
+               dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+               btree = dp->d_ops->node_tree_p(node);
+
+               max = nodehdr.count;
+               blk->hashval = be32_to_cpu(btree[max - 1].hashval);
+
+               /*
+                * Binary search.  (note: small blocks will skip loop)
+                */
+               probe = span = max / 2;
+               hashval = args->hashval;
+               while (span > 4) {
+                       span /= 2;
+                       btreehashval = be32_to_cpu(btree[probe].hashval);
+                       if (btreehashval < hashval)
+                               probe += span;
+                       else if (btreehashval > hashval)
+                               probe -= span;
+                       else
+                               break;
+               }
+               ASSERT((probe >= 0) && (probe < max));
+               ASSERT((span <= 4) ||
+                       (be32_to_cpu(btree[probe].hashval) == hashval));
+
+               /*
+                * Since we may have duplicate hashval's, find the first
+                * matching hashval in the node.
+                */
+               while (probe > 0 &&
+                      be32_to_cpu(btree[probe].hashval) >= hashval) {
+                       probe--;
+               }
+               while (probe < max &&
+                      be32_to_cpu(btree[probe].hashval) < hashval) {
+                       probe++;
+               }
+
+               /*
+                * Pick the right block to descend on.
+                */
+               if (probe == max) {
+                       blk->index = max - 1;
+                       blkno = be32_to_cpu(btree[max - 1].before);
+               } else {
+                       blk->index = probe;
+                       blkno = be32_to_cpu(btree[probe].before);
+               }
+       }
+
+       /*
+        * A leaf block that ends in the hashval that we are interested in
+        * (final hashval == search hashval) means that the next block may
+        * contain more entries with the same hashval, shift upward to the
+        * next leaf and keep searching.
+        */
+       for (;;) {
+               if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
+                       retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
+                                                       &blk->index, state);
+               } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
+                       retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
+                       blk->index = args->index;
+                       args->blkno = blk->blkno;
+               } else {
+                       ASSERT(0);
+                       return EFSCORRUPTED;
+               }
+               if (((retval == ENOENT) || (retval == ENOATTR)) &&
+                   (blk->hashval == args->hashval)) {
+                       error = xfs_da3_path_shift(state, &state->path, 1, 1,
+                                                        &retval);
+                       if (error)
+                               return error;
+                       if (retval == 0) {
+                               continue;
+                       } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
+                               /* path_shift() gives ENOENT */
+                               retval = ENOATTR;
+                       }
+               }
+               break;
+       }
+       *result = retval;
+       return 0;
+}
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Compare two intermediate nodes for "order".
+ */
+STATIC int
+xfs_da3_node_order(
+       struct xfs_inode *dp,
+       struct xfs_buf  *node1_bp,
+       struct xfs_buf  *node2_bp)
+{
+       struct xfs_da_intnode   *node1;
+       struct xfs_da_intnode   *node2;
+       struct xfs_da_node_entry *btree1;
+       struct xfs_da_node_entry *btree2;
+       struct xfs_da3_icnode_hdr node1hdr;
+       struct xfs_da3_icnode_hdr node2hdr;
+
+       node1 = node1_bp->b_addr;
+       node2 = node2_bp->b_addr;
+       dp->d_ops->node_hdr_from_disk(&node1hdr, node1);
+       dp->d_ops->node_hdr_from_disk(&node2hdr, node2);
+       btree1 = dp->d_ops->node_tree_p(node1);
+       btree2 = dp->d_ops->node_tree_p(node2);
+
+       if (node1hdr.count > 0 && node2hdr.count > 0 &&
+           ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
+            (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
+             be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * Link a new block into a doubly linked list of blocks (of whatever type).
+ */
+int                                                    /* error */
+xfs_da3_blk_link(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *old_blk,
+       struct xfs_da_state_blk *new_blk)
+{
+       struct xfs_da_blkinfo   *old_info;
+       struct xfs_da_blkinfo   *new_info;
+       struct xfs_da_blkinfo   *tmp_info;
+       struct xfs_da_args      *args;
+       struct xfs_buf          *bp;
+       int                     before = 0;
+       int                     error;
+       struct xfs_inode        *dp = state->args->dp;
+
+       /*
+        * Set up environment.
+        */
+       args = state->args;
+       ASSERT(args != NULL);
+       old_info = old_blk->bp->b_addr;
+       new_info = new_blk->bp->b_addr;
+       ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
+              old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
+              old_blk->magic == XFS_ATTR_LEAF_MAGIC);
+
+       switch (old_blk->magic) {
+       case XFS_ATTR_LEAF_MAGIC:
+               before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
+               break;
+       case XFS_DIR2_LEAFN_MAGIC:
+               before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);
+               break;
+       case XFS_DA_NODE_MAGIC:
+               before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);
+               break;
+       }
+
+       /*
+        * Link blocks in appropriate order.
+        */
+       if (before) {
+               /*
+                * Link new block in before existing block.
+                */
+               trace_xfs_da_link_before(args);
+               new_info->forw = cpu_to_be32(old_blk->blkno);
+               new_info->back = old_info->back;
+               if (old_info->back) {
+                       error = xfs_da3_node_read(args->trans, dp,
+                                               be32_to_cpu(old_info->back),
+                                               -1, &bp, args->whichfork);
+                       if (error)
+                               return error;
+                       ASSERT(bp != NULL);
+                       tmp_info = bp->b_addr;
+                       ASSERT(tmp_info->magic == old_info->magic);
+                       ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
+                       tmp_info->forw = cpu_to_be32(new_blk->blkno);
+                       xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
+               }
+               old_info->back = cpu_to_be32(new_blk->blkno);
+       } else {
+               /*
+                * Link new block in after existing block.
+                */
+               trace_xfs_da_link_after(args);
+               new_info->forw = old_info->forw;
+               new_info->back = cpu_to_be32(old_blk->blkno);
+               if (old_info->forw) {
+                       error = xfs_da3_node_read(args->trans, dp,
+                                               be32_to_cpu(old_info->forw),
+                                               -1, &bp, args->whichfork);
+                       if (error)
+                               return error;
+                       ASSERT(bp != NULL);
+                       tmp_info = bp->b_addr;
+                       ASSERT(tmp_info->magic == old_info->magic);
+                       ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
+                       tmp_info->back = cpu_to_be32(new_blk->blkno);
+                       xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
+               }
+               old_info->forw = cpu_to_be32(new_blk->blkno);
+       }
+
+       xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
+       xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
+       return 0;
+}
+
+/*
+ * Unlink a block from a doubly linked list of blocks.
+ */
+STATIC int                                             /* error */
+xfs_da3_blk_unlink(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_blk *drop_blk,
+       struct xfs_da_state_blk *save_blk)
+{
+       struct xfs_da_blkinfo   *drop_info;
+       struct xfs_da_blkinfo   *save_info;
+       struct xfs_da_blkinfo   *tmp_info;
+       struct xfs_da_args      *args;
+       struct xfs_buf          *bp;
+       int                     error;
+
+       /*
+        * Set up environment.
+        */
+       args = state->args;
+       ASSERT(args != NULL);
+       save_info = save_blk->bp->b_addr;
+       drop_info = drop_blk->bp->b_addr;
+       ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
+              save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
+              save_blk->magic == XFS_ATTR_LEAF_MAGIC);
+       ASSERT(save_blk->magic == drop_blk->magic);
+       ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
+              (be32_to_cpu(save_info->back) == drop_blk->blkno));
+       ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
+              (be32_to_cpu(drop_info->back) == save_blk->blkno));
+
+       /*
+        * Unlink the leaf block from the doubly linked chain of leaves.
+        */
+       if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
+               trace_xfs_da_unlink_back(args);
+               save_info->back = drop_info->back;
+               if (drop_info->back) {
+                       error = xfs_da3_node_read(args->trans, args->dp,
+                                               be32_to_cpu(drop_info->back),
+                                               -1, &bp, args->whichfork);
+                       if (error)
+                               return error;
+                       ASSERT(bp != NULL);
+                       tmp_info = bp->b_addr;
+                       ASSERT(tmp_info->magic == save_info->magic);
+                       ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
+                       tmp_info->forw = cpu_to_be32(save_blk->blkno);
+                       xfs_trans_log_buf(args->trans, bp, 0,
+                                                   sizeof(*tmp_info) - 1);
+               }
+       } else {
+               trace_xfs_da_unlink_forward(args);
+               save_info->forw = drop_info->forw;
+               if (drop_info->forw) {
+                       error = xfs_da3_node_read(args->trans, args->dp,
+                                               be32_to_cpu(drop_info->forw),
+                                               -1, &bp, args->whichfork);
+                       if (error)
+                               return error;
+                       ASSERT(bp != NULL);
+                       tmp_info = bp->b_addr;
+                       ASSERT(tmp_info->magic == save_info->magic);
+                       ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
+                       tmp_info->back = cpu_to_be32(save_blk->blkno);
+                       xfs_trans_log_buf(args->trans, bp, 0,
+                                                   sizeof(*tmp_info) - 1);
+               }
+       }
+
+       xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
+       return 0;
+}
+
+/*
+ * Move a path "forward" or "!forward" one block at the current level.
+ *
+ * This routine will adjust a "path" to point to the next block
+ * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
+ * Btree, including updating pointers to the intermediate nodes between
+ * the new bottom and the root.
+ */
+int                                                    /* error */
+xfs_da3_path_shift(
+       struct xfs_da_state     *state,
+       struct xfs_da_state_path *path,
+       int                     forward,
+       int                     release,
+       int                     *result)
+{
+       struct xfs_da_state_blk *blk;
+       struct xfs_da_blkinfo   *info;
+       struct xfs_da_intnode   *node;
+       struct xfs_da_args      *args;
+       struct xfs_da_node_entry *btree;
+       struct xfs_da3_icnode_hdr nodehdr;
+       xfs_dablk_t             blkno = 0;
+       int                     level;
+       int                     error;
+       struct xfs_inode        *dp = state->args->dp;
+
+       trace_xfs_da_path_shift(state->args);
+
+       /*
+        * Roll up the Btree looking for the first block where our
+        * current index is not at the edge of the block.  Note that
+        * we skip the bottom layer because we want the sibling block.
+        */
+       args = state->args;
+       ASSERT(args != NULL);
+       ASSERT(path != NULL);
+       ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+       level = (path->active-1) - 1;   /* skip bottom layer in path */
+       for (blk = &path->blk[level]; level >= 0; blk--, level--) {
+               node = blk->bp->b_addr;
+               dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+               btree = dp->d_ops->node_tree_p(node);
+
+               if (forward && (blk->index < nodehdr.count - 1)) {
+                       blk->index++;
+                       blkno = be32_to_cpu(btree[blk->index].before);
+                       break;
+               } else if (!forward && (blk->index > 0)) {
+                       blk->index--;
+                       blkno = be32_to_cpu(btree[blk->index].before);
+                       break;
+               }
+       }
+       if (level < 0) {
+               *result = ENOENT;       /* we're out of our tree */
+               ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+               return 0;
+       }
+
+       /*
+        * Roll down the edge of the subtree until we reach the
+        * same depth we were at originally.
+        */
+       for (blk++, level++; level < path->active; blk++, level++) {
+               /*
+                * Release the old block.
+                * (if it's dirty, trans won't actually let go)
+                */
+               if (release)
+                       xfs_trans_brelse(args->trans, blk->bp);
+
+               /*
+                * Read the next child block.
+                */
+               blk->blkno = blkno;
+               error = xfs_da3_node_read(args->trans, dp, blkno, -1,
+                                       &blk->bp, args->whichfork);
+               if (error)
+                       return error;
+               info = blk->bp->b_addr;
+               ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
+                      info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
+                      info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+                      info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
+                      info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
+                      info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
+
+
+               /*
+                * Note: we flatten the magic number to a single type so we
+                * don't have to compare against crc/non-crc types elsewhere.
+                */
+               switch (be16_to_cpu(info->magic)) {
+               case XFS_DA_NODE_MAGIC:
+               case XFS_DA3_NODE_MAGIC:
+                       blk->magic = XFS_DA_NODE_MAGIC;
+                       node = (xfs_da_intnode_t *)info;
+                       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+                       btree = dp->d_ops->node_tree_p(node);
+                       blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
+                       if (forward)
+                               blk->index = 0;
+                       else
+                               blk->index = nodehdr.count - 1;
+                       blkno = be32_to_cpu(btree[blk->index].before);
+                       break;
+               case XFS_ATTR_LEAF_MAGIC:
+               case XFS_ATTR3_LEAF_MAGIC:
+                       blk->magic = XFS_ATTR_LEAF_MAGIC;
+                       ASSERT(level == path->active-1);
+                       blk->index = 0;
+                       blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
+                       break;
+               case XFS_DIR2_LEAFN_MAGIC:
+               case XFS_DIR3_LEAFN_MAGIC:
+                       blk->magic = XFS_DIR2_LEAFN_MAGIC;
+                       ASSERT(level == path->active-1);
+                       blk->index = 0;
+                       blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
+                                                              blk->bp, NULL);
+                       break;
+               default:
+                       ASSERT(0);
+                       break;
+               }
+       }
+       *result = 0;
+       return 0;
+}
+
+
+/*========================================================================
+ * Utility routines.
+ *========================================================================*/
+
+/*
+ * Implement a simple hash on a character string.
+ * Rotate the hash value by 7 bits, then XOR each character in.
+ * This is implemented with some source-level loop unrolling.
+ */
+xfs_dahash_t
+xfs_da_hashname(const __uint8_t *name, int namelen)
+{
+       xfs_dahash_t hash;
+
+       /*
+        * Do four characters at a time as long as we can.
+        */
+       for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
+               hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
+                      (name[3] << 0) ^ rol32(hash, 7 * 4);
+
+       /*
+        * Now do the rest of the characters.
+        */
+       switch (namelen) {
+       case 3:
+               return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
+                      rol32(hash, 7 * 3);
+       case 2:
+               return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
+       case 1:
+               return (name[0] << 0) ^ rol32(hash, 7 * 1);
+       default: /* case 0: */
+               return hash;
+       }
+}
+
+enum xfs_dacmp
+xfs_da_compname(
+       struct xfs_da_args *args,
+       const unsigned char *name,
+       int             len)
+{
+       return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
+                                       XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
+}
+
+static xfs_dahash_t
+xfs_default_hashname(
+       struct xfs_name *name)
+{
+       return xfs_da_hashname(name->name, name->len);
+}
+
+const struct xfs_nameops xfs_default_nameops = {
+       .hashname       = xfs_default_hashname,
+       .compname       = xfs_da_compname
+};
+
+int
+xfs_da_grow_inode_int(
+       struct xfs_da_args      *args,
+       xfs_fileoff_t           *bno,
+       int                     count)
+{
+       struct xfs_trans        *tp = args->trans;
+       struct xfs_inode        *dp = args->dp;
+       int                     w = args->whichfork;
+       xfs_drfsbno_t           nblks = dp->i_d.di_nblocks;
+       struct xfs_bmbt_irec    map, *mapp;
+       int                     nmap, error, got, i, mapi;
+
+       /*
+        * Find a spot in the file space to put the new block.
+        */
+       error = xfs_bmap_first_unused(tp, dp, count, bno, w);
+       if (error)
+               return error;
+
+       /*
+        * Try mapping it in one filesystem block.
+        */
+       nmap = 1;
+       ASSERT(args->firstblock != NULL);
+       error = xfs_bmapi_write(tp, dp, *bno, count,
+                       xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
+                       args->firstblock, args->total, &map, &nmap,
+                       args->flist);
+       if (error)
+               return error;
+
+       ASSERT(nmap <= 1);
+       if (nmap == 1) {
+               mapp = &map;
+               mapi = 1;
+       } else if (nmap == 0 && count > 1) {
+               xfs_fileoff_t           b;
+               int                     c;
+
+               /*
+                * If we didn't get it and the block might work if fragmented,
+                * try without the CONTIG flag.  Loop until we get it all.
+                */
+               mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
+               for (b = *bno, mapi = 0; b < *bno + count; ) {
+                       nmap = MIN(XFS_BMAP_MAX_NMAP, count);
+                       c = (int)(*bno + count - b);
+                       error = xfs_bmapi_write(tp, dp, b, c,
+                                       xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
+                                       args->firstblock, args->total,
+                                       &mapp[mapi], &nmap, args->flist);
+                       if (error)
+                               goto out_free_map;
+                       if (nmap < 1)
+                               break;
+                       mapi += nmap;
+                       b = mapp[mapi - 1].br_startoff +
+                           mapp[mapi - 1].br_blockcount;
+               }
+       } else {
+               mapi = 0;
+               mapp = NULL;
+       }
+
+       /*
+        * Count the blocks we got, make sure it matches the total.
+        */
+       for (i = 0, got = 0; i < mapi; i++)
+               got += mapp[i].br_blockcount;
+       if (got != count || mapp[0].br_startoff != *bno ||
+           mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
+           *bno + count) {
+               error = ENOSPC;
+               goto out_free_map;
+       }
+
+       /* account for newly allocated blocks in reserved blocks total */
+       args->total -= dp->i_d.di_nblocks - nblks;
+
+out_free_map:
+       if (mapp != &map)
+               kmem_free(mapp);
+       return error;
+}
+
+/*
+ * Add a block to the btree ahead of the file.
+ * Return the new block number to the caller.
+ */
+int
+xfs_da_grow_inode(
+       struct xfs_da_args      *args,
+       xfs_dablk_t             *new_blkno)
+{
+       xfs_fileoff_t           bno;
+       int                     error;
+
+       trace_xfs_da_grow_inode(args);
+
+       bno = args->geo->leafblk;
+       error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
+       if (!error)
+               *new_blkno = (xfs_dablk_t)bno;
+       return error;
+}
+
+/*
+ * Ick.  We need to always be able to remove a btree block, even
+ * if there's no space reservation because the filesystem is full.
+ * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
+ * It swaps the target block with the last block in the file.  The
+ * last block in the file can always be removed since it can't cause
+ * a bmap btree split to do that.
+ */
+STATIC int
+xfs_da3_swap_lastblock(
+       struct xfs_da_args      *args,
+       xfs_dablk_t             *dead_blknop,
+       struct xfs_buf          **dead_bufp)
+{
+       struct xfs_da_blkinfo   *dead_info;
+       struct xfs_da_blkinfo   *sib_info;
+       struct xfs_da_intnode   *par_node;
+       struct xfs_da_intnode   *dead_node;
+       struct xfs_dir2_leaf    *dead_leaf2;
+       struct xfs_da_node_entry *btree;
+       struct xfs_da3_icnode_hdr par_hdr;
+       struct xfs_inode        *dp;
+       struct xfs_trans        *tp;
+       struct xfs_mount        *mp;
+       struct xfs_buf          *dead_buf;
+       struct xfs_buf          *last_buf;
+       struct xfs_buf          *sib_buf;
+       struct xfs_buf          *par_buf;
+       xfs_dahash_t            dead_hash;
+       xfs_fileoff_t           lastoff;
+       xfs_dablk_t             dead_blkno;
+       xfs_dablk_t             last_blkno;
+       xfs_dablk_t             sib_blkno;
+       xfs_dablk_t             par_blkno;
+       int                     error;
+       int                     w;
+       int                     entno;
+       int                     level;
+       int                     dead_level;
+
+       trace_xfs_da_swap_lastblock(args);
+
+       dead_buf = *dead_bufp;
+       dead_blkno = *dead_blknop;
+       tp = args->trans;
+       dp = args->dp;
+       w = args->whichfork;
+       ASSERT(w == XFS_DATA_FORK);
+       mp = dp->i_mount;
+       lastoff = args->geo->freeblk;
+       error = xfs_bmap_last_before(tp, dp, &lastoff, w);
+       if (error)
+               return error;
+       if (unlikely(lastoff == 0)) {
+               XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
+                                mp);
+               return EFSCORRUPTED;
+       }
+       /*
+        * Read the last block in the btree space.
+        */
+       last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
+       error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w);
+       if (error)
+               return error;
+       /*
+        * Copy the last block into the dead buffer and log it.
+        */
+       memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
+       xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
+       dead_info = dead_buf->b_addr;
+       /*
+        * Get values from the moved block.
+        */
+       if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+           dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
+               struct xfs_dir3_icleaf_hdr leafhdr;
+               struct xfs_dir2_leaf_entry *ents;
+
+               dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
+               dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2);
+               ents = dp->d_ops->leaf_ents_p(dead_leaf2);
+               dead_level = 0;
+               dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
+       } else {
+               struct xfs_da3_icnode_hdr deadhdr;
+
+               dead_node = (xfs_da_intnode_t *)dead_info;
+               dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node);
+               btree = dp->d_ops->node_tree_p(dead_node);
+               dead_level = deadhdr.level;
+               dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
+       }
+       sib_buf = par_buf = NULL;
+       /*
+        * If the moved block has a left sibling, fix up the pointers.
+        */
+       if ((sib_blkno = be32_to_cpu(dead_info->back))) {
+               error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
+               if (error)
+                       goto done;
+               sib_info = sib_buf->b_addr;
+               if (unlikely(
+                   be32_to_cpu(sib_info->forw) != last_blkno ||
+                   sib_info->magic != dead_info->magic)) {
+                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
+                                        XFS_ERRLEVEL_LOW, mp);
+                       error = EFSCORRUPTED;
+                       goto done;
+               }
+               sib_info->forw = cpu_to_be32(dead_blkno);
+               xfs_trans_log_buf(tp, sib_buf,
+                       XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
+                                       sizeof(sib_info->forw)));
+               sib_buf = NULL;
+       }
+       /*
+        * If the moved block has a right sibling, fix up the pointers.
+        */
+       if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
+               error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
+               if (error)
+                       goto done;
+               sib_info = sib_buf->b_addr;
+               if (unlikely(
+                      be32_to_cpu(sib_info->back) != last_blkno ||
+                      sib_info->magic != dead_info->magic)) {
+                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
+                                        XFS_ERRLEVEL_LOW, mp);
+                       error = EFSCORRUPTED;
+                       goto done;
+               }
+               sib_info->back = cpu_to_be32(dead_blkno);
+               xfs_trans_log_buf(tp, sib_buf,
+                       XFS_DA_LOGRANGE(sib_info, &sib_info->back,
+                                       sizeof(sib_info->back)));
+               sib_buf = NULL;
+       }
+       par_blkno = args->geo->leafblk;
+       level = -1;
+       /*
+        * Walk down the tree looking for the parent of the moved block.
+        */
+       for (;;) {
+               error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
+               if (error)
+                       goto done;
+               par_node = par_buf->b_addr;
+               dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
+               if (level >= 0 && level != par_hdr.level + 1) {
+                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
+                                        XFS_ERRLEVEL_LOW, mp);
+                       error = EFSCORRUPTED;
+                       goto done;
+               }
+               level = par_hdr.level;
+               btree = dp->d_ops->node_tree_p(par_node);
+               for (entno = 0;
+                    entno < par_hdr.count &&
+                    be32_to_cpu(btree[entno].hashval) < dead_hash;
+                    entno++)
+                       continue;
+               if (entno == par_hdr.count) {
+                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
+                                        XFS_ERRLEVEL_LOW, mp);
+                       error = EFSCORRUPTED;
+                       goto done;
+               }
+               par_blkno = be32_to_cpu(btree[entno].before);
+               if (level == dead_level + 1)
+                       break;
+               xfs_trans_brelse(tp, par_buf);
+               par_buf = NULL;
+       }
+       /*
+        * We're in the right parent block.
+        * Look for the right entry.
+        */
+       for (;;) {
+               for (;
+                    entno < par_hdr.count &&
+                    be32_to_cpu(btree[entno].before) != last_blkno;
+                    entno++)
+                       continue;
+               if (entno < par_hdr.count)
+                       break;
+               par_blkno = par_hdr.forw;
+               xfs_trans_brelse(tp, par_buf);
+               par_buf = NULL;
+               if (unlikely(par_blkno == 0)) {
+                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
+                                        XFS_ERRLEVEL_LOW, mp);
+                       error = EFSCORRUPTED;
+                       goto done;
+               }
+               error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
+               if (error)
+                       goto done;
+               par_node = par_buf->b_addr;
+               dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
+               if (par_hdr.level != level) {
+                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
+                                        XFS_ERRLEVEL_LOW, mp);
+                       error = EFSCORRUPTED;
+                       goto done;
+               }
+               btree = dp->d_ops->node_tree_p(par_node);
+               entno = 0;
+       }
+       /*
+        * Update the parent entry pointing to the moved block.
+        */
+       btree[entno].before = cpu_to_be32(dead_blkno);
+       xfs_trans_log_buf(tp, par_buf,
+               XFS_DA_LOGRANGE(par_node, &btree[entno].before,
+                               sizeof(btree[entno].before)));
+       *dead_blknop = last_blkno;
+       *dead_bufp = last_buf;
+       return 0;
+done:
+       if (par_buf)
+               xfs_trans_brelse(tp, par_buf);
+       if (sib_buf)
+               xfs_trans_brelse(tp, sib_buf);
+       xfs_trans_brelse(tp, last_buf);
+       return error;
+}
+
+/*
+ * Remove a btree block from a directory or attribute.
+ */
+int
+xfs_da_shrink_inode(
+       xfs_da_args_t   *args,
+       xfs_dablk_t     dead_blkno,
+       struct xfs_buf  *dead_buf)
+{
+       xfs_inode_t *dp;
+       int done, error, w, count;
+       xfs_trans_t *tp;
+       xfs_mount_t *mp;
+
+       trace_xfs_da_shrink_inode(args);
+
+       dp = args->dp;
+       w = args->whichfork;
+       tp = args->trans;
+       mp = dp->i_mount;
+       count = args->geo->fsbcount;
+       for (;;) {
+               /*
+                * Remove extents.  If we get ENOSPC for a dir we have to move
+                * the last block to the place we want to kill.
+                */
+               error = xfs_bunmapi(tp, dp, dead_blkno, count,
+                                   xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
+                                   0, args->firstblock, args->flist, &done);
+               if (error == ENOSPC) {
+                       if (w != XFS_DATA_FORK)
+                               break;
+                       error = xfs_da3_swap_lastblock(args, &dead_blkno,
+                                                     &dead_buf);
+                       if (error)
+                               break;
+               } else {
+                       break;
+               }
+       }
+       xfs_trans_binval(tp, dead_buf);
+       return error;
+}
+
+/*
+ * See if the mapping(s) for this btree block are valid, i.e.
+ * don't contain holes, are logically contiguous, and cover the whole range.
+ */
+STATIC int
+xfs_da_map_covers_blocks(
+       int             nmap,
+       xfs_bmbt_irec_t *mapp,
+       xfs_dablk_t     bno,
+       int             count)
+{
+       int             i;
+       xfs_fileoff_t   off;
+
+       for (i = 0, off = bno; i < nmap; i++) {
+               if (mapp[i].br_startblock == HOLESTARTBLOCK ||
+                   mapp[i].br_startblock == DELAYSTARTBLOCK) {
+                       return 0;
+               }
+               if (off != mapp[i].br_startoff) {
+                       return 0;
+               }
+               off += mapp[i].br_blockcount;
+       }
+       return off == bno + count;
+}
+
+/*
+ * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map.
+ *
+ * For the single map case, it is assumed that the caller has provided a pointer
+ * to a valid xfs_buf_map.  For the multiple map case, this function will
+ * allocate the xfs_buf_map to hold all the maps and replace the caller's single
+ * map pointer with the allocated map.
+ */
+static int
+xfs_buf_map_from_irec(
+       struct xfs_mount        *mp,
+       struct xfs_buf_map      **mapp,
+       int                     *nmaps,
+       struct xfs_bmbt_irec    *irecs,
+       int                     nirecs)
+{
+       struct xfs_buf_map      *map;
+       int                     i;
+
+       ASSERT(*nmaps == 1);
+       ASSERT(nirecs >= 1);
+
+       if (nirecs > 1) {
+               map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
+                                 KM_SLEEP | KM_NOFS);
+               if (!map)
+                       return ENOMEM;
+               *mapp = map;
+       }
+
+       *nmaps = nirecs;
+       map = *mapp;
+       for (i = 0; i < *nmaps; i++) {
+               ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK &&
+                      irecs[i].br_startblock != HOLESTARTBLOCK);
+               map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
+               map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
+       }
+       return 0;
+}
+
+/*
+ * Map the block we are given ready for reading. There are three possible return
+ * values:
+ *     -1 - will be returned if we land in a hole and mappedbno == -2 so the
+ *          caller knows not to execute a subsequent read.
+ *      0 - if we mapped the block successfully
+ *     >0 - positive error number if there was an error.
+ */
+static int
+xfs_dabuf_map(
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       int                     whichfork,
+       struct xfs_buf_map      **map,
+       int                     *nmaps)
+{
+       struct xfs_mount        *mp = dp->i_mount;
+       int                     nfsb;
+       int                     error = 0;
+       struct xfs_bmbt_irec    irec;
+       struct xfs_bmbt_irec    *irecs = &irec;
+       int                     nirecs;
+
+       ASSERT(map && *map);
+       ASSERT(*nmaps == 1);
+
+       if (whichfork == XFS_DATA_FORK)
+               nfsb = mp->m_dir_geo->fsbcount;
+       else
+               nfsb = mp->m_attr_geo->fsbcount;
+
+       /*
+        * Caller doesn't have a mapping.  -2 means don't complain
+        * if we land in a hole.
+        */
+       if (mappedbno == -1 || mappedbno == -2) {
+               /*
+                * Optimize the one-block case.
+                */
+               if (nfsb != 1)
+                       irecs = kmem_zalloc(sizeof(irec) * nfsb,
+                                           KM_SLEEP | KM_NOFS);
+
+               nirecs = nfsb;
+               error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
+                                      &nirecs, xfs_bmapi_aflag(whichfork));
+               if (error)
+                       goto out;
+       } else {
+               irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
+               irecs->br_startoff = (xfs_fileoff_t)bno;
+               irecs->br_blockcount = nfsb;
+               irecs->br_state = 0;
+               nirecs = 1;
+       }
+
+       if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
+               error = mappedbno == -2 ? -1 : EFSCORRUPTED;
+               if (unlikely(error == EFSCORRUPTED)) {
+                       if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+                               int i;
+                               xfs_alert(mp, "%s: bno %lld dir: inode %lld",
+                                       __func__, (long long)bno,
+                                       (long long)dp->i_ino);
+                               for (i = 0; i < *nmaps; i++) {
+                                       xfs_alert(mp,
+"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
+                                               i,
+                                               (long long)irecs[i].br_startoff,
+                                               (long long)irecs[i].br_startblock,
+                                               (long long)irecs[i].br_blockcount,
+                                               irecs[i].br_state);
+                               }
+                       }
+                       XFS_ERROR_REPORT("xfs_da_do_buf(1)",
+                                        XFS_ERRLEVEL_LOW, mp);
+               }
+               goto out;
+       }
+       error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs);
+out:
+       if (irecs != &irec)
+               kmem_free(irecs);
+       return error;
+}
+
+/*
+ * Get a buffer for the dir/attr block.
+ */
+int
+xfs_da_get_buf(
+       struct xfs_trans        *trans,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp,
+       int                     whichfork)
+{
+       struct xfs_buf          *bp;
+       struct xfs_buf_map      map;
+       struct xfs_buf_map      *mapp;
+       int                     nmap;
+       int                     error;
+
+       *bpp = NULL;
+       mapp = &map;
+       nmap = 1;
+       error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
+                               &mapp, &nmap);
+       if (error) {
+               /* mapping a hole is not an error, but we don't continue */
+               if (error == -1)
+                       error = 0;
+               goto out_free;
+       }
+
+       bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
+                                   mapp, nmap, 0);
+       error = bp ? bp->b_error : EIO;
+       if (error) {
+               xfs_trans_brelse(trans, bp);
+               goto out_free;
+       }
+
+       *bpp = bp;
+
+out_free:
+       if (mapp != &map)
+               kmem_free(mapp);
+
+       return error;
+}
+
+/*
+ * Get a buffer for the dir/attr block, fill in the contents.
+ */
+int
+xfs_da_read_buf(
+       struct xfs_trans        *trans,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp,
+       int                     whichfork,
+       const struct xfs_buf_ops *ops)
+{
+       struct xfs_buf          *bp;
+       struct xfs_buf_map      map;
+       struct xfs_buf_map      *mapp;
+       int                     nmap;
+       int                     error;
+
+       *bpp = NULL;
+       mapp = &map;
+       nmap = 1;
+       error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
+                               &mapp, &nmap);
+       if (error) {
+               /* mapping a hole is not an error, but we don't continue */
+               if (error == -1)
+                       error = 0;
+               goto out_free;
+       }
+
+       error = xfs_trans_read_buf_map(dp->i_mount, trans,
+                                       dp->i_mount->m_ddev_targp,
+                                       mapp, nmap, 0, &bp, ops);
+       if (error)
+               goto out_free;
+
+       if (whichfork == XFS_ATTR_FORK)
+               xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
+       else
+               xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
+       *bpp = bp;
+out_free:
+       if (mapp != &map)
+               kmem_free(mapp);
+
+       return error;
+}
+
+/*
+ * Readahead the dir/attr block.
+ */
+xfs_daddr_t
+xfs_da_reada_buf(
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mappedbno,
+       int                     whichfork,
+       const struct xfs_buf_ops *ops)
+{
+       struct xfs_buf_map      map;
+       struct xfs_buf_map      *mapp;
+       int                     nmap;
+       int                     error;
+
+       mapp = &map;
+       nmap = 1;
+       error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
+                               &mapp, &nmap);
+       if (error) {
+               /* mapping a hole is not an error, but we don't continue */
+               if (error == -1)
+                       error = 0;
+               goto out_free;
+       }
+
+       mappedbno = mapp[0].bm_bn;
+       xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
+
+out_free:
+       if (mapp != &map)
+               kmem_free(mapp);
+
+       if (error)
+               return -1;
+       return mappedbno;
+}
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
new file mode 100644 (file)
index 0000000..c9aee52
--- /dev/null
@@ -0,0 +1,911 @@
+/*
+ * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+
+/*
+ * Shortform directory ops
+ */
+static int
+xfs_dir2_sf_entsize(
+       struct xfs_dir2_sf_hdr  *hdr,
+       int                     len)
+{
+       int count = sizeof(struct xfs_dir2_sf_entry);   /* namelen + offset */
+
+       count += len;                                   /* name */
+       count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) :
+                               sizeof(xfs_dir2_ino4_t); /* ino # */
+       return count;
+}
+
+static int
+xfs_dir3_sf_entsize(
+       struct xfs_dir2_sf_hdr  *hdr,
+       int                     len)
+{
+       return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t);
+}
+
+static struct xfs_dir2_sf_entry *
+xfs_dir2_sf_nextentry(
+       struct xfs_dir2_sf_hdr  *hdr,
+       struct xfs_dir2_sf_entry *sfep)
+{
+       return (struct xfs_dir2_sf_entry *)
+               ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
+}
+
+static struct xfs_dir2_sf_entry *
+xfs_dir3_sf_nextentry(
+       struct xfs_dir2_sf_hdr  *hdr,
+       struct xfs_dir2_sf_entry *sfep)
+{
+       return (struct xfs_dir2_sf_entry *)
+               ((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen));
+}
+
+
+/*
+ * For filetype enabled shortform directories, the file type field is stored at
+ * the end of the name.  Because it's only a single byte, endian conversion is
+ * not necessary. For non-filetype enable directories, the type is always
+ * unknown and we never store the value.
+ */
+static __uint8_t
+xfs_dir2_sfe_get_ftype(
+       struct xfs_dir2_sf_entry *sfep)
+{
+       return XFS_DIR3_FT_UNKNOWN;
+}
+
+static void
+xfs_dir2_sfe_put_ftype(
+       struct xfs_dir2_sf_entry *sfep,
+       __uint8_t               ftype)
+{
+       ASSERT(ftype < XFS_DIR3_FT_MAX);
+}
+
+static __uint8_t
+xfs_dir3_sfe_get_ftype(
+       struct xfs_dir2_sf_entry *sfep)
+{
+       __uint8_t       ftype;
+
+       ftype = sfep->name[sfep->namelen];
+       if (ftype >= XFS_DIR3_FT_MAX)
+               return XFS_DIR3_FT_UNKNOWN;
+       return ftype;
+}
+
+static void
+xfs_dir3_sfe_put_ftype(
+       struct xfs_dir2_sf_entry *sfep,
+       __uint8_t               ftype)
+{
+       ASSERT(ftype < XFS_DIR3_FT_MAX);
+
+       sfep->name[sfep->namelen] = ftype;
+}
+
+/*
+ * Inode numbers in short-form directories can come in two versions,
+ * either 4 bytes or 8 bytes wide.  These helpers deal with the
+ * two forms transparently by looking at the headers i8count field.
+ *
+ * For 64-bit inode number the most significant byte must be zero.
+ */
+static xfs_ino_t
+xfs_dir2_sf_get_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       xfs_dir2_inou_t         *from)
+{
+       if (hdr->i8count)
+               return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
+       else
+               return get_unaligned_be32(&from->i4.i);
+}
+
+static void
+xfs_dir2_sf_put_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       xfs_dir2_inou_t         *to,
+       xfs_ino_t               ino)
+{
+       ASSERT((ino & 0xff00000000000000ULL) == 0);
+
+       if (hdr->i8count)
+               put_unaligned_be64(ino, &to->i8.i);
+       else
+               put_unaligned_be32(ino, &to->i4.i);
+}
+
+static xfs_ino_t
+xfs_dir2_sf_get_parent_ino(
+       struct xfs_dir2_sf_hdr  *hdr)
+{
+       return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
+}
+
+static void
+xfs_dir2_sf_put_parent_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       xfs_ino_t               ino)
+{
+       xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
+}
+
+/*
+ * In short-form directory entries the inode numbers are stored at variable
+ * offset behind the entry name. If the entry stores a filetype value, then it
+ * sits between the name and the inode number. Hence the inode numbers may only
+ * be accessed through the helpers below.
+ */
+static xfs_ino_t
+xfs_dir2_sfe_get_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       struct xfs_dir2_sf_entry *sfep)
+{
+       return xfs_dir2_sf_get_ino(hdr,
+                               (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]);
+}
+
+static void
+xfs_dir2_sfe_put_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       struct xfs_dir2_sf_entry *sfep,
+       xfs_ino_t               ino)
+{
+       xfs_dir2_sf_put_ino(hdr,
+                           (xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino);
+}
+
+static xfs_ino_t
+xfs_dir3_sfe_get_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       struct xfs_dir2_sf_entry *sfep)
+{
+       return xfs_dir2_sf_get_ino(hdr,
+                       (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]);
+}
+
+static void
+xfs_dir3_sfe_put_ino(
+       struct xfs_dir2_sf_hdr  *hdr,
+       struct xfs_dir2_sf_entry *sfep,
+       xfs_ino_t               ino)
+{
+       xfs_dir2_sf_put_ino(hdr,
+                       (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino);
+}
+
+
+/*
+ * Directory data block operations
+ */
+
+/*
+ * For special situations, the dirent size ends up fixed because we always know
+ * what the size of the entry is. That's true for the "." and "..", and
+ * therefore we know that they are a fixed size and hence their offsets are
+ * constant, as is the first entry.
+ *
+ * Hence, this calculation is written as a macro to be able to be calculated at
+ * compile time and so certain offsets can be calculated directly in the
+ * structure initaliser via the macro. There are two macros - one for dirents
+ * with ftype and without so there are no unresolvable conditionals in the
+ * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power
+ * of 2 and the compiler doesn't reject it (unlike roundup()).
+ */
+#define XFS_DIR2_DATA_ENTSIZE(n)                                       \
+       round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
+                sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN)
+
+#define XFS_DIR3_DATA_ENTSIZE(n)                                       \
+       round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
+                sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)),      \
+               XFS_DIR2_DATA_ALIGN)
+
+static int
+xfs_dir2_data_entsize(
+       int                     n)
+{
+       return XFS_DIR2_DATA_ENTSIZE(n);
+}
+
+static int
+xfs_dir3_data_entsize(
+       int                     n)
+{
+       return XFS_DIR3_DATA_ENTSIZE(n);
+}
+
+static __uint8_t
+xfs_dir2_data_get_ftype(
+       struct xfs_dir2_data_entry *dep)
+{
+       return XFS_DIR3_FT_UNKNOWN;
+}
+
+static void
+xfs_dir2_data_put_ftype(
+       struct xfs_dir2_data_entry *dep,
+       __uint8_t               ftype)
+{
+       ASSERT(ftype < XFS_DIR3_FT_MAX);
+}
+
+static __uint8_t
+xfs_dir3_data_get_ftype(
+       struct xfs_dir2_data_entry *dep)
+{
+       __uint8_t       ftype = dep->name[dep->namelen];
+
+       ASSERT(ftype < XFS_DIR3_FT_MAX);
+       if (ftype >= XFS_DIR3_FT_MAX)
+               return XFS_DIR3_FT_UNKNOWN;
+       return ftype;
+}
+
+static void
+xfs_dir3_data_put_ftype(
+       struct xfs_dir2_data_entry *dep,
+       __uint8_t               type)
+{
+       ASSERT(type < XFS_DIR3_FT_MAX);
+       ASSERT(dep->namelen != 0);
+
+       dep->name[dep->namelen] = type;
+}
+
+/*
+ * Pointer to an entry's tag word.
+ */
+static __be16 *
+xfs_dir2_data_entry_tag_p(
+       struct xfs_dir2_data_entry *dep)
+{
+       return (__be16 *)((char *)dep +
+               xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
+}
+
+static __be16 *
+xfs_dir3_data_entry_tag_p(
+       struct xfs_dir2_data_entry *dep)
+{
+       return (__be16 *)((char *)dep +
+               xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16));
+}
+
+/*
+ * location of . and .. in data space (always block 0)
+ */
+static struct xfs_dir2_data_entry *
+xfs_dir2_data_dot_entry_p(
+       struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_entry *)
+               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
+}
+
+static struct xfs_dir2_data_entry *
+xfs_dir2_data_dotdot_entry_p(
+       struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_entry *)
+               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
+                               XFS_DIR2_DATA_ENTSIZE(1));
+}
+
+static struct xfs_dir2_data_entry *
+xfs_dir2_data_first_entry_p(
+       struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_entry *)
+               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
+                               XFS_DIR2_DATA_ENTSIZE(1) +
+                               XFS_DIR2_DATA_ENTSIZE(2));
+}
+
+static struct xfs_dir2_data_entry *
+xfs_dir2_ftype_data_dotdot_entry_p(
+       struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_entry *)
+               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
+                               XFS_DIR3_DATA_ENTSIZE(1));
+}
+
+static struct xfs_dir2_data_entry *
+xfs_dir2_ftype_data_first_entry_p(
+       struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_entry *)
+               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
+                               XFS_DIR3_DATA_ENTSIZE(1) +
+                               XFS_DIR3_DATA_ENTSIZE(2));
+}
+
+static struct xfs_dir2_data_entry *
+xfs_dir3_data_dot_entry_p(
+       struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_entry *)
+               ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
+}
+
+static struct xfs_dir2_data_entry *
+xfs_dir3_data_dotdot_entry_p(
+       struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_entry *)
+               ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
+                               XFS_DIR3_DATA_ENTSIZE(1));
+}
+
+static struct xfs_dir2_data_entry *
+xfs_dir3_data_first_entry_p(
+       struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_entry *)
+               ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
+                               XFS_DIR3_DATA_ENTSIZE(1) +
+                               XFS_DIR3_DATA_ENTSIZE(2));
+}
+
+static struct xfs_dir2_data_free *
+xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
+{
+       return hdr->bestfree;
+}
+
+static struct xfs_dir2_data_free *
+xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
+{
+       return ((struct xfs_dir3_data_hdr *)hdr)->best_free;
+}
+
+static struct xfs_dir2_data_entry *
+xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_entry *)
+               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
+}
+
+static struct xfs_dir2_data_unused *
+xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_unused *)
+               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
+}
+
+static struct xfs_dir2_data_entry *
+xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_entry *)
+               ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
+}
+
+static struct xfs_dir2_data_unused *
+xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
+{
+       return (struct xfs_dir2_data_unused *)
+               ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
+}
+
+
+/*
+ * Directory Leaf block operations
+ */
+static int
+xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo)
+{
+       return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) /
+               (uint)sizeof(struct xfs_dir2_leaf_entry);
+}
+
+static struct xfs_dir2_leaf_entry *
+xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp)
+{
+       return lp->__ents;
+}
+
+static int
+xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo)
+{
+       return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) /
+               (uint)sizeof(struct xfs_dir2_leaf_entry);
+}
+
+static struct xfs_dir2_leaf_entry *
+xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp)
+{
+       return ((struct xfs_dir3_leaf *)lp)->__ents;
+}
+
+static void
+xfs_dir2_leaf_hdr_from_disk(
+       struct xfs_dir3_icleaf_hdr      *to,
+       struct xfs_dir2_leaf            *from)
+{
+       to->forw = be32_to_cpu(from->hdr.info.forw);
+       to->back = be32_to_cpu(from->hdr.info.back);
+       to->magic = be16_to_cpu(from->hdr.info.magic);
+       to->count = be16_to_cpu(from->hdr.count);
+       to->stale = be16_to_cpu(from->hdr.stale);
+
+       ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
+              to->magic == XFS_DIR2_LEAFN_MAGIC);
+}
+
+static void
+xfs_dir2_leaf_hdr_to_disk(
+       struct xfs_dir2_leaf            *to,
+       struct xfs_dir3_icleaf_hdr      *from)
+{
+       ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
+              from->magic == XFS_DIR2_LEAFN_MAGIC);
+
+       to->hdr.info.forw = cpu_to_be32(from->forw);
+       to->hdr.info.back = cpu_to_be32(from->back);
+       to->hdr.info.magic = cpu_to_be16(from->magic);
+       to->hdr.count = cpu_to_be16(from->count);
+       to->hdr.stale = cpu_to_be16(from->stale);
+}
+
+static void
+xfs_dir3_leaf_hdr_from_disk(
+       struct xfs_dir3_icleaf_hdr      *to,
+       struct xfs_dir2_leaf            *from)
+{
+       struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from;
+
+       to->forw = be32_to_cpu(hdr3->info.hdr.forw);
+       to->back = be32_to_cpu(hdr3->info.hdr.back);
+       to->magic = be16_to_cpu(hdr3->info.hdr.magic);
+       to->count = be16_to_cpu(hdr3->count);
+       to->stale = be16_to_cpu(hdr3->stale);
+
+       ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC ||
+              to->magic == XFS_DIR3_LEAFN_MAGIC);
+}
+
+static void
+xfs_dir3_leaf_hdr_to_disk(
+       struct xfs_dir2_leaf            *to,
+       struct xfs_dir3_icleaf_hdr      *from)
+{
+       struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to;
+
+       ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC ||
+              from->magic == XFS_DIR3_LEAFN_MAGIC);
+
+       hdr3->info.hdr.forw = cpu_to_be32(from->forw);
+       hdr3->info.hdr.back = cpu_to_be32(from->back);
+       hdr3->info.hdr.magic = cpu_to_be16(from->magic);
+       hdr3->count = cpu_to_be16(from->count);
+       hdr3->stale = cpu_to_be16(from->stale);
+}
+
+
+/*
+ * Directory/Attribute Node block operations
+ */
+static struct xfs_da_node_entry *
+xfs_da2_node_tree_p(struct xfs_da_intnode *dap)
+{
+       return dap->__btree;
+}
+
+static struct xfs_da_node_entry *
+xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
+{
+       return ((struct xfs_da3_intnode *)dap)->__btree;
+}
+
+static void
+xfs_da2_node_hdr_from_disk(
+       struct xfs_da3_icnode_hdr       *to,
+       struct xfs_da_intnode           *from)
+{
+       ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+       to->forw = be32_to_cpu(from->hdr.info.forw);
+       to->back = be32_to_cpu(from->hdr.info.back);
+       to->magic = be16_to_cpu(from->hdr.info.magic);
+       to->count = be16_to_cpu(from->hdr.__count);
+       to->level = be16_to_cpu(from->hdr.__level);
+}
+
+static void
+xfs_da2_node_hdr_to_disk(
+       struct xfs_da_intnode           *to,
+       struct xfs_da3_icnode_hdr       *from)
+{
+       ASSERT(from->magic == XFS_DA_NODE_MAGIC);
+       to->hdr.info.forw = cpu_to_be32(from->forw);
+       to->hdr.info.back = cpu_to_be32(from->back);
+       to->hdr.info.magic = cpu_to_be16(from->magic);
+       to->hdr.__count = cpu_to_be16(from->count);
+       to->hdr.__level = cpu_to_be16(from->level);
+}
+
+static void
+xfs_da3_node_hdr_from_disk(
+       struct xfs_da3_icnode_hdr       *to,
+       struct xfs_da_intnode           *from)
+{
+       struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from;
+
+       ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
+       to->forw = be32_to_cpu(hdr3->info.hdr.forw);
+       to->back = be32_to_cpu(hdr3->info.hdr.back);
+       to->magic = be16_to_cpu(hdr3->info.hdr.magic);
+       to->count = be16_to_cpu(hdr3->__count);
+       to->level = be16_to_cpu(hdr3->__level);
+}
+
+static void
+xfs_da3_node_hdr_to_disk(
+       struct xfs_da_intnode           *to,
+       struct xfs_da3_icnode_hdr       *from)
+{
+       struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to;
+
+       ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
+       hdr3->info.hdr.forw = cpu_to_be32(from->forw);
+       hdr3->info.hdr.back = cpu_to_be32(from->back);
+       hdr3->info.hdr.magic = cpu_to_be16(from->magic);
+       hdr3->__count = cpu_to_be16(from->count);
+       hdr3->__level = cpu_to_be16(from->level);
+}
+
+
+/*
+ * Directory free space block operations
+ */
+static int
+xfs_dir2_free_max_bests(struct xfs_da_geometry *geo)
+{
+       return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) /
+               sizeof(xfs_dir2_data_off_t);
+}
+
+static __be16 *
+xfs_dir2_free_bests_p(struct xfs_dir2_free *free)
+{
+       return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr));
+}
+
+/*
+ * Convert data space db to the corresponding free db.
+ */
+static xfs_dir2_db_t
+xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
+{
+       return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
+                       (db / xfs_dir2_free_max_bests(geo));
+}
+
+/*
+ * Convert data space db to the corresponding index in a free db.
+ */
+static int
+xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
+{
+       return db % xfs_dir2_free_max_bests(geo);
+}
+
+static int
+xfs_dir3_free_max_bests(struct xfs_da_geometry *geo)
+{
+       return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) /
+               sizeof(xfs_dir2_data_off_t);
+}
+
+static __be16 *
+xfs_dir3_free_bests_p(struct xfs_dir2_free *free)
+{
+       return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr));
+}
+
+/*
+ * Convert data space db to the corresponding free db.
+ */
+static xfs_dir2_db_t
+xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
+{
+       return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
+                       (db / xfs_dir3_free_max_bests(geo));
+}
+
+/*
+ * Convert data space db to the corresponding index in a free db.
+ */
+static int
+xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
+{
+       return db % xfs_dir3_free_max_bests(geo);
+}
+
+static void
+xfs_dir2_free_hdr_from_disk(
+       struct xfs_dir3_icfree_hdr      *to,
+       struct xfs_dir2_free            *from)
+{
+       to->magic = be32_to_cpu(from->hdr.magic);
+       to->firstdb = be32_to_cpu(from->hdr.firstdb);
+       to->nvalid = be32_to_cpu(from->hdr.nvalid);
+       to->nused = be32_to_cpu(from->hdr.nused);
+       ASSERT(to->magic == XFS_DIR2_FREE_MAGIC);
+}
+
+static void
+xfs_dir2_free_hdr_to_disk(
+       struct xfs_dir2_free            *to,
+       struct xfs_dir3_icfree_hdr      *from)
+{
+       ASSERT(from->magic == XFS_DIR2_FREE_MAGIC);
+
+       to->hdr.magic = cpu_to_be32(from->magic);
+       to->hdr.firstdb = cpu_to_be32(from->firstdb);
+       to->hdr.nvalid = cpu_to_be32(from->nvalid);
+       to->hdr.nused = cpu_to_be32(from->nused);
+}
+
+static void
+xfs_dir3_free_hdr_from_disk(
+       struct xfs_dir3_icfree_hdr      *to,
+       struct xfs_dir2_free            *from)
+{
+       struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from;
+
+       to->magic = be32_to_cpu(hdr3->hdr.magic);
+       to->firstdb = be32_to_cpu(hdr3->firstdb);
+       to->nvalid = be32_to_cpu(hdr3->nvalid);
+       to->nused = be32_to_cpu(hdr3->nused);
+
+       ASSERT(to->magic == XFS_DIR3_FREE_MAGIC);
+}
+
+static void
+xfs_dir3_free_hdr_to_disk(
+       struct xfs_dir2_free            *to,
+       struct xfs_dir3_icfree_hdr      *from)
+{
+       struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to;
+
+       ASSERT(from->magic == XFS_DIR3_FREE_MAGIC);
+
+       hdr3->hdr.magic = cpu_to_be32(from->magic);
+       hdr3->firstdb = cpu_to_be32(from->firstdb);
+       hdr3->nvalid = cpu_to_be32(from->nvalid);
+       hdr3->nused = cpu_to_be32(from->nused);
+}
+
+static const struct xfs_dir_ops xfs_dir2_ops = {
+       .sf_entsize = xfs_dir2_sf_entsize,
+       .sf_nextentry = xfs_dir2_sf_nextentry,
+       .sf_get_ftype = xfs_dir2_sfe_get_ftype,
+       .sf_put_ftype = xfs_dir2_sfe_put_ftype,
+       .sf_get_ino = xfs_dir2_sfe_get_ino,
+       .sf_put_ino = xfs_dir2_sfe_put_ino,
+       .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
+       .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
+
+       .data_entsize = xfs_dir2_data_entsize,
+       .data_get_ftype = xfs_dir2_data_get_ftype,
+       .data_put_ftype = xfs_dir2_data_put_ftype,
+       .data_entry_tag_p = xfs_dir2_data_entry_tag_p,
+       .data_bestfree_p = xfs_dir2_data_bestfree_p,
+
+       .data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
+       .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
+                               XFS_DIR2_DATA_ENTSIZE(1),
+       .data_first_offset =  sizeof(struct xfs_dir2_data_hdr) +
+                               XFS_DIR2_DATA_ENTSIZE(1) +
+                               XFS_DIR2_DATA_ENTSIZE(2),
+       .data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
+
+       .data_dot_entry_p = xfs_dir2_data_dot_entry_p,
+       .data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p,
+       .data_first_entry_p = xfs_dir2_data_first_entry_p,
+       .data_entry_p = xfs_dir2_data_entry_p,
+       .data_unused_p = xfs_dir2_data_unused_p,
+
+       .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
+       .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
+       .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
+       .leaf_max_ents = xfs_dir2_max_leaf_ents,
+       .leaf_ents_p = xfs_dir2_leaf_ents_p,
+
+       .node_hdr_size = sizeof(struct xfs_da_node_hdr),
+       .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
+       .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
+       .node_tree_p = xfs_da2_node_tree_p,
+
+       .free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
+       .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
+       .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
+       .free_max_bests = xfs_dir2_free_max_bests,
+       .free_bests_p = xfs_dir2_free_bests_p,
+       .db_to_fdb = xfs_dir2_db_to_fdb,
+       .db_to_fdindex = xfs_dir2_db_to_fdindex,
+};
+
+static const struct xfs_dir_ops xfs_dir2_ftype_ops = {
+       .sf_entsize = xfs_dir3_sf_entsize,
+       .sf_nextentry = xfs_dir3_sf_nextentry,
+       .sf_get_ftype = xfs_dir3_sfe_get_ftype,
+       .sf_put_ftype = xfs_dir3_sfe_put_ftype,
+       .sf_get_ino = xfs_dir3_sfe_get_ino,
+       .sf_put_ino = xfs_dir3_sfe_put_ino,
+       .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
+       .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
+
+       .data_entsize = xfs_dir3_data_entsize,
+       .data_get_ftype = xfs_dir3_data_get_ftype,
+       .data_put_ftype = xfs_dir3_data_put_ftype,
+       .data_entry_tag_p = xfs_dir3_data_entry_tag_p,
+       .data_bestfree_p = xfs_dir2_data_bestfree_p,
+
+       .data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
+       .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
+                               XFS_DIR3_DATA_ENTSIZE(1),
+       .data_first_offset =  sizeof(struct xfs_dir2_data_hdr) +
+                               XFS_DIR3_DATA_ENTSIZE(1) +
+                               XFS_DIR3_DATA_ENTSIZE(2),
+       .data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
+
+       .data_dot_entry_p = xfs_dir2_data_dot_entry_p,
+       .data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p,
+       .data_first_entry_p = xfs_dir2_ftype_data_first_entry_p,
+       .data_entry_p = xfs_dir2_data_entry_p,
+       .data_unused_p = xfs_dir2_data_unused_p,
+
+       .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
+       .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
+       .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
+       .leaf_max_ents = xfs_dir2_max_leaf_ents,
+       .leaf_ents_p = xfs_dir2_leaf_ents_p,
+
+       .node_hdr_size = sizeof(struct xfs_da_node_hdr),
+       .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
+       .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
+       .node_tree_p = xfs_da2_node_tree_p,
+
+       .free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
+       .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
+       .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
+       .free_max_bests = xfs_dir2_free_max_bests,
+       .free_bests_p = xfs_dir2_free_bests_p,
+       .db_to_fdb = xfs_dir2_db_to_fdb,
+       .db_to_fdindex = xfs_dir2_db_to_fdindex,
+};
+
+static const struct xfs_dir_ops xfs_dir3_ops = {
+       .sf_entsize = xfs_dir3_sf_entsize,
+       .sf_nextentry = xfs_dir3_sf_nextentry,
+       .sf_get_ftype = xfs_dir3_sfe_get_ftype,
+       .sf_put_ftype = xfs_dir3_sfe_put_ftype,
+       .sf_get_ino = xfs_dir3_sfe_get_ino,
+       .sf_put_ino = xfs_dir3_sfe_put_ino,
+       .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
+       .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
+
+       .data_entsize = xfs_dir3_data_entsize,
+       .data_get_ftype = xfs_dir3_data_get_ftype,
+       .data_put_ftype = xfs_dir3_data_put_ftype,
+       .data_entry_tag_p = xfs_dir3_data_entry_tag_p,
+       .data_bestfree_p = xfs_dir3_data_bestfree_p,
+
+       .data_dot_offset = sizeof(struct xfs_dir3_data_hdr),
+       .data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) +
+                               XFS_DIR3_DATA_ENTSIZE(1),
+       .data_first_offset =  sizeof(struct xfs_dir3_data_hdr) +
+                               XFS_DIR3_DATA_ENTSIZE(1) +
+                               XFS_DIR3_DATA_ENTSIZE(2),
+       .data_entry_offset = sizeof(struct xfs_dir3_data_hdr),
+
+       .data_dot_entry_p = xfs_dir3_data_dot_entry_p,
+       .data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p,
+       .data_first_entry_p = xfs_dir3_data_first_entry_p,
+       .data_entry_p = xfs_dir3_data_entry_p,
+       .data_unused_p = xfs_dir3_data_unused_p,
+
+       .leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr),
+       .leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk,
+       .leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk,
+       .leaf_max_ents = xfs_dir3_max_leaf_ents,
+       .leaf_ents_p = xfs_dir3_leaf_ents_p,
+
+       .node_hdr_size = sizeof(struct xfs_da3_node_hdr),
+       .node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
+       .node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
+       .node_tree_p = xfs_da3_node_tree_p,
+
+       .free_hdr_size = sizeof(struct xfs_dir3_free_hdr),
+       .free_hdr_to_disk = xfs_dir3_free_hdr_to_disk,
+       .free_hdr_from_disk = xfs_dir3_free_hdr_from_disk,
+       .free_max_bests = xfs_dir3_free_max_bests,
+       .free_bests_p = xfs_dir3_free_bests_p,
+       .db_to_fdb = xfs_dir3_db_to_fdb,
+       .db_to_fdindex = xfs_dir3_db_to_fdindex,
+};
+
+static const struct xfs_dir_ops xfs_dir2_nondir_ops = {
+       .node_hdr_size = sizeof(struct xfs_da_node_hdr),
+       .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
+       .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
+       .node_tree_p = xfs_da2_node_tree_p,
+};
+
+static const struct xfs_dir_ops xfs_dir3_nondir_ops = {
+       .node_hdr_size = sizeof(struct xfs_da3_node_hdr),
+       .node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
+       .node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
+       .node_tree_p = xfs_da3_node_tree_p,
+};
+
+/*
+ * Return the ops structure according to the current config.  If we are passed
+ * an inode, then that overrides the default config we use which is based on
+ * feature bits.
+ */
+const struct xfs_dir_ops *
+xfs_dir_get_ops(
+       struct xfs_mount        *mp,
+       struct xfs_inode        *dp)
+{
+       if (dp)
+               return dp->d_ops;
+       if (mp->m_dir_inode_ops)
+               return mp->m_dir_inode_ops;
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               return &xfs_dir3_ops;
+       if (xfs_sb_version_hasftype(&mp->m_sb))
+               return &xfs_dir2_ftype_ops;
+       return &xfs_dir2_ops;
+}
+
+const struct xfs_dir_ops *
+xfs_nondir_get_ops(
+       struct xfs_mount        *mp,
+       struct xfs_inode        *dp)
+{
+       if (dp)
+               return dp->d_ops;
+       if (mp->m_nondir_inode_ops)
+               return mp->m_nondir_inode_ops;
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               return &xfs_dir3_nondir_ops;
+       return &xfs_dir2_nondir_ops;
+}
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
new file mode 100644 (file)
index 0000000..a0aca73
--- /dev/null
@@ -0,0 +1,762 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_dinode.h"
+
+struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
+
+
+/*
+ * ASCII case-insensitive (ie. A-Z) support for directories that was
+ * used in IRIX.
+ */
+STATIC xfs_dahash_t
+xfs_ascii_ci_hashname(
+       struct xfs_name *name)
+{
+       xfs_dahash_t    hash;
+       int             i;
+
+       for (i = 0, hash = 0; i < name->len; i++)
+               hash = tolower(name->name[i]) ^ rol32(hash, 7);
+
+       return hash;
+}
+
+STATIC enum xfs_dacmp
+xfs_ascii_ci_compname(
+       struct xfs_da_args *args,
+       const unsigned char *name,
+       int             len)
+{
+       enum xfs_dacmp  result;
+       int             i;
+
+       if (args->namelen != len)
+               return XFS_CMP_DIFFERENT;
+
+       result = XFS_CMP_EXACT;
+       for (i = 0; i < len; i++) {
+               if (args->name[i] == name[i])
+                       continue;
+               if (tolower(args->name[i]) != tolower(name[i]))
+                       return XFS_CMP_DIFFERENT;
+               result = XFS_CMP_CASE;
+       }
+
+       return result;
+}
+
+static struct xfs_nameops xfs_ascii_ci_nameops = {
+       .hashname       = xfs_ascii_ci_hashname,
+       .compname       = xfs_ascii_ci_compname,
+};
+
+int
+xfs_da_mount(
+       struct xfs_mount        *mp)
+{
+       struct xfs_da_geometry  *dageo;
+       int                     nodehdr_size;
+
+
+       ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
+       ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
+              XFS_MAX_BLOCKSIZE);
+
+       mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL);
+       mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL);
+
+       nodehdr_size = mp->m_dir_inode_ops->node_hdr_size;
+       mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
+                                   KM_SLEEP | KM_MAYFAIL);
+       mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
+                                    KM_SLEEP | KM_MAYFAIL);
+       if (!mp->m_dir_geo || !mp->m_attr_geo) {
+               kmem_free(mp->m_dir_geo);
+               kmem_free(mp->m_attr_geo);
+               return ENOMEM;
+       }
+
+       /* set up directory geometry */
+       dageo = mp->m_dir_geo;
+       dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog;
+       dageo->fsblog = mp->m_sb.sb_blocklog;
+       dageo->blksize = 1 << dageo->blklog;
+       dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog;
+
+       /*
+        * Now we've set up the block conversion variables, we can calculate the
+        * segment block constants using the geometry structure.
+        */
+       dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET);
+       dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET);
+       dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET);
+       dageo->node_ents = (dageo->blksize - nodehdr_size) /
+                               (uint)sizeof(xfs_da_node_entry_t);
+       dageo->magicpct = (dageo->blksize * 37) / 100;
+
+       /* set up attribute geometry - single fsb only */
+       dageo = mp->m_attr_geo;
+       dageo->blklog = mp->m_sb.sb_blocklog;
+       dageo->fsblog = mp->m_sb.sb_blocklog;
+       dageo->blksize = 1 << dageo->blklog;
+       dageo->fsbcount = 1;
+       dageo->node_ents = (dageo->blksize - nodehdr_size) /
+                               (uint)sizeof(xfs_da_node_entry_t);
+       dageo->magicpct = (dageo->blksize * 37) / 100;
+
+       if (xfs_sb_version_hasasciici(&mp->m_sb))
+               mp->m_dirnameops = &xfs_ascii_ci_nameops;
+       else
+               mp->m_dirnameops = &xfs_default_nameops;
+
+       return 0;
+}
+
+void
+xfs_da_unmount(
+       struct xfs_mount        *mp)
+{
+       kmem_free(mp->m_dir_geo);
+       kmem_free(mp->m_attr_geo);
+}
+
+/*
+ * Return 1 if directory contains only "." and "..".
+ */
+int
+xfs_dir_isempty(
+       xfs_inode_t     *dp)
+{
+       xfs_dir2_sf_hdr_t       *sfp;
+
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       if (dp->i_d.di_size == 0)       /* might happen during shutdown. */
+               return 1;
+       if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
+               return 0;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       return !sfp->count;
+}
+
+/*
+ * Validate a given inode number.
+ */
+int
+xfs_dir_ino_validate(
+       xfs_mount_t     *mp,
+       xfs_ino_t       ino)
+{
+       xfs_agblock_t   agblkno;
+       xfs_agino_t     agino;
+       xfs_agnumber_t  agno;
+       int             ino_ok;
+       int             ioff;
+
+       agno = XFS_INO_TO_AGNO(mp, ino);
+       agblkno = XFS_INO_TO_AGBNO(mp, ino);
+       ioff = XFS_INO_TO_OFFSET(mp, ino);
+       agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
+       ino_ok =
+               agno < mp->m_sb.sb_agcount &&
+               agblkno < mp->m_sb.sb_agblocks &&
+               agblkno != 0 &&
+               ioff < (1 << mp->m_sb.sb_inopblog) &&
+               XFS_AGINO_TO_INO(mp, agno, agino) == ino;
+       if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
+                       XFS_RANDOM_DIR_INO_VALIDATE))) {
+               xfs_warn(mp, "Invalid inode number 0x%Lx",
+                               (unsigned long long) ino);
+               XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
+               return EFSCORRUPTED;
+       }
+       return 0;
+}
+
+/*
+ * Initialize a directory with its "." and ".." entries.
+ */
+int
+xfs_dir_init(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *dp,
+       xfs_inode_t     *pdp)
+{
+       struct xfs_da_args *args;
+       int             error;
+
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
+       if (error)
+               return error;
+
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->geo = dp->i_mount->m_dir_geo;
+       args->dp = dp;
+       args->trans = tp;
+       error = xfs_dir2_sf_create(args, pdp->i_ino);
+       kmem_free(args);
+       return error;
+}
+
+/*
+  Enter a name in a directory.
+ */
+int
+xfs_dir_createname(
+       xfs_trans_t             *tp,
+       xfs_inode_t             *dp,
+       struct xfs_name         *name,
+       xfs_ino_t               inum,           /* new entry inode number */
+       xfs_fsblock_t           *first,         /* bmap's firstblock */
+       xfs_bmap_free_t         *flist,         /* bmap's freeblock list */
+       xfs_extlen_t            total)          /* bmap's total block count */
+{
+       struct xfs_da_args      *args;
+       int                     rval;
+       int                     v;              /* type-checking value */
+
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+       if (rval)
+               return rval;
+       XFS_STATS_INC(xs_dir_create);
+
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->geo = dp->i_mount->m_dir_geo;
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->inumber = inum;
+       args->dp = dp;
+       args->firstblock = first;
+       args->flist = flist;
+       args->total = total;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+       args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(args, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(args, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_addname(args);
+       else
+               rval = xfs_dir2_node_addname(args);
+
+out_free:
+       kmem_free(args);
+       return rval;
+}
+
+/*
+ * If doing a CI lookup and case-insensitive match, dup actual name into
+ * args.value. Return EEXIST for success (ie. name found) or an error.
+ */
+int
+xfs_dir_cilookup_result(
+       struct xfs_da_args *args,
+       const unsigned char *name,
+       int             len)
+{
+       if (args->cmpresult == XFS_CMP_DIFFERENT)
+               return ENOENT;
+       if (args->cmpresult != XFS_CMP_CASE ||
+                                       !(args->op_flags & XFS_DA_OP_CILOOKUP))
+               return EEXIST;
+
+       args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
+       if (!args->value)
+               return ENOMEM;
+
+       memcpy(args->value, name, len);
+       args->valuelen = len;
+       return EEXIST;
+}
+
+/*
+ * Lookup a name in a directory, give back the inode number.
+ * If ci_name is not NULL, returns the actual name in ci_name if it differs
+ * to name, or ci_name->name is set to NULL for an exact match.
+ */
+
+int
+xfs_dir_lookup(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *dp,
+       struct xfs_name *name,
+       xfs_ino_t       *inum,          /* out: inode number */
+       struct xfs_name *ci_name)       /* out: actual name if CI match */
+{
+       struct xfs_da_args *args;
+       int             rval;
+       int             v;              /* type-checking value */
+
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       XFS_STATS_INC(xs_dir_lookup);
+
+       /*
+        * We need to use KM_NOFS here so that lockdep will not throw false
+        * positive deadlock warnings on a non-transactional lookup path. It is
+        * safe to recurse into inode recalim in that case, but lockdep can't
+        * easily be taught about it. Hence KM_NOFS avoids having to add more
+        * lockdep Doing this avoids having to add a bunch of lockdep class
+        * annotations into the reclaim path for the ilock.
+        */
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       args->geo = dp->i_mount->m_dir_geo;
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->dp = dp;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+       args->op_flags = XFS_DA_OP_OKNOENT;
+       if (ci_name)
+               args->op_flags |= XFS_DA_OP_CILOOKUP;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_lookup(args);
+               goto out_check_rval;
+       }
+
+       rval = xfs_dir2_isblock(args, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_lookup(args);
+               goto out_check_rval;
+       }
+
+       rval = xfs_dir2_isleaf(args, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_lookup(args);
+       else
+               rval = xfs_dir2_node_lookup(args);
+
+out_check_rval:
+       if (rval == EEXIST)
+               rval = 0;
+       if (!rval) {
+               *inum = args->inumber;
+               if (ci_name) {
+                       ci_name->name = args->value;
+                       ci_name->len = args->valuelen;
+               }
+       }
+out_free:
+       kmem_free(args);
+       return rval;
+}
+
+/*
+ * Remove an entry from a directory.
+ */
+int
+xfs_dir_removename(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *dp,
+       struct xfs_name *name,
+       xfs_ino_t       ino,
+       xfs_fsblock_t   *first,         /* bmap's firstblock */
+       xfs_bmap_free_t *flist,         /* bmap's freeblock list */
+       xfs_extlen_t    total)          /* bmap's total block count */
+{
+       struct xfs_da_args *args;
+       int             rval;
+       int             v;              /* type-checking value */
+
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
+       XFS_STATS_INC(xs_dir_remove);
+
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->geo = dp->i_mount->m_dir_geo;
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->inumber = ino;
+       args->dp = dp;
+       args->firstblock = first;
+       args->flist = flist;
+       args->total = total;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_removename(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(args, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_removename(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(args, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_removename(args);
+       else
+               rval = xfs_dir2_node_removename(args);
+out_free:
+       kmem_free(args);
+       return rval;
+}
+
+/*
+ * Replace the inode number of a directory entry.
+ */
+int
+xfs_dir_replace(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *dp,
+       struct xfs_name *name,          /* name of entry to replace */
+       xfs_ino_t       inum,           /* new inode number */
+       xfs_fsblock_t   *first,         /* bmap's firstblock */
+       xfs_bmap_free_t *flist,         /* bmap's freeblock list */
+       xfs_extlen_t    total)          /* bmap's total block count */
+{
+       struct xfs_da_args *args;
+       int             rval;
+       int             v;              /* type-checking value */
+
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
+
+       rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+       if (rval)
+               return rval;
+
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->geo = dp->i_mount->m_dir_geo;
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->inumber = inum;
+       args->dp = dp;
+       args->firstblock = first;
+       args->flist = flist;
+       args->total = total;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_replace(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(args, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_replace(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(args, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_replace(args);
+       else
+               rval = xfs_dir2_node_replace(args);
+out_free:
+       kmem_free(args);
+       return rval;
+}
+
+/*
+ * See if this entry can be added to the directory without allocating space.
+ * First checks that the caller couldn't reserve enough space (resblks = 0).
+ */
+int
+xfs_dir_canenter(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *dp,
+       struct xfs_name *name,          /* name of entry to add */
+       uint            resblks)
+{
+       struct xfs_da_args *args;
+       int             rval;
+       int             v;              /* type-checking value */
+
+       if (resblks)
+               return 0;
+
+       ASSERT(S_ISDIR(dp->i_d.di_mode));
+
+       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+       if (!args)
+               return ENOMEM;
+
+       args->geo = dp->i_mount->m_dir_geo;
+       args->name = name->name;
+       args->namelen = name->len;
+       args->filetype = name->type;
+       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+       args->dp = dp;
+       args->whichfork = XFS_DATA_FORK;
+       args->trans = tp;
+       args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+                                                       XFS_DA_OP_OKNOENT;
+
+       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               rval = xfs_dir2_sf_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isblock(args, &v);
+       if (rval)
+               goto out_free;
+       if (v) {
+               rval = xfs_dir2_block_addname(args);
+               goto out_free;
+       }
+
+       rval = xfs_dir2_isleaf(args, &v);
+       if (rval)
+               goto out_free;
+       if (v)
+               rval = xfs_dir2_leaf_addname(args);
+       else
+               rval = xfs_dir2_node_addname(args);
+out_free:
+       kmem_free(args);
+       return rval;
+}
+
+/*
+ * Utility routines.
+ */
+
+/*
+ * Add a block to the directory.
+ *
+ * This routine is for data and free blocks, not leaf/node blocks which are
+ * handled by xfs_da_grow_inode.
+ */
+int
+xfs_dir2_grow_inode(
+       struct xfs_da_args      *args,
+       int                     space,  /* v2 dir's space XFS_DIR2_xxx_SPACE */
+       xfs_dir2_db_t           *dbp)   /* out: block number added */
+{
+       struct xfs_inode        *dp = args->dp;
+       struct xfs_mount        *mp = dp->i_mount;
+       xfs_fileoff_t           bno;    /* directory offset of new block */
+       int                     count;  /* count of filesystem blocks */
+       int                     error;
+
+       trace_xfs_dir2_grow_inode(args, space);
+
+       /*
+        * Set lowest possible block in the space requested.
+        */
+       bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
+       count = args->geo->fsbcount;
+
+       error = xfs_da_grow_inode_int(args, &bno, count);
+       if (error)
+               return error;
+
+       *dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno);
+
+       /*
+        * Update file's size if this is the data space and it grew.
+        */
+       if (space == XFS_DIR2_DATA_SPACE) {
+               xfs_fsize_t     size;           /* directory file (data) size */
+
+               size = XFS_FSB_TO_B(mp, bno + count);
+               if (size > dp->i_d.di_size) {
+                       dp->i_d.di_size = size;
+                       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+               }
+       }
+       return 0;
+}
+
+/*
+ * See if the directory is a single-block form directory.
+ */
+int
+xfs_dir2_isblock(
+       struct xfs_da_args      *args,
+       int                     *vp)    /* out: 1 is block, 0 is not block */
+{
+       xfs_fileoff_t           last;   /* last file offset */
+       int                     rval;
+
+       if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
+               return rval;
+       rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
+       ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize);
+       *vp = rval;
+       return 0;
+}
+
+/*
+ * See if the directory is a single-leaf form directory.
+ */
+int
+xfs_dir2_isleaf(
+       struct xfs_da_args      *args,
+       int                     *vp)    /* out: 1 is block, 0 is not block */
+{
+       xfs_fileoff_t           last;   /* last file offset */
+       int                     rval;
+
+       if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
+               return rval;
+       *vp = last == args->geo->leafblk + args->geo->fsbcount;
+       return 0;
+}
+
+/*
+ * Remove the given block from the directory.
+ * This routine is used for data and free blocks, leaf/node are done
+ * by xfs_da_shrink_inode.
+ */
+int
+xfs_dir2_shrink_inode(
+       xfs_da_args_t   *args,
+       xfs_dir2_db_t   db,
+       struct xfs_buf  *bp)
+{
+       xfs_fileoff_t   bno;            /* directory file offset */
+       xfs_dablk_t     da;             /* directory file offset */
+       int             done;           /* bunmap is finished */
+       xfs_inode_t     *dp;
+       int             error;
+       xfs_mount_t     *mp;
+       xfs_trans_t     *tp;
+
+       trace_xfs_dir2_shrink_inode(args, db);
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       da = xfs_dir2_db_to_da(args->geo, db);
+       /*
+        * Unmap the fsblock(s).
+        */
+       if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount,
+                       XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
+                       &done))) {
+               /*
+                * ENOSPC actually can happen if we're in a removename with
+                * no space reservation, and the resulting block removal
+                * would cause a bmap btree split or conversion from extents
+                * to btree.  This can only happen for un-fragmented
+                * directory blocks, since you need to be punching out
+                * the middle of an extent.
+                * In this case we need to leave the block in the file,
+                * and not binval it.
+                * So the block has to be in a consistent empty state
+                * and appropriately logged.
+                * We don't free up the buffer, the caller can tell it
+                * hasn't happened since it got an error back.
+                */
+               return error;
+       }
+       ASSERT(done);
+       /*
+        * Invalidate the buffer from the transaction.
+        */
+       xfs_trans_binval(tp, bp);
+       /*
+        * If it's not a data block, we're done.
+        */
+       if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET))
+               return 0;
+       /*
+        * If the block isn't the last one in the directory, we're done.
+        */
+       if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0))
+               return 0;
+       bno = da;
+       if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) {
+               /*
+                * This can't really happen unless there's kernel corruption.
+                */
+               return error;
+       }
+       if (db == args->geo->datablk)
+               ASSERT(bno == 0);
+       else
+               ASSERT(bno > 0);
+       /*
+        * Set the size to the new last block.
+        */
+       dp->i_d.di_size = XFS_FSB_TO_B(mp, bno);
+       xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+       return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
new file mode 100644 (file)
index 0000000..ab0bffc
--- /dev/null
@@ -0,0 +1,1265 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_buf_item.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_dinode.h"
+
+/*
+ * Local function prototypes.
+ */
+static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, struct xfs_buf *bp,
+                                   int first, int last);
+static void xfs_dir2_block_log_tail(xfs_trans_t *tp, struct xfs_buf *bp);
+static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, struct xfs_buf **bpp,
+                                    int *entno);
+static int xfs_dir2_block_sort(const void *a, const void *b);
+
+static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot;
+
+/*
+ * One-time startup routine called from xfs_init().
+ */
+void
+xfs_dir_startup(void)
+{
+       xfs_dir_hash_dot = xfs_da_hashname((unsigned char *)".", 1);
+       xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
+}
+
+static bool
+xfs_dir3_block_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
+                       return false;
+               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
+                       return false;
+       } else {
+               if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
+                       return false;
+       }
+       if (__xfs_dir3_data_check(NULL, bp))
+               return false;
+       return true;
+}
+
+static void
+xfs_dir3_block_read_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_block_verify(bp))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+}
+
+static void
+xfs_dir3_block_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+
+       if (!xfs_dir3_block_verify(bp)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (bip)
+               hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+       xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
+       .verify_read = xfs_dir3_block_read_verify,
+       .verify_write = xfs_dir3_block_write_verify,
+};
+
+int
+xfs_dir3_block_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       struct xfs_buf          **bpp)
+{
+       struct xfs_mount        *mp = dp->i_mount;
+       int                     err;
+
+       err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
+                               XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
+       if (!err && tp)
+               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
+       return err;
+}
+
+static void
+xfs_dir3_block_init(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp,
+       struct xfs_inode        *dp)
+{
+       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+
+       bp->b_ops = &xfs_dir3_block_buf_ops;
+       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               memset(hdr3, 0, sizeof(*hdr3));
+               hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
+               hdr3->blkno = cpu_to_be64(bp->b_bn);
+               hdr3->owner = cpu_to_be64(dp->i_ino);
+               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
+               return;
+
+       }
+       hdr3->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+}
+
+static void
+xfs_dir2_block_need_space(
+       struct xfs_inode                *dp,
+       struct xfs_dir2_data_hdr        *hdr,
+       struct xfs_dir2_block_tail      *btp,
+       struct xfs_dir2_leaf_entry      *blp,
+       __be16                          **tagpp,
+       struct xfs_dir2_data_unused     **dupp,
+       struct xfs_dir2_data_unused     **enddupp,
+       int                             *compact,
+       int                             len)
+{
+       struct xfs_dir2_data_free       *bf;
+       __be16                          *tagp = NULL;
+       struct xfs_dir2_data_unused     *dup = NULL;
+       struct xfs_dir2_data_unused     *enddup = NULL;
+
+       *compact = 0;
+       bf = dp->d_ops->data_bestfree_p(hdr);
+
+       /*
+        * If there are stale entries we'll use one for the leaf.
+        */
+       if (btp->stale) {
+               if (be16_to_cpu(bf[0].length) >= len) {
+                       /*
+                        * The biggest entry enough to avoid compaction.
+                        */
+                       dup = (xfs_dir2_data_unused_t *)
+                             ((char *)hdr + be16_to_cpu(bf[0].offset));
+                       goto out;
+               }
+
+               /*
+                * Will need to compact to make this work.
+                * Tag just before the first leaf entry.
+                */
+               *compact = 1;
+               tagp = (__be16 *)blp - 1;
+
+               /* Data object just before the first leaf entry.  */
+               dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
+
+               /*
+                * If it's not free then the data will go where the
+                * leaf data starts now, if it works at all.
+                */
+               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+                       if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) *
+                           (uint)sizeof(*blp) < len)
+                               dup = NULL;
+               } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len)
+                       dup = NULL;
+               else
+                       dup = (xfs_dir2_data_unused_t *)blp;
+               goto out;
+       }
+
+       /*
+        * no stale entries, so just use free space.
+        * Tag just before the first leaf entry.
+        */
+       tagp = (__be16 *)blp - 1;
+
+       /* Data object just before the first leaf entry.  */
+       enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
+
+       /*
+        * If it's not free then can't do this add without cleaning up:
+        * the space before the first leaf entry needs to be free so it
+        * can be expanded to hold the pointer to the new entry.
+        */
+       if (be16_to_cpu(enddup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+               /*
+                * Check out the biggest freespace and see if it's the same one.
+                */
+               dup = (xfs_dir2_data_unused_t *)
+                     ((char *)hdr + be16_to_cpu(bf[0].offset));
+               if (dup != enddup) {
+                       /*
+                        * Not the same free entry, just check its length.
+                        */
+                       if (be16_to_cpu(dup->length) < len)
+                               dup = NULL;
+                       goto out;
+               }
+
+               /*
+                * It is the biggest freespace, can it hold the leaf too?
+                */
+               if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) {
+                       /*
+                        * Yes, use the second-largest entry instead if it works.
+                        */
+                       if (be16_to_cpu(bf[1].length) >= len)
+                               dup = (xfs_dir2_data_unused_t *)
+                                     ((char *)hdr + be16_to_cpu(bf[1].offset));
+                       else
+                               dup = NULL;
+               }
+       }
+out:
+       *tagpp = tagp;
+       *dupp = dup;
+       *enddupp = enddup;
+}
+
+/*
+ * compact the leaf entries.
+ * Leave the highest-numbered stale entry stale.
+ * XXX should be the one closest to mid but mid is not yet computed.
+ */
+static void
+xfs_dir2_block_compact(
+       struct xfs_da_args              *args,
+       struct xfs_buf                  *bp,
+       struct xfs_dir2_data_hdr        *hdr,
+       struct xfs_dir2_block_tail      *btp,
+       struct xfs_dir2_leaf_entry      *blp,
+       int                             *needlog,
+       int                             *lfloghigh,
+       int                             *lfloglow)
+{
+       int                     fromidx;        /* source leaf index */
+       int                     toidx;          /* target leaf index */
+       int                     needscan = 0;
+       int                     highstale;      /* high stale index */
+
+       fromidx = toidx = be32_to_cpu(btp->count) - 1;
+       highstale = *lfloghigh = -1;
+       for (; fromidx >= 0; fromidx--) {
+               if (blp[fromidx].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
+                       if (highstale == -1)
+                               highstale = toidx;
+                       else {
+                               if (*lfloghigh == -1)
+                                       *lfloghigh = toidx;
+                               continue;
+                       }
+               }
+               if (fromidx < toidx)
+                       blp[toidx] = blp[fromidx];
+               toidx--;
+       }
+       *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
+       *lfloghigh -= be32_to_cpu(btp->stale) - 1;
+       be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
+       xfs_dir2_data_make_free(args, bp,
+               (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
+               (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
+               needlog, &needscan);
+       btp->stale = cpu_to_be32(1);
+       /*
+        * If we now need to rebuild the bestfree map, do so.
+        * This needs to happen before the next call to use_free.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(args->dp, hdr, needlog);
+}
+
+/*
+ * Add an entry to a block directory.
+ */
+int                                            /* error */
+xfs_dir2_block_addname(
+       xfs_da_args_t           *args)          /* directory op arguments */
+{
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       struct xfs_buf          *bp;            /* buffer for block */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       int                     compact;        /* need to compact leaf ents */
+       xfs_dir2_data_entry_t   *dep;           /* block data entry */
+       xfs_inode_t             *dp;            /* directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* block unused entry */
+       int                     error;          /* error return value */
+       xfs_dir2_data_unused_t  *enddup=NULL;   /* unused at end of data */
+       xfs_dahash_t            hash;           /* hash value of found entry */
+       int                     high;           /* high index for binary srch */
+       int                     highstale;      /* high stale index */
+       int                     lfloghigh=0;    /* last final leaf to log */
+       int                     lfloglow=0;     /* first final leaf to log */
+       int                     len;            /* length of the new entry */
+       int                     low;            /* low index for binary srch */
+       int                     lowstale;       /* low stale index */
+       int                     mid=0;          /* midpoint for binary srch */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log header */
+       int                     needscan;       /* need to rescan freespace */
+       __be16                  *tagp;          /* pointer to tag value */
+       xfs_trans_t             *tp;            /* transaction structure */
+
+       trace_xfs_dir2_block_addname(args);
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+
+       /* Read the (one and only) directory block into bp. */
+       error = xfs_dir3_block_read(tp, dp, &bp);
+       if (error)
+               return error;
+
+       len = dp->d_ops->data_entsize(args->namelen);
+
+       /*
+        * Set up pointers to parts of the block.
+        */
+       hdr = bp->b_addr;
+       btp = xfs_dir2_block_tail_p(args->geo, hdr);
+       blp = xfs_dir2_block_leaf_p(btp);
+
+       /*
+        * Find out if we can reuse stale entries or whether we need extra
+        * space for entry and new leaf.
+        */
+       xfs_dir2_block_need_space(dp, hdr, btp, blp, &tagp, &dup,
+                                 &enddup, &compact, len);
+
+       /*
+        * Done everything we need for a space check now.
+        */
+       if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
+               xfs_trans_brelse(tp, bp);
+               if (!dup)
+                       return ENOSPC;
+               return 0;
+       }
+
+       /*
+        * If we don't have space for the new entry & leaf ...
+        */
+       if (!dup) {
+               /* Don't have a space reservation: return no-space.  */
+               if (args->total == 0)
+                       return ENOSPC;
+               /*
+                * Convert to the next larger format.
+                * Then add the new entry in that format.
+                */
+               error = xfs_dir2_block_to_leaf(args, bp);
+               if (error)
+                       return error;
+               return xfs_dir2_leaf_addname(args);
+       }
+
+       needlog = needscan = 0;
+
+       /*
+        * If need to compact the leaf entries, do it now.
+        */
+       if (compact) {
+               xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog,
+                                     &lfloghigh, &lfloglow);
+               /* recalculate blp post-compaction */
+               blp = xfs_dir2_block_leaf_p(btp);
+       } else if (btp->stale) {
+               /*
+                * Set leaf logging boundaries to impossible state.
+                * For the no-stale case they're set explicitly.
+                */
+               lfloglow = be32_to_cpu(btp->count);
+               lfloghigh = -1;
+       }
+
+       /*
+        * Find the slot that's first lower than our hash value, -1 if none.
+        */
+       for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) {
+               mid = (low + high) >> 1;
+               if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval)
+                       break;
+               if (hash < args->hashval)
+                       low = mid + 1;
+               else
+                       high = mid - 1;
+       }
+       while (mid >= 0 && be32_to_cpu(blp[mid].hashval) >= args->hashval) {
+               mid--;
+       }
+       /*
+        * No stale entries, will use enddup space to hold new leaf.
+        */
+       if (!btp->stale) {
+               /*
+                * Mark the space needed for the new leaf entry, now in use.
+                */
+               xfs_dir2_data_use_free(args, bp, enddup,
+                       (xfs_dir2_data_aoff_t)
+                       ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
+                        sizeof(*blp)),
+                       (xfs_dir2_data_aoff_t)sizeof(*blp),
+                       &needlog, &needscan);
+               /*
+                * Update the tail (entry count).
+                */
+               be32_add_cpu(&btp->count, 1);
+               /*
+                * If we now need to rebuild the bestfree map, do so.
+                * This needs to happen before the next call to use_free.
+                */
+               if (needscan) {
+                       xfs_dir2_data_freescan(dp, hdr, &needlog);
+                       needscan = 0;
+               }
+               /*
+                * Adjust pointer to the first leaf entry, we're about to move
+                * the table up one to open up space for the new leaf entry.
+                * Then adjust our index to match.
+                */
+               blp--;
+               mid++;
+               if (mid)
+                       memmove(blp, &blp[1], mid * sizeof(*blp));
+               lfloglow = 0;
+               lfloghigh = mid;
+       }
+       /*
+        * Use a stale leaf for our new entry.
+        */
+       else {
+               for (lowstale = mid;
+                    lowstale >= 0 &&
+                       blp[lowstale].address !=
+                       cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
+                    lowstale--)
+                       continue;
+               for (highstale = mid + 1;
+                    highstale < be32_to_cpu(btp->count) &&
+                       blp[highstale].address !=
+                       cpu_to_be32(XFS_DIR2_NULL_DATAPTR) &&
+                       (lowstale < 0 || mid - lowstale > highstale - mid);
+                    highstale++)
+                       continue;
+               /*
+                * Move entries toward the low-numbered stale entry.
+                */
+               if (lowstale >= 0 &&
+                   (highstale == be32_to_cpu(btp->count) ||
+                    mid - lowstale <= highstale - mid)) {
+                       if (mid - lowstale)
+                               memmove(&blp[lowstale], &blp[lowstale + 1],
+                                       (mid - lowstale) * sizeof(*blp));
+                       lfloglow = MIN(lowstale, lfloglow);
+                       lfloghigh = MAX(mid, lfloghigh);
+               }
+               /*
+                * Move entries toward the high-numbered stale entry.
+                */
+               else {
+                       ASSERT(highstale < be32_to_cpu(btp->count));
+                       mid++;
+                       if (highstale - mid)
+                               memmove(&blp[mid + 1], &blp[mid],
+                                       (highstale - mid) * sizeof(*blp));
+                       lfloglow = MIN(mid, lfloglow);
+                       lfloghigh = MAX(highstale, lfloghigh);
+               }
+               be32_add_cpu(&btp->stale, -1);
+       }
+       /*
+        * Point to the new data entry.
+        */
+       dep = (xfs_dir2_data_entry_t *)dup;
+       /*
+        * Fill in the leaf entry.
+        */
+       blp[mid].hashval = cpu_to_be32(args->hashval);
+       blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
+                               (char *)dep - (char *)hdr));
+       xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
+       /*
+        * Mark space for the data entry used.
+        */
+       xfs_dir2_data_use_free(args, bp, dup,
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
+               (xfs_dir2_data_aoff_t)len, &needlog, &needscan);
+       /*
+        * Create the new data entry.
+        */
+       dep->inumber = cpu_to_be64(args->inumber);
+       dep->namelen = args->namelen;
+       memcpy(dep->name, args->name, args->namelen);
+       dp->d_ops->data_put_ftype(dep, args->filetype);
+       tagp = dp->d_ops->data_entry_tag_p(dep);
+       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
+       /*
+        * Clean up the bestfree array and log the header, tail, and entry.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(dp, hdr, &needlog);
+       if (needlog)
+               xfs_dir2_data_log_header(args, bp);
+       xfs_dir2_block_log_tail(tp, bp);
+       xfs_dir2_data_log_entry(args, bp, dep);
+       xfs_dir3_data_check(dp, bp);
+       return 0;
+}
+
+/*
+ * Log leaf entries from the block.
+ */
+static void
+xfs_dir2_block_log_leaf(
+       xfs_trans_t             *tp,            /* transaction structure */
+       struct xfs_buf          *bp,            /* block buffer */
+       int                     first,          /* index of first logged leaf */
+       int                     last)           /* index of last logged leaf */
+{
+       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
+       xfs_dir2_leaf_entry_t   *blp;
+       xfs_dir2_block_tail_t   *btp;
+
+       btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr);
+       blp = xfs_dir2_block_leaf_p(btp);
+       xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
+               (uint)((char *)&blp[last + 1] - (char *)hdr - 1));
+}
+
+/*
+ * Log the block tail.
+ */
+static void
+xfs_dir2_block_log_tail(
+       xfs_trans_t             *tp,            /* transaction structure */
+       struct xfs_buf          *bp)            /* block buffer */
+{
+       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
+       xfs_dir2_block_tail_t   *btp;
+
+       btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr);
+       xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
+               (uint)((char *)(btp + 1) - (char *)hdr - 1));
+}
+
+/*
+ * Look up an entry in the block.  This is the external routine,
+ * xfs_dir2_block_lookup_int does the real work.
+ */
+int                                            /* error */
+xfs_dir2_block_lookup(
+       xfs_da_args_t           *args)          /* dir lookup arguments */
+{
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       struct xfs_buf          *bp;            /* block buffer */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_dir2_data_entry_t   *dep;           /* block data entry */
+       xfs_inode_t             *dp;            /* incore inode */
+       int                     ent;            /* entry index */
+       int                     error;          /* error return value */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+
+       trace_xfs_dir2_block_lookup(args);
+
+       /*
+        * Get the buffer, look up the entry.
+        * If not found (ENOENT) then return, have no buffer.
+        */
+       if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent)))
+               return error;
+       dp = args->dp;
+       mp = dp->i_mount;
+       hdr = bp->b_addr;
+       xfs_dir3_data_check(dp, bp);
+       btp = xfs_dir2_block_tail_p(args->geo, hdr);
+       blp = xfs_dir2_block_leaf_p(btp);
+       /*
+        * Get the offset from the leaf entry, to point to the data.
+        */
+       dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+                       xfs_dir2_dataptr_to_off(args->geo,
+                                               be32_to_cpu(blp[ent].address)));
+       /*
+        * Fill in inode number, CI name if appropriate, release the block.
+        */
+       args->inumber = be64_to_cpu(dep->inumber);
+       args->filetype = dp->d_ops->data_get_ftype(dep);
+       error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
+       xfs_trans_brelse(args->trans, bp);
+       return error;
+}
+
+/*
+ * Internal block lookup routine.
+ */
+static int                                     /* error */
+xfs_dir2_block_lookup_int(
+       xfs_da_args_t           *args,          /* dir lookup arguments */
+       struct xfs_buf          **bpp,          /* returned block buffer */
+       int                     *entno)         /* returned entry number */
+{
+       xfs_dir2_dataptr_t      addr;           /* data entry address */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       struct xfs_buf          *bp;            /* block buffer */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_dir2_data_entry_t   *dep;           /* block data entry */
+       xfs_inode_t             *dp;            /* incore inode */
+       int                     error;          /* error return value */
+       xfs_dahash_t            hash;           /* found hash value */
+       int                     high;           /* binary search high index */
+       int                     low;            /* binary search low index */
+       int                     mid;            /* binary search current idx */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       enum xfs_dacmp          cmp;            /* comparison result */
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+
+       error = xfs_dir3_block_read(tp, dp, &bp);
+       if (error)
+               return error;
+
+       hdr = bp->b_addr;
+       xfs_dir3_data_check(dp, bp);
+       btp = xfs_dir2_block_tail_p(args->geo, hdr);
+       blp = xfs_dir2_block_leaf_p(btp);
+       /*
+        * Loop doing a binary search for our hash value.
+        * Find our entry, ENOENT if it's not there.
+        */
+       for (low = 0, high = be32_to_cpu(btp->count) - 1; ; ) {
+               ASSERT(low <= high);
+               mid = (low + high) >> 1;
+               if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval)
+                       break;
+               if (hash < args->hashval)
+                       low = mid + 1;
+               else
+                       high = mid - 1;
+               if (low > high) {
+                       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+                       xfs_trans_brelse(tp, bp);
+                       return ENOENT;
+               }
+       }
+       /*
+        * Back up to the first one with the right hash value.
+        */
+       while (mid > 0 && be32_to_cpu(blp[mid - 1].hashval) == args->hashval) {
+               mid--;
+       }
+       /*
+        * Now loop forward through all the entries with the
+        * right hash value looking for our name.
+        */
+       do {
+               if ((addr = be32_to_cpu(blp[mid].address)) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               /*
+                * Get pointer to the entry from the leaf.
+                */
+               dep = (xfs_dir2_data_entry_t *)
+                       ((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr));
+               /*
+                * Compare name and if it's an exact match, return the index
+                * and buffer. If it's the first case-insensitive match, store
+                * the index and buffer and continue looking for an exact match.
+                */
+               cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+               if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+                       args->cmpresult = cmp;
+                       *bpp = bp;
+                       *entno = mid;
+                       if (cmp == XFS_CMP_EXACT)
+                               return 0;
+               }
+       } while (++mid < be32_to_cpu(btp->count) &&
+                       be32_to_cpu(blp[mid].hashval) == hash);
+
+       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+       /*
+        * Here, we can only be doing a lookup (not a rename or replace).
+        * If a case-insensitive match was found earlier, return success.
+        */
+       if (args->cmpresult == XFS_CMP_CASE)
+               return 0;
+       /*
+        * No match, release the buffer and return ENOENT.
+        */
+       xfs_trans_brelse(tp, bp);
+       return ENOENT;
+}
+
+/*
+ * Remove an entry from a block format directory.
+ * If that makes the block small enough to fit in shortform, transform it.
+ */
+int                                            /* error */
+xfs_dir2_block_removename(
+       xfs_da_args_t           *args)          /* directory operation args */
+{
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf pointer */
+       struct xfs_buf          *bp;            /* block buffer */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_dir2_data_entry_t   *dep;           /* block data entry */
+       xfs_inode_t             *dp;            /* incore inode */
+       int                     ent;            /* block leaf entry index */
+       int                     error;          /* error return value */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log block header */
+       int                     needscan;       /* need to fixup bestfree */
+       xfs_dir2_sf_hdr_t       sfh;            /* shortform header */
+       int                     size;           /* shortform size */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       trace_xfs_dir2_block_removename(args);
+
+       /*
+        * Look up the entry in the block.  Gets the buffer and entry index.
+        * It will always be there, the vnodeops level does a lookup first.
+        */
+       if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) {
+               return error;
+       }
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       hdr = bp->b_addr;
+       btp = xfs_dir2_block_tail_p(args->geo, hdr);
+       blp = xfs_dir2_block_leaf_p(btp);
+       /*
+        * Point to the data entry using the leaf entry.
+        */
+       dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+                       xfs_dir2_dataptr_to_off(args->geo,
+                                               be32_to_cpu(blp[ent].address)));
+       /*
+        * Mark the data entry's space free.
+        */
+       needlog = needscan = 0;
+       xfs_dir2_data_make_free(args, bp,
+               (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
+               dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
+       /*
+        * Fix up the block tail.
+        */
+       be32_add_cpu(&btp->stale, 1);
+       xfs_dir2_block_log_tail(tp, bp);
+       /*
+        * Remove the leaf entry by marking it stale.
+        */
+       blp[ent].address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
+       xfs_dir2_block_log_leaf(tp, bp, ent, ent);
+       /*
+        * Fix up bestfree, log the header if necessary.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(dp, hdr, &needlog);
+       if (needlog)
+               xfs_dir2_data_log_header(args, bp);
+       xfs_dir3_data_check(dp, bp);
+       /*
+        * See if the size as a shortform is good enough.
+        */
+       size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
+       if (size > XFS_IFORK_DSIZE(dp))
+               return 0;
+
+       /*
+        * If it works, do the conversion.
+        */
+       return xfs_dir2_block_to_sf(args, bp, size, &sfh);
+}
+
+/*
+ * Replace an entry in a V2 block directory.
+ * Change the inode number to the new value.
+ */
+int                                            /* error */
+xfs_dir2_block_replace(
+       xfs_da_args_t           *args)          /* directory operation args */
+{
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       struct xfs_buf          *bp;            /* block buffer */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_dir2_data_entry_t   *dep;           /* block data entry */
+       xfs_inode_t             *dp;            /* incore inode */
+       int                     ent;            /* leaf entry index */
+       int                     error;          /* error return value */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+
+       trace_xfs_dir2_block_replace(args);
+
+       /*
+        * Lookup the entry in the directory.  Get buffer and entry index.
+        * This will always succeed since the caller has already done a lookup.
+        */
+       if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) {
+               return error;
+       }
+       dp = args->dp;
+       mp = dp->i_mount;
+       hdr = bp->b_addr;
+       btp = xfs_dir2_block_tail_p(args->geo, hdr);
+       blp = xfs_dir2_block_leaf_p(btp);
+       /*
+        * Point to the data entry we need to change.
+        */
+       dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+                       xfs_dir2_dataptr_to_off(args->geo,
+                                               be32_to_cpu(blp[ent].address)));
+       ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
+       /*
+        * Change the inode number to the new value.
+        */
+       dep->inumber = cpu_to_be64(args->inumber);
+       dp->d_ops->data_put_ftype(dep, args->filetype);
+       xfs_dir2_data_log_entry(args, bp, dep);
+       xfs_dir3_data_check(dp, bp);
+       return 0;
+}
+
+/*
+ * Qsort comparison routine for the block leaf entries.
+ */
+static int                                     /* sort order */
+xfs_dir2_block_sort(
+       const void                      *a,     /* first leaf entry */
+       const void                      *b)     /* second leaf entry */
+{
+       const xfs_dir2_leaf_entry_t     *la;    /* first leaf entry */
+       const xfs_dir2_leaf_entry_t     *lb;    /* second leaf entry */
+
+       la = a;
+       lb = b;
+       return be32_to_cpu(la->hashval) < be32_to_cpu(lb->hashval) ? -1 :
+               (be32_to_cpu(la->hashval) > be32_to_cpu(lb->hashval) ? 1 : 0);
+}
+
+/*
+ * Convert a V2 leaf directory to a V2 block directory if possible.
+ */
+int                                            /* error */
+xfs_dir2_leaf_to_block(
+       xfs_da_args_t           *args,          /* operation arguments */
+       struct xfs_buf          *lbp,           /* leaf buffer */
+       struct xfs_buf          *dbp)           /* data buffer */
+{
+       __be16                  *bestsp;        /* leaf bests table */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* unused data entry */
+       int                     error;          /* error return value */
+       int                     from;           /* leaf from index */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+       xfs_mount_t             *mp;            /* file system mount point */
+       int                     needlog;        /* need to log data header */
+       int                     needscan;       /* need to scan for bestfree */
+       xfs_dir2_sf_hdr_t       sfh;            /* shortform header */
+       int                     size;           /* bytes used */
+       __be16                  *tagp;          /* end of entry (tag) */
+       int                     to;             /* block/leaf to index */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       trace_xfs_dir2_leaf_to_block(args);
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       leaf = lbp->b_addr;
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+       ents = dp->d_ops->leaf_ents_p(leaf);
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+
+       ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
+              leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
+       /*
+        * If there are data blocks other than the first one, take this
+        * opportunity to remove trailing empty data blocks that may have
+        * been left behind during no-space-reservation operations.
+        * These will show up in the leaf bests table.
+        */
+       while (dp->i_d.di_size > args->geo->blksize) {
+               int hdrsz;
+
+               hdrsz = dp->d_ops->data_entry_offset;
+               bestsp = xfs_dir2_leaf_bests_p(ltp);
+               if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
+                                           args->geo->blksize - hdrsz) {
+                       if ((error =
+                           xfs_dir2_leaf_trim_data(args, lbp,
+                                   (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
+                               return error;
+               } else
+                       return 0;
+       }
+       /*
+        * Read the data block if we don't already have it, give up if it fails.
+        */
+       if (!dbp) {
+               error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp);
+               if (error)
+                       return error;
+       }
+       hdr = dbp->b_addr;
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
+
+       /*
+        * Size of the "leaf" area in the block.
+        */
+       size = (uint)sizeof(xfs_dir2_block_tail_t) +
+              (uint)sizeof(*lep) * (leafhdr.count - leafhdr.stale);
+       /*
+        * Look at the last data entry.
+        */
+       tagp = (__be16 *)((char *)hdr + args->geo->blksize) - 1;
+       dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
+       /*
+        * If it's not free or is too short we can't do it.
+        */
+       if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG ||
+           be16_to_cpu(dup->length) < size)
+               return 0;
+
+       /*
+        * Start converting it to block form.
+        */
+       xfs_dir3_block_init(mp, tp, dbp, dp);
+
+       needlog = 1;
+       needscan = 0;
+       /*
+        * Use up the space at the end of the block (blp/btp).
+        */
+       xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size,
+               &needlog, &needscan);
+       /*
+        * Initialize the block tail.
+        */
+       btp = xfs_dir2_block_tail_p(args->geo, hdr);
+       btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale);
+       btp->stale = 0;
+       xfs_dir2_block_log_tail(tp, dbp);
+       /*
+        * Initialize the block leaf area.  We compact out stale entries.
+        */
+       lep = xfs_dir2_block_leaf_p(btp);
+       for (from = to = 0; from < leafhdr.count; from++) {
+               if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                       continue;
+               lep[to++] = ents[from];
+       }
+       ASSERT(to == be32_to_cpu(btp->count));
+       xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1);
+       /*
+        * Scan the bestfree if we need it and log the data block header.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(dp, hdr, &needlog);
+       if (needlog)
+               xfs_dir2_data_log_header(args, dbp);
+       /*
+        * Pitch the old leaf block.
+        */
+       error = xfs_da_shrink_inode(args, args->geo->leafblk, lbp);
+       if (error)
+               return error;
+
+       /*
+        * Now see if the resulting block can be shrunken to shortform.
+        */
+       size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
+       if (size > XFS_IFORK_DSIZE(dp))
+               return 0;
+
+       return xfs_dir2_block_to_sf(args, dbp, size, &sfh);
+}
+
+/*
+ * Convert the shortform directory to block form.
+ */
+int                                            /* error */
+xfs_dir2_sf_to_block(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_dir2_db_t           blkno;          /* dir-relative block # (0) */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
+       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
+       struct xfs_buf          *bp;            /* block buffer */
+       xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     dummy;          /* trash */
+       xfs_dir2_data_unused_t  *dup;           /* unused entry pointer */
+       int                     endoffset;      /* end of data objects */
+       int                     error;          /* error return value */
+       int                     i;              /* index */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log block header */
+       int                     needscan;       /* need to scan block freespc */
+       int                     newoffset;      /* offset from current entry */
+       int                     offset;         /* target block offset */
+       xfs_dir2_sf_entry_t     *sfep;          /* sf entry pointer */
+       xfs_dir2_sf_hdr_t       *oldsfp;        /* old shortform header  */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform header  */
+       __be16                  *tagp;          /* end of data entry */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_name         name;
+       struct xfs_ifork        *ifp;
+
+       trace_xfs_dir2_sf_to_block(args);
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK);
+       ASSERT(ifp->if_flags & XFS_IFINLINE);
+       /*
+        * Bomb out if the shortform directory is way too short.
+        */
+       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+               ASSERT(XFS_FORCED_SHUTDOWN(mp));
+               return EIO;
+       }
+
+       oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data;
+
+       ASSERT(ifp->if_bytes == dp->i_d.di_size);
+       ASSERT(ifp->if_u1.if_data != NULL);
+       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count));
+       ASSERT(dp->i_d.di_nextents == 0);
+
+       /*
+        * Copy the directory into a temporary buffer.
+        * Then pitch the incore inode data so we can make extents.
+        */
+       sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP);
+       memcpy(sfp, oldsfp, ifp->if_bytes);
+
+       xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK);
+       xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK);
+       dp->i_d.di_size = 0;
+
+       /*
+        * Add block 0 to the inode.
+        */
+       error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
+       if (error) {
+               kmem_free(sfp);
+               return error;
+       }
+       /*
+        * Initialize the data block, then convert it to block format.
+        */
+       error = xfs_dir3_data_init(args, blkno, &bp);
+       if (error) {
+               kmem_free(sfp);
+               return error;
+       }
+       xfs_dir3_block_init(mp, tp, bp, dp);
+       hdr = bp->b_addr;
+
+       /*
+        * Compute size of block "tail" area.
+        */
+       i = (uint)sizeof(*btp) +
+           (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
+       /*
+        * The whole thing is initialized to free by the init routine.
+        * Say we're using the leaf and tail area.
+        */
+       dup = dp->d_ops->data_unused_p(hdr);
+       needlog = needscan = 0;
+       xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
+                              i, &needlog, &needscan);
+       ASSERT(needscan == 0);
+       /*
+        * Fill in the tail.
+        */
+       btp = xfs_dir2_block_tail_p(args->geo, hdr);
+       btp->count = cpu_to_be32(sfp->count + 2);       /* ., .. */
+       btp->stale = 0;
+       blp = xfs_dir2_block_leaf_p(btp);
+       endoffset = (uint)((char *)blp - (char *)hdr);
+       /*
+        * Remove the freespace, we'll manage it.
+        */
+       xfs_dir2_data_use_free(args, bp, dup,
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
+               be16_to_cpu(dup->length), &needlog, &needscan);
+       /*
+        * Create entry for .
+        */
+       dep = dp->d_ops->data_dot_entry_p(hdr);
+       dep->inumber = cpu_to_be64(dp->i_ino);
+       dep->namelen = 1;
+       dep->name[0] = '.';
+       dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
+       tagp = dp->d_ops->data_entry_tag_p(dep);
+       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
+       xfs_dir2_data_log_entry(args, bp, dep);
+       blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
+       blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
+                               (char *)dep - (char *)hdr));
+       /*
+        * Create entry for ..
+        */
+       dep = dp->d_ops->data_dotdot_entry_p(hdr);
+       dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp));
+       dep->namelen = 2;
+       dep->name[0] = dep->name[1] = '.';
+       dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
+       tagp = dp->d_ops->data_entry_tag_p(dep);
+       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
+       xfs_dir2_data_log_entry(args, bp, dep);
+       blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
+       blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
+                               (char *)dep - (char *)hdr));
+       offset = dp->d_ops->data_first_offset;
+       /*
+        * Loop over existing entries, stuff them in.
+        */
+       i = 0;
+       if (!sfp->count)
+               sfep = NULL;
+       else
+               sfep = xfs_dir2_sf_firstentry(sfp);
+       /*
+        * Need to preserve the existing offset values in the sf directory.
+        * Insert holes (unused entries) where necessary.
+        */
+       while (offset < endoffset) {
+               /*
+                * sfep is null when we reach the end of the list.
+                */
+               if (sfep == NULL)
+                       newoffset = endoffset;
+               else
+                       newoffset = xfs_dir2_sf_get_offset(sfep);
+               /*
+                * There should be a hole here, make one.
+                */
+               if (offset < newoffset) {
+                       dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
+                       dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+                       dup->length = cpu_to_be16(newoffset - offset);
+                       *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
+                               ((char *)dup - (char *)hdr));
+                       xfs_dir2_data_log_unused(args, bp, dup);
+                       xfs_dir2_data_freeinsert(hdr,
+                                                dp->d_ops->data_bestfree_p(hdr),
+                                                dup, &dummy);
+                       offset += be16_to_cpu(dup->length);
+                       continue;
+               }
+               /*
+                * Copy a real entry.
+                */
+               dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
+               dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep));
+               dep->namelen = sfep->namelen;
+               dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep));
+               memcpy(dep->name, sfep->name, dep->namelen);
+               tagp = dp->d_ops->data_entry_tag_p(dep);
+               *tagp = cpu_to_be16((char *)dep - (char *)hdr);
+               xfs_dir2_data_log_entry(args, bp, dep);
+               name.name = sfep->name;
+               name.len = sfep->namelen;
+               blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
+                                                       hashname(&name));
+               blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
+                                                (char *)dep - (char *)hdr));
+               offset = (int)((char *)(tagp + 1) - (char *)hdr);
+               if (++i == sfp->count)
+                       sfep = NULL;
+               else
+                       sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+       }
+       /* Done with the temporary buffer */
+       kmem_free(sfp);
+       /*
+        * Sort the leaf entries by hash value.
+        */
+       xfs_sort(blp, be32_to_cpu(btp->count), sizeof(*blp), xfs_dir2_block_sort);
+       /*
+        * Log the leaf entry area and tail.
+        * Already logged the header in data_init, ignore needlog.
+        */
+       ASSERT(needscan == 0);
+       xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
+       xfs_dir2_block_log_tail(tp, bp);
+       xfs_dir3_data_check(dp, bp);
+       return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
new file mode 100644 (file)
index 0000000..8c2f642
--- /dev/null
@@ -0,0 +1,1050 @@
+/*
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_error.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_cksum.h"
+
+/*
+ * Check the consistency of the data block.
+ * The input can also be a block-format directory.
+ * Return 0 is the buffer is good, otherwise an error.
+ */
+int
+__xfs_dir3_data_check(
+       struct xfs_inode        *dp,            /* incore inode pointer */
+       struct xfs_buf          *bp)            /* data block's buffer */
+{
+       xfs_dir2_dataptr_t      addr;           /* addr for leaf lookup */
+       xfs_dir2_data_free_t    *bf;            /* bestfree table */
+       xfs_dir2_block_tail_t   *btp=NULL;      /* block tail */
+       int                     count;          /* count of entries found */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
+       xfs_dir2_data_entry_t   *dep;           /* data entry */
+       xfs_dir2_data_free_t    *dfp;           /* bestfree entry */
+       xfs_dir2_data_unused_t  *dup;           /* unused entry */
+       char                    *endp;          /* end of useful data */
+       int                     freeseen;       /* mask of bestfrees seen */
+       xfs_dahash_t            hash;           /* hash of current name */
+       int                     i;              /* leaf index */
+       int                     lastfree;       /* last entry was unused */
+       xfs_dir2_leaf_entry_t   *lep=NULL;      /* block leaf entries */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       char                    *p;             /* current data position */
+       int                     stale;          /* count of stale leaves */
+       struct xfs_name         name;
+       const struct xfs_dir_ops *ops;
+       struct xfs_da_geometry  *geo;
+
+       mp = bp->b_target->bt_mount;
+       geo = mp->m_dir_geo;
+
+       /*
+        * We can be passed a null dp here from a verifier, so we need to go the
+        * hard way to get them.
+        */
+       ops = xfs_dir_get_ops(mp, dp);
+
+       hdr = bp->b_addr;
+       p = (char *)ops->data_entry_p(hdr);
+
+       switch (hdr->magic) {
+       case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
+       case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
+               btp = xfs_dir2_block_tail_p(geo, hdr);
+               lep = xfs_dir2_block_leaf_p(btp);
+               endp = (char *)lep;
+
+               /*
+                * The number of leaf entries is limited by the size of the
+                * block and the amount of space used by the data entries.
+                * We don't know how much space is used by the data entries yet,
+                * so just ensure that the count falls somewhere inside the
+                * block right now.
+                */
+               XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) <
+                       ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
+               break;
+       case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
+       case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
+               endp = (char *)hdr + geo->blksize;
+               break;
+       default:
+               XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
+               return EFSCORRUPTED;
+       }
+
+       /*
+        * Account for zero bestfree entries.
+        */
+       bf = ops->data_bestfree_p(hdr);
+       count = lastfree = freeseen = 0;
+       if (!bf[0].length) {
+               XFS_WANT_CORRUPTED_RETURN(!bf[0].offset);
+               freeseen |= 1 << 0;
+       }
+       if (!bf[1].length) {
+               XFS_WANT_CORRUPTED_RETURN(!bf[1].offset);
+               freeseen |= 1 << 1;
+       }
+       if (!bf[2].length) {
+               XFS_WANT_CORRUPTED_RETURN(!bf[2].offset);
+               freeseen |= 1 << 2;
+       }
+
+       XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >=
+                                               be16_to_cpu(bf[1].length));
+       XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >=
+                                               be16_to_cpu(bf[2].length));
+       /*
+        * Loop over the data/unused entries.
+        */
+       while (p < endp) {
+               dup = (xfs_dir2_data_unused_t *)p;
+               /*
+                * If it's unused, look for the space in the bestfree table.
+                * If we find it, account for that, else make sure it
+                * doesn't need to be there.
+                */
+               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+                       XFS_WANT_CORRUPTED_RETURN(lastfree == 0);
+                       XFS_WANT_CORRUPTED_RETURN(
+                               be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
+                                              (char *)dup - (char *)hdr);
+                       dfp = xfs_dir2_data_freefind(hdr, bf, dup);
+                       if (dfp) {
+                               i = (int)(dfp - bf);
+                               XFS_WANT_CORRUPTED_RETURN(
+                                       (freeseen & (1 << i)) == 0);
+                               freeseen |= 1 << i;
+                       } else {
+                               XFS_WANT_CORRUPTED_RETURN(
+                                       be16_to_cpu(dup->length) <=
+                                               be16_to_cpu(bf[2].length));
+                       }
+                       p += be16_to_cpu(dup->length);
+                       lastfree = 1;
+                       continue;
+               }
+               /*
+                * It's a real entry.  Validate the fields.
+                * If this is a block directory then make sure it's
+                * in the leaf section of the block.
+                * The linear search is crude but this is DEBUG code.
+                */
+               dep = (xfs_dir2_data_entry_t *)p;
+               XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0);
+               XFS_WANT_CORRUPTED_RETURN(
+                       !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
+               XFS_WANT_CORRUPTED_RETURN(
+                       be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
+                                              (char *)dep - (char *)hdr);
+               XFS_WANT_CORRUPTED_RETURN(
+                               ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
+               count++;
+               lastfree = 0;
+               if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+                   hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
+                       addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
+                                               (xfs_dir2_data_aoff_t)
+                                               ((char *)dep - (char *)hdr));
+                       name.name = dep->name;
+                       name.len = dep->namelen;
+                       hash = mp->m_dirnameops->hashname(&name);
+                       for (i = 0; i < be32_to_cpu(btp->count); i++) {
+                               if (be32_to_cpu(lep[i].address) == addr &&
+                                   be32_to_cpu(lep[i].hashval) == hash)
+                                       break;
+                       }
+                       XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
+               }
+               p += ops->data_entsize(dep->namelen);
+       }
+       /*
+        * Need to have seen all the entries and all the bestfree slots.
+        */
+       XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
+       if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+           hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
+               for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
+                       if (lep[i].address ==
+                           cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                               stale++;
+                       if (i > 0)
+                               XFS_WANT_CORRUPTED_RETURN(
+                                       be32_to_cpu(lep[i].hashval) >=
+                                               be32_to_cpu(lep[i - 1].hashval));
+               }
+               XFS_WANT_CORRUPTED_RETURN(count ==
+                       be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
+               XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale));
+       }
+       return 0;
+}
+
+static bool
+xfs_dir3_data_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
+                       return false;
+               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
+                       return false;
+       } else {
+               if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
+                       return false;
+       }
+       if (__xfs_dir3_data_check(NULL, bp))
+               return false;
+       return true;
+}
+
+/*
+ * Readahead of the first block of the directory when it is opened is completely
+ * oblivious to the format of the directory. Hence we can either get a block
+ * format buffer or a data format buffer on readahead.
+ */
+static void
+xfs_dir3_data_reada_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_dir2_data_hdr *hdr = bp->b_addr;
+
+       switch (hdr->magic) {
+       case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
+       case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
+               bp->b_ops = &xfs_dir3_block_buf_ops;
+               bp->b_ops->verify_read(bp);
+               return;
+       case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
+       case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
+               xfs_dir3_data_verify(bp);
+               return;
+       default:
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               break;
+       }
+}
+
+static void
+xfs_dir3_data_read_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+                xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_data_verify(bp))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+}
+
+static void
+xfs_dir3_data_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+
+       if (!xfs_dir3_data_verify(bp)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (bip)
+               hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+       xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
+       .verify_read = xfs_dir3_data_read_verify,
+       .verify_write = xfs_dir3_data_write_verify,
+};
+
+static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
+       .verify_read = xfs_dir3_data_reada_verify,
+       .verify_write = xfs_dir3_data_write_verify,
+};
+
+
+int
+xfs_dir3_data_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mapped_bno,
+       struct xfs_buf          **bpp)
+{
+       int                     err;
+
+       err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
+                               XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
+       if (!err && tp)
+               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
+       return err;
+}
+
+int
+xfs_dir3_data_readahead(
+       struct xfs_inode        *dp,
+       xfs_dablk_t             bno,
+       xfs_daddr_t             mapped_bno)
+{
+       return xfs_da_reada_buf(dp, bno, mapped_bno,
+                               XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
+}
+
+/*
+ * Given a data block and an unused entry from that block,
+ * return the bestfree entry if any that corresponds to it.
+ */
+xfs_dir2_data_free_t *
+xfs_dir2_data_freefind(
+       struct xfs_dir2_data_hdr *hdr,          /* data block header */
+       struct xfs_dir2_data_free *bf,          /* bestfree table pointer */
+       struct xfs_dir2_data_unused *dup)       /* unused space */
+{
+       xfs_dir2_data_free_t    *dfp;           /* bestfree entry */
+       xfs_dir2_data_aoff_t    off;            /* offset value needed */
+#ifdef DEBUG
+       int                     matched;        /* matched the value */
+       int                     seenzero;       /* saw a 0 bestfree entry */
+#endif
+
+       off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
+
+#ifdef DEBUG
+       /*
+        * Validate some consistency in the bestfree table.
+        * Check order, non-overlapping entries, and if we find the
+        * one we're looking for it has to be exact.
+        */
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
+       for (dfp = &bf[0], seenzero = matched = 0;
+            dfp < &bf[XFS_DIR2_DATA_FD_COUNT];
+            dfp++) {
+               if (!dfp->offset) {
+                       ASSERT(!dfp->length);
+                       seenzero = 1;
+                       continue;
+               }
+               ASSERT(seenzero == 0);
+               if (be16_to_cpu(dfp->offset) == off) {
+                       matched = 1;
+                       ASSERT(dfp->length == dup->length);
+               } else if (off < be16_to_cpu(dfp->offset))
+                       ASSERT(off + be16_to_cpu(dup->length) <= be16_to_cpu(dfp->offset));
+               else
+                       ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
+               ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
+               if (dfp > &bf[0])
+                       ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
+       }
+#endif
+       /*
+        * If this is smaller than the smallest bestfree entry,
+        * it can't be there since they're sorted.
+        */
+       if (be16_to_cpu(dup->length) <
+           be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
+               return NULL;
+       /*
+        * Look at the three bestfree entries for our guy.
+        */
+       for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
+               if (!dfp->offset)
+                       return NULL;
+               if (be16_to_cpu(dfp->offset) == off)
+                       return dfp;
+       }
+       /*
+        * Didn't find it.  This only happens if there are duplicate lengths.
+        */
+       return NULL;
+}
+
+/*
+ * Insert an unused-space entry into the bestfree table.
+ */
+xfs_dir2_data_free_t *                         /* entry inserted */
+xfs_dir2_data_freeinsert(
+       struct xfs_dir2_data_hdr *hdr,          /* data block pointer */
+       struct xfs_dir2_data_free *dfp,         /* bestfree table pointer */
+       struct xfs_dir2_data_unused *dup,       /* unused space */
+       int                     *loghead)       /* log the data header (out) */
+{
+       xfs_dir2_data_free_t    new;            /* new bestfree entry */
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
+
+       new.length = dup->length;
+       new.offset = cpu_to_be16((char *)dup - (char *)hdr);
+
+       /*
+        * Insert at position 0, 1, or 2; or not at all.
+        */
+       if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) {
+               dfp[2] = dfp[1];
+               dfp[1] = dfp[0];
+               dfp[0] = new;
+               *loghead = 1;
+               return &dfp[0];
+       }
+       if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) {
+               dfp[2] = dfp[1];
+               dfp[1] = new;
+               *loghead = 1;
+               return &dfp[1];
+       }
+       if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) {
+               dfp[2] = new;
+               *loghead = 1;
+               return &dfp[2];
+       }
+       return NULL;
+}
+
+/*
+ * Remove a bestfree entry from the table.
+ */
+STATIC void
+xfs_dir2_data_freeremove(
+       struct xfs_dir2_data_hdr *hdr,          /* data block header */
+       struct xfs_dir2_data_free *bf,          /* bestfree table pointer */
+       struct xfs_dir2_data_free *dfp,         /* bestfree entry pointer */
+       int                     *loghead)       /* out: log data header */
+{
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
+
+       /*
+        * It's the first entry, slide the next 2 up.
+        */
+       if (dfp == &bf[0]) {
+               bf[0] = bf[1];
+               bf[1] = bf[2];
+       }
+       /*
+        * It's the second entry, slide the 3rd entry up.
+        */
+       else if (dfp == &bf[1])
+               bf[1] = bf[2];
+       /*
+        * Must be the last entry.
+        */
+       else
+               ASSERT(dfp == &bf[2]);
+       /*
+        * Clear the 3rd entry, must be zero now.
+        */
+       bf[2].length = 0;
+       bf[2].offset = 0;
+       *loghead = 1;
+}
+
+/*
+ * Given a data block, reconstruct its bestfree map.
+ */
+void
+xfs_dir2_data_freescan(
+       struct xfs_inode        *dp,
+       struct xfs_dir2_data_hdr *hdr,
+       int                     *loghead)
+{
+       xfs_dir2_block_tail_t   *btp;           /* block tail */
+       xfs_dir2_data_entry_t   *dep;           /* active data entry */
+       xfs_dir2_data_unused_t  *dup;           /* unused data entry */
+       struct xfs_dir2_data_free *bf;
+       char                    *endp;          /* end of block's data */
+       char                    *p;             /* current entry pointer */
+       struct xfs_da_geometry  *geo = dp->i_mount->m_dir_geo;
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
+
+       /*
+        * Start by clearing the table.
+        */
+       bf = dp->d_ops->data_bestfree_p(hdr);
+       memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
+       *loghead = 1;
+       /*
+        * Set up pointers.
+        */
+       p = (char *)dp->d_ops->data_entry_p(hdr);
+       if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+           hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
+               btp = xfs_dir2_block_tail_p(geo, hdr);
+               endp = (char *)xfs_dir2_block_leaf_p(btp);
+       } else
+               endp = (char *)hdr + geo->blksize;
+       /*
+        * Loop over the block's entries.
+        */
+       while (p < endp) {
+               dup = (xfs_dir2_data_unused_t *)p;
+               /*
+                * If it's a free entry, insert it.
+                */
+               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+                       ASSERT((char *)dup - (char *)hdr ==
+                              be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
+                       xfs_dir2_data_freeinsert(hdr, bf, dup, loghead);
+                       p += be16_to_cpu(dup->length);
+               }
+               /*
+                * For active entries, check their tags and skip them.
+                */
+               else {
+                       dep = (xfs_dir2_data_entry_t *)p;
+                       ASSERT((char *)dep - (char *)hdr ==
+                              be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep)));
+                       p += dp->d_ops->data_entsize(dep->namelen);
+               }
+       }
+}
+
+/*
+ * Initialize a data block at the given block number in the directory.
+ * Give back the buffer for the created block.
+ */
+int                                            /* error */
+xfs_dir3_data_init(
+       xfs_da_args_t           *args,          /* directory operation args */
+       xfs_dir2_db_t           blkno,          /* logical dir block number */
+       struct xfs_buf          **bpp)          /* output block buffer */
+{
+       struct xfs_buf          *bp;            /* block buffer */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* unused entry pointer */
+       struct xfs_dir2_data_free *bf;
+       int                     error;          /* error return value */
+       int                     i;              /* bestfree index */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       int                     t;              /* temp */
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       /*
+        * Get the buffer set up for the block.
+        */
+       error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno),
+                              -1, &bp, XFS_DATA_FORK);
+       if (error)
+               return error;
+       bp->b_ops = &xfs_dir3_data_buf_ops;
+       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF);
+
+       /*
+        * Initialize the header.
+        */
+       hdr = bp->b_addr;
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+
+               memset(hdr3, 0, sizeof(*hdr3));
+               hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
+               hdr3->blkno = cpu_to_be64(bp->b_bn);
+               hdr3->owner = cpu_to_be64(dp->i_ino);
+               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
+
+       } else
+               hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+
+       bf = dp->d_ops->data_bestfree_p(hdr);
+       bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset);
+       for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
+               bf[i].length = 0;
+               bf[i].offset = 0;
+       }
+
+       /*
+        * Set up an unused entry for the block's body.
+        */
+       dup = dp->d_ops->data_unused_p(hdr);
+       dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+
+       t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset;
+       bf[0].length = cpu_to_be16(t);
+       dup->length = cpu_to_be16(t);
+       *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
+       /*
+        * Log it and return it.
+        */
+       xfs_dir2_data_log_header(args, bp);
+       xfs_dir2_data_log_unused(args, bp, dup);
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Log an active data entry from the block.
+ */
+void
+xfs_dir2_data_log_entry(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp,
+       xfs_dir2_data_entry_t   *dep)           /* data entry pointer */
+{
+       struct xfs_dir2_data_hdr *hdr = bp->b_addr;
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
+
+       xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr),
+               (uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) -
+                      (char *)hdr - 1));
+}
+
+/*
+ * Log a data block header.
+ */
+void
+xfs_dir2_data_log_header(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp)
+{
+#ifdef DEBUG
+       struct xfs_dir2_data_hdr *hdr = bp->b_addr;
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
+#endif
+
+       xfs_trans_log_buf(args->trans, bp, 0,
+                         args->dp->d_ops->data_entry_offset - 1);
+}
+
+/*
+ * Log a data unused entry.
+ */
+void
+xfs_dir2_data_log_unused(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp,
+       xfs_dir2_data_unused_t  *dup)           /* data unused pointer */
+{
+       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
+
+       /*
+        * Log the first part of the unused entry.
+        */
+       xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr),
+               (uint)((char *)&dup->length + sizeof(dup->length) -
+                      1 - (char *)hdr));
+       /*
+        * Log the end (tag) of the unused entry.
+        */
+       xfs_trans_log_buf(args->trans, bp,
+               (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
+               (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
+                      sizeof(xfs_dir2_data_off_t) - 1));
+}
+
+/*
+ * Make a byte range in the data block unused.
+ * Its current contents are unimportant.
+ */
+void
+xfs_dir2_data_make_free(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp,
+       xfs_dir2_data_aoff_t    offset,         /* starting byte offset */
+       xfs_dir2_data_aoff_t    len,            /* length in bytes */
+       int                     *needlogp,      /* out: log header */
+       int                     *needscanp)     /* out: regen bestfree */
+{
+       xfs_dir2_data_hdr_t     *hdr;           /* data block pointer */
+       xfs_dir2_data_free_t    *dfp;           /* bestfree pointer */
+       char                    *endptr;        /* end of data area */
+       int                     needscan;       /* need to regen bestfree */
+       xfs_dir2_data_unused_t  *newdup;        /* new unused entry */
+       xfs_dir2_data_unused_t  *postdup;       /* unused entry after us */
+       xfs_dir2_data_unused_t  *prevdup;       /* unused entry before us */
+       struct xfs_dir2_data_free *bf;
+
+       hdr = bp->b_addr;
+
+       /*
+        * Figure out where the end of the data area is.
+        */
+       if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+           hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
+               endptr = (char *)hdr + args->geo->blksize;
+       else {
+               xfs_dir2_block_tail_t   *btp;   /* block tail */
+
+               ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+                       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
+               btp = xfs_dir2_block_tail_p(args->geo, hdr);
+               endptr = (char *)xfs_dir2_block_leaf_p(btp);
+       }
+       /*
+        * If this isn't the start of the block, then back up to
+        * the previous entry and see if it's free.
+        */
+       if (offset > args->dp->d_ops->data_entry_offset) {
+               __be16                  *tagp;  /* tag just before us */
+
+               tagp = (__be16 *)((char *)hdr + offset) - 1;
+               prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
+               if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
+                       prevdup = NULL;
+       } else
+               prevdup = NULL;
+       /*
+        * If this isn't the end of the block, see if the entry after
+        * us is free.
+        */
+       if ((char *)hdr + offset + len < endptr) {
+               postdup =
+                       (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
+               if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
+                       postdup = NULL;
+       } else
+               postdup = NULL;
+       ASSERT(*needscanp == 0);
+       needscan = 0;
+       /*
+        * Previous and following entries are both free,
+        * merge everything into a single free entry.
+        */
+       bf = args->dp->d_ops->data_bestfree_p(hdr);
+       if (prevdup && postdup) {
+               xfs_dir2_data_free_t    *dfp2;  /* another bestfree pointer */
+
+               /*
+                * See if prevdup and/or postdup are in bestfree table.
+                */
+               dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
+               dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup);
+               /*
+                * We need a rescan unless there are exactly 2 free entries
+                * namely our two.  Then we know what's happening, otherwise
+                * since the third bestfree is there, there might be more
+                * entries.
+                */
+               needscan = (bf[2].length != 0);
+               /*
+                * Fix up the new big freespace.
+                */
+               be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
+               *xfs_dir2_data_unused_tag_p(prevdup) =
+                       cpu_to_be16((char *)prevdup - (char *)hdr);
+               xfs_dir2_data_log_unused(args, bp, prevdup);
+               if (!needscan) {
+                       /*
+                        * Has to be the case that entries 0 and 1 are
+                        * dfp and dfp2 (don't know which is which), and
+                        * entry 2 is empty.
+                        * Remove entry 1 first then entry 0.
+                        */
+                       ASSERT(dfp && dfp2);
+                       if (dfp == &bf[1]) {
+                               dfp = &bf[0];
+                               ASSERT(dfp2 == dfp);
+                               dfp2 = &bf[1];
+                       }
+                       xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp);
+                       xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
+                       /*
+                        * Now insert the new entry.
+                        */
+                       dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup,
+                                                      needlogp);
+                       ASSERT(dfp == &bf[0]);
+                       ASSERT(dfp->length == prevdup->length);
+                       ASSERT(!dfp[1].length);
+                       ASSERT(!dfp[2].length);
+               }
+       }
+       /*
+        * The entry before us is free, merge with it.
+        */
+       else if (prevdup) {
+               dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
+               be16_add_cpu(&prevdup->length, len);
+               *xfs_dir2_data_unused_tag_p(prevdup) =
+                       cpu_to_be16((char *)prevdup - (char *)hdr);
+               xfs_dir2_data_log_unused(args, bp, prevdup);
+               /*
+                * If the previous entry was in the table, the new entry
+                * is longer, so it will be in the table too.  Remove
+                * the old one and add the new one.
+                */
+               if (dfp) {
+                       xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
+                       xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp);
+               }
+               /*
+                * Otherwise we need a scan if the new entry is big enough.
+                */
+               else {
+                       needscan = be16_to_cpu(prevdup->length) >
+                                  be16_to_cpu(bf[2].length);
+               }
+       }
+       /*
+        * The following entry is free, merge with it.
+        */
+       else if (postdup) {
+               dfp = xfs_dir2_data_freefind(hdr, bf, postdup);
+               newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
+               newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+               newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
+               *xfs_dir2_data_unused_tag_p(newdup) =
+                       cpu_to_be16((char *)newdup - (char *)hdr);
+               xfs_dir2_data_log_unused(args, bp, newdup);
+               /*
+                * If the following entry was in the table, the new entry
+                * is longer, so it will be in the table too.  Remove
+                * the old one and add the new one.
+                */
+               if (dfp) {
+                       xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
+                       xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
+               }
+               /*
+                * Otherwise we need a scan if the new entry is big enough.
+                */
+               else {
+                       needscan = be16_to_cpu(newdup->length) >
+                                  be16_to_cpu(bf[2].length);
+               }
+       }
+       /*
+        * Neither neighbor is free.  Make a new entry.
+        */
+       else {
+               newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
+               newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+               newdup->length = cpu_to_be16(len);
+               *xfs_dir2_data_unused_tag_p(newdup) =
+                       cpu_to_be16((char *)newdup - (char *)hdr);
+               xfs_dir2_data_log_unused(args, bp, newdup);
+               xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
+       }
+       *needscanp = needscan;
+}
+
+/*
+ * Take a byte range out of an existing unused space and make it un-free.
+ */
+void
+xfs_dir2_data_use_free(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp,
+       xfs_dir2_data_unused_t  *dup,           /* unused entry */
+       xfs_dir2_data_aoff_t    offset,         /* starting offset to use */
+       xfs_dir2_data_aoff_t    len,            /* length to use */
+       int                     *needlogp,      /* out: need to log header */
+       int                     *needscanp)     /* out: need regen bestfree */
+{
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
+       xfs_dir2_data_free_t    *dfp;           /* bestfree pointer */
+       int                     matchback;      /* matches end of freespace */
+       int                     matchfront;     /* matches start of freespace */
+       int                     needscan;       /* need to regen bestfree */
+       xfs_dir2_data_unused_t  *newdup;        /* new unused entry */
+       xfs_dir2_data_unused_t  *newdup2;       /* another new unused entry */
+       int                     oldlen;         /* old unused entry's length */
+       struct xfs_dir2_data_free *bf;
+
+       hdr = bp->b_addr;
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
+       ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
+       ASSERT(offset >= (char *)dup - (char *)hdr);
+       ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
+       ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
+       /*
+        * Look up the entry in the bestfree table.
+        */
+       oldlen = be16_to_cpu(dup->length);
+       bf = args->dp->d_ops->data_bestfree_p(hdr);
+       dfp = xfs_dir2_data_freefind(hdr, bf, dup);
+       ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
+       /*
+        * Check for alignment with front and back of the entry.
+        */
+       matchfront = (char *)dup - (char *)hdr == offset;
+       matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
+       ASSERT(*needscanp == 0);
+       needscan = 0;
+       /*
+        * If we matched it exactly we just need to get rid of it from
+        * the bestfree table.
+        */
+       if (matchfront && matchback) {
+               if (dfp) {
+                       needscan = (bf[2].offset != 0);
+                       if (!needscan)
+                               xfs_dir2_data_freeremove(hdr, bf, dfp,
+                                                        needlogp);
+               }
+       }
+       /*
+        * We match the first part of the entry.
+        * Make a new entry with the remaining freespace.
+        */
+       else if (matchfront) {
+               newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
+               newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+               newdup->length = cpu_to_be16(oldlen - len);
+               *xfs_dir2_data_unused_tag_p(newdup) =
+                       cpu_to_be16((char *)newdup - (char *)hdr);
+               xfs_dir2_data_log_unused(args, bp, newdup);
+               /*
+                * If it was in the table, remove it and add the new one.
+                */
+               if (dfp) {
+                       xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
+                       dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
+                                                      needlogp);
+                       ASSERT(dfp != NULL);
+                       ASSERT(dfp->length == newdup->length);
+                       ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
+                       /*
+                        * If we got inserted at the last slot,
+                        * that means we don't know if there was a better
+                        * choice for the last slot, or not.  Rescan.
+                        */
+                       needscan = dfp == &bf[2];
+               }
+       }
+       /*
+        * We match the last part of the entry.
+        * Trim the allocated space off the tail of the entry.
+        */
+       else if (matchback) {
+               newdup = dup;
+               newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
+               *xfs_dir2_data_unused_tag_p(newdup) =
+                       cpu_to_be16((char *)newdup - (char *)hdr);
+               xfs_dir2_data_log_unused(args, bp, newdup);
+               /*
+                * If it was in the table, remove it and add the new one.
+                */
+               if (dfp) {
+                       xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
+                       dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
+                                                      needlogp);
+                       ASSERT(dfp != NULL);
+                       ASSERT(dfp->length == newdup->length);
+                       ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
+                       /*
+                        * If we got inserted at the last slot,
+                        * that means we don't know if there was a better
+                        * choice for the last slot, or not.  Rescan.
+                        */
+                       needscan = dfp == &bf[2];
+               }
+       }
+       /*
+        * Poking out the middle of an entry.
+        * Make two new entries.
+        */
+       else {
+               newdup = dup;
+               newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
+               *xfs_dir2_data_unused_tag_p(newdup) =
+                       cpu_to_be16((char *)newdup - (char *)hdr);
+               xfs_dir2_data_log_unused(args, bp, newdup);
+               newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
+               newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
+               newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
+               *xfs_dir2_data_unused_tag_p(newdup2) =
+                       cpu_to_be16((char *)newdup2 - (char *)hdr);
+               xfs_dir2_data_log_unused(args, bp, newdup2);
+               /*
+                * If the old entry was in the table, we need to scan
+                * if the 3rd entry was valid, since these entries
+                * are smaller than the old one.
+                * If we don't need to scan that means there were 1 or 2
+                * entries in the table, and removing the old and adding
+                * the 2 new will work.
+                */
+               if (dfp) {
+                       needscan = (bf[2].length != 0);
+                       if (!needscan) {
+                               xfs_dir2_data_freeremove(hdr, bf, dfp,
+                                                        needlogp);
+                               xfs_dir2_data_freeinsert(hdr, bf, newdup,
+                                                        needlogp);
+                               xfs_dir2_data_freeinsert(hdr, bf, newdup2,
+                                                        needlogp);
+                       }
+               }
+       }
+       *needscanp = needscan;
+}
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
new file mode 100644 (file)
index 0000000..78b411b
--- /dev/null
@@ -0,0 +1,1831 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_cksum.h"
+
+/*
+ * Local function declarations.
+ */
+static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
+                                   int *indexp, struct xfs_buf **dbpp);
+static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args,
+                                   struct xfs_buf *bp, int first, int last);
+static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args,
+                                  struct xfs_buf *bp);
+
+/*
+ * Check the internal consistency of a leaf1 block.
+ * Pop an assert if something is wrong.
+ */
+#ifdef DEBUG
+#define        xfs_dir3_leaf_check(dp, bp) \
+do { \
+       if (!xfs_dir3_leaf1_check((dp), (bp))) \
+               ASSERT(0); \
+} while (0);
+
+STATIC bool
+xfs_dir3_leaf1_check(
+       struct xfs_inode        *dp,
+       struct xfs_buf          *bp)
+{
+       struct xfs_dir2_leaf    *leaf = bp->b_addr;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+
+       if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
+               struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
+               if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
+                       return false;
+       } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
+               return false;
+
+       return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
+}
+#else
+#define        xfs_dir3_leaf_check(dp, bp)
+#endif
+
+bool
+xfs_dir3_leaf_check_int(
+       struct xfs_mount        *mp,
+       struct xfs_inode        *dp,
+       struct xfs_dir3_icleaf_hdr *hdr,
+       struct xfs_dir2_leaf    *leaf)
+{
+       struct xfs_dir2_leaf_entry *ents;
+       xfs_dir2_leaf_tail_t    *ltp;
+       int                     stale;
+       int                     i;
+       const struct xfs_dir_ops *ops;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+       struct xfs_da_geometry  *geo = mp->m_dir_geo;
+
+       /*
+        * we can be passed a null dp here from a verifier, so we need to go the
+        * hard way to get them.
+        */
+       ops = xfs_dir_get_ops(mp, dp);
+
+       if (!hdr) {
+               ops->leaf_hdr_from_disk(&leafhdr, leaf);
+               hdr = &leafhdr;
+       }
+
+       ents = ops->leaf_ents_p(leaf);
+       ltp = xfs_dir2_leaf_tail_p(geo, leaf);
+
+       /*
+        * XXX (dgc): This value is not restrictive enough.
+        * Should factor in the size of the bests table as well.
+        * We can deduce a value for that from di_size.
+        */
+       if (hdr->count > ops->leaf_max_ents(geo))
+               return false;
+
+       /* Leaves and bests don't overlap in leaf format. */
+       if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
+            hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
+           (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
+               return false;
+
+       /* Check hash value order, count stale entries.  */
+       for (i = stale = 0; i < hdr->count; i++) {
+               if (i + 1 < hdr->count) {
+                       if (be32_to_cpu(ents[i].hashval) >
+                                       be32_to_cpu(ents[i + 1].hashval))
+                               return false;
+               }
+               if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                       stale++;
+       }
+       if (hdr->stale != stale)
+               return false;
+       return true;
+}
+
+/*
+ * We verify the magic numbers before decoding the leaf header so that on debug
+ * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due
+ * to incorrect magic numbers.
+ */
+static bool
+xfs_dir3_leaf_verify(
+       struct xfs_buf          *bp,
+       __uint16_t              magic)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_dir2_leaf    *leaf = bp->b_addr;
+
+       ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
+               __uint16_t              magic3;
+
+               magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
+                                                        : XFS_DIR3_LEAFN_MAGIC;
+
+               if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
+                       return false;
+               if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
+                       return false;
+       } else {
+               if (leaf->hdr.info.magic != cpu_to_be16(magic))
+                       return false;
+       }
+
+       return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
+}
+
+static void
+__read_verify(
+       struct xfs_buf  *bp,
+       __uint16_t      magic)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_leaf_verify(bp, magic))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+}
+
+static void
+__write_verify(
+       struct xfs_buf  *bp,
+       __uint16_t      magic)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
+
+       if (!xfs_dir3_leaf_verify(bp, magic)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (bip)
+               hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+       xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
+}
+
+static void
+xfs_dir3_leaf1_read_verify(
+       struct xfs_buf  *bp)
+{
+       __read_verify(bp, XFS_DIR2_LEAF1_MAGIC);
+}
+
+static void
+xfs_dir3_leaf1_write_verify(
+       struct xfs_buf  *bp)
+{
+       __write_verify(bp, XFS_DIR2_LEAF1_MAGIC);
+}
+
+static void
+xfs_dir3_leafn_read_verify(
+       struct xfs_buf  *bp)
+{
+       __read_verify(bp, XFS_DIR2_LEAFN_MAGIC);
+}
+
+static void
+xfs_dir3_leafn_write_verify(
+       struct xfs_buf  *bp)
+{
+       __write_verify(bp, XFS_DIR2_LEAFN_MAGIC);
+}
+
+const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
+       .verify_read = xfs_dir3_leaf1_read_verify,
+       .verify_write = xfs_dir3_leaf1_write_verify,
+};
+
+const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
+       .verify_read = xfs_dir3_leafn_read_verify,
+       .verify_write = xfs_dir3_leafn_write_verify,
+};
+
+static int
+xfs_dir3_leaf_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp)
+{
+       int                     err;
+
+       err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
+                               XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops);
+       if (!err && tp)
+               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
+       return err;
+}
+
+int
+xfs_dir3_leafn_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp)
+{
+       int                     err;
+
+       err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
+                               XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops);
+       if (!err && tp)
+               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
+       return err;
+}
+
+/*
+ * Initialize a new leaf block, leaf1 or leafn magic accepted.
+ */
+static void
+xfs_dir3_leaf_init(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp,
+       xfs_ino_t               owner,
+       __uint16_t              type)
+{
+       struct xfs_dir2_leaf    *leaf = bp->b_addr;
+
+       ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
+
+               memset(leaf3, 0, sizeof(*leaf3));
+
+               leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC)
+                                        ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)
+                                        : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
+               leaf3->info.blkno = cpu_to_be64(bp->b_bn);
+               leaf3->info.owner = cpu_to_be64(owner);
+               uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_uuid);
+       } else {
+               memset(leaf, 0, sizeof(*leaf));
+               leaf->hdr.info.magic = cpu_to_be16(type);
+       }
+
+       /*
+        * If it's a leaf-format directory initialize the tail.
+        * Caller is responsible for initialising the bests table.
+        */
+       if (type == XFS_DIR2_LEAF1_MAGIC) {
+               struct xfs_dir2_leaf_tail *ltp;
+
+               ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
+               ltp->bestcount = 0;
+               bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+               xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF);
+       } else {
+               bp->b_ops = &xfs_dir3_leafn_buf_ops;
+               xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
+       }
+}
+
+int
+xfs_dir3_leaf_get_buf(
+       xfs_da_args_t           *args,
+       xfs_dir2_db_t           bno,
+       struct xfs_buf          **bpp,
+       __uint16_t              magic)
+{
+       struct xfs_inode        *dp = args->dp;
+       struct xfs_trans        *tp = args->trans;
+       struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_buf          *bp;
+       int                     error;
+
+       ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
+       ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) &&
+              bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
+
+       error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno),
+                              -1, &bp, XFS_DATA_FORK);
+       if (error)
+               return error;
+
+       xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
+       xfs_dir3_leaf_log_header(args, bp);
+       if (magic == XFS_DIR2_LEAF1_MAGIC)
+               xfs_dir3_leaf_log_tail(args, bp);
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Convert a block form directory to a leaf form directory.
+ */
+int                                            /* error */
+xfs_dir2_block_to_leaf(
+       xfs_da_args_t           *args,          /* operation arguments */
+       struct xfs_buf          *dbp)           /* input block's buffer */
+{
+       __be16                  *bestsp;        /* leaf's bestsp entries */
+       xfs_dablk_t             blkno;          /* leaf block's bno */
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
+       xfs_dir2_leaf_entry_t   *blp;           /* block's leaf entries */
+       xfs_dir2_block_tail_t   *btp;           /* block's tail */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       struct xfs_buf          *lbp;           /* leaf block's buffer */
+       xfs_dir2_db_t           ldb;            /* leaf block's bno */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf's tail */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log block header */
+       int                     needscan;       /* need to rescan bestfree */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir2_data_free *bf;
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       trace_xfs_dir2_block_to_leaf(args);
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       /*
+        * Add the leaf block to the inode.
+        * This interface will only put blocks in the leaf/node range.
+        * Since that's empty now, we'll get the root (block 0 in range).
+        */
+       if ((error = xfs_da_grow_inode(args, &blkno))) {
+               return error;
+       }
+       ldb = xfs_dir2_da_to_db(args->geo, blkno);
+       ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET));
+       /*
+        * Initialize the leaf block, get a buffer for it.
+        */
+       error = xfs_dir3_leaf_get_buf(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC);
+       if (error)
+               return error;
+
+       leaf = lbp->b_addr;
+       hdr = dbp->b_addr;
+       xfs_dir3_data_check(dp, dbp);
+       btp = xfs_dir2_block_tail_p(args->geo, hdr);
+       blp = xfs_dir2_block_leaf_p(btp);
+       bf = dp->d_ops->data_bestfree_p(hdr);
+       ents = dp->d_ops->leaf_ents_p(leaf);
+
+       /*
+        * Set the counts in the leaf header.
+        */
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+       leafhdr.count = be32_to_cpu(btp->count);
+       leafhdr.stale = be32_to_cpu(btp->stale);
+       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+       xfs_dir3_leaf_log_header(args, lbp);
+
+       /*
+        * Could compact these but I think we always do the conversion
+        * after squeezing out stale entries.
+        */
+       memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
+       xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1);
+       needscan = 0;
+       needlog = 1;
+       /*
+        * Make the space formerly occupied by the leaf entries and block
+        * tail be free.
+        */
+       xfs_dir2_data_make_free(args, dbp,
+               (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
+               (xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize -
+                                      (char *)blp),
+               &needlog, &needscan);
+       /*
+        * Fix up the block header, make it a data block.
+        */
+       dbp->b_ops = &xfs_dir3_data_buf_ops;
+       xfs_trans_buf_set_type(tp, dbp, XFS_BLFT_DIR_DATA_BUF);
+       if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
+               hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+       else
+               hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
+
+       if (needscan)
+               xfs_dir2_data_freescan(dp, hdr, &needlog);
+       /*
+        * Set up leaf tail and bests table.
+        */
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+       ltp->bestcount = cpu_to_be32(1);
+       bestsp = xfs_dir2_leaf_bests_p(ltp);
+       bestsp[0] =  bf[0].length;
+       /*
+        * Log the data header and leaf bests table.
+        */
+       if (needlog)
+               xfs_dir2_data_log_header(args, dbp);
+       xfs_dir3_leaf_check(dp, lbp);
+       xfs_dir3_data_check(dp, dbp);
+       xfs_dir3_leaf_log_bests(args, lbp, 0, 0);
+       return 0;
+}
+
+STATIC void
+xfs_dir3_leaf_find_stale(
+       struct xfs_dir3_icleaf_hdr *leafhdr,
+       struct xfs_dir2_leaf_entry *ents,
+       int                     index,
+       int                     *lowstale,
+       int                     *highstale)
+{
+       /*
+        * Find the first stale entry before our index, if any.
+        */
+       for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
+               if (ents[*lowstale].address ==
+                   cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                       break;
+       }
+
+       /*
+        * Find the first stale entry at or after our index, if any.
+        * Stop if the result would require moving more entries than using
+        * lowstale.
+        */
+       for (*highstale = index; *highstale < leafhdr->count; ++*highstale) {
+               if (ents[*highstale].address ==
+                   cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                       break;
+               if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
+                       break;
+       }
+}
+
+struct xfs_dir2_leaf_entry *
+xfs_dir3_leaf_find_entry(
+       struct xfs_dir3_icleaf_hdr *leafhdr,
+       struct xfs_dir2_leaf_entry *ents,
+       int                     index,          /* leaf table position */
+       int                     compact,        /* need to compact leaves */
+       int                     lowstale,       /* index of prev stale leaf */
+       int                     highstale,      /* index of next stale leaf */
+       int                     *lfloglow,      /* low leaf logging index */
+       int                     *lfloghigh)     /* high leaf logging index */
+{
+       if (!leafhdr->stale) {
+               xfs_dir2_leaf_entry_t   *lep;   /* leaf entry table pointer */
+
+               /*
+                * Now we need to make room to insert the leaf entry.
+                *
+                * If there are no stale entries, just insert a hole at index.
+                */
+               lep = &ents[index];
+               if (index < leafhdr->count)
+                       memmove(lep + 1, lep,
+                               (leafhdr->count - index) * sizeof(*lep));
+
+               /*
+                * Record low and high logging indices for the leaf.
+                */
+               *lfloglow = index;
+               *lfloghigh = leafhdr->count++;
+               return lep;
+       }
+
+       /*
+        * There are stale entries.
+        *
+        * We will use one of them for the new entry.  It's probably not at
+        * the right location, so we'll have to shift some up or down first.
+        *
+        * If we didn't compact before, we need to find the nearest stale
+        * entries before and after our insertion point.
+        */
+       if (compact == 0)
+               xfs_dir3_leaf_find_stale(leafhdr, ents, index,
+                                        &lowstale, &highstale);
+
+       /*
+        * If the low one is better, use it.
+        */
+       if (lowstale >= 0 &&
+           (highstale == leafhdr->count ||
+            index - lowstale - 1 < highstale - index)) {
+               ASSERT(index - lowstale - 1 >= 0);
+               ASSERT(ents[lowstale].address ==
+                      cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
+
+               /*
+                * Copy entries up to cover the stale entry and make room
+                * for the new entry.
+                */
+               if (index - lowstale - 1 > 0) {
+                       memmove(&ents[lowstale], &ents[lowstale + 1],
+                               (index - lowstale - 1) *
+                                       sizeof(xfs_dir2_leaf_entry_t));
+               }
+               *lfloglow = MIN(lowstale, *lfloglow);
+               *lfloghigh = MAX(index - 1, *lfloghigh);
+               leafhdr->stale--;
+               return &ents[index - 1];
+       }
+
+       /*
+        * The high one is better, so use that one.
+        */
+       ASSERT(highstale - index >= 0);
+       ASSERT(ents[highstale].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
+
+       /*
+        * Copy entries down to cover the stale entry and make room for the
+        * new entry.
+        */
+       if (highstale - index > 0) {
+               memmove(&ents[index + 1], &ents[index],
+                       (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
+       }
+       *lfloglow = MIN(index, *lfloglow);
+       *lfloghigh = MAX(highstale, *lfloghigh);
+       leafhdr->stale--;
+       return &ents[index];
+}
+
+/*
+ * Add an entry to a leaf form directory.
+ */
+int                                            /* error */
+xfs_dir2_leaf_addname(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       __be16                  *bestsp;        /* freespace table in leaf */
+       int                     compact;        /* need to compact leaves */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
+       struct xfs_buf          *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data block entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* data unused entry */
+       int                     error;          /* error return value */
+       int                     grown;          /* allocated new data block */
+       int                     highstale;      /* index of next stale leaf */
+       int                     i;              /* temporary, index */
+       int                     index;          /* leaf table position */
+       struct xfs_buf          *lbp;           /* leaf's buffer */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       int                     length;         /* length of new entry */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry table pointer */
+       int                     lfloglow;       /* low leaf logging index */
+       int                     lfloghigh;      /* high leaf logging index */
+       int                     lowstale;       /* index of prev stale leaf */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail pointer */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needbytes;      /* leaf block bytes needed */
+       int                     needlog;        /* need to log data header */
+       int                     needscan;       /* need to rescan data free */
+       __be16                  *tagp;          /* end of data entry */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       xfs_dir2_db_t           use_block;      /* data block number */
+       struct xfs_dir2_data_free *bf;          /* bestfree table */
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       trace_xfs_dir2_leaf_addname(args);
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+
+       error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
+       if (error)
+               return error;
+
+       /*
+        * Look up the entry by hash value and name.
+        * We know it's not there, our caller has already done a lookup.
+        * So the index is of the entry to insert in front of.
+        * But if there are dup hash values the index is of the first of those.
+        */
+       index = xfs_dir2_leaf_search_hash(args, lbp);
+       leaf = lbp->b_addr;
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+       ents = dp->d_ops->leaf_ents_p(leaf);
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+       bestsp = xfs_dir2_leaf_bests_p(ltp);
+       length = dp->d_ops->data_entsize(args->namelen);
+
+       /*
+        * See if there are any entries with the same hash value
+        * and space in their block for the new entry.
+        * This is good because it puts multiple same-hash value entries
+        * in a data block, improving the lookup of those entries.
+        */
+       for (use_block = -1, lep = &ents[index];
+            index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
+            index++, lep++) {
+               if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
+               ASSERT(i < be32_to_cpu(ltp->bestcount));
+               ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
+               if (be16_to_cpu(bestsp[i]) >= length) {
+                       use_block = i;
+                       break;
+               }
+       }
+       /*
+        * Didn't find a block yet, linear search all the data blocks.
+        */
+       if (use_block == -1) {
+               for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
+                       /*
+                        * Remember a block we see that's missing.
+                        */
+                       if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
+                           use_block == -1)
+                               use_block = i;
+                       else if (be16_to_cpu(bestsp[i]) >= length) {
+                               use_block = i;
+                               break;
+                       }
+               }
+       }
+       /*
+        * How many bytes do we need in the leaf block?
+        */
+       needbytes = 0;
+       if (!leafhdr.stale)
+               needbytes += sizeof(xfs_dir2_leaf_entry_t);
+       if (use_block == -1)
+               needbytes += sizeof(xfs_dir2_data_off_t);
+
+       /*
+        * Now kill use_block if it refers to a missing block, so we
+        * can use it as an indication of allocation needed.
+        */
+       if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
+               use_block = -1;
+       /*
+        * If we don't have enough free bytes but we can make enough
+        * by compacting out stale entries, we'll do that.
+        */
+       if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes &&
+           leafhdr.stale > 1)
+               compact = 1;
+
+       /*
+        * Otherwise if we don't have enough free bytes we need to
+        * convert to node form.
+        */
+       else if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes) {
+               /*
+                * Just checking or no space reservation, give up.
+                */
+               if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
+                                                       args->total == 0) {
+                       xfs_trans_brelse(tp, lbp);
+                       return ENOSPC;
+               }
+               /*
+                * Convert to node form.
+                */
+               error = xfs_dir2_leaf_to_node(args, lbp);
+               if (error)
+                       return error;
+               /*
+                * Then add the new entry.
+                */
+               return xfs_dir2_node_addname(args);
+       }
+       /*
+        * Otherwise it will fit without compaction.
+        */
+       else
+               compact = 0;
+       /*
+        * If just checking, then it will fit unless we needed to allocate
+        * a new data block.
+        */
+       if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
+               xfs_trans_brelse(tp, lbp);
+               return use_block == -1 ? ENOSPC : 0;
+       }
+       /*
+        * If no allocations are allowed, return now before we've
+        * changed anything.
+        */
+       if (args->total == 0 && use_block == -1) {
+               xfs_trans_brelse(tp, lbp);
+               return ENOSPC;
+       }
+       /*
+        * Need to compact the leaf entries, removing stale ones.
+        * Leave one stale entry behind - the one closest to our
+        * insertion index - and we'll shift that one to our insertion
+        * point later.
+        */
+       if (compact) {
+               xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale,
+                       &highstale, &lfloglow, &lfloghigh);
+       }
+       /*
+        * There are stale entries, so we'll need log-low and log-high
+        * impossibly bad values later.
+        */
+       else if (leafhdr.stale) {
+               lfloglow = leafhdr.count;
+               lfloghigh = -1;
+       }
+       /*
+        * If there was no data block space found, we need to allocate
+        * a new one.
+        */
+       if (use_block == -1) {
+               /*
+                * Add the new data block.
+                */
+               if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
+                               &use_block))) {
+                       xfs_trans_brelse(tp, lbp);
+                       return error;
+               }
+               /*
+                * Initialize the block.
+                */
+               if ((error = xfs_dir3_data_init(args, use_block, &dbp))) {
+                       xfs_trans_brelse(tp, lbp);
+                       return error;
+               }
+               /*
+                * If we're adding a new data block on the end we need to
+                * extend the bests table.  Copy it up one entry.
+                */
+               if (use_block >= be32_to_cpu(ltp->bestcount)) {
+                       bestsp--;
+                       memmove(&bestsp[0], &bestsp[1],
+                               be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
+                       be32_add_cpu(&ltp->bestcount, 1);
+                       xfs_dir3_leaf_log_tail(args, lbp);
+                       xfs_dir3_leaf_log_bests(args, lbp, 0,
+                                               be32_to_cpu(ltp->bestcount) - 1);
+               }
+               /*
+                * If we're filling in a previously empty block just log it.
+                */
+               else
+                       xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
+               hdr = dbp->b_addr;
+               bf = dp->d_ops->data_bestfree_p(hdr);
+               bestsp[use_block] = bf[0].length;
+               grown = 1;
+       } else {
+               /*
+                * Already had space in some data block.
+                * Just read that one in.
+                */
+               error = xfs_dir3_data_read(tp, dp,
+                                  xfs_dir2_db_to_da(args->geo, use_block),
+                                  -1, &dbp);
+               if (error) {
+                       xfs_trans_brelse(tp, lbp);
+                       return error;
+               }
+               hdr = dbp->b_addr;
+               bf = dp->d_ops->data_bestfree_p(hdr);
+               grown = 0;
+       }
+       /*
+        * Point to the biggest freespace in our data block.
+        */
+       dup = (xfs_dir2_data_unused_t *)
+             ((char *)hdr + be16_to_cpu(bf[0].offset));
+       ASSERT(be16_to_cpu(dup->length) >= length);
+       needscan = needlog = 0;
+       /*
+        * Mark the initial part of our freespace in use for the new entry.
+        */
+       xfs_dir2_data_use_free(args, dbp, dup,
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
+               &needlog, &needscan);
+       /*
+        * Initialize our new entry (at last).
+        */
+       dep = (xfs_dir2_data_entry_t *)dup;
+       dep->inumber = cpu_to_be64(args->inumber);
+       dep->namelen = args->namelen;
+       memcpy(dep->name, args->name, dep->namelen);
+       dp->d_ops->data_put_ftype(dep, args->filetype);
+       tagp = dp->d_ops->data_entry_tag_p(dep);
+       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
+       /*
+        * Need to scan fix up the bestfree table.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(dp, hdr, &needlog);
+       /*
+        * Need to log the data block's header.
+        */
+       if (needlog)
+               xfs_dir2_data_log_header(args, dbp);
+       xfs_dir2_data_log_entry(args, dbp, dep);
+       /*
+        * If the bests table needs to be changed, do it.
+        * Log the change unless we've already done that.
+        */
+       if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {
+               bestsp[use_block] = bf[0].length;
+               if (!grown)
+                       xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
+       }
+
+       lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
+                                      highstale, &lfloglow, &lfloghigh);
+
+       /*
+        * Fill in the new leaf entry.
+        */
+       lep->hashval = cpu_to_be32(args->hashval);
+       lep->address = cpu_to_be32(
+                               xfs_dir2_db_off_to_dataptr(args->geo, use_block,
+                               be16_to_cpu(*tagp)));
+       /*
+        * Log the leaf fields and give up the buffers.
+        */
+       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+       xfs_dir3_leaf_log_header(args, lbp);
+       xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh);
+       xfs_dir3_leaf_check(dp, lbp);
+       xfs_dir3_data_check(dp, dbp);
+       return 0;
+}
+
+/*
+ * Compact out any stale entries in the leaf.
+ * Log the header and changed leaf entries, if any.
+ */
+void
+xfs_dir3_leaf_compact(
+       xfs_da_args_t   *args,          /* operation arguments */
+       struct xfs_dir3_icleaf_hdr *leafhdr,
+       struct xfs_buf  *bp)            /* leaf buffer */
+{
+       int             from;           /* source leaf index */
+       xfs_dir2_leaf_t *leaf;          /* leaf structure */
+       int             loglow;         /* first leaf entry to log */
+       int             to;             /* target leaf index */
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_inode *dp = args->dp;
+
+       leaf = bp->b_addr;
+       if (!leafhdr->stale)
+               return;
+
+       /*
+        * Compress out the stale entries in place.
+        */
+       ents = dp->d_ops->leaf_ents_p(leaf);
+       for (from = to = 0, loglow = -1; from < leafhdr->count; from++) {
+               if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                       continue;
+               /*
+                * Only actually copy the entries that are different.
+                */
+               if (from > to) {
+                       if (loglow == -1)
+                               loglow = to;
+                       ents[to] = ents[from];
+               }
+               to++;
+       }
+       /*
+        * Update and log the header, log the leaf entries.
+        */
+       ASSERT(leafhdr->stale == from - to);
+       leafhdr->count -= leafhdr->stale;
+       leafhdr->stale = 0;
+
+       dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr);
+       xfs_dir3_leaf_log_header(args, bp);
+       if (loglow != -1)
+               xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1);
+}
+
+/*
+ * Compact the leaf entries, removing stale ones.
+ * Leave one stale entry behind - the one closest to our
+ * insertion index - and the caller will shift that one to our insertion
+ * point later.
+ * Return new insertion index, where the remaining stale entry is,
+ * and leaf logging indices.
+ */
+void
+xfs_dir3_leaf_compact_x1(
+       struct xfs_dir3_icleaf_hdr *leafhdr,
+       struct xfs_dir2_leaf_entry *ents,
+       int             *indexp,        /* insertion index */
+       int             *lowstalep,     /* out: stale entry before us */
+       int             *highstalep,    /* out: stale entry after us */
+       int             *lowlogp,       /* out: low log index */
+       int             *highlogp)      /* out: high log index */
+{
+       int             from;           /* source copy index */
+       int             highstale;      /* stale entry at/after index */
+       int             index;          /* insertion index */
+       int             keepstale;      /* source index of kept stale */
+       int             lowstale;       /* stale entry before index */
+       int             newindex=0;     /* new insertion index */
+       int             to;             /* destination copy index */
+
+       ASSERT(leafhdr->stale > 1);
+       index = *indexp;
+
+       xfs_dir3_leaf_find_stale(leafhdr, ents, index, &lowstale, &highstale);
+
+       /*
+        * Pick the better of lowstale and highstale.
+        */
+       if (lowstale >= 0 &&
+           (highstale == leafhdr->count ||
+            index - lowstale <= highstale - index))
+               keepstale = lowstale;
+       else
+               keepstale = highstale;
+       /*
+        * Copy the entries in place, removing all the stale entries
+        * except keepstale.
+        */
+       for (from = to = 0; from < leafhdr->count; from++) {
+               /*
+                * Notice the new value of index.
+                */
+               if (index == from)
+                       newindex = to;
+               if (from != keepstale &&
+                   ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
+                       if (from == to)
+                               *lowlogp = to;
+                       continue;
+               }
+               /*
+                * Record the new keepstale value for the insertion.
+                */
+               if (from == keepstale)
+                       lowstale = highstale = to;
+               /*
+                * Copy only the entries that have moved.
+                */
+               if (from > to)
+                       ents[to] = ents[from];
+               to++;
+       }
+       ASSERT(from > to);
+       /*
+        * If the insertion point was past the last entry,
+        * set the new insertion point accordingly.
+        */
+       if (index == from)
+               newindex = to;
+       *indexp = newindex;
+       /*
+        * Adjust the leaf header values.
+        */
+       leafhdr->count -= from - to;
+       leafhdr->stale = 1;
+       /*
+        * Remember the low/high stale value only in the "right"
+        * direction.
+        */
+       if (lowstale >= newindex)
+               lowstale = -1;
+       else
+               highstale = leafhdr->count;
+       *highlogp = leafhdr->count - 1;
+       *lowstalep = lowstale;
+       *highstalep = highstale;
+}
+
+/*
+ * Log the bests entries indicated from a leaf1 block.
+ */
+static void
+xfs_dir3_leaf_log_bests(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp,            /* leaf buffer */
+       int                     first,          /* first entry to log */
+       int                     last)           /* last entry to log */
+{
+       __be16                  *firstb;        /* pointer to first entry */
+       __be16                  *lastb;         /* pointer to last entry */
+       struct xfs_dir2_leaf    *leaf = bp->b_addr;
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC));
+
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+       firstb = xfs_dir2_leaf_bests_p(ltp) + first;
+       lastb = xfs_dir2_leaf_bests_p(ltp) + last;
+       xfs_trans_log_buf(args->trans, bp,
+               (uint)((char *)firstb - (char *)leaf),
+               (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
+}
+
+/*
+ * Log the leaf entries indicated from a leaf1 or leafn block.
+ */
+void
+xfs_dir3_leaf_log_ents(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp,
+       int                     first,
+       int                     last)
+{
+       xfs_dir2_leaf_entry_t   *firstlep;      /* pointer to first entry */
+       xfs_dir2_leaf_entry_t   *lastlep;       /* pointer to last entry */
+       struct xfs_dir2_leaf    *leaf = bp->b_addr;
+       struct xfs_dir2_leaf_entry *ents;
+
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
+
+       ents = args->dp->d_ops->leaf_ents_p(leaf);
+       firstlep = &ents[first];
+       lastlep = &ents[last];
+       xfs_trans_log_buf(args->trans, bp,
+               (uint)((char *)firstlep - (char *)leaf),
+               (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
+}
+
+/*
+ * Log the header of the leaf1 or leafn block.
+ */
+void
+xfs_dir3_leaf_log_header(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp)
+{
+       struct xfs_dir2_leaf    *leaf = bp->b_addr;
+
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
+
+       xfs_trans_log_buf(args->trans, bp,
+                         (uint)((char *)&leaf->hdr - (char *)leaf),
+                         args->dp->d_ops->leaf_hdr_size - 1);
+}
+
+/*
+ * Log the tail of the leaf1 block.
+ */
+STATIC void
+xfs_dir3_leaf_log_tail(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp)
+{
+       struct xfs_dir2_leaf    *leaf = bp->b_addr;
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+
+       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
+
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+       xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf),
+               (uint)(args->geo->blksize - 1));
+}
+
+/*
+ * Look up the entry referred to by args in the leaf format directory.
+ * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
+ * is also used by the node-format code.
+ */
+int
+xfs_dir2_leaf_lookup(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       struct xfs_buf          *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data block entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       int                     index;          /* found entry index */
+       struct xfs_buf          *lbp;           /* leaf buffer */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir2_leaf_entry *ents;
+
+       trace_xfs_dir2_leaf_lookup(args);
+
+       /*
+        * Look up name in the leaf block, returning both buffers and index.
+        */
+       if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
+               return error;
+       }
+       tp = args->trans;
+       dp = args->dp;
+       xfs_dir3_leaf_check(dp, lbp);
+       leaf = lbp->b_addr;
+       ents = dp->d_ops->leaf_ents_p(leaf);
+       /*
+        * Get to the leaf entry and contained data entry address.
+        */
+       lep = &ents[index];
+
+       /*
+        * Point to the data entry.
+        */
+       dep = (xfs_dir2_data_entry_t *)
+             ((char *)dbp->b_addr +
+              xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
+       /*
+        * Return the found inode number & CI name if appropriate
+        */
+       args->inumber = be64_to_cpu(dep->inumber);
+       args->filetype = dp->d_ops->data_get_ftype(dep);
+       error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
+       xfs_trans_brelse(tp, dbp);
+       xfs_trans_brelse(tp, lbp);
+       return error;
+}
+
+/*
+ * Look up name/hash in the leaf block.
+ * Fill in indexp with the found index, and dbpp with the data buffer.
+ * If not found dbpp will be NULL, and ENOENT comes back.
+ * lbpp will always be filled in with the leaf buffer unless there's an error.
+ */
+static int                                     /* error */
+xfs_dir2_leaf_lookup_int(
+       xfs_da_args_t           *args,          /* operation arguments */
+       struct xfs_buf          **lbpp,         /* out: leaf buffer */
+       int                     *indexp,        /* out: index in leaf block */
+       struct xfs_buf          **dbpp)         /* out: data buffer */
+{
+       xfs_dir2_db_t           curdb = -1;     /* current data block number */
+       struct xfs_buf          *dbp = NULL;    /* data buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       int                     index;          /* index in leaf block */
+       struct xfs_buf          *lbp;           /* leaf buffer */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_dir2_db_t           newdb;          /* new data block number */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       xfs_dir2_db_t           cidb = -1;      /* case match data block no. */
+       enum xfs_dacmp          cmp;            /* name compare result */
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+
+       error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
+       if (error)
+               return error;
+
+       *lbpp = lbp;
+       leaf = lbp->b_addr;
+       xfs_dir3_leaf_check(dp, lbp);
+       ents = dp->d_ops->leaf_ents_p(leaf);
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+
+       /*
+        * Look for the first leaf entry with our hash value.
+        */
+       index = xfs_dir2_leaf_search_hash(args, lbp);
+       /*
+        * Loop over all the entries with the right hash value
+        * looking to match the name.
+        */
+       for (lep = &ents[index];
+            index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
+            lep++, index++) {
+               /*
+                * Skip over stale leaf entries.
+                */
+               if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               /*
+                * Get the new data block number.
+                */
+               newdb = xfs_dir2_dataptr_to_db(args->geo,
+                                              be32_to_cpu(lep->address));
+               /*
+                * If it's not the same as the old data block number,
+                * need to pitch the old one and read the new one.
+                */
+               if (newdb != curdb) {
+                       if (dbp)
+                               xfs_trans_brelse(tp, dbp);
+                       error = xfs_dir3_data_read(tp, dp,
+                                          xfs_dir2_db_to_da(args->geo, newdb),
+                                          -1, &dbp);
+                       if (error) {
+                               xfs_trans_brelse(tp, lbp);
+                               return error;
+                       }
+                       curdb = newdb;
+               }
+               /*
+                * Point to the data entry.
+                */
+               dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
+                       xfs_dir2_dataptr_to_off(args->geo,
+                                               be32_to_cpu(lep->address)));
+               /*
+                * Compare name and if it's an exact match, return the index
+                * and buffer. If it's the first case-insensitive match, store
+                * the index and buffer and continue looking for an exact match.
+                */
+               cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+               if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+                       args->cmpresult = cmp;
+                       *indexp = index;
+                       /* case exact match: return the current buffer. */
+                       if (cmp == XFS_CMP_EXACT) {
+                               *dbpp = dbp;
+                               return 0;
+                       }
+                       cidb = curdb;
+               }
+       }
+       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+       /*
+        * Here, we can only be doing a lookup (not a rename or remove).
+        * If a case-insensitive match was found earlier, re-read the
+        * appropriate data block if required and return it.
+        */
+       if (args->cmpresult == XFS_CMP_CASE) {
+               ASSERT(cidb != -1);
+               if (cidb != curdb) {
+                       xfs_trans_brelse(tp, dbp);
+                       error = xfs_dir3_data_read(tp, dp,
+                                          xfs_dir2_db_to_da(args->geo, cidb),
+                                          -1, &dbp);
+                       if (error) {
+                               xfs_trans_brelse(tp, lbp);
+                               return error;
+                       }
+               }
+               *dbpp = dbp;
+               return 0;
+       }
+       /*
+        * No match found, return ENOENT.
+        */
+       ASSERT(cidb == -1);
+       if (dbp)
+               xfs_trans_brelse(tp, dbp);
+       xfs_trans_brelse(tp, lbp);
+       return ENOENT;
+}
+
+/*
+ * Remove an entry from a leaf format directory.
+ */
+int                                            /* error */
+xfs_dir2_leaf_removename(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       __be16                  *bestsp;        /* leaf block best freespace */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
+       xfs_dir2_db_t           db;             /* data block number */
+       struct xfs_buf          *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry structure */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       xfs_dir2_db_t           i;              /* temporary data block # */
+       int                     index;          /* index into leaf entries */
+       struct xfs_buf          *lbp;           /* leaf buffer */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log data header */
+       int                     needscan;       /* need to rescan data frees */
+       xfs_dir2_data_off_t     oldbest;        /* old value of best free */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir2_data_free *bf;          /* bestfree table */
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       trace_xfs_dir2_leaf_removename(args);
+
+       /*
+        * Lookup the leaf entry, get the leaf and data blocks read in.
+        */
+       if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
+               return error;
+       }
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       leaf = lbp->b_addr;
+       hdr = dbp->b_addr;
+       xfs_dir3_data_check(dp, dbp);
+       bf = dp->d_ops->data_bestfree_p(hdr);
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+       ents = dp->d_ops->leaf_ents_p(leaf);
+       /*
+        * Point to the leaf entry, use that to point to the data entry.
+        */
+       lep = &ents[index];
+       db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
+       dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+               xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
+       needscan = needlog = 0;
+       oldbest = be16_to_cpu(bf[0].length);
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+       bestsp = xfs_dir2_leaf_bests_p(ltp);
+       ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
+       /*
+        * Mark the former data entry unused.
+        */
+       xfs_dir2_data_make_free(args, dbp,
+               (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
+               dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
+       /*
+        * We just mark the leaf entry stale by putting a null in it.
+        */
+       leafhdr.stale++;
+       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+       xfs_dir3_leaf_log_header(args, lbp);
+
+       lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
+       xfs_dir3_leaf_log_ents(args, lbp, index, index);
+
+       /*
+        * Scan the freespace in the data block again if necessary,
+        * log the data block header if necessary.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(dp, hdr, &needlog);
+       if (needlog)
+               xfs_dir2_data_log_header(args, dbp);
+       /*
+        * If the longest freespace in the data block has changed,
+        * put the new value in the bests table and log that.
+        */
+       if (be16_to_cpu(bf[0].length) != oldbest) {
+               bestsp[db] = bf[0].length;
+               xfs_dir3_leaf_log_bests(args, lbp, db, db);
+       }
+       xfs_dir3_data_check(dp, dbp);
+       /*
+        * If the data block is now empty then get rid of the data block.
+        */
+       if (be16_to_cpu(bf[0].length) ==
+                       args->geo->blksize - dp->d_ops->data_entry_offset) {
+               ASSERT(db != args->geo->datablk);
+               if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
+                       /*
+                        * Nope, can't get rid of it because it caused
+                        * allocation of a bmap btree block to do so.
+                        * Just go on, returning success, leaving the
+                        * empty block in place.
+                        */
+                       if (error == ENOSPC && args->total == 0)
+                               error = 0;
+                       xfs_dir3_leaf_check(dp, lbp);
+                       return error;
+               }
+               dbp = NULL;
+               /*
+                * If this is the last data block then compact the
+                * bests table by getting rid of entries.
+                */
+               if (db == be32_to_cpu(ltp->bestcount) - 1) {
+                       /*
+                        * Look for the last active entry (i).
+                        */
+                       for (i = db - 1; i > 0; i--) {
+                               if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
+                                       break;
+                       }
+                       /*
+                        * Copy the table down so inactive entries at the
+                        * end are removed.
+                        */
+                       memmove(&bestsp[db - i], bestsp,
+                               (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
+                       be32_add_cpu(&ltp->bestcount, -(db - i));
+                       xfs_dir3_leaf_log_tail(args, lbp);
+                       xfs_dir3_leaf_log_bests(args, lbp, 0,
+                                               be32_to_cpu(ltp->bestcount) - 1);
+               } else
+                       bestsp[db] = cpu_to_be16(NULLDATAOFF);
+       }
+       /*
+        * If the data block was not the first one, drop it.
+        */
+       else if (db != args->geo->datablk)
+               dbp = NULL;
+
+       xfs_dir3_leaf_check(dp, lbp);
+       /*
+        * See if we can convert to block form.
+        */
+       return xfs_dir2_leaf_to_block(args, lbp, dbp);
+}
+
+/*
+ * Replace the inode number in a leaf format directory entry.
+ */
+int                                            /* error */
+xfs_dir2_leaf_replace(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       struct xfs_buf          *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data block entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       int                     index;          /* index of leaf entry */
+       struct xfs_buf          *lbp;           /* leaf buffer */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir2_leaf_entry *ents;
+
+       trace_xfs_dir2_leaf_replace(args);
+
+       /*
+        * Look up the entry.
+        */
+       if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
+               return error;
+       }
+       dp = args->dp;
+       leaf = lbp->b_addr;
+       ents = dp->d_ops->leaf_ents_p(leaf);
+       /*
+        * Point to the leaf entry, get data address from it.
+        */
+       lep = &ents[index];
+       /*
+        * Point to the data entry.
+        */
+       dep = (xfs_dir2_data_entry_t *)
+             ((char *)dbp->b_addr +
+              xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
+       ASSERT(args->inumber != be64_to_cpu(dep->inumber));
+       /*
+        * Put the new inode number in, log it.
+        */
+       dep->inumber = cpu_to_be64(args->inumber);
+       dp->d_ops->data_put_ftype(dep, args->filetype);
+       tp = args->trans;
+       xfs_dir2_data_log_entry(args, dbp, dep);
+       xfs_dir3_leaf_check(dp, lbp);
+       xfs_trans_brelse(tp, lbp);
+       return 0;
+}
+
+/*
+ * Return index in the leaf block (lbp) which is either the first
+ * one with this hash value, or if there are none, the insert point
+ * for that hash value.
+ */
+int                                            /* index value */
+xfs_dir2_leaf_search_hash(
+       xfs_da_args_t           *args,          /* operation arguments */
+       struct xfs_buf          *lbp)           /* leaf buffer */
+{
+       xfs_dahash_t            hash=0;         /* hash from this entry */
+       xfs_dahash_t            hashwant;       /* hash value looking for */
+       int                     high;           /* high leaf index */
+       int                     low;            /* low leaf index */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       int                     mid=0;          /* current leaf index */
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       leaf = lbp->b_addr;
+       ents = args->dp->d_ops->leaf_ents_p(leaf);
+       args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+
+       /*
+        * Note, the table cannot be empty, so we have to go through the loop.
+        * Binary search the leaf entries looking for our hash value.
+        */
+       for (lep = ents, low = 0, high = leafhdr.count - 1,
+               hashwant = args->hashval;
+            low <= high; ) {
+               mid = (low + high) >> 1;
+               if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
+                       break;
+               if (hash < hashwant)
+                       low = mid + 1;
+               else
+                       high = mid - 1;
+       }
+       /*
+        * Found one, back up through all the equal hash values.
+        */
+       if (hash == hashwant) {
+               while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
+                       mid--;
+               }
+       }
+       /*
+        * Need to point to an entry higher than ours.
+        */
+       else if (hash < hashwant)
+               mid++;
+       return mid;
+}
+
+/*
+ * Trim off a trailing data block.  We know it's empty since the leaf
+ * freespace table says so.
+ */
+int                                            /* error */
+xfs_dir2_leaf_trim_data(
+       xfs_da_args_t           *args,          /* operation arguments */
+       struct xfs_buf          *lbp,           /* leaf buffer */
+       xfs_dir2_db_t           db)             /* data block number */
+{
+       __be16                  *bestsp;        /* leaf bests table */
+       struct xfs_buf          *dbp;           /* data block buffer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return value */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       /*
+        * Read the offending data block.  We need its buffer.
+        */
+       error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db),
+                                  -1, &dbp);
+       if (error)
+               return error;
+
+       leaf = lbp->b_addr;
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+
+#ifdef DEBUG
+{
+       struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
+       struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr);
+
+       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
+       ASSERT(be16_to_cpu(bf[0].length) ==
+              args->geo->blksize - dp->d_ops->data_entry_offset);
+       ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
+}
+#endif
+
+       /*
+        * Get rid of the data block.
+        */
+       if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
+               ASSERT(error != ENOSPC);
+               xfs_trans_brelse(tp, dbp);
+               return error;
+       }
+       /*
+        * Eliminate the last bests entry from the table.
+        */
+       bestsp = xfs_dir2_leaf_bests_p(ltp);
+       be32_add_cpu(&ltp->bestcount, -1);
+       memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
+       xfs_dir3_leaf_log_tail(args, lbp);
+       xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
+       return 0;
+}
+
+static inline size_t
+xfs_dir3_leaf_size(
+       struct xfs_dir3_icleaf_hdr      *hdr,
+       int                             counts)
+{
+       int     entries;
+       int     hdrsize;
+
+       entries = hdr->count - hdr->stale;
+       if (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
+           hdr->magic == XFS_DIR2_LEAFN_MAGIC)
+               hdrsize = sizeof(struct xfs_dir2_leaf_hdr);
+       else
+               hdrsize = sizeof(struct xfs_dir3_leaf_hdr);
+
+       return hdrsize + entries * sizeof(xfs_dir2_leaf_entry_t)
+                      + counts * sizeof(xfs_dir2_data_off_t)
+                      + sizeof(xfs_dir2_leaf_tail_t);
+}
+
+/*
+ * Convert node form directory to leaf form directory.
+ * The root of the node form dir needs to already be a LEAFN block.
+ * Just return if we can't do anything.
+ */
+int                                            /* error */
+xfs_dir2_node_to_leaf(
+       xfs_da_state_t          *state)         /* directory operation state */
+{
+       xfs_da_args_t           *args;          /* operation arguments */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       struct xfs_buf          *fbp;           /* buffer for freespace block */
+       xfs_fileoff_t           fo;             /* freespace file offset */
+       xfs_dir2_free_t         *free;          /* freespace structure */
+       struct xfs_buf          *lbp;           /* buffer for leaf block */
+       xfs_dir2_leaf_tail_t    *ltp;           /* tail of leaf structure */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     rval;           /* successful free trim? */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir3_icleaf_hdr leafhdr;
+       struct xfs_dir3_icfree_hdr freehdr;
+
+       /*
+        * There's more than a leaf level in the btree, so there must
+        * be multiple leafn blocks.  Give up.
+        */
+       if (state->path.active > 1)
+               return 0;
+       args = state->args;
+
+       trace_xfs_dir2_node_to_leaf(args);
+
+       mp = state->mp;
+       dp = args->dp;
+       tp = args->trans;
+       /*
+        * Get the last offset in the file.
+        */
+       if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) {
+               return error;
+       }
+       fo -= args->geo->fsbcount;
+       /*
+        * If there are freespace blocks other than the first one,
+        * take this opportunity to remove trailing empty freespace blocks
+        * that may have been left behind during no-space-reservation
+        * operations.
+        */
+       while (fo > args->geo->freeblk) {
+               if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
+                       return error;
+               }
+               if (rval)
+                       fo -= args->geo->fsbcount;
+               else
+                       return 0;
+       }
+       /*
+        * Now find the block just before the freespace block.
+        */
+       if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
+               return error;
+       }
+       /*
+        * If it's not the single leaf block, give up.
+        */
+       if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize)
+               return 0;
+       lbp = state->path.blk[0].bp;
+       leaf = lbp->b_addr;
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+
+       ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
+              leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
+
+       /*
+        * Read the freespace block.
+        */
+       error = xfs_dir2_free_read(tp, dp,  args->geo->freeblk, &fbp);
+       if (error)
+               return error;
+       free = fbp->b_addr;
+       dp->d_ops->free_hdr_from_disk(&freehdr, free);
+
+       ASSERT(!freehdr.firstdb);
+
+       /*
+        * Now see if the leafn and free data will fit in a leaf1.
+        * If not, release the buffer and give up.
+        */
+       if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) {
+               xfs_trans_brelse(tp, fbp);
+               return 0;
+       }
+
+       /*
+        * If the leaf has any stale entries in it, compress them out.
+        */
+       if (leafhdr.stale)
+               xfs_dir3_leaf_compact(args, &leafhdr, lbp);
+
+       lbp->b_ops = &xfs_dir3_leaf1_buf_ops;
+       xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAF1_BUF);
+       leafhdr.magic = (leafhdr.magic == XFS_DIR2_LEAFN_MAGIC)
+                                       ? XFS_DIR2_LEAF1_MAGIC
+                                       : XFS_DIR3_LEAF1_MAGIC;
+
+       /*
+        * Set up the leaf tail from the freespace block.
+        */
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+       ltp->bestcount = cpu_to_be32(freehdr.nvalid);
+
+       /*
+        * Set up the leaf bests table.
+        */
+       memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free),
+               freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
+
+       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+       xfs_dir3_leaf_log_header(args, lbp);
+       xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
+       xfs_dir3_leaf_log_tail(args, lbp);
+       xfs_dir3_leaf_check(dp, lbp);
+
+       /*
+        * Get rid of the freespace block.
+        */
+       error = xfs_dir2_shrink_inode(args,
+                       xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET),
+                       fbp);
+       if (error) {
+               /*
+                * This can't fail here because it can only happen when
+                * punching out the middle of an extent, and this is an
+                * isolated block.
+                */
+               ASSERT(error != ENOSPC);
+               return error;
+       }
+       fbp = NULL;
+       /*
+        * Now see if we can convert the single-leaf directory
+        * down to a block form directory.
+        * This routine always kills the dabuf for the leaf, so
+        * eliminate it from the path.
+        */
+       error = xfs_dir2_leaf_to_block(args, lbp, NULL);
+       state->path.blk[0].bp = NULL;
+       return error;
+}
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
new file mode 100644 (file)
index 0000000..4cf8b99
--- /dev/null
@@ -0,0 +1,2284 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_cksum.h"
+
+/*
+ * Function declarations.
+ */
+static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args,
+                             int index);
+static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state,
+                                    xfs_da_state_blk_t *blk1,
+                                    xfs_da_state_blk_t *blk2);
+static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp,
+                                int index, xfs_da_state_blk_t *dblk,
+                                int *rval);
+static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
+                                    xfs_da_state_blk_t *fblk);
+
+/*
+ * Check internal consistency of a leafn block.
+ */
+#ifdef DEBUG
+#define        xfs_dir3_leaf_check(dp, bp) \
+do { \
+       if (!xfs_dir3_leafn_check((dp), (bp))) \
+               ASSERT(0); \
+} while (0);
+
+static bool
+xfs_dir3_leafn_check(
+       struct xfs_inode        *dp,
+       struct xfs_buf          *bp)
+{
+       struct xfs_dir2_leaf    *leaf = bp->b_addr;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+
+       if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {
+               struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
+               if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
+                       return false;
+       } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)
+               return false;
+
+       return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
+}
+#else
+#define        xfs_dir3_leaf_check(dp, bp)
+#endif
+
+static bool
+xfs_dir3_free_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_dir2_free_hdr *hdr = bp->b_addr;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+
+               if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC))
+                       return false;
+               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
+                       return false;
+       } else {
+               if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
+                       return false;
+       }
+
+       /* XXX: should bounds check the xfs_dir3_icfree_hdr here */
+
+       return true;
+}
+
+static void
+xfs_dir3_free_read_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dir3_free_verify(bp))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+}
+
+static void
+xfs_dir3_free_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+
+       if (!xfs_dir3_free_verify(bp)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (bip)
+               hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+       xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
+       .verify_read = xfs_dir3_free_read_verify,
+       .verify_write = xfs_dir3_free_write_verify,
+};
+
+
+static int
+__xfs_dir3_free_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       xfs_daddr_t             mappedbno,
+       struct xfs_buf          **bpp)
+{
+       int                     err;
+
+       err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
+                               XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
+
+       /* try read returns without an error or *bpp if it lands in a hole */
+       if (!err && tp && *bpp)
+               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF);
+       return err;
+}
+
+int
+xfs_dir2_free_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       struct xfs_buf          **bpp)
+{
+       return __xfs_dir3_free_read(tp, dp, fbno, -1, bpp);
+}
+
+static int
+xfs_dir2_free_try_read(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       struct xfs_buf          **bpp)
+{
+       return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp);
+}
+
+static int
+xfs_dir3_free_get_buf(
+       xfs_da_args_t           *args,
+       xfs_dir2_db_t           fbno,
+       struct xfs_buf          **bpp)
+{
+       struct xfs_trans        *tp = args->trans;
+       struct xfs_inode        *dp = args->dp;
+       struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_buf          *bp;
+       int                     error;
+       struct xfs_dir3_icfree_hdr hdr;
+
+       error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno),
+                                  -1, &bp, XFS_DATA_FORK);
+       if (error)
+               return error;
+
+       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_FREE_BUF);
+       bp->b_ops = &xfs_dir3_free_buf_ops;
+
+       /*
+        * Initialize the new block to be empty, and remember
+        * its first slot as our empty slot.
+        */
+       memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr));
+       memset(&hdr, 0, sizeof(hdr));
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
+
+               hdr.magic = XFS_DIR3_FREE_MAGIC;
+
+               hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
+               hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
+               uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
+       } else
+               hdr.magic = XFS_DIR2_FREE_MAGIC;
+       dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr);
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Log entries from a freespace block.
+ */
+STATIC void
+xfs_dir2_free_log_bests(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp,
+       int                     first,          /* first entry to log */
+       int                     last)           /* last entry to log */
+{
+       xfs_dir2_free_t         *free;          /* freespace structure */
+       __be16                  *bests;
+
+       free = bp->b_addr;
+       bests = args->dp->d_ops->free_bests_p(free);
+       ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
+              free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
+       xfs_trans_log_buf(args->trans, bp,
+               (uint)((char *)&bests[first] - (char *)free),
+               (uint)((char *)&bests[last] - (char *)free +
+                      sizeof(bests[0]) - 1));
+}
+
+/*
+ * Log header from a freespace block.
+ */
+static void
+xfs_dir2_free_log_header(
+       struct xfs_da_args      *args,
+       struct xfs_buf          *bp)
+{
+#ifdef DEBUG
+       xfs_dir2_free_t         *free;          /* freespace structure */
+
+       free = bp->b_addr;
+       ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
+              free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
+#endif
+       xfs_trans_log_buf(args->trans, bp, 0,
+                         args->dp->d_ops->free_hdr_size - 1);
+}
+
+/*
+ * Convert a leaf-format directory to a node-format directory.
+ * We need to change the magic number of the leaf block, and copy
+ * the freespace table out of the leaf block into its own block.
+ */
+int                                            /* error */
+xfs_dir2_leaf_to_node(
+       xfs_da_args_t           *args,          /* operation arguments */
+       struct xfs_buf          *lbp)           /* leaf buffer */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return value */
+       struct xfs_buf          *fbp;           /* freespace buffer */
+       xfs_dir2_db_t           fdb;            /* freespace block number */
+       xfs_dir2_free_t         *free;          /* freespace structure */
+       __be16                  *from;          /* pointer to freespace entry */
+       int                     i;              /* leaf freespace index */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     n;              /* count of live freespc ents */
+       xfs_dir2_data_off_t     off;            /* freespace entry value */
+       __be16                  *to;            /* pointer to freespace entry */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir3_icfree_hdr freehdr;
+
+       trace_xfs_dir2_leaf_to_node(args);
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       /*
+        * Add a freespace block to the directory.
+        */
+       if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) {
+               return error;
+       }
+       ASSERT(fdb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
+       /*
+        * Get the buffer for the new freespace block.
+        */
+       error = xfs_dir3_free_get_buf(args, fdb, &fbp);
+       if (error)
+               return error;
+
+       free = fbp->b_addr;
+       dp->d_ops->free_hdr_from_disk(&freehdr, free);
+       leaf = lbp->b_addr;
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+       ASSERT(be32_to_cpu(ltp->bestcount) <=
+                               (uint)dp->i_d.di_size / args->geo->blksize);
+
+       /*
+        * Copy freespace entries from the leaf block to the new block.
+        * Count active entries.
+        */
+       from = xfs_dir2_leaf_bests_p(ltp);
+       to = dp->d_ops->free_bests_p(free);
+       for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
+               if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
+                       n++;
+               *to = cpu_to_be16(off);
+       }
+
+       /*
+        * Now initialize the freespace block header.
+        */
+       freehdr.nused = n;
+       freehdr.nvalid = be32_to_cpu(ltp->bestcount);
+
+       dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
+       xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1);
+       xfs_dir2_free_log_header(args, fbp);
+
+       /*
+        * Converting the leaf to a leafnode is just a matter of changing the
+        * magic number and the ops. Do the change directly to the buffer as
+        * it's less work (and less code) than decoding the header to host
+        * format and back again.
+        */
+       if (leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC))
+               leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
+       else
+               leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
+       lbp->b_ops = &xfs_dir3_leafn_buf_ops;
+       xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF);
+       xfs_dir3_leaf_log_header(args, lbp);
+       xfs_dir3_leaf_check(dp, lbp);
+       return 0;
+}
+
+/*
+ * Add a leaf entry to a leaf block in a node-form directory.
+ * The other work necessary is done from the caller.
+ */
+static int                                     /* error */
+xfs_dir2_leafn_add(
+       struct xfs_buf          *bp,            /* leaf buffer */
+       xfs_da_args_t           *args,          /* operation arguments */
+       int                     index)          /* insertion pt for new entry */
+{
+       int                     compact;        /* compacting stale leaves */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     highstale;      /* next stale entry */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       int                     lfloghigh;      /* high leaf entry logging */
+       int                     lfloglow;       /* low leaf entry logging */
+       int                     lowstale;       /* previous stale entry */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir3_icleaf_hdr leafhdr;
+       struct xfs_dir2_leaf_entry *ents;
+
+       trace_xfs_dir2_leafn_add(args, index);
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       leaf = bp->b_addr;
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+       ents = dp->d_ops->leaf_ents_p(leaf);
+
+       /*
+        * Quick check just to make sure we are not going to index
+        * into other peoples memory
+        */
+       if (index < 0)
+               return EFSCORRUPTED;
+
+       /*
+        * If there are already the maximum number of leaf entries in
+        * the block, if there are no stale entries it won't fit.
+        * Caller will do a split.  If there are stale entries we'll do
+        * a compact.
+        */
+
+       if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) {
+               if (!leafhdr.stale)
+                       return ENOSPC;
+               compact = leafhdr.stale > 1;
+       } else
+               compact = 0;
+       ASSERT(index == 0 || be32_to_cpu(ents[index - 1].hashval) <= args->hashval);
+       ASSERT(index == leafhdr.count ||
+              be32_to_cpu(ents[index].hashval) >= args->hashval);
+
+       if (args->op_flags & XFS_DA_OP_JUSTCHECK)
+               return 0;
+
+       /*
+        * Compact out all but one stale leaf entry.  Leaves behind
+        * the entry closest to index.
+        */
+       if (compact)
+               xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale,
+                                        &highstale, &lfloglow, &lfloghigh);
+       else if (leafhdr.stale) {
+               /*
+                * Set impossible logging indices for this case.
+                */
+               lfloglow = leafhdr.count;
+               lfloghigh = -1;
+       }
+
+       /*
+        * Insert the new entry, log everything.
+        */
+       lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
+                                      highstale, &lfloglow, &lfloghigh);
+
+       lep->hashval = cpu_to_be32(args->hashval);
+       lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo,
+                               args->blkno, args->index));
+
+       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+       xfs_dir3_leaf_log_header(args, bp);
+       xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh);
+       xfs_dir3_leaf_check(dp, bp);
+       return 0;
+}
+
+#ifdef DEBUG
+static void
+xfs_dir2_free_hdr_check(
+       struct xfs_inode *dp,
+       struct xfs_buf  *bp,
+       xfs_dir2_db_t   db)
+{
+       struct xfs_dir3_icfree_hdr hdr;
+
+       dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr);
+
+       ASSERT((hdr.firstdb %
+               dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0);
+       ASSERT(hdr.firstdb <= db);
+       ASSERT(db < hdr.firstdb + hdr.nvalid);
+}
+#else
+#define xfs_dir2_free_hdr_check(dp, bp, db)
+#endif /* DEBUG */
+
+/*
+ * Return the last hash value in the leaf.
+ * Stale entries are ok.
+ */
+xfs_dahash_t                                   /* hash value */
+xfs_dir2_leafn_lasthash(
+       struct xfs_inode *dp,
+       struct xfs_buf  *bp,                    /* leaf buffer */
+       int             *count)                 /* count of entries in leaf */
+{
+       struct xfs_dir2_leaf    *leaf = bp->b_addr;
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+
+       ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
+              leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
+
+       if (count)
+               *count = leafhdr.count;
+       if (!leafhdr.count)
+               return 0;
+
+       ents = dp->d_ops->leaf_ents_p(leaf);
+       return be32_to_cpu(ents[leafhdr.count - 1].hashval);
+}
+
+/*
+ * Look up a leaf entry for space to add a name in a node-format leaf block.
+ * The extrablk in state is a freespace block.
+ */
+STATIC int
+xfs_dir2_leafn_lookup_for_addname(
+       struct xfs_buf          *bp,            /* leaf buffer */
+       xfs_da_args_t           *args,          /* operation arguments */
+       int                     *indexp,        /* out: leaf entry index */
+       xfs_da_state_t          *state)         /* state to fill in */
+{
+       struct xfs_buf          *curbp = NULL;  /* current data/free buffer */
+       xfs_dir2_db_t           curdb = -1;     /* current data block number */
+       xfs_dir2_db_t           curfdb = -1;    /* current free block number */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return value */
+       int                     fi;             /* free entry index */
+       xfs_dir2_free_t         *free = NULL;   /* free block structure */
+       int                     index;          /* leaf entry index */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       int                     length;         /* length of new data entry */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_dir2_db_t           newdb;          /* new data block number */
+       xfs_dir2_db_t           newfdb;         /* new free block number */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       leaf = bp->b_addr;
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+       ents = dp->d_ops->leaf_ents_p(leaf);
+
+       xfs_dir3_leaf_check(dp, bp);
+       ASSERT(leafhdr.count > 0);
+
+       /*
+        * Look up the hash value in the leaf entries.
+        */
+       index = xfs_dir2_leaf_search_hash(args, bp);
+       /*
+        * Do we have a buffer coming in?
+        */
+       if (state->extravalid) {
+               /* If so, it's a free block buffer, get the block number. */
+               curbp = state->extrablk.bp;
+               curfdb = state->extrablk.blkno;
+               free = curbp->b_addr;
+               ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
+                      free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
+       }
+       length = dp->d_ops->data_entsize(args->namelen);
+       /*
+        * Loop over leaf entries with the right hash value.
+        */
+       for (lep = &ents[index];
+            index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
+            lep++, index++) {
+               /*
+                * Skip stale leaf entries.
+                */
+               if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               /*
+                * Pull the data block number from the entry.
+                */
+               newdb = xfs_dir2_dataptr_to_db(args->geo,
+                                              be32_to_cpu(lep->address));
+               /*
+                * For addname, we're looking for a place to put the new entry.
+                * We want to use a data block with an entry of equal
+                * hash value to ours if there is one with room.
+                *
+                * If this block isn't the data block we already have
+                * in hand, take a look at it.
+                */
+               if (newdb != curdb) {
+                       __be16 *bests;
+
+                       curdb = newdb;
+                       /*
+                        * Convert the data block to the free block
+                        * holding its freespace information.
+                        */
+                       newfdb = dp->d_ops->db_to_fdb(args->geo, newdb);
+                       /*
+                        * If it's not the one we have in hand, read it in.
+                        */
+                       if (newfdb != curfdb) {
+                               /*
+                                * If we had one before, drop it.
+                                */
+                               if (curbp)
+                                       xfs_trans_brelse(tp, curbp);
+
+                               error = xfs_dir2_free_read(tp, dp,
+                                               xfs_dir2_db_to_da(args->geo,
+                                                                 newfdb),
+                                               &curbp);
+                               if (error)
+                                       return error;
+                               free = curbp->b_addr;
+
+                               xfs_dir2_free_hdr_check(dp, curbp, curdb);
+                       }
+                       /*
+                        * Get the index for our entry.
+                        */
+                       fi = dp->d_ops->db_to_fdindex(args->geo, curdb);
+                       /*
+                        * If it has room, return it.
+                        */
+                       bests = dp->d_ops->free_bests_p(free);
+                       if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) {
+                               XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
+                                                       XFS_ERRLEVEL_LOW, mp);
+                               if (curfdb != newfdb)
+                                       xfs_trans_brelse(tp, curbp);
+                               return EFSCORRUPTED;
+                       }
+                       curfdb = newfdb;
+                       if (be16_to_cpu(bests[fi]) >= length)
+                               goto out;
+               }
+       }
+       /* Didn't find any space */
+       fi = -1;
+out:
+       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+       if (curbp) {
+               /* Giving back a free block. */
+               state->extravalid = 1;
+               state->extrablk.bp = curbp;
+               state->extrablk.index = fi;
+               state->extrablk.blkno = curfdb;
+
+               /*
+                * Important: this magic number is not in the buffer - it's for
+                * buffer type information and therefore only the free/data type
+                * matters here, not whether CRCs are enabled or not.
+                */
+               state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+       } else {
+               state->extravalid = 0;
+       }
+       /*
+        * Return the index, that will be the insertion point.
+        */
+       *indexp = index;
+       return ENOENT;
+}
+
+/*
+ * Look up a leaf entry in a node-format leaf block.
+ * The extrablk in state a data block.
+ */
+STATIC int
+xfs_dir2_leafn_lookup_for_entry(
+       struct xfs_buf          *bp,            /* leaf buffer */
+       xfs_da_args_t           *args,          /* operation arguments */
+       int                     *indexp,        /* out: leaf entry index */
+       xfs_da_state_t          *state)         /* state to fill in */
+{
+       struct xfs_buf          *curbp = NULL;  /* current data/free buffer */
+       xfs_dir2_db_t           curdb = -1;     /* current data block number */
+       xfs_dir2_data_entry_t   *dep;           /* data block entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return value */
+       int                     index;          /* leaf entry index */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_dir2_db_t           newdb;          /* new data block number */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       enum xfs_dacmp          cmp;            /* comparison result */
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_dir3_icleaf_hdr leafhdr;
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       leaf = bp->b_addr;
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+       ents = dp->d_ops->leaf_ents_p(leaf);
+
+       xfs_dir3_leaf_check(dp, bp);
+       ASSERT(leafhdr.count > 0);
+
+       /*
+        * Look up the hash value in the leaf entries.
+        */
+       index = xfs_dir2_leaf_search_hash(args, bp);
+       /*
+        * Do we have a buffer coming in?
+        */
+       if (state->extravalid) {
+               curbp = state->extrablk.bp;
+               curdb = state->extrablk.blkno;
+       }
+       /*
+        * Loop over leaf entries with the right hash value.
+        */
+       for (lep = &ents[index];
+            index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
+            lep++, index++) {
+               /*
+                * Skip stale leaf entries.
+                */
+               if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               /*
+                * Pull the data block number from the entry.
+                */
+               newdb = xfs_dir2_dataptr_to_db(args->geo,
+                                              be32_to_cpu(lep->address));
+               /*
+                * Not adding a new entry, so we really want to find
+                * the name given to us.
+                *
+                * If it's a different data block, go get it.
+                */
+               if (newdb != curdb) {
+                       /*
+                        * If we had a block before that we aren't saving
+                        * for a CI name, drop it
+                        */
+                       if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
+                                               curdb != state->extrablk.blkno))
+                               xfs_trans_brelse(tp, curbp);
+                       /*
+                        * If needing the block that is saved with a CI match,
+                        * use it otherwise read in the new data block.
+                        */
+                       if (args->cmpresult != XFS_CMP_DIFFERENT &&
+                                       newdb == state->extrablk.blkno) {
+                               ASSERT(state->extravalid);
+                               curbp = state->extrablk.bp;
+                       } else {
+                               error = xfs_dir3_data_read(tp, dp,
+                                               xfs_dir2_db_to_da(args->geo,
+                                                                 newdb),
+                                               -1, &curbp);
+                               if (error)
+                                       return error;
+                       }
+                       xfs_dir3_data_check(dp, curbp);
+                       curdb = newdb;
+               }
+               /*
+                * Point to the data entry.
+                */
+               dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr +
+                       xfs_dir2_dataptr_to_off(args->geo,
+                                               be32_to_cpu(lep->address)));
+               /*
+                * Compare the entry and if it's an exact match, return
+                * EEXIST immediately. If it's the first case-insensitive
+                * match, store the block & inode number and continue looking.
+                */
+               cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+               if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+                       /* If there is a CI match block, drop it */
+                       if (args->cmpresult != XFS_CMP_DIFFERENT &&
+                                               curdb != state->extrablk.blkno)
+                               xfs_trans_brelse(tp, state->extrablk.bp);
+                       args->cmpresult = cmp;
+                       args->inumber = be64_to_cpu(dep->inumber);
+                       args->filetype = dp->d_ops->data_get_ftype(dep);
+                       *indexp = index;
+                       state->extravalid = 1;
+                       state->extrablk.bp = curbp;
+                       state->extrablk.blkno = curdb;
+                       state->extrablk.index = (int)((char *)dep -
+                                                       (char *)curbp->b_addr);
+                       state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+                       curbp->b_ops = &xfs_dir3_data_buf_ops;
+                       xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
+                       if (cmp == XFS_CMP_EXACT)
+                               return EEXIST;
+               }
+       }
+       ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT));
+       if (curbp) {
+               if (args->cmpresult == XFS_CMP_DIFFERENT) {
+                       /* Giving back last used data block. */
+                       state->extravalid = 1;
+                       state->extrablk.bp = curbp;
+                       state->extrablk.index = -1;
+                       state->extrablk.blkno = curdb;
+                       state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+                       curbp->b_ops = &xfs_dir3_data_buf_ops;
+                       xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
+               } else {
+                       /* If the curbp is not the CI match block, drop it */
+                       if (state->extrablk.bp != curbp)
+                               xfs_trans_brelse(tp, curbp);
+               }
+       } else {
+               state->extravalid = 0;
+       }
+       *indexp = index;
+       return ENOENT;
+}
+
+/*
+ * Look up a leaf entry in a node-format leaf block.
+ * If this is an addname then the extrablk in state is a freespace block,
+ * otherwise it's a data block.
+ */
+int
+xfs_dir2_leafn_lookup_int(
+       struct xfs_buf          *bp,            /* leaf buffer */
+       xfs_da_args_t           *args,          /* operation arguments */
+       int                     *indexp,        /* out: leaf entry index */
+       xfs_da_state_t          *state)         /* state to fill in */
+{
+       if (args->op_flags & XFS_DA_OP_ADDNAME)
+               return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp,
+                                                       state);
+       return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state);
+}
+
+/*
+ * Move count leaf entries from source to destination leaf.
+ * Log entries and headers.  Stale entries are preserved.
+ */
+static void
+xfs_dir3_leafn_moveents(
+       xfs_da_args_t                   *args,  /* operation arguments */
+       struct xfs_buf                  *bp_s,  /* source */
+       struct xfs_dir3_icleaf_hdr      *shdr,
+       struct xfs_dir2_leaf_entry      *sents,
+       int                             start_s,/* source leaf index */
+       struct xfs_buf                  *bp_d,  /* destination */
+       struct xfs_dir3_icleaf_hdr      *dhdr,
+       struct xfs_dir2_leaf_entry      *dents,
+       int                             start_d,/* destination leaf index */
+       int                             count)  /* count of leaves to copy */
+{
+       int                             stale;  /* count stale leaves copied */
+
+       trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count);
+
+       /*
+        * Silently return if nothing to do.
+        */
+       if (count == 0)
+               return;
+
+       /*
+        * If the destination index is not the end of the current
+        * destination leaf entries, open up a hole in the destination
+        * to hold the new entries.
+        */
+       if (start_d < dhdr->count) {
+               memmove(&dents[start_d + count], &dents[start_d],
+                       (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t));
+               xfs_dir3_leaf_log_ents(args, bp_d, start_d + count,
+                                      count + dhdr->count - 1);
+       }
+       /*
+        * If the source has stale leaves, count the ones in the copy range
+        * so we can update the header correctly.
+        */
+       if (shdr->stale) {
+               int     i;                      /* temp leaf index */
+
+               for (i = start_s, stale = 0; i < start_s + count; i++) {
+                       if (sents[i].address ==
+                                       cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+                               stale++;
+               }
+       } else
+               stale = 0;
+       /*
+        * Copy the leaf entries from source to destination.
+        */
+       memcpy(&dents[start_d], &sents[start_s],
+               count * sizeof(xfs_dir2_leaf_entry_t));
+       xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1);
+
+       /*
+        * If there are source entries after the ones we copied,
+        * delete the ones we copied by sliding the next ones down.
+        */
+       if (start_s + count < shdr->count) {
+               memmove(&sents[start_s], &sents[start_s + count],
+                       count * sizeof(xfs_dir2_leaf_entry_t));
+               xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1);
+       }
+
+       /*
+        * Update the headers and log them.
+        */
+       shdr->count -= count;
+       shdr->stale -= stale;
+       dhdr->count += count;
+       dhdr->stale += stale;
+}
+
+/*
+ * Determine the sort order of two leaf blocks.
+ * Returns 1 if both are valid and leaf2 should be before leaf1, else 0.
+ */
+int                                            /* sort order */
+xfs_dir2_leafn_order(
+       struct xfs_inode        *dp,
+       struct xfs_buf          *leaf1_bp,              /* leaf1 buffer */
+       struct xfs_buf          *leaf2_bp)              /* leaf2 buffer */
+{
+       struct xfs_dir2_leaf    *leaf1 = leaf1_bp->b_addr;
+       struct xfs_dir2_leaf    *leaf2 = leaf2_bp->b_addr;
+       struct xfs_dir2_leaf_entry *ents1;
+       struct xfs_dir2_leaf_entry *ents2;
+       struct xfs_dir3_icleaf_hdr hdr1;
+       struct xfs_dir3_icleaf_hdr hdr2;
+
+       dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1);
+       dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2);
+       ents1 = dp->d_ops->leaf_ents_p(leaf1);
+       ents2 = dp->d_ops->leaf_ents_p(leaf2);
+
+       if (hdr1.count > 0 && hdr2.count > 0 &&
+           (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) ||
+            be32_to_cpu(ents2[hdr2.count - 1].hashval) <
+                               be32_to_cpu(ents1[hdr1.count - 1].hashval)))
+               return 1;
+       return 0;
+}
+
+/*
+ * Rebalance leaf entries between two leaf blocks.
+ * This is actually only called when the second block is new,
+ * though the code deals with the general case.
+ * A new entry will be inserted in one of the blocks, and that
+ * entry is taken into account when balancing.
+ */
+static void
+xfs_dir2_leafn_rebalance(
+       xfs_da_state_t          *state,         /* btree cursor */
+       xfs_da_state_blk_t      *blk1,          /* first btree block */
+       xfs_da_state_blk_t      *blk2)          /* second btree block */
+{
+       xfs_da_args_t           *args;          /* operation arguments */
+       int                     count;          /* count (& direction) leaves */
+       int                     isleft;         /* new goes in left leaf */
+       xfs_dir2_leaf_t         *leaf1;         /* first leaf structure */
+       xfs_dir2_leaf_t         *leaf2;         /* second leaf structure */
+       int                     mid;            /* midpoint leaf index */
+#if defined(DEBUG) || defined(XFS_WARN)
+       int                     oldstale;       /* old count of stale leaves */
+#endif
+       int                     oldsum;         /* old total leaf count */
+       int                     swap;           /* swapped leaf blocks */
+       struct xfs_dir2_leaf_entry *ents1;
+       struct xfs_dir2_leaf_entry *ents2;
+       struct xfs_dir3_icleaf_hdr hdr1;
+       struct xfs_dir3_icleaf_hdr hdr2;
+       struct xfs_inode        *dp = state->args->dp;
+
+       args = state->args;
+       /*
+        * If the block order is wrong, swap the arguments.
+        */
+       if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) {
+               xfs_da_state_blk_t      *tmp;   /* temp for block swap */
+
+               tmp = blk1;
+               blk1 = blk2;
+               blk2 = tmp;
+       }
+       leaf1 = blk1->bp->b_addr;
+       leaf2 = blk2->bp->b_addr;
+       dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1);
+       dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2);
+       ents1 = dp->d_ops->leaf_ents_p(leaf1);
+       ents2 = dp->d_ops->leaf_ents_p(leaf2);
+
+       oldsum = hdr1.count + hdr2.count;
+#if defined(DEBUG) || defined(XFS_WARN)
+       oldstale = hdr1.stale + hdr2.stale;
+#endif
+       mid = oldsum >> 1;
+
+       /*
+        * If the old leaf count was odd then the new one will be even,
+        * so we need to divide the new count evenly.
+        */
+       if (oldsum & 1) {
+               xfs_dahash_t    midhash;        /* middle entry hash value */
+
+               if (mid >= hdr1.count)
+                       midhash = be32_to_cpu(ents2[mid - hdr1.count].hashval);
+               else
+                       midhash = be32_to_cpu(ents1[mid].hashval);
+               isleft = args->hashval <= midhash;
+       }
+       /*
+        * If the old count is even then the new count is odd, so there's
+        * no preferred side for the new entry.
+        * Pick the left one.
+        */
+       else
+               isleft = 1;
+       /*
+        * Calculate moved entry count.  Positive means left-to-right,
+        * negative means right-to-left.  Then move the entries.
+        */
+       count = hdr1.count - mid + (isleft == 0);
+       if (count > 0)
+               xfs_dir3_leafn_moveents(args, blk1->bp, &hdr1, ents1,
+                                       hdr1.count - count, blk2->bp,
+                                       &hdr2, ents2, 0, count);
+       else if (count < 0)
+               xfs_dir3_leafn_moveents(args, blk2->bp, &hdr2, ents2, 0,
+                                       blk1->bp, &hdr1, ents1,
+                                       hdr1.count, count);
+
+       ASSERT(hdr1.count + hdr2.count == oldsum);
+       ASSERT(hdr1.stale + hdr2.stale == oldstale);
+
+       /* log the changes made when moving the entries */
+       dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1);
+       dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2);
+       xfs_dir3_leaf_log_header(args, blk1->bp);
+       xfs_dir3_leaf_log_header(args, blk2->bp);
+
+       xfs_dir3_leaf_check(dp, blk1->bp);
+       xfs_dir3_leaf_check(dp, blk2->bp);
+
+       /*
+        * Mark whether we're inserting into the old or new leaf.
+        */
+       if (hdr1.count < hdr2.count)
+               state->inleaf = swap;
+       else if (hdr1.count > hdr2.count)
+               state->inleaf = !swap;
+       else
+               state->inleaf = swap ^ (blk1->index <= hdr1.count);
+       /*
+        * Adjust the expected index for insertion.
+        */
+       if (!state->inleaf)
+               blk2->index = blk1->index - hdr1.count;
+
+       /*
+        * Finally sanity check just to make sure we are not returning a
+        * negative index
+        */
+       if (blk2->index < 0) {
+               state->inleaf = 1;
+               blk2->index = 0;
+               xfs_alert(dp->i_mount,
+       "%s: picked the wrong leaf? reverting original leaf: blk1->index %d",
+                       __func__, blk1->index);
+       }
+}
+
+static int
+xfs_dir3_data_block_free(
+       xfs_da_args_t           *args,
+       struct xfs_dir2_data_hdr *hdr,
+       struct xfs_dir2_free    *free,
+       xfs_dir2_db_t           fdb,
+       int                     findex,
+       struct xfs_buf          *fbp,
+       int                     longest)
+{
+       int                     logfree = 0;
+       __be16                  *bests;
+       struct xfs_dir3_icfree_hdr freehdr;
+       struct xfs_inode        *dp = args->dp;
+
+       dp->d_ops->free_hdr_from_disk(&freehdr, free);
+       bests = dp->d_ops->free_bests_p(free);
+       if (hdr) {
+               /*
+                * Data block is not empty, just set the free entry to the new
+                * value.
+                */
+               bests[findex] = cpu_to_be16(longest);
+               xfs_dir2_free_log_bests(args, fbp, findex, findex);
+               return 0;
+       }
+
+       /* One less used entry in the free table. */
+       freehdr.nused--;
+
+       /*
+        * If this was the last entry in the table, we can trim the table size
+        * back.  There might be other entries at the end referring to
+        * non-existent data blocks, get those too.
+        */
+       if (findex == freehdr.nvalid - 1) {
+               int     i;              /* free entry index */
+
+               for (i = findex - 1; i >= 0; i--) {
+                       if (bests[i] != cpu_to_be16(NULLDATAOFF))
+                               break;
+               }
+               freehdr.nvalid = i + 1;
+               logfree = 0;
+       } else {
+               /* Not the last entry, just punch it out.  */
+               bests[findex] = cpu_to_be16(NULLDATAOFF);
+               logfree = 1;
+       }
+
+       dp->d_ops->free_hdr_to_disk(free, &freehdr);
+       xfs_dir2_free_log_header(args, fbp);
+
+       /*
+        * If there are no useful entries left in the block, get rid of the
+        * block if we can.
+        */
+       if (!freehdr.nused) {
+               int error;
+
+               error = xfs_dir2_shrink_inode(args, fdb, fbp);
+               if (error == 0) {
+                       fbp = NULL;
+                       logfree = 0;
+               } else if (error != ENOSPC || args->total != 0)
+                       return error;
+               /*
+                * It's possible to get ENOSPC if there is no
+                * space reservation.  In this case some one
+                * else will eventually get rid of this block.
+                */
+       }
+
+       /* Log the free entry that changed, unless we got rid of it.  */
+       if (logfree)
+               xfs_dir2_free_log_bests(args, fbp, findex, findex);
+       return 0;
+}
+
+/*
+ * Remove an entry from a node directory.
+ * This removes the leaf entry and the data entry,
+ * and updates the free block if necessary.
+ */
+static int                                     /* error */
+xfs_dir2_leafn_remove(
+       xfs_da_args_t           *args,          /* operation arguments */
+       struct xfs_buf          *bp,            /* leaf buffer */
+       int                     index,          /* leaf entry index */
+       xfs_da_state_blk_t      *dblk,          /* data block */
+       int                     *rval)          /* resulting block needs join */
+{
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
+       xfs_dir2_db_t           db;             /* data block number */
+       struct xfs_buf          *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data block entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+       int                     longest;        /* longest data free entry */
+       int                     off;            /* data block entry offset */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log data header */
+       int                     needscan;       /* need to rescan data frees */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir2_data_free *bf;          /* bestfree table */
+       struct xfs_dir3_icleaf_hdr leafhdr;
+       struct xfs_dir2_leaf_entry *ents;
+
+       trace_xfs_dir2_leafn_remove(args, index);
+
+       dp = args->dp;
+       tp = args->trans;
+       mp = dp->i_mount;
+       leaf = bp->b_addr;
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+       ents = dp->d_ops->leaf_ents_p(leaf);
+
+       /*
+        * Point to the entry we're removing.
+        */
+       lep = &ents[index];
+
+       /*
+        * Extract the data block and offset from the entry.
+        */
+       db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
+       ASSERT(dblk->blkno == db);
+       off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address));
+       ASSERT(dblk->index == off);
+
+       /*
+        * Kill the leaf entry by marking it stale.
+        * Log the leaf block changes.
+        */
+       leafhdr.stale++;
+       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+       xfs_dir3_leaf_log_header(args, bp);
+
+       lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
+       xfs_dir3_leaf_log_ents(args, bp, index, index);
+
+       /*
+        * Make the data entry free.  Keep track of the longest freespace
+        * in the data block in case it changes.
+        */
+       dbp = dblk->bp;
+       hdr = dbp->b_addr;
+       dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
+       bf = dp->d_ops->data_bestfree_p(hdr);
+       longest = be16_to_cpu(bf[0].length);
+       needlog = needscan = 0;
+       xfs_dir2_data_make_free(args, dbp, off,
+               dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
+       /*
+        * Rescan the data block freespaces for bestfree.
+        * Log the data block header if needed.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(dp, hdr, &needlog);
+       if (needlog)
+               xfs_dir2_data_log_header(args, dbp);
+       xfs_dir3_data_check(dp, dbp);
+       /*
+        * If the longest data block freespace changes, need to update
+        * the corresponding freeblock entry.
+        */
+       if (longest < be16_to_cpu(bf[0].length)) {
+               int             error;          /* error return value */
+               struct xfs_buf  *fbp;           /* freeblock buffer */
+               xfs_dir2_db_t   fdb;            /* freeblock block number */
+               int             findex;         /* index in freeblock entries */
+               xfs_dir2_free_t *free;          /* freeblock structure */
+
+               /*
+                * Convert the data block number to a free block,
+                * read in the free block.
+                */
+               fdb = dp->d_ops->db_to_fdb(args->geo, db);
+               error = xfs_dir2_free_read(tp, dp,
+                                          xfs_dir2_db_to_da(args->geo, fdb),
+                                          &fbp);
+               if (error)
+                       return error;
+               free = fbp->b_addr;
+#ifdef DEBUG
+       {
+               struct xfs_dir3_icfree_hdr freehdr;
+               dp->d_ops->free_hdr_from_disk(&freehdr, free);
+               ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) *
+                       (fdb - xfs_dir2_byte_to_db(args->geo,
+                                                  XFS_DIR2_FREE_OFFSET)));
+       }
+#endif
+               /*
+                * Calculate which entry we need to fix.
+                */
+               findex = dp->d_ops->db_to_fdindex(args->geo, db);
+               longest = be16_to_cpu(bf[0].length);
+               /*
+                * If the data block is now empty we can get rid of it
+                * (usually).
+                */
+               if (longest == args->geo->blksize -
+                              dp->d_ops->data_entry_offset) {
+                       /*
+                        * Try to punch out the data block.
+                        */
+                       error = xfs_dir2_shrink_inode(args, db, dbp);
+                       if (error == 0) {
+                               dblk->bp = NULL;
+                               hdr = NULL;
+                       }
+                       /*
+                        * We can get ENOSPC if there's no space reservation.
+                        * In this case just drop the buffer and some one else
+                        * will eventually get rid of the empty block.
+                        */
+                       else if (!(error == ENOSPC && args->total == 0))
+                               return error;
+               }
+               /*
+                * If we got rid of the data block, we can eliminate that entry
+                * in the free block.
+                */
+               error = xfs_dir3_data_block_free(args, hdr, free,
+                                                fdb, findex, fbp, longest);
+               if (error)
+                       return error;
+       }
+
+       xfs_dir3_leaf_check(dp, bp);
+       /*
+        * Return indication of whether this leaf block is empty enough
+        * to justify trying to join it with a neighbor.
+        */
+       *rval = (dp->d_ops->leaf_hdr_size +
+                (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) <
+               args->geo->magicpct;
+       return 0;
+}
+
+/*
+ * Split the leaf entries in the old block into old and new blocks.
+ */
+int                                            /* error */
+xfs_dir2_leafn_split(
+       xfs_da_state_t          *state,         /* btree cursor */
+       xfs_da_state_blk_t      *oldblk,        /* original block */
+       xfs_da_state_blk_t      *newblk)        /* newly created block */
+{
+       xfs_da_args_t           *args;          /* operation arguments */
+       xfs_dablk_t             blkno;          /* new leaf block number */
+       int                     error;          /* error return value */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       struct xfs_inode        *dp;
+
+       /*
+        * Allocate space for a new leaf node.
+        */
+       args = state->args;
+       dp = args->dp;
+       mp = dp->i_mount;
+       ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC);
+       error = xfs_da_grow_inode(args, &blkno);
+       if (error) {
+               return error;
+       }
+       /*
+        * Initialize the new leaf block.
+        */
+       error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(args->geo, blkno),
+                                     &newblk->bp, XFS_DIR2_LEAFN_MAGIC);
+       if (error)
+               return error;
+
+       newblk->blkno = blkno;
+       newblk->magic = XFS_DIR2_LEAFN_MAGIC;
+       /*
+        * Rebalance the entries across the two leaves, link the new
+        * block into the leaves.
+        */
+       xfs_dir2_leafn_rebalance(state, oldblk, newblk);
+       error = xfs_da3_blk_link(state, oldblk, newblk);
+       if (error) {
+               return error;
+       }
+       /*
+        * Insert the new entry in the correct block.
+        */
+       if (state->inleaf)
+               error = xfs_dir2_leafn_add(oldblk->bp, args, oldblk->index);
+       else
+               error = xfs_dir2_leafn_add(newblk->bp, args, newblk->index);
+       /*
+        * Update last hashval in each block since we added the name.
+        */
+       oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL);
+       newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL);
+       xfs_dir3_leaf_check(dp, oldblk->bp);
+       xfs_dir3_leaf_check(dp, newblk->bp);
+       return error;
+}
+
+/*
+ * Check a leaf block and its neighbors to see if the block should be
+ * collapsed into one or the other neighbor.  Always keep the block
+ * with the smaller block number.
+ * If the current block is over 50% full, don't try to join it, return 0.
+ * If the block is empty, fill in the state structure and return 2.
+ * If it can be collapsed, fill in the state structure and return 1.
+ * If nothing can be done, return 0.
+ */
+int                                            /* error */
+xfs_dir2_leafn_toosmall(
+       xfs_da_state_t          *state,         /* btree cursor */
+       int                     *action)        /* resulting action to take */
+{
+       xfs_da_state_blk_t      *blk;           /* leaf block */
+       xfs_dablk_t             blkno;          /* leaf block number */
+       struct xfs_buf          *bp;            /* leaf buffer */
+       int                     bytes;          /* bytes in use */
+       int                     count;          /* leaf live entry count */
+       int                     error;          /* error return value */
+       int                     forward;        /* sibling block direction */
+       int                     i;              /* sibling counter */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       int                     rval;           /* result from path_shift */
+       struct xfs_dir3_icleaf_hdr leafhdr;
+       struct xfs_dir2_leaf_entry *ents;
+       struct xfs_inode        *dp = state->args->dp;
+
+       /*
+        * Check for the degenerate case of the block being over 50% full.
+        * If so, it's not worth even looking to see if we might be able
+        * to coalesce with a sibling.
+        */
+       blk = &state->path.blk[state->path.active - 1];
+       leaf = blk->bp->b_addr;
+       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+       ents = dp->d_ops->leaf_ents_p(leaf);
+       xfs_dir3_leaf_check(dp, blk->bp);
+
+       count = leafhdr.count - leafhdr.stale;
+       bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]);
+       if (bytes > (state->args->geo->blksize >> 1)) {
+               /*
+                * Blk over 50%, don't try to join.
+                */
+               *action = 0;
+               return 0;
+       }
+       /*
+        * Check for the degenerate case of the block being empty.
+        * If the block is empty, we'll simply delete it, no need to
+        * coalesce it with a sibling block.  We choose (arbitrarily)
+        * to merge with the forward block unless it is NULL.
+        */
+       if (count == 0) {
+               /*
+                * Make altpath point to the block we want to keep and
+                * path point to the block we want to drop (this one).
+                */
+               forward = (leafhdr.forw != 0);
+               memcpy(&state->altpath, &state->path, sizeof(state->path));
+               error = xfs_da3_path_shift(state, &state->altpath, forward, 0,
+                       &rval);
+               if (error)
+                       return error;
+               *action = rval ? 2 : 0;
+               return 0;
+       }
+       /*
+        * Examine each sibling block to see if we can coalesce with
+        * at least 25% free space to spare.  We need to figure out
+        * whether to merge with the forward or the backward block.
+        * We prefer coalescing with the lower numbered sibling so as
+        * to shrink a directory over time.
+        */
+       forward = leafhdr.forw < leafhdr.back;
+       for (i = 0, bp = NULL; i < 2; forward = !forward, i++) {
+               struct xfs_dir3_icleaf_hdr hdr2;
+
+               blkno = forward ? leafhdr.forw : leafhdr.back;
+               if (blkno == 0)
+                       continue;
+               /*
+                * Read the sibling leaf block.
+                */
+               error = xfs_dir3_leafn_read(state->args->trans, dp,
+                                           blkno, -1, &bp);
+               if (error)
+                       return error;
+
+               /*
+                * Count bytes in the two blocks combined.
+                */
+               count = leafhdr.count - leafhdr.stale;
+               bytes = state->args->geo->blksize -
+                       (state->args->geo->blksize >> 2);
+
+               leaf = bp->b_addr;
+               dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf);
+               ents = dp->d_ops->leaf_ents_p(leaf);
+               count += hdr2.count - hdr2.stale;
+               bytes -= count * sizeof(ents[0]);
+
+               /*
+                * Fits with at least 25% to spare.
+                */
+               if (bytes >= 0)
+                       break;
+               xfs_trans_brelse(state->args->trans, bp);
+       }
+       /*
+        * Didn't like either block, give up.
+        */
+       if (i >= 2) {
+               *action = 0;
+               return 0;
+       }
+
+       /*
+        * Make altpath point to the block we want to keep (the lower
+        * numbered block) and path point to the block we want to drop.
+        */
+       memcpy(&state->altpath, &state->path, sizeof(state->path));
+       if (blkno < blk->blkno)
+               error = xfs_da3_path_shift(state, &state->altpath, forward, 0,
+                       &rval);
+       else
+               error = xfs_da3_path_shift(state, &state->path, forward, 0,
+                       &rval);
+       if (error) {
+               return error;
+       }
+       *action = rval ? 0 : 1;
+       return 0;
+}
+
+/*
+ * Move all the leaf entries from drop_blk to save_blk.
+ * This is done as part of a join operation.
+ */
+void
+xfs_dir2_leafn_unbalance(
+       xfs_da_state_t          *state,         /* cursor */
+       xfs_da_state_blk_t      *drop_blk,      /* dead block */
+       xfs_da_state_blk_t      *save_blk)      /* surviving block */
+{
+       xfs_da_args_t           *args;          /* operation arguments */
+       xfs_dir2_leaf_t         *drop_leaf;     /* dead leaf structure */
+       xfs_dir2_leaf_t         *save_leaf;     /* surviving leaf structure */
+       struct xfs_dir3_icleaf_hdr savehdr;
+       struct xfs_dir3_icleaf_hdr drophdr;
+       struct xfs_dir2_leaf_entry *sents;
+       struct xfs_dir2_leaf_entry *dents;
+       struct xfs_inode        *dp = state->args->dp;
+
+       args = state->args;
+       ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
+       ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
+       drop_leaf = drop_blk->bp->b_addr;
+       save_leaf = save_blk->bp->b_addr;
+
+       dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf);
+       dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf);
+       sents = dp->d_ops->leaf_ents_p(save_leaf);
+       dents = dp->d_ops->leaf_ents_p(drop_leaf);
+
+       /*
+        * If there are any stale leaf entries, take this opportunity
+        * to purge them.
+        */
+       if (drophdr.stale)
+               xfs_dir3_leaf_compact(args, &drophdr, drop_blk->bp);
+       if (savehdr.stale)
+               xfs_dir3_leaf_compact(args, &savehdr, save_blk->bp);
+
+       /*
+        * Move the entries from drop to the appropriate end of save.
+        */
+       drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval);
+       if (xfs_dir2_leafn_order(dp, save_blk->bp, drop_blk->bp))
+               xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0,
+                                       save_blk->bp, &savehdr, sents, 0,
+                                       drophdr.count);
+       else
+               xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0,
+                                       save_blk->bp, &savehdr, sents,
+                                       savehdr.count, drophdr.count);
+       save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval);
+
+       /* log the changes made when moving the entries */
+       dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr);
+       dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr);
+       xfs_dir3_leaf_log_header(args, save_blk->bp);
+       xfs_dir3_leaf_log_header(args, drop_blk->bp);
+
+       xfs_dir3_leaf_check(dp, save_blk->bp);
+       xfs_dir3_leaf_check(dp, drop_blk->bp);
+}
+
+/*
+ * Top-level node form directory addname routine.
+ */
+int                                            /* error */
+xfs_dir2_node_addname(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_da_state_blk_t      *blk;           /* leaf block for insert */
+       int                     error;          /* error return value */
+       int                     rval;           /* sub-return value */
+       xfs_da_state_t          *state;         /* btree cursor */
+
+       trace_xfs_dir2_node_addname(args);
+
+       /*
+        * Allocate and initialize the state (btree cursor).
+        */
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       /*
+        * Look up the name.  We're not supposed to find it, but
+        * this gives us the insertion point.
+        */
+       error = xfs_da3_node_lookup_int(state, &rval);
+       if (error)
+               rval = error;
+       if (rval != ENOENT) {
+               goto done;
+       }
+       /*
+        * Add the data entry to a data block.
+        * Extravalid is set to a freeblock found by lookup.
+        */
+       rval = xfs_dir2_node_addname_int(args,
+               state->extravalid ? &state->extrablk : NULL);
+       if (rval) {
+               goto done;
+       }
+       blk = &state->path.blk[state->path.active - 1];
+       ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+       /*
+        * Add the new leaf entry.
+        */
+       rval = xfs_dir2_leafn_add(blk->bp, args, blk->index);
+       if (rval == 0) {
+               /*
+                * It worked, fix the hash values up the btree.
+                */
+               if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
+                       xfs_da3_fixhashpath(state, &state->path);
+       } else {
+               /*
+                * It didn't work, we need to split the leaf block.
+                */
+               if (args->total == 0) {
+                       ASSERT(rval == ENOSPC);
+                       goto done;
+               }
+               /*
+                * Split the leaf block and insert the new entry.
+                */
+               rval = xfs_da3_split(state);
+       }
+done:
+       xfs_da_state_free(state);
+       return rval;
+}
+
+/*
+ * Add the data entry for a node-format directory name addition.
+ * The leaf entry is added in xfs_dir2_leafn_add.
+ * We may enter with a freespace block that the lookup found.
+ */
+static int                                     /* error */
+xfs_dir2_node_addname_int(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_da_state_blk_t      *fblk)          /* optional freespace block */
+{
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
+       xfs_dir2_db_t           dbno;           /* data block number */
+       struct xfs_buf          *dbp;           /* data block buffer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* data unused entry pointer */
+       int                     error;          /* error return value */
+       xfs_dir2_db_t           fbno;           /* freespace block number */
+       struct xfs_buf          *fbp;           /* freespace buffer */
+       int                     findex;         /* freespace entry index */
+       xfs_dir2_free_t         *free=NULL;     /* freespace block structure */
+       xfs_dir2_db_t           ifbno;          /* initial freespace block no */
+       xfs_dir2_db_t           lastfbno=0;     /* highest freespace block no */
+       int                     length;         /* length of the new entry */
+       int                     logfree;        /* need to log free entry */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       int                     needlog;        /* need to log data header */
+       int                     needscan;       /* need to rescan data frees */
+       __be16                  *tagp;          /* data entry tag pointer */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       __be16                  *bests;
+       struct xfs_dir3_icfree_hdr freehdr;
+       struct xfs_dir2_data_free *bf;
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       length = dp->d_ops->data_entsize(args->namelen);
+       /*
+        * If we came in with a freespace block that means that lookup
+        * found an entry with our hash value.  This is the freespace
+        * block for that data entry.
+        */
+       if (fblk) {
+               fbp = fblk->bp;
+               /*
+                * Remember initial freespace block number.
+                */
+               ifbno = fblk->blkno;
+               free = fbp->b_addr;
+               findex = fblk->index;
+               bests = dp->d_ops->free_bests_p(free);
+               dp->d_ops->free_hdr_from_disk(&freehdr, free);
+
+               /*
+                * This means the free entry showed that the data block had
+                * space for our entry, so we remembered it.
+                * Use that data block.
+                */
+               if (findex >= 0) {
+                       ASSERT(findex < freehdr.nvalid);
+                       ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
+                       ASSERT(be16_to_cpu(bests[findex]) >= length);
+                       dbno = freehdr.firstdb + findex;
+               } else {
+                       /*
+                        * The data block looked at didn't have enough room.
+                        * We'll start at the beginning of the freespace entries.
+                        */
+                       dbno = -1;
+                       findex = 0;
+               }
+       } else {
+               /*
+                * Didn't come in with a freespace block, so no data block.
+                */
+               ifbno = dbno = -1;
+               fbp = NULL;
+               findex = 0;
+       }
+
+       /*
+        * If we don't have a data block yet, we're going to scan the
+        * freespace blocks looking for one.  Figure out what the
+        * highest freespace block number is.
+        */
+       if (dbno == -1) {
+               xfs_fileoff_t   fo;             /* freespace block number */
+
+               if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK)))
+                       return error;
+               lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
+               fbno = ifbno;
+       }
+       /*
+        * While we haven't identified a data block, search the freeblock
+        * data for a good data block.  If we find a null freeblock entry,
+        * indicating a hole in the data blocks, remember that.
+        */
+       while (dbno == -1) {
+               /*
+                * If we don't have a freeblock in hand, get the next one.
+                */
+               if (fbp == NULL) {
+                       /*
+                        * Happens the first time through unless lookup gave
+                        * us a freespace block to start with.
+                        */
+                       if (++fbno == 0)
+                               fbno = xfs_dir2_byte_to_db(args->geo,
+                                                       XFS_DIR2_FREE_OFFSET);
+                       /*
+                        * If it's ifbno we already looked at it.
+                        */
+                       if (fbno == ifbno)
+                               fbno++;
+                       /*
+                        * If it's off the end we're done.
+                        */
+                       if (fbno >= lastfbno)
+                               break;
+                       /*
+                        * Read the block.  There can be holes in the
+                        * freespace blocks, so this might not succeed.
+                        * This should be really rare, so there's no reason
+                        * to avoid it.
+                        */
+                       error = xfs_dir2_free_try_read(tp, dp,
+                                       xfs_dir2_db_to_da(args->geo, fbno),
+                                       &fbp);
+                       if (error)
+                               return error;
+                       if (!fbp)
+                               continue;
+                       free = fbp->b_addr;
+                       findex = 0;
+               }
+               /*
+                * Look at the current free entry.  Is it good enough?
+                *
+                * The bests initialisation should be where the bufer is read in
+                * the above branch. But gcc is too stupid to realise that bests
+                * and the freehdr are actually initialised if they are placed
+                * there, so we have to do it here to avoid warnings. Blech.
+                */
+               bests = dp->d_ops->free_bests_p(free);
+               dp->d_ops->free_hdr_from_disk(&freehdr, free);
+               if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
+                   be16_to_cpu(bests[findex]) >= length)
+                       dbno = freehdr.firstdb + findex;
+               else {
+                       /*
+                        * Are we done with the freeblock?
+                        */
+                       if (++findex == freehdr.nvalid) {
+                               /*
+                                * Drop the block.
+                                */
+                               xfs_trans_brelse(tp, fbp);
+                               fbp = NULL;
+                               if (fblk && fblk->bp)
+                                       fblk->bp = NULL;
+                       }
+               }
+       }
+       /*
+        * If we don't have a data block, we need to allocate one and make
+        * the freespace entries refer to it.
+        */
+       if (unlikely(dbno == -1)) {
+               /*
+                * Not allowed to allocate, return failure.
+                */
+               if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
+                       return ENOSPC;
+
+               /*
+                * Allocate and initialize the new data block.
+                */
+               if (unlikely((error = xfs_dir2_grow_inode(args,
+                                                        XFS_DIR2_DATA_SPACE,
+                                                        &dbno)) ||
+                   (error = xfs_dir3_data_init(args, dbno, &dbp))))
+                       return error;
+
+               /*
+                * If (somehow) we have a freespace block, get rid of it.
+                */
+               if (fbp)
+                       xfs_trans_brelse(tp, fbp);
+               if (fblk && fblk->bp)
+                       fblk->bp = NULL;
+
+               /*
+                * Get the freespace block corresponding to the data block
+                * that was just allocated.
+                */
+               fbno = dp->d_ops->db_to_fdb(args->geo, dbno);
+               error = xfs_dir2_free_try_read(tp, dp,
+                                      xfs_dir2_db_to_da(args->geo, fbno),
+                                      &fbp);
+               if (error)
+                       return error;
+
+               /*
+                * If there wasn't a freespace block, the read will
+                * return a NULL fbp.  Allocate and initialize a new one.
+                */
+               if (!fbp) {
+                       error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE,
+                                                   &fbno);
+                       if (error)
+                               return error;
+
+                       if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) {
+                               xfs_alert(mp,
+                       "%s: dir ino %llu needed freesp block %lld for\n"
+                       "  data block %lld, got %lld ifbno %llu lastfbno %d",
+                                       __func__, (unsigned long long)dp->i_ino,
+                                       (long long)dp->d_ops->db_to_fdb(
+                                                               args->geo, dbno),
+                                       (long long)dbno, (long long)fbno,
+                                       (unsigned long long)ifbno, lastfbno);
+                               if (fblk) {
+                                       xfs_alert(mp,
+                               " fblk 0x%p blkno %llu index %d magic 0x%x",
+                                               fblk,
+                                               (unsigned long long)fblk->blkno,
+                                               fblk->index,
+                                               fblk->magic);
+                               } else {
+                                       xfs_alert(mp, " ... fblk is NULL");
+                               }
+                               XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
+                                                XFS_ERRLEVEL_LOW, mp);
+                               return EFSCORRUPTED;
+                       }
+
+                       /*
+                        * Get a buffer for the new block.
+                        */
+                       error = xfs_dir3_free_get_buf(args, fbno, &fbp);
+                       if (error)
+                               return error;
+                       free = fbp->b_addr;
+                       bests = dp->d_ops->free_bests_p(free);
+                       dp->d_ops->free_hdr_from_disk(&freehdr, free);
+
+                       /*
+                        * Remember the first slot as our empty slot.
+                        */
+                       freehdr.firstdb =
+                               (fbno - xfs_dir2_byte_to_db(args->geo,
+                                                       XFS_DIR2_FREE_OFFSET)) *
+                                       dp->d_ops->free_max_bests(args->geo);
+               } else {
+                       free = fbp->b_addr;
+                       bests = dp->d_ops->free_bests_p(free);
+                       dp->d_ops->free_hdr_from_disk(&freehdr, free);
+               }
+
+               /*
+                * Set the freespace block index from the data block number.
+                */
+               findex = dp->d_ops->db_to_fdindex(args->geo, dbno);
+               /*
+                * If it's after the end of the current entries in the
+                * freespace block, extend that table.
+                */
+               if (findex >= freehdr.nvalid) {
+                       ASSERT(findex < dp->d_ops->free_max_bests(args->geo));
+                       freehdr.nvalid = findex + 1;
+                       /*
+                        * Tag new entry so nused will go up.
+                        */
+                       bests[findex] = cpu_to_be16(NULLDATAOFF);
+               }
+               /*
+                * If this entry was for an empty data block
+                * (this should always be true) then update the header.
+                */
+               if (bests[findex] == cpu_to_be16(NULLDATAOFF)) {
+                       freehdr.nused++;
+                       dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
+                       xfs_dir2_free_log_header(args, fbp);
+               }
+               /*
+                * Update the real value in the table.
+                * We haven't allocated the data entry yet so this will
+                * change again.
+                */
+               hdr = dbp->b_addr;
+               bf = dp->d_ops->data_bestfree_p(hdr);
+               bests[findex] = bf[0].length;
+               logfree = 1;
+       }
+       /*
+        * We had a data block so we don't have to make a new one.
+        */
+       else {
+               /*
+                * If just checking, we succeeded.
+                */
+               if (args->op_flags & XFS_DA_OP_JUSTCHECK)
+                       return 0;
+
+               /*
+                * Read the data block in.
+                */
+               error = xfs_dir3_data_read(tp, dp,
+                                          xfs_dir2_db_to_da(args->geo, dbno),
+                                          -1, &dbp);
+               if (error)
+                       return error;
+               hdr = dbp->b_addr;
+               bf = dp->d_ops->data_bestfree_p(hdr);
+               logfree = 0;
+       }
+       ASSERT(be16_to_cpu(bf[0].length) >= length);
+       /*
+        * Point to the existing unused space.
+        */
+       dup = (xfs_dir2_data_unused_t *)
+             ((char *)hdr + be16_to_cpu(bf[0].offset));
+       needscan = needlog = 0;
+       /*
+        * Mark the first part of the unused space, inuse for us.
+        */
+       xfs_dir2_data_use_free(args, dbp, dup,
+               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
+               &needlog, &needscan);
+       /*
+        * Fill in the new entry and log it.
+        */
+       dep = (xfs_dir2_data_entry_t *)dup;
+       dep->inumber = cpu_to_be64(args->inumber);
+       dep->namelen = args->namelen;
+       memcpy(dep->name, args->name, dep->namelen);
+       dp->d_ops->data_put_ftype(dep, args->filetype);
+       tagp = dp->d_ops->data_entry_tag_p(dep);
+       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
+       xfs_dir2_data_log_entry(args, dbp, dep);
+       /*
+        * Rescan the block for bestfree if needed.
+        */
+       if (needscan)
+               xfs_dir2_data_freescan(dp, hdr, &needlog);
+       /*
+        * Log the data block header if needed.
+        */
+       if (needlog)
+               xfs_dir2_data_log_header(args, dbp);
+       /*
+        * If the freespace entry is now wrong, update it.
+        */
+       bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */
+       if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) {
+               bests[findex] = bf[0].length;
+               logfree = 1;
+       }
+       /*
+        * Log the freespace entry if needed.
+        */
+       if (logfree)
+               xfs_dir2_free_log_bests(args, fbp, findex, findex);
+       /*
+        * Return the data block and offset in args, then drop the data block.
+        */
+       args->blkno = (xfs_dablk_t)dbno;
+       args->index = be16_to_cpu(*tagp);
+       return 0;
+}
+
+/*
+ * Lookup an entry in a node-format directory.
+ * All the real work happens in xfs_da3_node_lookup_int.
+ * The only real output is the inode number of the entry.
+ */
+int                                            /* error */
+xfs_dir2_node_lookup(
+       xfs_da_args_t   *args)                  /* operation arguments */
+{
+       int             error;                  /* error return value */
+       int             i;                      /* btree level */
+       int             rval;                   /* operation return value */
+       xfs_da_state_t  *state;                 /* btree cursor */
+
+       trace_xfs_dir2_node_lookup(args);
+
+       /*
+        * Allocate and initialize the btree cursor.
+        */
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       /*
+        * Fill in the path to the entry in the cursor.
+        */
+       error = xfs_da3_node_lookup_int(state, &rval);
+       if (error)
+               rval = error;
+       else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
+               /* If a CI match, dup the actual name and return EEXIST */
+               xfs_dir2_data_entry_t   *dep;
+
+               dep = (xfs_dir2_data_entry_t *)
+                       ((char *)state->extrablk.bp->b_addr +
+                                                state->extrablk.index);
+               rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
+       }
+       /*
+        * Release the btree blocks and leaf block.
+        */
+       for (i = 0; i < state->path.active; i++) {
+               xfs_trans_brelse(args->trans, state->path.blk[i].bp);
+               state->path.blk[i].bp = NULL;
+       }
+       /*
+        * Release the data block if we have it.
+        */
+       if (state->extravalid && state->extrablk.bp) {
+               xfs_trans_brelse(args->trans, state->extrablk.bp);
+               state->extrablk.bp = NULL;
+       }
+       xfs_da_state_free(state);
+       return rval;
+}
+
+/*
+ * Remove an entry from a node-format directory.
+ */
+int                                            /* error */
+xfs_dir2_node_removename(
+       struct xfs_da_args      *args)          /* operation arguments */
+{
+       struct xfs_da_state_blk *blk;           /* leaf block */
+       int                     error;          /* error return value */
+       int                     rval;           /* operation return value */
+       struct xfs_da_state     *state;         /* btree cursor */
+
+       trace_xfs_dir2_node_removename(args);
+
+       /*
+        * Allocate and initialize the btree cursor.
+        */
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+
+       /* Look up the entry we're deleting, set up the cursor. */
+       error = xfs_da3_node_lookup_int(state, &rval);
+       if (error)
+               goto out_free;
+
+       /* Didn't find it, upper layer screwed up. */
+       if (rval != EEXIST) {
+               error = rval;
+               goto out_free;
+       }
+
+       blk = &state->path.blk[state->path.active - 1];
+       ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+       ASSERT(state->extravalid);
+       /*
+        * Remove the leaf and data entries.
+        * Extrablk refers to the data block.
+        */
+       error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
+               &state->extrablk, &rval);
+       if (error)
+               goto out_free;
+       /*
+        * Fix the hash values up the btree.
+        */
+       xfs_da3_fixhashpath(state, &state->path);
+       /*
+        * If we need to join leaf blocks, do it.
+        */
+       if (rval && state->path.active > 1)
+               error = xfs_da3_join(state);
+       /*
+        * If no errors so far, try conversion to leaf format.
+        */
+       if (!error)
+               error = xfs_dir2_node_to_leaf(state);
+out_free:
+       xfs_da_state_free(state);
+       return error;
+}
+
+/*
+ * Replace an entry's inode number in a node-format directory.
+ */
+int                                            /* error */
+xfs_dir2_node_replace(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_da_state_blk_t      *blk;           /* leaf block */
+       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
+       xfs_dir2_data_entry_t   *dep;           /* data entry changed */
+       int                     error;          /* error return value */
+       int                     i;              /* btree level */
+       xfs_ino_t               inum;           /* new inode number */
+       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry being changed */
+       int                     rval;           /* internal return value */
+       xfs_da_state_t          *state;         /* btree cursor */
+
+       trace_xfs_dir2_node_replace(args);
+
+       /*
+        * Allocate and initialize the btree cursor.
+        */
+       state = xfs_da_state_alloc();
+       state->args = args;
+       state->mp = args->dp->i_mount;
+       inum = args->inumber;
+       /*
+        * Lookup the entry to change in the btree.
+        */
+       error = xfs_da3_node_lookup_int(state, &rval);
+       if (error) {
+               rval = error;
+       }
+       /*
+        * It should be found, since the vnodeops layer has looked it up
+        * and locked it.  But paranoia is good.
+        */
+       if (rval == EEXIST) {
+               struct xfs_dir2_leaf_entry *ents;
+               /*
+                * Find the leaf entry.
+                */
+               blk = &state->path.blk[state->path.active - 1];
+               ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+               leaf = blk->bp->b_addr;
+               ents = args->dp->d_ops->leaf_ents_p(leaf);
+               lep = &ents[blk->index];
+               ASSERT(state->extravalid);
+               /*
+                * Point to the data entry.
+                */
+               hdr = state->extrablk.bp->b_addr;
+               ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+                      hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
+               dep = (xfs_dir2_data_entry_t *)
+                     ((char *)hdr +
+                      xfs_dir2_dataptr_to_off(args->geo,
+                                              be32_to_cpu(lep->address)));
+               ASSERT(inum != be64_to_cpu(dep->inumber));
+               /*
+                * Fill in the new inode number and log the entry.
+                */
+               dep->inumber = cpu_to_be64(inum);
+               args->dp->d_ops->data_put_ftype(dep, args->filetype);
+               xfs_dir2_data_log_entry(args, state->extrablk.bp, dep);
+               rval = 0;
+       }
+       /*
+        * Didn't find it, and we're holding a data block.  Drop it.
+        */
+       else if (state->extravalid) {
+               xfs_trans_brelse(args->trans, state->extrablk.bp);
+               state->extrablk.bp = NULL;
+       }
+       /*
+        * Release all the buffers in the cursor.
+        */
+       for (i = 0; i < state->path.active; i++) {
+               xfs_trans_brelse(args->trans, state->path.blk[i].bp);
+               state->path.blk[i].bp = NULL;
+       }
+       xfs_da_state_free(state);
+       return rval;
+}
+
+/*
+ * Trim off a trailing empty freespace block.
+ * Return (in rvalp) 1 if we did it, 0 if not.
+ */
+int                                            /* error */
+xfs_dir2_node_trim_free(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_fileoff_t           fo,             /* free block number */
+       int                     *rvalp)         /* out: did something */
+{
+       struct xfs_buf          *bp;            /* freespace buffer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return code */
+       xfs_dir2_free_t         *free;          /* freespace structure */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_trans_t             *tp;            /* transaction pointer */
+       struct xfs_dir3_icfree_hdr freehdr;
+
+       dp = args->dp;
+       mp = dp->i_mount;
+       tp = args->trans;
+       /*
+        * Read the freespace block.
+        */
+       error = xfs_dir2_free_try_read(tp, dp, fo, &bp);
+       if (error)
+               return error;
+       /*
+        * There can be holes in freespace.  If fo is a hole, there's
+        * nothing to do.
+        */
+       if (!bp)
+               return 0;
+       free = bp->b_addr;
+       dp->d_ops->free_hdr_from_disk(&freehdr, free);
+
+       /*
+        * If there are used entries, there's nothing to do.
+        */
+       if (freehdr.nused > 0) {
+               xfs_trans_brelse(tp, bp);
+               *rvalp = 0;
+               return 0;
+       }
+       /*
+        * Blow the block away.
+        */
+       error = xfs_dir2_shrink_inode(args,
+                       xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo), bp);
+       if (error) {
+               /*
+                * Can't fail with ENOSPC since that only happens with no
+                * space reservation, when breaking up an extent into two
+                * pieces.  This is the last block of an extent.
+                */
+               ASSERT(error != ENOSPC);
+               xfs_trans_brelse(tp, bp);
+               return error;
+       }
+       /*
+        * Return that we succeeded.
+        */
+       *rvalp = 1;
+       return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
new file mode 100644 (file)
index 0000000..27ce079
--- /dev/null
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DIR2_PRIV_H__
+#define __XFS_DIR2_PRIV_H__
+
+struct dir_context;
+
+/*
+ * Directory offset/block conversion functions.
+ *
+ * DB blocks here are logical directory block numbers, not filesystem blocks.
+ */
+
+/*
+ * Convert dataptr to byte in file space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp)
+{
+       return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
+}
+
+/*
+ * Convert byte in file space to dataptr.  It had better be aligned.
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by)
+{
+       return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
+}
+
+/*
+ * Convert byte in space to (DB) block
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
+{
+       return (xfs_dir2_db_t)(by >> geo->blklog);
+}
+
+/*
+ * Convert dataptr to a block number
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp)
+{
+       return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp));
+}
+
+/*
+ * Convert byte in space to offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
+{
+       return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1));
+}
+
+/*
+ * Convert dataptr to a byte offset in a block
+ */
+static inline xfs_dir2_data_aoff_t
+xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp)
+{
+       return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp));
+}
+
+/*
+ * Convert block and offset to byte in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db,
+                       xfs_dir2_data_aoff_t o)
+{
+       return ((xfs_dir2_off_t)db << geo->blklog) + o;
+}
+
+/*
+ * Convert block (DB) to block (dablk)
+ */
+static inline xfs_dablk_t
+xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
+{
+       return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog));
+}
+
+/*
+ * Convert byte in space to (DA) block
+ */
+static inline xfs_dablk_t
+xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
+{
+       return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by));
+}
+
+/*
+ * Convert block and offset to dataptr
+ */
+static inline xfs_dir2_dataptr_t
+xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db,
+                          xfs_dir2_data_aoff_t o)
+{
+       return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o));
+}
+
+/*
+ * Convert block (dablk) to block (DB)
+ */
+static inline xfs_dir2_db_t
+xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da)
+{
+       return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog));
+}
+
+/*
+ * Convert block (dablk) to byte offset in space
+ */
+static inline xfs_dir2_off_t
+xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da)
+{
+       return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0);
+}
+
+/*
+ * Directory tail pointer accessor functions. Based on block geometry.
+ */
+static inline struct xfs_dir2_block_tail *
+xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr)
+{
+       return ((struct xfs_dir2_block_tail *)
+               ((char *)hdr + geo->blksize)) - 1;
+}
+
+static inline struct xfs_dir2_leaf_tail *
+xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
+{
+       return (struct xfs_dir2_leaf_tail *)
+               ((char *)lp + geo->blksize -
+                 sizeof(struct xfs_dir2_leaf_tail));
+}
+
+/* xfs_dir2.c */
+extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
+extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+                               xfs_dir2_db_t *dbp);
+extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
+                               const unsigned char *name, int len);
+
+#define S_SHIFT 12
+extern const unsigned char xfs_mode_to_ftype[];
+
+extern unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp,
+                                       __uint8_t filetype);
+
+
+/* xfs_dir2_block.c */
+extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp,
+                              struct xfs_buf **bpp);
+extern int xfs_dir2_block_addname(struct xfs_da_args *args);
+extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_block_removename(struct xfs_da_args *args);
+extern int xfs_dir2_block_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
+               struct xfs_buf *lbp, struct xfs_buf *dbp);
+
+/* xfs_dir2_data.c */
+#ifdef DEBUG
+#define        xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp);
+#else
+#define        xfs_dir3_data_check(dp,bp)
+#endif
+
+extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
+extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
+               xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
+extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
+               xfs_daddr_t mapped_bno);
+
+extern struct xfs_dir2_data_free *
+xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
+               struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup,
+               int *loghead);
+extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
+               struct xfs_buf **bpp);
+
+/* xfs_dir2_leaf.c */
+extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
+               xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
+extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
+               struct xfs_buf *dbp);
+extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
+extern void xfs_dir3_leaf_compact(struct xfs_da_args *args,
+               struct xfs_dir3_icleaf_hdr *leafhdr, struct xfs_buf *bp);
+extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
+               struct xfs_dir2_leaf_entry *ents, int *indexp,
+               int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
+extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
+               struct xfs_buf **bpp, __uint16_t magic);
+extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args,
+               struct xfs_buf *bp, int first, int last);
+extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,
+               struct xfs_buf *bp);
+extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
+               struct xfs_buf *lbp);
+extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
+               struct xfs_buf *lbp, xfs_dir2_db_t db);
+extern struct xfs_dir2_leaf_entry *
+xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
+               struct xfs_dir2_leaf_entry *ents, int index, int compact,
+               int lowstale, int highstale, int *lfloglow, int *lfloghigh);
+extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
+
+extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
+               struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);
+
+/* xfs_dir2_node.c */
+extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
+               struct xfs_buf *lbp);
+extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp,
+               struct xfs_buf *bp, int *count);
+extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
+               struct xfs_da_args *args, int *indexp,
+               struct xfs_da_state *state);
+extern int xfs_dir2_leafn_order(struct xfs_inode *dp, struct xfs_buf *leaf1_bp,
+               struct xfs_buf *leaf2_bp);
+extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
+       struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
+extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
+extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
+               struct xfs_da_state_blk *drop_blk,
+               struct xfs_da_state_blk *save_blk);
+extern int xfs_dir2_node_addname(struct xfs_da_args *args);
+extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_node_removename(struct xfs_da_args *args);
+extern int xfs_dir2_node_replace(struct xfs_da_args *args);
+extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
+               int *rvalp);
+extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
+               xfs_dablk_t fbno, struct xfs_buf **bpp);
+
+/* xfs_dir2_sf.c */
+extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
+               struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
+extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp,
+               int size, xfs_dir2_sf_hdr_t *sfhp);
+extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
+extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
+extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
+extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
+extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+
+/* xfs_dir2_readdir.c */
+extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
+                      size_t bufsize);
+
+#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
new file mode 100644 (file)
index 0000000..ab3563b
--- /dev/null
@@ -0,0 +1,1184 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_error.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_trace.h"
+#include "xfs_dinode.h"
+
+/*
+ * Prototypes for internal functions.
+ */
+static void xfs_dir2_sf_addname_easy(xfs_da_args_t *args,
+                                    xfs_dir2_sf_entry_t *sfep,
+                                    xfs_dir2_data_aoff_t offset,
+                                    int new_isize);
+static void xfs_dir2_sf_addname_hard(xfs_da_args_t *args, int objchange,
+                                    int new_isize);
+static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange,
+                                   xfs_dir2_sf_entry_t **sfepp,
+                                   xfs_dir2_data_aoff_t *offsetp);
+#ifdef DEBUG
+static void xfs_dir2_sf_check(xfs_da_args_t *args);
+#else
+#define        xfs_dir2_sf_check(args)
+#endif /* DEBUG */
+#if XFS_BIG_INUMS
+static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
+static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
+#endif /* XFS_BIG_INUMS */
+
+/*
+ * Given a block directory (dp/block), calculate its size as a shortform (sf)
+ * directory and a header for the sf directory, if it will fit it the
+ * space currently present in the inode.  If it won't fit, the output
+ * size is too big (but not accurate).
+ */
+int                                            /* size for sf form */
+xfs_dir2_block_sfsize(
+       xfs_inode_t             *dp,            /* incore inode pointer */
+       xfs_dir2_data_hdr_t     *hdr,           /* block directory data */
+       xfs_dir2_sf_hdr_t       *sfhp)          /* output: header for sf form */
+{
+       xfs_dir2_dataptr_t      addr;           /* data entry address */
+       xfs_dir2_leaf_entry_t   *blp;           /* leaf area of the block */
+       xfs_dir2_block_tail_t   *btp;           /* tail area of the block */
+       int                     count;          /* shortform entry count */
+       xfs_dir2_data_entry_t   *dep;           /* data entry in the block */
+       int                     i;              /* block entry index */
+       int                     i8count;        /* count of big-inode entries */
+       int                     isdot;          /* entry is "." */
+       int                     isdotdot;       /* entry is ".." */
+       xfs_mount_t             *mp;            /* mount structure pointer */
+       int                     namelen;        /* total name bytes */
+       xfs_ino_t               parent = 0;     /* parent inode number */
+       int                     size=0;         /* total computed size */
+       int                     has_ftype;
+       struct xfs_da_geometry  *geo;
+
+       mp = dp->i_mount;
+       geo = mp->m_dir_geo;
+
+       /*
+        * if there is a filetype field, add the extra byte to the namelen
+        * for each entry that we see.
+        */
+       has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0;
+
+       count = i8count = namelen = 0;
+       btp = xfs_dir2_block_tail_p(geo, hdr);
+       blp = xfs_dir2_block_leaf_p(btp);
+
+       /*
+        * Iterate over the block's data entries by using the leaf pointers.
+        */
+       for (i = 0; i < be32_to_cpu(btp->count); i++) {
+               if ((addr = be32_to_cpu(blp[i].address)) == XFS_DIR2_NULL_DATAPTR)
+                       continue;
+               /*
+                * Calculate the pointer to the entry at hand.
+                */
+               dep = (xfs_dir2_data_entry_t *)((char *)hdr +
+                               xfs_dir2_dataptr_to_off(geo, addr));
+               /*
+                * Detect . and .., so we can special-case them.
+                * . is not included in sf directories.
+                * .. is included by just the parent inode number.
+                */
+               isdot = dep->namelen == 1 && dep->name[0] == '.';
+               isdotdot =
+                       dep->namelen == 2 &&
+                       dep->name[0] == '.' && dep->name[1] == '.';
+#if XFS_BIG_INUMS
+               if (!isdot)
+                       i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM;
+#endif
+               /* take into account the file type field */
+               if (!isdot && !isdotdot) {
+                       count++;
+                       namelen += dep->namelen + has_ftype;
+               } else if (isdotdot)
+                       parent = be64_to_cpu(dep->inumber);
+               /*
+                * Calculate the new size, see if we should give up yet.
+                */
+               size = xfs_dir2_sf_hdr_size(i8count) +          /* header */
+                      count +                                  /* namelen */
+                      count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */
+                      namelen +                                /* name */
+                      (i8count ?                               /* inumber */
+                               (uint)sizeof(xfs_dir2_ino8_t) * count :
+                               (uint)sizeof(xfs_dir2_ino4_t) * count);
+               if (size > XFS_IFORK_DSIZE(dp))
+                       return size;            /* size value is a failure */
+       }
+       /*
+        * Create the output header, if it worked.
+        */
+       sfhp->count = count;
+       sfhp->i8count = i8count;
+       dp->d_ops->sf_put_parent_ino(sfhp, parent);
+       return size;
+}
+
+/*
+ * Convert a block format directory to shortform.
+ * Caller has already checked that it will fit, and built us a header.
+ */
+int                                            /* error */
+xfs_dir2_block_to_sf(
+       xfs_da_args_t           *args,          /* operation arguments */
+       struct xfs_buf          *bp,
+       int                     size,           /* shortform directory size */
+       xfs_dir2_sf_hdr_t       *sfhp)          /* shortform directory hdr */
+{
+       xfs_dir2_data_hdr_t     *hdr;           /* block header */
+       xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
+       xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_data_unused_t  *dup;           /* unused data pointer */
+       char                    *endptr;        /* end of data entries */
+       int                     error;          /* error return value */
+       int                     logflags;       /* inode logging flags */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       char                    *ptr;           /* current data pointer */
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform directory header */
+       xfs_dir2_sf_hdr_t       *dst;           /* temporary data buffer */
+
+       trace_xfs_dir2_block_to_sf(args);
+
+       dp = args->dp;
+       mp = dp->i_mount;
+
+       /*
+        * allocate a temporary destination buffer the size of the inode
+        * to format the data into. Once we have formatted the data, we
+        * can free the block and copy the formatted data into the inode literal
+        * area.
+        */
+       dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP);
+       hdr = bp->b_addr;
+
+       /*
+        * Copy the header into the newly allocate local space.
+        */
+       sfp = (xfs_dir2_sf_hdr_t *)dst;
+       memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
+
+       /*
+        * Set up to loop over the block's entries.
+        */
+       btp = xfs_dir2_block_tail_p(args->geo, hdr);
+       ptr = (char *)dp->d_ops->data_entry_p(hdr);
+       endptr = (char *)xfs_dir2_block_leaf_p(btp);
+       sfep = xfs_dir2_sf_firstentry(sfp);
+       /*
+        * Loop over the active and unused entries.
+        * Stop when we reach the leaf/tail portion of the block.
+        */
+       while (ptr < endptr) {
+               /*
+                * If it's unused, just skip over it.
+                */
+               dup = (xfs_dir2_data_unused_t *)ptr;
+               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+                       ptr += be16_to_cpu(dup->length);
+                       continue;
+               }
+               dep = (xfs_dir2_data_entry_t *)ptr;
+               /*
+                * Skip .
+                */
+               if (dep->namelen == 1 && dep->name[0] == '.')
+                       ASSERT(be64_to_cpu(dep->inumber) == dp->i_ino);
+               /*
+                * Skip .., but make sure the inode number is right.
+                */
+               else if (dep->namelen == 2 &&
+                        dep->name[0] == '.' && dep->name[1] == '.')
+                       ASSERT(be64_to_cpu(dep->inumber) ==
+                              dp->d_ops->sf_get_parent_ino(sfp));
+               /*
+                * Normal entry, copy it into shortform.
+                */
+               else {
+                       sfep->namelen = dep->namelen;
+                       xfs_dir2_sf_put_offset(sfep,
+                               (xfs_dir2_data_aoff_t)
+                               ((char *)dep - (char *)hdr));
+                       memcpy(sfep->name, dep->name, dep->namelen);
+                       dp->d_ops->sf_put_ino(sfp, sfep,
+                                             be64_to_cpu(dep->inumber));
+                       dp->d_ops->sf_put_ftype(sfep,
+                                       dp->d_ops->data_get_ftype(dep));
+
+                       sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+               }
+               ptr += dp->d_ops->data_entsize(dep->namelen);
+       }
+       ASSERT((char *)sfep - (char *)sfp == size);
+
+       /* now we are done with the block, we can shrink the inode */
+       logflags = XFS_ILOG_CORE;
+       error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp);
+       if (error) {
+               ASSERT(error != ENOSPC);
+               goto out;
+       }
+
+       /*
+        * The buffer is now unconditionally gone, whether
+        * xfs_dir2_shrink_inode worked or not.
+        *
+        * Convert the inode to local format and copy the data in.
+        */
+       dp->i_df.if_flags &= ~XFS_IFEXTENTS;
+       dp->i_df.if_flags |= XFS_IFINLINE;
+       dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+       ASSERT(dp->i_df.if_bytes == 0);
+       xfs_idata_realloc(dp, size, XFS_DATA_FORK);
+
+       logflags |= XFS_ILOG_DDATA;
+       memcpy(dp->i_df.if_u1.if_data, dst, size);
+       dp->i_d.di_size = size;
+       xfs_dir2_sf_check(args);
+out:
+       xfs_trans_log_inode(args->trans, dp, logflags);
+       kmem_free(dst);
+       return error;
+}
+
+/*
+ * Add a name to a shortform directory.
+ * There are two algorithms, "easy" and "hard" which we decide on
+ * before changing anything.
+ * Convert to block form if necessary, if the new entry won't fit.
+ */
+int                                            /* error */
+xfs_dir2_sf_addname(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     error;          /* error return value */
+       int                     incr_isize;     /* total change in size */
+       int                     new_isize;      /* di_size after adding name */
+       int                     objchange;      /* changing to 8-byte inodes */
+       xfs_dir2_data_aoff_t    offset = 0;     /* offset for new entry */
+       int                     pick;           /* which algorithm to use */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
+       xfs_dir2_sf_entry_t     *sfep = NULL;   /* shortform entry */
+
+       trace_xfs_dir2_sf_addname(args);
+
+       ASSERT(xfs_dir2_sf_lookup(args) == ENOENT);
+       dp = args->dp;
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Make sure the shortform value has some of its header.
+        */
+       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return EIO;
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
+       /*
+        * Compute entry (and change in) size.
+        */
+       incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen);
+       objchange = 0;
+#if XFS_BIG_INUMS
+       /*
+        * Do we have to change to 8 byte inodes?
+        */
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
+               /*
+                * Yes, adjust the inode size.  old count + (parent + new)
+                */
+               incr_isize +=
+                       (sfp->count + 2) *
+                       ((uint)sizeof(xfs_dir2_ino8_t) -
+                        (uint)sizeof(xfs_dir2_ino4_t));
+               objchange = 1;
+       }
+#endif
+       new_isize = (int)dp->i_d.di_size + incr_isize;
+       /*
+        * Won't fit as shortform any more (due to size),
+        * or the pick routine says it won't (due to offset values).
+        */
+       if (new_isize > XFS_IFORK_DSIZE(dp) ||
+           (pick =
+            xfs_dir2_sf_addname_pick(args, objchange, &sfep, &offset)) == 0) {
+               /*
+                * Just checking or no space reservation, it doesn't fit.
+                */
+               if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
+                       return ENOSPC;
+               /*
+                * Convert to block form then add the name.
+                */
+               error = xfs_dir2_sf_to_block(args);
+               if (error)
+                       return error;
+               return xfs_dir2_block_addname(args);
+       }
+       /*
+        * Just checking, it fits.
+        */
+       if (args->op_flags & XFS_DA_OP_JUSTCHECK)
+               return 0;
+       /*
+        * Do it the easy way - just add it at the end.
+        */
+       if (pick == 1)
+               xfs_dir2_sf_addname_easy(args, sfep, offset, new_isize);
+       /*
+        * Do it the hard way - look for a place to insert the new entry.
+        * Convert to 8 byte inode numbers first if necessary.
+        */
+       else {
+               ASSERT(pick == 2);
+#if XFS_BIG_INUMS
+               if (objchange)
+                       xfs_dir2_sf_toino8(args);
+#endif
+               xfs_dir2_sf_addname_hard(args, objchange, new_isize);
+       }
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+       return 0;
+}
+
+/*
+ * Add the new entry the "easy" way.
+ * This is copying the old directory and adding the new entry at the end.
+ * Since it's sorted by "offset" we need room after the last offset
+ * that's already there, and then room to convert to a block directory.
+ * This is already checked by the pick routine.
+ */
+static void
+xfs_dir2_sf_addname_easy(
+       xfs_da_args_t           *args,          /* operation arguments */
+       xfs_dir2_sf_entry_t     *sfep,          /* pointer to new entry */
+       xfs_dir2_data_aoff_t    offset,         /* offset to use for new ent */
+       int                     new_isize)      /* new directory size */
+{
+       int                     byteoff;        /* byte offset in sf dir */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
+
+       dp = args->dp;
+
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       byteoff = (int)((char *)sfep - (char *)sfp);
+       /*
+        * Grow the in-inode space.
+        */
+       xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen),
+                         XFS_DATA_FORK);
+       /*
+        * Need to set up again due to realloc of the inode data.
+        */
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
+       /*
+        * Fill in the new entry.
+        */
+       sfep->namelen = args->namelen;
+       xfs_dir2_sf_put_offset(sfep, offset);
+       memcpy(sfep->name, args->name, sfep->namelen);
+       dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
+       dp->d_ops->sf_put_ftype(sfep, args->filetype);
+
+       /*
+        * Update the header and inode.
+        */
+       sfp->count++;
+#if XFS_BIG_INUMS
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
+               sfp->i8count++;
+#endif
+       dp->i_d.di_size = new_isize;
+       xfs_dir2_sf_check(args);
+}
+
+/*
+ * Add the new entry the "hard" way.
+ * The caller has already converted to 8 byte inode numbers if necessary,
+ * in which case we need to leave the i8count at 1.
+ * Find a hole that the new entry will fit into, and copy
+ * the first part of the entries, the new entry, and the last part of
+ * the entries.
+ */
+/* ARGSUSED */
+static void
+xfs_dir2_sf_addname_hard(
+       xfs_da_args_t           *args,          /* operation arguments */
+       int                     objchange,      /* changing inode number size */
+       int                     new_isize)      /* new directory size */
+{
+       int                     add_datasize;   /* data size need for new ent */
+       char                    *buf;           /* buffer for old */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     eof;            /* reached end of old dir */
+       int                     nbytes;         /* temp for byte copies */
+       xfs_dir2_data_aoff_t    new_offset;     /* next offset value */
+       xfs_dir2_data_aoff_t    offset;         /* current offset value */
+       int                     old_isize;      /* previous di_size */
+       xfs_dir2_sf_entry_t     *oldsfep;       /* entry in original dir */
+       xfs_dir2_sf_hdr_t       *oldsfp;        /* original shortform dir */
+       xfs_dir2_sf_entry_t     *sfep;          /* entry in new dir */
+       xfs_dir2_sf_hdr_t       *sfp;           /* new shortform dir */
+       struct xfs_mount        *mp;
+
+       /*
+        * Copy the old directory to the stack buffer.
+        */
+       dp = args->dp;
+       mp = dp->i_mount;
+
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       old_isize = (int)dp->i_d.di_size;
+       buf = kmem_alloc(old_isize, KM_SLEEP);
+       oldsfp = (xfs_dir2_sf_hdr_t *)buf;
+       memcpy(oldsfp, sfp, old_isize);
+       /*
+        * Loop over the old directory finding the place we're going
+        * to insert the new entry.
+        * If it's going to end up at the end then oldsfep will point there.
+        */
+       for (offset = dp->d_ops->data_first_offset,
+             oldsfep = xfs_dir2_sf_firstentry(oldsfp),
+             add_datasize = dp->d_ops->data_entsize(args->namelen),
+             eof = (char *)oldsfep == &buf[old_isize];
+            !eof;
+            offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen),
+             oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep),
+             eof = (char *)oldsfep == &buf[old_isize]) {
+               new_offset = xfs_dir2_sf_get_offset(oldsfep);
+               if (offset + add_datasize <= new_offset)
+                       break;
+       }
+       /*
+        * Get rid of the old directory, then allocate space for
+        * the new one.  We do this so xfs_idata_realloc won't copy
+        * the data.
+        */
+       xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK);
+       xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK);
+       /*
+        * Reset the pointer since the buffer was reallocated.
+        */
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       /*
+        * Copy the first part of the directory, including the header.
+        */
+       nbytes = (int)((char *)oldsfep - (char *)oldsfp);
+       memcpy(sfp, oldsfp, nbytes);
+       sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes);
+       /*
+        * Fill in the new entry, and update the header counts.
+        */
+       sfep->namelen = args->namelen;
+       xfs_dir2_sf_put_offset(sfep, offset);
+       memcpy(sfep->name, args->name, sfep->namelen);
+       dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
+       dp->d_ops->sf_put_ftype(sfep, args->filetype);
+       sfp->count++;
+#if XFS_BIG_INUMS
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
+               sfp->i8count++;
+#endif
+       /*
+        * If there's more left to copy, do that.
+        */
+       if (!eof) {
+               sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+               memcpy(sfep, oldsfep, old_isize - nbytes);
+       }
+       kmem_free(buf);
+       dp->i_d.di_size = new_isize;
+       xfs_dir2_sf_check(args);
+}
+
+/*
+ * Decide if the new entry will fit at all.
+ * If it will fit, pick between adding the new entry to the end (easy)
+ * or somewhere else (hard).
+ * Return 0 (won't fit), 1 (easy), 2 (hard).
+ */
+/*ARGSUSED*/
+static int                                     /* pick result */
+xfs_dir2_sf_addname_pick(
+       xfs_da_args_t           *args,          /* operation arguments */
+       int                     objchange,      /* inode # size changes */
+       xfs_dir2_sf_entry_t     **sfepp,        /* out(1): new entry ptr */
+       xfs_dir2_data_aoff_t    *offsetp)       /* out(1): new offset */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     holefit;        /* found hole it will fit in */
+       int                     i;              /* entry number */
+       xfs_mount_t             *mp;            /* filesystem mount point */
+       xfs_dir2_data_aoff_t    offset;         /* data block offset */
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
+       int                     size;           /* entry's data size */
+       int                     used;           /* data bytes used */
+
+       dp = args->dp;
+       mp = dp->i_mount;
+
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       size = dp->d_ops->data_entsize(args->namelen);
+       offset = dp->d_ops->data_first_offset;
+       sfep = xfs_dir2_sf_firstentry(sfp);
+       holefit = 0;
+       /*
+        * Loop over sf entries.
+        * Keep track of data offset and whether we've seen a place
+        * to insert the new entry.
+        */
+       for (i = 0; i < sfp->count; i++) {
+               if (!holefit)
+                       holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
+               offset = xfs_dir2_sf_get_offset(sfep) +
+                        dp->d_ops->data_entsize(sfep->namelen);
+               sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+       }
+       /*
+        * Calculate data bytes used excluding the new entry, if this
+        * was a data block (block form directory).
+        */
+       used = offset +
+              (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+              (uint)sizeof(xfs_dir2_block_tail_t);
+       /*
+        * If it won't fit in a block form then we can't insert it,
+        * we'll go back, convert to block, then try the insert and convert
+        * to leaf.
+        */
+       if (used + (holefit ? 0 : size) > args->geo->blksize)
+               return 0;
+       /*
+        * If changing the inode number size, do it the hard way.
+        */
+#if XFS_BIG_INUMS
+       if (objchange) {
+               return 2;
+       }
+#else
+       ASSERT(objchange == 0);
+#endif
+       /*
+        * If it won't fit at the end then do it the hard way (use the hole).
+        */
+       if (used + size > args->geo->blksize)
+               return 2;
+       /*
+        * Do it the easy way.
+        */
+       *sfepp = sfep;
+       *offsetp = offset;
+       return 1;
+}
+
+#ifdef DEBUG
+/*
+ * Check consistency of shortform directory, assert if bad.
+ */
+static void
+xfs_dir2_sf_check(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     i;              /* entry number */
+       int                     i8count;        /* number of big inode#s */
+       xfs_ino_t               ino;            /* entry inode number */
+       int                     offset;         /* data offset */
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform dir entry */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
+       struct xfs_mount        *mp;
+
+       dp = args->dp;
+       mp = dp->i_mount;
+
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       offset = dp->d_ops->data_first_offset;
+       ino = dp->d_ops->sf_get_parent_ino(sfp);
+       i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+
+       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
+            i < sfp->count;
+            i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+               ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
+               ino = dp->d_ops->sf_get_ino(sfp, sfep);
+               i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+               offset =
+                       xfs_dir2_sf_get_offset(sfep) +
+                       dp->d_ops->data_entsize(sfep->namelen);
+               ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX);
+       }
+       ASSERT(i8count == sfp->i8count);
+       ASSERT(XFS_BIG_INUMS || i8count == 0);
+       ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
+       ASSERT(offset +
+              (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+              (uint)sizeof(xfs_dir2_block_tail_t) <= args->geo->blksize);
+}
+#endif /* DEBUG */
+
+/*
+ * Create a new (shortform) directory.
+ */
+int                                    /* error, always 0 */
+xfs_dir2_sf_create(
+       xfs_da_args_t   *args,          /* operation arguments */
+       xfs_ino_t       pino)           /* parent inode number */
+{
+       xfs_inode_t     *dp;            /* incore directory inode */
+       int             i8count;        /* parent inode is an 8-byte number */
+       xfs_dir2_sf_hdr_t *sfp;         /* shortform structure */
+       int             size;           /* directory size */
+
+       trace_xfs_dir2_sf_create(args);
+
+       dp = args->dp;
+
+       ASSERT(dp != NULL);
+       ASSERT(dp->i_d.di_size == 0);
+       /*
+        * If it's currently a zero-length extent file,
+        * convert it to local format.
+        */
+       if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
+               dp->i_df.if_flags &= ~XFS_IFEXTENTS;    /* just in case */
+               dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+               xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
+               dp->i_df.if_flags |= XFS_IFINLINE;
+       }
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       ASSERT(dp->i_df.if_bytes == 0);
+       i8count = pino > XFS_DIR2_MAX_SHORT_INUM;
+       size = xfs_dir2_sf_hdr_size(i8count);
+       /*
+        * Make a buffer for the data.
+        */
+       xfs_idata_realloc(dp, size, XFS_DATA_FORK);
+       /*
+        * Fill in the header,
+        */
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       sfp->i8count = i8count;
+       /*
+        * Now can put in the inode number, since i8count is set.
+        */
+       dp->d_ops->sf_put_parent_ino(sfp, pino);
+       sfp->count = 0;
+       dp->i_d.di_size = size;
+       xfs_dir2_sf_check(args);
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+       return 0;
+}
+
+/*
+ * Lookup an entry in a shortform directory.
+ * Returns EEXIST if found, ENOENT if not found.
+ */
+int                                            /* error */
+xfs_dir2_sf_lookup(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     i;              /* entry index */
+       int                     error;
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
+       enum xfs_dacmp          cmp;            /* comparison result */
+       xfs_dir2_sf_entry_t     *ci_sfep;       /* case-insens. entry */
+
+       trace_xfs_dir2_sf_lookup(args);
+
+       xfs_dir2_sf_check(args);
+       dp = args->dp;
+
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Bail out if the directory is way too short.
+        */
+       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return EIO;
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
+       /*
+        * Special case for .
+        */
+       if (args->namelen == 1 && args->name[0] == '.') {
+               args->inumber = dp->i_ino;
+               args->cmpresult = XFS_CMP_EXACT;
+               args->filetype = XFS_DIR3_FT_DIR;
+               return EEXIST;
+       }
+       /*
+        * Special case for ..
+        */
+       if (args->namelen == 2 &&
+           args->name[0] == '.' && args->name[1] == '.') {
+               args->inumber = dp->d_ops->sf_get_parent_ino(sfp);
+               args->cmpresult = XFS_CMP_EXACT;
+               args->filetype = XFS_DIR3_FT_DIR;
+               return EEXIST;
+       }
+       /*
+        * Loop over all the entries trying to match ours.
+        */
+       ci_sfep = NULL;
+       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
+            i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+               /*
+                * Compare name and if it's an exact match, return the inode
+                * number. If it's the first case-insensitive match, store the
+                * inode number and continue looking for an exact match.
+                */
+               cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name,
+                                                               sfep->namelen);
+               if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+                       args->cmpresult = cmp;
+                       args->inumber = dp->d_ops->sf_get_ino(sfp, sfep);
+                       args->filetype = dp->d_ops->sf_get_ftype(sfep);
+                       if (cmp == XFS_CMP_EXACT)
+                               return EEXIST;
+                       ci_sfep = sfep;
+               }
+       }
+       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+       /*
+        * Here, we can only be doing a lookup (not a rename or replace).
+        * If a case-insensitive match was not found, return ENOENT.
+        */
+       if (!ci_sfep)
+               return ENOENT;
+       /* otherwise process the CI match as required by the caller */
+       error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
+       return error;
+}
+
+/*
+ * Remove an entry from a shortform directory.
+ */
+int                                            /* error */
+xfs_dir2_sf_removename(
+       xfs_da_args_t           *args)
+{
+       int                     byteoff;        /* offset of removed entry */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     entsize;        /* this entry's size */
+       int                     i;              /* shortform entry index */
+       int                     newsize;        /* new inode size */
+       int                     oldsize;        /* old inode size */
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
+
+       trace_xfs_dir2_sf_removename(args);
+
+       dp = args->dp;
+
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       oldsize = (int)dp->i_d.di_size;
+       /*
+        * Bail out if the directory is way too short.
+        */
+       if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return EIO;
+       }
+       ASSERT(dp->i_df.if_bytes == oldsize);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count));
+       /*
+        * Loop over the old directory entries.
+        * Find the one we're deleting.
+        */
+       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
+            i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+               if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
+                                                               XFS_CMP_EXACT) {
+                       ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) ==
+                              args->inumber);
+                       break;
+               }
+       }
+       /*
+        * Didn't find it.
+        */
+       if (i == sfp->count)
+               return ENOENT;
+       /*
+        * Calculate sizes.
+        */
+       byteoff = (int)((char *)sfep - (char *)sfp);
+       entsize = dp->d_ops->sf_entsize(sfp, args->namelen);
+       newsize = oldsize - entsize;
+       /*
+        * Copy the part if any after the removed entry, sliding it down.
+        */
+       if (byteoff + entsize < oldsize)
+               memmove((char *)sfp + byteoff, (char *)sfp + byteoff + entsize,
+                       oldsize - (byteoff + entsize));
+       /*
+        * Fix up the header and file size.
+        */
+       sfp->count--;
+       dp->i_d.di_size = newsize;
+       /*
+        * Reallocate, making it smaller.
+        */
+       xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+#if XFS_BIG_INUMS
+       /*
+        * Are we changing inode number size?
+        */
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
+               if (sfp->i8count == 1)
+                       xfs_dir2_sf_toino4(args);
+               else
+                       sfp->i8count--;
+       }
+#endif
+       xfs_dir2_sf_check(args);
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+       return 0;
+}
+
+/*
+ * Replace the inode number of an entry in a shortform directory.
+ */
+int                                            /* error */
+xfs_dir2_sf_replace(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     i;              /* entry index */
+#if XFS_BIG_INUMS || defined(DEBUG)
+       xfs_ino_t               ino=0;          /* entry old inode number */
+#endif
+#if XFS_BIG_INUMS
+       int                     i8elevated;     /* sf_toino8 set i8count=1 */
+#endif
+       xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
+       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
+
+       trace_xfs_dir2_sf_replace(args);
+
+       dp = args->dp;
+
+       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+       /*
+        * Bail out if the shortform directory is way too small.
+        */
+       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
+               return EIO;
+       }
+       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+       ASSERT(dp->i_df.if_u1.if_data != NULL);
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
+#if XFS_BIG_INUMS
+       /*
+        * New inode number is large, and need to convert to 8-byte inodes.
+        */
+       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
+               int     error;                  /* error return value */
+               int     newsize;                /* new inode size */
+
+               newsize =
+                       dp->i_df.if_bytes +
+                       (sfp->count + 1) *
+                       ((uint)sizeof(xfs_dir2_ino8_t) -
+                        (uint)sizeof(xfs_dir2_ino4_t));
+               /*
+                * Won't fit as shortform, convert to block then do replace.
+                */
+               if (newsize > XFS_IFORK_DSIZE(dp)) {
+                       error = xfs_dir2_sf_to_block(args);
+                       if (error) {
+                               return error;
+                       }
+                       return xfs_dir2_block_replace(args);
+               }
+               /*
+                * Still fits, convert to 8-byte now.
+                */
+               xfs_dir2_sf_toino8(args);
+               i8elevated = 1;
+               sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       } else
+               i8elevated = 0;
+#endif
+       ASSERT(args->namelen != 1 || args->name[0] != '.');
+       /*
+        * Replace ..'s entry.
+        */
+       if (args->namelen == 2 &&
+           args->name[0] == '.' && args->name[1] == '.') {
+#if XFS_BIG_INUMS || defined(DEBUG)
+               ino = dp->d_ops->sf_get_parent_ino(sfp);
+               ASSERT(args->inumber != ino);
+#endif
+               dp->d_ops->sf_put_parent_ino(sfp, args->inumber);
+       }
+       /*
+        * Normal entry, look for the name.
+        */
+       else {
+               for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
+                    i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+                       if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
+                                                               XFS_CMP_EXACT) {
+#if XFS_BIG_INUMS || defined(DEBUG)
+                               ino = dp->d_ops->sf_get_ino(sfp, sfep);
+                               ASSERT(args->inumber != ino);
+#endif
+                               dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
+                               dp->d_ops->sf_put_ftype(sfep, args->filetype);
+                               break;
+                       }
+               }
+               /*
+                * Didn't find it.
+                */
+               if (i == sfp->count) {
+                       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+#if XFS_BIG_INUMS
+                       if (i8elevated)
+                               xfs_dir2_sf_toino4(args);
+#endif
+                       return ENOENT;
+               }
+       }
+#if XFS_BIG_INUMS
+       /*
+        * See if the old number was large, the new number is small.
+        */
+       if (ino > XFS_DIR2_MAX_SHORT_INUM &&
+           args->inumber <= XFS_DIR2_MAX_SHORT_INUM) {
+               /*
+                * And the old count was one, so need to convert to small.
+                */
+               if (sfp->i8count == 1)
+                       xfs_dir2_sf_toino4(args);
+               else
+                       sfp->i8count--;
+       }
+       /*
+        * See if the old number was small, the new number is large.
+        */
+       if (ino <= XFS_DIR2_MAX_SHORT_INUM &&
+           args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
+               /*
+                * add to the i8count unless we just converted to 8-byte
+                * inodes (which does an implied i8count = 1)
+                */
+               ASSERT(sfp->i8count != 0);
+               if (!i8elevated)
+                       sfp->i8count++;
+       }
+#endif
+       xfs_dir2_sf_check(args);
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
+       return 0;
+}
+
+#if XFS_BIG_INUMS
+/*
+ * Convert from 8-byte inode numbers to 4-byte inode numbers.
+ * The last 8-byte inode number is gone, but the count is still 1.
+ */
+static void
+xfs_dir2_sf_toino4(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       char                    *buf;           /* old dir's buffer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     i;              /* entry index */
+       int                     newsize;        /* new inode size */
+       xfs_dir2_sf_entry_t     *oldsfep;       /* old sf entry */
+       xfs_dir2_sf_hdr_t       *oldsfp;        /* old sf directory */
+       int                     oldsize;        /* old inode size */
+       xfs_dir2_sf_entry_t     *sfep;          /* new sf entry */
+       xfs_dir2_sf_hdr_t       *sfp;           /* new sf directory */
+       struct xfs_mount        *mp;
+
+       trace_xfs_dir2_sf_toino4(args);
+
+       dp = args->dp;
+       mp = dp->i_mount;
+
+       /*
+        * Copy the old directory to the buffer.
+        * Then nuke it from the inode, and add the new buffer to the inode.
+        * Don't want xfs_idata_realloc copying the data here.
+        */
+       oldsize = dp->i_df.if_bytes;
+       buf = kmem_alloc(oldsize, KM_SLEEP);
+       oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(oldsfp->i8count == 1);
+       memcpy(buf, oldsfp, oldsize);
+       /*
+        * Compute the new inode size.
+        */
+       newsize =
+               oldsize -
+               (oldsfp->count + 1) *
+               ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
+       xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
+       xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
+       /*
+        * Reset our pointers, the data has moved.
+        */
+       oldsfp = (xfs_dir2_sf_hdr_t *)buf;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       /*
+        * Fill in the new header.
+        */
+       sfp->count = oldsfp->count;
+       sfp->i8count = 0;
+       dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp));
+       /*
+        * Copy the entries field by field.
+        */
+       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
+                   oldsfep = xfs_dir2_sf_firstentry(oldsfp);
+            i < sfp->count;
+            i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep),
+                 oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {
+               sfep->namelen = oldsfep->namelen;
+               sfep->offset = oldsfep->offset;
+               memcpy(sfep->name, oldsfep->name, sfep->namelen);
+               dp->d_ops->sf_put_ino(sfp, sfep,
+                                     dp->d_ops->sf_get_ino(oldsfp, oldsfep));
+               dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep));
+       }
+       /*
+        * Clean up the inode.
+        */
+       kmem_free(buf);
+       dp->i_d.di_size = newsize;
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+}
+
+/*
+ * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers.
+ * The new entry w/ an 8-byte inode number is not there yet; we leave with
+ * i8count set to 1, but no corresponding 8-byte entry.
+ */
+static void
+xfs_dir2_sf_toino8(
+       xfs_da_args_t           *args)          /* operation arguments */
+{
+       char                    *buf;           /* old dir's buffer */
+       xfs_inode_t             *dp;            /* incore directory inode */
+       int                     i;              /* entry index */
+       int                     newsize;        /* new inode size */
+       xfs_dir2_sf_entry_t     *oldsfep;       /* old sf entry */
+       xfs_dir2_sf_hdr_t       *oldsfp;        /* old sf directory */
+       int                     oldsize;        /* old inode size */
+       xfs_dir2_sf_entry_t     *sfep;          /* new sf entry */
+       xfs_dir2_sf_hdr_t       *sfp;           /* new sf directory */
+       struct xfs_mount        *mp;
+
+       trace_xfs_dir2_sf_toino8(args);
+
+       dp = args->dp;
+       mp = dp->i_mount;
+
+       /*
+        * Copy the old directory to the buffer.
+        * Then nuke it from the inode, and add the new buffer to the inode.
+        * Don't want xfs_idata_realloc copying the data here.
+        */
+       oldsize = dp->i_df.if_bytes;
+       buf = kmem_alloc(oldsize, KM_SLEEP);
+       oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       ASSERT(oldsfp->i8count == 0);
+       memcpy(buf, oldsfp, oldsize);
+       /*
+        * Compute the new inode size (nb: entry count + 1 for parent)
+        */
+       newsize =
+               oldsize +
+               (oldsfp->count + 1) *
+               ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
+       xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
+       xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
+       /*
+        * Reset our pointers, the data has moved.
+        */
+       oldsfp = (xfs_dir2_sf_hdr_t *)buf;
+       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+       /*
+        * Fill in the new header.
+        */
+       sfp->count = oldsfp->count;
+       sfp->i8count = 1;
+       dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp));
+       /*
+        * Copy the entries field by field.
+        */
+       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
+                   oldsfep = xfs_dir2_sf_firstentry(oldsfp);
+            i < sfp->count;
+            i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep),
+                 oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {
+               sfep->namelen = oldsfep->namelen;
+               sfep->offset = oldsfep->offset;
+               memcpy(sfep->name, oldsfep->name, sfep->namelen);
+               dp->d_ops->sf_put_ino(sfp, sfep,
+                                     dp->d_ops->sf_get_ino(oldsfp, oldsfep));
+               dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep));
+       }
+       /*
+        * Clean up the inode.
+        */
+       kmem_free(buf);
+       dp->i_d.di_size = newsize;
+       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+}
+#endif /* XFS_BIG_INUMS */
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
new file mode 100644 (file)
index 0000000..c2ac0c6
--- /dev/null
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_qm.h"
+#include "xfs_error.h"
+#include "xfs_cksum.h"
+#include "xfs_trace.h"
+
+int
+xfs_calc_dquots_per_chunk(
+       unsigned int            nbblks) /* basic block units */
+{
+       unsigned int    ndquots;
+
+       ASSERT(nbblks > 0);
+       ndquots = BBTOB(nbblks);
+       do_div(ndquots, sizeof(xfs_dqblk_t));
+
+       return ndquots;
+}
+
+/*
+ * Do some primitive error checking on ondisk dquot data structures.
+ */
+int
+xfs_dqcheck(
+       struct xfs_mount *mp,
+       xfs_disk_dquot_t *ddq,
+       xfs_dqid_t       id,
+       uint             type,    /* used only when IO_dorepair is true */
+       uint             flags,
+       char             *str)
+{
+       xfs_dqblk_t      *d = (xfs_dqblk_t *)ddq;
+       int             errs = 0;
+
+       /*
+        * We can encounter an uninitialized dquot buffer for 2 reasons:
+        * 1. If we crash while deleting the quotainode(s), and those blks got
+        *    used for user data. This is because we take the path of regular
+        *    file deletion; however, the size field of quotainodes is never
+        *    updated, so all the tricks that we play in itruncate_finish
+        *    don't quite matter.
+        *
+        * 2. We don't play the quota buffers when there's a quotaoff logitem.
+        *    But the allocation will be replayed so we'll end up with an
+        *    uninitialized quota block.
+        *
+        * This is all fine; things are still consistent, and we haven't lost
+        * any quota information. Just don't complain about bad dquot blks.
+        */
+       if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
+               if (flags & XFS_QMOPT_DOWARN)
+                       xfs_alert(mp,
+                       "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
+                       str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
+               errs++;
+       }
+       if (ddq->d_version != XFS_DQUOT_VERSION) {
+               if (flags & XFS_QMOPT_DOWARN)
+                       xfs_alert(mp,
+                       "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
+                       str, id, ddq->d_version, XFS_DQUOT_VERSION);
+               errs++;
+       }
+
+       if (ddq->d_flags != XFS_DQ_USER &&
+           ddq->d_flags != XFS_DQ_PROJ &&
+           ddq->d_flags != XFS_DQ_GROUP) {
+               if (flags & XFS_QMOPT_DOWARN)
+                       xfs_alert(mp,
+                       "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
+                       str, id, ddq->d_flags);
+               errs++;
+       }
+
+       if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
+               if (flags & XFS_QMOPT_DOWARN)
+                       xfs_alert(mp,
+                       "%s : ondisk-dquot 0x%p, ID mismatch: "
+                       "0x%x expected, found id 0x%x",
+                       str, ddq, id, be32_to_cpu(ddq->d_id));
+               errs++;
+       }
+
+       if (!errs && ddq->d_id) {
+               if (ddq->d_blk_softlimit &&
+                   be64_to_cpu(ddq->d_bcount) >
+                               be64_to_cpu(ddq->d_blk_softlimit)) {
+                       if (!ddq->d_btimer) {
+                               if (flags & XFS_QMOPT_DOWARN)
+                                       xfs_alert(mp,
+                       "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
+                                       str, (int)be32_to_cpu(ddq->d_id), ddq);
+                               errs++;
+                       }
+               }
+               if (ddq->d_ino_softlimit &&
+                   be64_to_cpu(ddq->d_icount) >
+                               be64_to_cpu(ddq->d_ino_softlimit)) {
+                       if (!ddq->d_itimer) {
+                               if (flags & XFS_QMOPT_DOWARN)
+                                       xfs_alert(mp,
+                       "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
+                                       str, (int)be32_to_cpu(ddq->d_id), ddq);
+                               errs++;
+                       }
+               }
+               if (ddq->d_rtb_softlimit &&
+                   be64_to_cpu(ddq->d_rtbcount) >
+                               be64_to_cpu(ddq->d_rtb_softlimit)) {
+                       if (!ddq->d_rtbtimer) {
+                               if (flags & XFS_QMOPT_DOWARN)
+                                       xfs_alert(mp,
+                       "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
+                                       str, (int)be32_to_cpu(ddq->d_id), ddq);
+                               errs++;
+                       }
+               }
+       }
+
+       if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
+               return errs;
+
+       if (flags & XFS_QMOPT_DOWARN)
+               xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
+
+       /*
+        * Typically, a repair is only requested by quotacheck.
+        */
+       ASSERT(id != -1);
+       ASSERT(flags & XFS_QMOPT_DQREPAIR);
+       memset(d, 0, sizeof(xfs_dqblk_t));
+
+       d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+       d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+       d->dd_diskdq.d_flags = type;
+       d->dd_diskdq.d_id = cpu_to_be32(id);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+               xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
+                                XFS_DQUOT_CRC_OFF);
+       }
+
+       return errs;
+}
+
+STATIC bool
+xfs_dquot_buf_verify_crc(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp)
+{
+       struct xfs_dqblk        *d = (struct xfs_dqblk *)bp->b_addr;
+       int                     ndquots;
+       int                     i;
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return true;
+
+       /*
+        * if we are in log recovery, the quota subsystem has not been
+        * initialised so we have no quotainfo structure. In that case, we need
+        * to manually calculate the number of dquots in the buffer.
+        */
+       if (mp->m_quotainfo)
+               ndquots = mp->m_quotainfo->qi_dqperchunk;
+       else
+               ndquots = xfs_calc_dquots_per_chunk(
+                                       XFS_BB_TO_FSB(mp, bp->b_length));
+
+       for (i = 0; i < ndquots; i++, d++) {
+               if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
+                                XFS_DQUOT_CRC_OFF))
+                       return false;
+               if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
+                       return false;
+       }
+       return true;
+}
+
+STATIC bool
+xfs_dquot_buf_verify(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp)
+{
+       struct xfs_dqblk        *d = (struct xfs_dqblk *)bp->b_addr;
+       xfs_dqid_t              id = 0;
+       int                     ndquots;
+       int                     i;
+
+       /*
+        * if we are in log recovery, the quota subsystem has not been
+        * initialised so we have no quotainfo structure. In that case, we need
+        * to manually calculate the number of dquots in the buffer.
+        */
+       if (mp->m_quotainfo)
+               ndquots = mp->m_quotainfo->qi_dqperchunk;
+       else
+               ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
+
+       /*
+        * On the first read of the buffer, verify that each dquot is valid.
+        * We don't know what the id of the dquot is supposed to be, just that
+        * they should be increasing monotonically within the buffer. If the
+        * first id is corrupt, then it will fail on the second dquot in the
+        * buffer so corruptions could point to the wrong dquot in this case.
+        */
+       for (i = 0; i < ndquots; i++) {
+               struct xfs_disk_dquot   *ddq;
+               int                     error;
+
+               ddq = &d[i].dd_diskdq;
+
+               if (i == 0)
+                       id = be32_to_cpu(ddq->d_id);
+
+               error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
+                                      "xfs_dquot_buf_verify");
+               if (error)
+                       return false;
+       }
+       return true;
+}
+
+static void
+xfs_dquot_buf_read_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+
+       if (!xfs_dquot_buf_verify_crc(mp, bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_dquot_buf_verify(mp, bp))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+}
+
+/*
+ * we don't calculate the CRC here as that is done when the dquot is flushed to
+ * the buffer after the update is done. This ensures that the dquot in the
+ * buffer always has an up-to-date CRC value.
+ */
+static void
+xfs_dquot_buf_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+
+       if (!xfs_dquot_buf_verify(mp, bp)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+}
+
+const struct xfs_buf_ops xfs_dquot_buf_ops = {
+       .verify_read = xfs_dquot_buf_read_verify,
+       .verify_write = xfs_dquot_buf_write_verify,
+};
+
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
new file mode 100644 (file)
index 0000000..16fb63a
--- /dev/null
@@ -0,0 +1,2189 @@
+/*
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_bmap.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+#include "xfs_icreate_item.h"
+#include "xfs_icache.h"
+#include "xfs_dinode.h"
+#include "xfs_trace.h"
+
+
+/*
+ * Allocation group level functions.
+ */
+static inline int
+xfs_ialloc_cluster_alignment(
+       xfs_alloc_arg_t *args)
+{
+       if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
+           args->mp->m_sb.sb_inoalignmt >=
+            XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size))
+               return args->mp->m_sb.sb_inoalignmt;
+       return 1;
+}
+
+/*
+ * Lookup a record by ino in the btree given by cur.
+ */
+int                                    /* error */
+xfs_inobt_lookup(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_agino_t             ino,    /* starting inode of chunk */
+       xfs_lookup_t            dir,    /* <=, >=, == */
+       int                     *stat)  /* success/failure */
+{
+       cur->bc_rec.i.ir_startino = ino;
+       cur->bc_rec.i.ir_freecount = 0;
+       cur->bc_rec.i.ir_free = 0;
+       return xfs_btree_lookup(cur, dir, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given.
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int                             /* error */
+xfs_inobt_update(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_inobt_rec_incore_t  *irec)  /* btree record */
+{
+       union xfs_btree_rec     rec;
+
+       rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
+       rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
+       rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
+       return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int                                    /* error */
+xfs_inobt_get_rec(
+       struct xfs_btree_cur    *cur,   /* btree cursor */
+       xfs_inobt_rec_incore_t  *irec,  /* btree record */
+       int                     *stat)  /* output: success/failure */
+{
+       union xfs_btree_rec     *rec;
+       int                     error;
+
+       error = xfs_btree_get_rec(cur, &rec, stat);
+       if (!error && *stat == 1) {
+               irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
+               irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
+               irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+       }
+       return error;
+}
+
+/*
+ * Insert a single inobt record. Cursor must already point to desired location.
+ */
+STATIC int
+xfs_inobt_insert_rec(
+       struct xfs_btree_cur    *cur,
+       __int32_t               freecount,
+       xfs_inofree_t           free,
+       int                     *stat)
+{
+       cur->bc_rec.i.ir_freecount = freecount;
+       cur->bc_rec.i.ir_free = free;
+       return xfs_btree_insert(cur, stat);
+}
+
+/*
+ * Insert records describing a newly allocated inode chunk into the inobt.
+ */
+STATIC int
+xfs_inobt_insert(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_agino_t             newino,
+       xfs_agino_t             newlen,
+       xfs_btnum_t             btnum)
+{
+       struct xfs_btree_cur    *cur;
+       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
+       xfs_agnumber_t          agno = be32_to_cpu(agi->agi_seqno);
+       xfs_agino_t             thisino;
+       int                     i;
+       int                     error;
+
+       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
+
+       for (thisino = newino;
+            thisino < newino + newlen;
+            thisino += XFS_INODES_PER_CHUNK) {
+               error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
+               if (error) {
+                       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+                       return error;
+               }
+               ASSERT(i == 0);
+
+               error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
+                                            XFS_INOBT_ALL_FREE, &i);
+               if (error) {
+                       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+                       return error;
+               }
+               ASSERT(i == 1);
+       }
+
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+
+       return 0;
+}
+
+/*
+ * Verify that the number of free inodes in the AGI is correct.
+ */
+#ifdef DEBUG
+STATIC int
+xfs_check_agi_freecount(
+       struct xfs_btree_cur    *cur,
+       struct xfs_agi          *agi)
+{
+       if (cur->bc_nlevels == 1) {
+               xfs_inobt_rec_incore_t rec;
+               int             freecount = 0;
+               int             error;
+               int             i;
+
+               error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
+               if (error)
+                       return error;
+
+               do {
+                       error = xfs_inobt_get_rec(cur, &rec, &i);
+                       if (error)
+                               return error;
+
+                       if (i) {
+                               freecount += rec.ir_freecount;
+                               error = xfs_btree_increment(cur, 0, &i);
+                               if (error)
+                                       return error;
+                       }
+               } while (i == 1);
+
+               if (!XFS_FORCED_SHUTDOWN(cur->bc_mp))
+                       ASSERT(freecount == be32_to_cpu(agi->agi_freecount));
+       }
+       return 0;
+}
+#else
+#define xfs_check_agi_freecount(cur, agi)      0
+#endif
+
+/*
+ * Initialise a new set of inodes. When called without a transaction context
+ * (e.g. from recovery) we initiate a delayed write of the inode buffers rather
+ * than logging them (which in a transaction context puts them into the AIL
+ * for writeback rather than the xfsbufd queue).
+ */
+int
+xfs_ialloc_inode_init(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct list_head        *buffer_list,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           agbno,
+       xfs_agblock_t           length,
+       unsigned int            gen)
+{
+       struct xfs_buf          *fbuf;
+       struct xfs_dinode       *free;
+       int                     nbufs, blks_per_cluster, inodes_per_cluster;
+       int                     version;
+       int                     i, j;
+       xfs_daddr_t             d;
+       xfs_ino_t               ino = 0;
+
+       /*
+        * Loop over the new block(s), filling in the inodes.  For small block
+        * sizes, manipulate the inodes in buffers  which are multiples of the
+        * blocks size.
+        */
+       blks_per_cluster = xfs_icluster_size_fsb(mp);
+       inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
+       nbufs = length / blks_per_cluster;
+
+       /*
+        * Figure out what version number to use in the inodes we create.  If
+        * the superblock version has caught up to the one that supports the new
+        * inode format, then use the new inode version.  Otherwise use the old
+        * version so that old kernels will continue to be able to use the file
+        * system.
+        *
+        * For v3 inodes, we also need to write the inode number into the inode,
+        * so calculate the first inode number of the chunk here as
+        * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not
+        * across multiple filesystem blocks (such as a cluster) and so cannot
+        * be used in the cluster buffer loop below.
+        *
+        * Further, because we are writing the inode directly into the buffer
+        * and calculating a CRC on the entire inode, we have ot log the entire
+        * inode so that the entire range the CRC covers is present in the log.
+        * That means for v3 inode we log the entire buffer rather than just the
+        * inode cores.
+        */
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               version = 3;
+               ino = XFS_AGINO_TO_INO(mp, agno,
+                                      XFS_OFFBNO_TO_AGINO(mp, agbno, 0));
+
+               /*
+                * log the initialisation that is about to take place as an
+                * logical operation. This means the transaction does not
+                * need to log the physical changes to the inode buffers as log
+                * recovery will know what initialisation is actually needed.
+                * Hence we only need to log the buffers as "ordered" buffers so
+                * they track in the AIL as if they were physically logged.
+                */
+               if (tp)
+                       xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos,
+                                       mp->m_sb.sb_inodesize, length, gen);
+       } else
+               version = 2;
+
+       for (j = 0; j < nbufs; j++) {
+               /*
+                * Get the block.
+                */
+               d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
+               fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
+                                        mp->m_bsize * blks_per_cluster,
+                                        XBF_UNMAPPED);
+               if (!fbuf)
+                       return ENOMEM;
+
+               /* Initialize the inode buffers and log them appropriately. */
+               fbuf->b_ops = &xfs_inode_buf_ops;
+               xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
+               for (i = 0; i < inodes_per_cluster; i++) {
+                       int     ioffset = i << mp->m_sb.sb_inodelog;
+                       uint    isize = xfs_dinode_size(version);
+
+                       free = xfs_make_iptr(mp, fbuf, i);
+                       free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
+                       free->di_version = version;
+                       free->di_gen = cpu_to_be32(gen);
+                       free->di_next_unlinked = cpu_to_be32(NULLAGINO);
+
+                       if (version == 3) {
+                               free->di_ino = cpu_to_be64(ino);
+                               ino++;
+                               uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
+                               xfs_dinode_calc_crc(mp, free);
+                       } else if (tp) {
+                               /* just log the inode core */
+                               xfs_trans_log_buf(tp, fbuf, ioffset,
+                                                 ioffset + isize - 1);
+                       }
+               }
+
+               if (tp) {
+                       /*
+                        * Mark the buffer as an inode allocation buffer so it
+                        * sticks in AIL at the point of this allocation
+                        * transaction. This ensures the they are on disk before
+                        * the tail of the log can be moved past this
+                        * transaction (i.e. by preventing relogging from moving
+                        * it forward in the log).
+                        */
+                       xfs_trans_inode_alloc_buf(tp, fbuf);
+                       if (version == 3) {
+                               /*
+                                * Mark the buffer as ordered so that they are
+                                * not physically logged in the transaction but
+                                * still tracked in the AIL as part of the
+                                * transaction and pin the log appropriately.
+                                */
+                               xfs_trans_ordered_buf(tp, fbuf);
+                               xfs_trans_log_buf(tp, fbuf, 0,
+                                                 BBTOB(fbuf->b_length) - 1);
+                       }
+               } else {
+                       fbuf->b_flags |= XBF_DONE;
+                       xfs_buf_delwri_queue(fbuf, buffer_list);
+                       xfs_buf_relse(fbuf);
+               }
+       }
+       return 0;
+}
+
+/*
+ * Allocate new inodes in the allocation group specified by agbp.
+ * Return 0 for success, else error code.
+ */
+STATIC int                             /* error code or 0 */
+xfs_ialloc_ag_alloc(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_buf_t       *agbp,          /* alloc group buffer */
+       int             *alloc)
+{
+       xfs_agi_t       *agi;           /* allocation group header */
+       xfs_alloc_arg_t args;           /* allocation argument structure */
+       xfs_agnumber_t  agno;
+       int             error;
+       xfs_agino_t     newino;         /* new first inode's number */
+       xfs_agino_t     newlen;         /* new number of inodes */
+       int             isaligned = 0;  /* inode allocation at stripe unit */
+                                       /* boundary */
+       struct xfs_perag *pag;
+
+       memset(&args, 0, sizeof(args));
+       args.tp = tp;
+       args.mp = tp->t_mountp;
+
+       /*
+        * Locking will ensure that we don't have two callers in here
+        * at one time.
+        */
+       newlen = args.mp->m_ialloc_inos;
+       if (args.mp->m_maxicount &&
+           args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
+               return ENOSPC;
+       args.minlen = args.maxlen = args.mp->m_ialloc_blks;
+       /*
+        * First try to allocate inodes contiguous with the last-allocated
+        * chunk of inodes.  If the filesystem is striped, this will fill
+        * an entire stripe unit with inodes.
+        */
+       agi = XFS_BUF_TO_AGI(agbp);
+       newino = be32_to_cpu(agi->agi_newino);
+       agno = be32_to_cpu(agi->agi_seqno);
+       args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
+                    args.mp->m_ialloc_blks;
+       if (likely(newino != NULLAGINO &&
+                 (args.agbno < be32_to_cpu(agi->agi_length)))) {
+               args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
+               args.type = XFS_ALLOCTYPE_THIS_BNO;
+               args.prod = 1;
+
+               /*
+                * We need to take into account alignment here to ensure that
+                * we don't modify the free list if we fail to have an exact
+                * block. If we don't have an exact match, and every oher
+                * attempt allocation attempt fails, we'll end up cancelling
+                * a dirty transaction and shutting down.
+                *
+                * For an exact allocation, alignment must be 1,
+                * however we need to take cluster alignment into account when
+                * fixing up the freelist. Use the minalignslop field to
+                * indicate that extra blocks might be required for alignment,
+                * but not to use them in the actual exact allocation.
+                */
+               args.alignment = 1;
+               args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
+
+               /* Allow space for the inode btree to split. */
+               args.minleft = args.mp->m_in_maxlevels - 1;
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+
+               /*
+                * This request might have dirtied the transaction if the AG can
+                * satisfy the request, but the exact block was not available.
+                * If the allocation did fail, subsequent requests will relax
+                * the exact agbno requirement and increase the alignment
+                * instead. It is critical that the total size of the request
+                * (len + alignment + slop) does not increase from this point
+                * on, so reset minalignslop to ensure it is not included in
+                * subsequent requests.
+                */
+               args.minalignslop = 0;
+       } else
+               args.fsbno = NULLFSBLOCK;
+
+       if (unlikely(args.fsbno == NULLFSBLOCK)) {
+               /*
+                * Set the alignment for the allocation.
+                * If stripe alignment is turned on then align at stripe unit
+                * boundary.
+                * If the cluster size is smaller than a filesystem block
+                * then we're doing I/O for inodes in filesystem block size
+                * pieces, so don't need alignment anyway.
+                */
+               isaligned = 0;
+               if (args.mp->m_sinoalign) {
+                       ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
+                       args.alignment = args.mp->m_dalign;
+                       isaligned = 1;
+               } else
+                       args.alignment = xfs_ialloc_cluster_alignment(&args);
+               /*
+                * Need to figure out where to allocate the inode blocks.
+                * Ideally they should be spaced out through the a.g.
+                * For now, just allocate blocks up front.
+                */
+               args.agbno = be32_to_cpu(agi->agi_root);
+               args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
+               /*
+                * Allocate a fixed-size extent of inodes.
+                */
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+               args.prod = 1;
+               /*
+                * Allow space for the inode btree to split.
+                */
+               args.minleft = args.mp->m_in_maxlevels - 1;
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+       }
+
+       /*
+        * If stripe alignment is turned on, then try again with cluster
+        * alignment.
+        */
+       if (isaligned && args.fsbno == NULLFSBLOCK) {
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+               args.agbno = be32_to_cpu(agi->agi_root);
+               args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
+               args.alignment = xfs_ialloc_cluster_alignment(&args);
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+       }
+
+       if (args.fsbno == NULLFSBLOCK) {
+               *alloc = 0;
+               return 0;
+       }
+       ASSERT(args.len == args.minlen);
+
+       /*
+        * Stamp and write the inode buffers.
+        *
+        * Seed the new inode cluster with a random generation number. This
+        * prevents short-term reuse of generation numbers if a chunk is
+        * freed and then immediately reallocated. We use random numbers
+        * rather than a linear progression to prevent the next generation
+        * number from being easily guessable.
+        */
+       error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,
+                       args.len, prandom_u32());
+
+       if (error)
+               return error;
+       /*
+        * Convert the results.
+        */
+       newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
+       be32_add_cpu(&agi->agi_count, newlen);
+       be32_add_cpu(&agi->agi_freecount, newlen);
+       pag = xfs_perag_get(args.mp, agno);
+       pag->pagi_freecount += newlen;
+       xfs_perag_put(pag);
+       agi->agi_newino = cpu_to_be32(newino);
+
+       /*
+        * Insert records describing the new inode chunk into the btrees.
+        */
+       error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
+                                XFS_BTNUM_INO);
+       if (error)
+               return error;
+
+       if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
+               error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
+                                        XFS_BTNUM_FINO);
+               if (error)
+                       return error;
+       }
+       /*
+        * Log allocation group header fields
+        */
+       xfs_ialloc_log_agi(tp, agbp,
+               XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
+       /*
+        * Modify/log superblock values for inode count and inode free count.
+        */
+       xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
+       xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
+       *alloc = 1;
+       return 0;
+}
+
+STATIC xfs_agnumber_t
+xfs_ialloc_next_ag(
+       xfs_mount_t     *mp)
+{
+       xfs_agnumber_t  agno;
+
+       spin_lock(&mp->m_agirotor_lock);
+       agno = mp->m_agirotor;
+       if (++mp->m_agirotor >= mp->m_maxagi)
+               mp->m_agirotor = 0;
+       spin_unlock(&mp->m_agirotor_lock);
+
+       return agno;
+}
+
+/*
+ * Select an allocation group to look for a free inode in, based on the parent
+ * inode and the mode.  Return the allocation group buffer.
+ */
+STATIC xfs_agnumber_t
+xfs_ialloc_ag_select(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_ino_t       parent,         /* parent directory inode number */
+       umode_t         mode,           /* bits set to indicate file type */
+       int             okalloc)        /* ok to allocate more space */
+{
+       xfs_agnumber_t  agcount;        /* number of ag's in the filesystem */
+       xfs_agnumber_t  agno;           /* current ag number */
+       int             flags;          /* alloc buffer locking flags */
+       xfs_extlen_t    ineed;          /* blocks needed for inode allocation */
+       xfs_extlen_t    longest = 0;    /* longest extent available */
+       xfs_mount_t     *mp;            /* mount point structure */
+       int             needspace;      /* file mode implies space allocated */
+       xfs_perag_t     *pag;           /* per allocation group data */
+       xfs_agnumber_t  pagno;          /* parent (starting) ag number */
+       int             error;
+
+       /*
+        * Files of these types need at least one block if length > 0
+        * (and they won't fit in the inode, but that's hard to figure out).
+        */
+       needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
+       mp = tp->t_mountp;
+       agcount = mp->m_maxagi;
+       if (S_ISDIR(mode))
+               pagno = xfs_ialloc_next_ag(mp);
+       else {
+               pagno = XFS_INO_TO_AGNO(mp, parent);
+               if (pagno >= agcount)
+                       pagno = 0;
+       }
+
+       ASSERT(pagno < agcount);
+
+       /*
+        * Loop through allocation groups, looking for one with a little
+        * free space in it.  Note we don't look for free inodes, exactly.
+        * Instead, we include whether there is a need to allocate inodes
+        * to mean that blocks must be allocated for them,
+        * if none are currently free.
+        */
+       agno = pagno;
+       flags = XFS_ALLOC_FLAG_TRYLOCK;
+       for (;;) {
+               pag = xfs_perag_get(mp, agno);
+               if (!pag->pagi_inodeok) {
+                       xfs_ialloc_next_ag(mp);
+                       goto nextag;
+               }
+
+               if (!pag->pagi_init) {
+                       error = xfs_ialloc_pagi_init(mp, tp, agno);
+                       if (error)
+                               goto nextag;
+               }
+
+               if (pag->pagi_freecount) {
+                       xfs_perag_put(pag);
+                       return agno;
+               }
+
+               if (!okalloc)
+                       goto nextag;
+
+               if (!pag->pagf_init) {
+                       error = xfs_alloc_pagf_init(mp, tp, agno, flags);
+                       if (error)
+                               goto nextag;
+               }
+
+               /*
+                * Is there enough free space for the file plus a block of
+                * inodes? (if we need to allocate some)?
+                */
+               ineed = mp->m_ialloc_blks;
+               longest = pag->pagf_longest;
+               if (!longest)
+                       longest = pag->pagf_flcount > 0;
+
+               if (pag->pagf_freeblks >= needspace + ineed &&
+                   longest >= ineed) {
+                       xfs_perag_put(pag);
+                       return agno;
+               }
+nextag:
+               xfs_perag_put(pag);
+               /*
+                * No point in iterating over the rest, if we're shutting
+                * down.
+                */
+               if (XFS_FORCED_SHUTDOWN(mp))
+                       return NULLAGNUMBER;
+               agno++;
+               if (agno >= agcount)
+                       agno = 0;
+               if (agno == pagno) {
+                       if (flags == 0)
+                               return NULLAGNUMBER;
+                       flags = 0;
+               }
+       }
+}
+
+/*
+ * Try to retrieve the next record to the left/right from the current one.
+ */
+STATIC int
+xfs_ialloc_next_rec(
+       struct xfs_btree_cur    *cur,
+       xfs_inobt_rec_incore_t  *rec,
+       int                     *done,
+       int                     left)
+{
+       int                     error;
+       int                     i;
+
+       if (left)
+               error = xfs_btree_decrement(cur, 0, &i);
+       else
+               error = xfs_btree_increment(cur, 0, &i);
+
+       if (error)
+               return error;
+       *done = !i;
+       if (i) {
+               error = xfs_inobt_get_rec(cur, rec, &i);
+               if (error)
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_ialloc_get_rec(
+       struct xfs_btree_cur    *cur,
+       xfs_agino_t             agino,
+       xfs_inobt_rec_incore_t  *rec,
+       int                     *done)
+{
+       int                     error;
+       int                     i;
+
+       error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i);
+       if (error)
+               return error;
+       *done = !i;
+       if (i) {
+               error = xfs_inobt_get_rec(cur, rec, &i);
+               if (error)
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+       }
+
+       return 0;
+}
+
+/*
+ * Allocate an inode using the inobt-only algorithm.
+ */
+STATIC int
+xfs_dialloc_ag_inobt(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_ino_t               parent,
+       xfs_ino_t               *inop)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
+       xfs_agnumber_t          agno = be32_to_cpu(agi->agi_seqno);
+       xfs_agnumber_t          pagno = XFS_INO_TO_AGNO(mp, parent);
+       xfs_agino_t             pagino = XFS_INO_TO_AGINO(mp, parent);
+       struct xfs_perag        *pag;
+       struct xfs_btree_cur    *cur, *tcur;
+       struct xfs_inobt_rec_incore rec, trec;
+       xfs_ino_t               ino;
+       int                     error;
+       int                     offset;
+       int                     i, j;
+
+       pag = xfs_perag_get(mp, agno);
+
+       ASSERT(pag->pagi_init);
+       ASSERT(pag->pagi_inodeok);
+       ASSERT(pag->pagi_freecount > 0);
+
+ restart_pagno:
+       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
+       /*
+        * If pagino is 0 (this is the root inode allocation) use newino.
+        * This must work because we've just allocated some.
+        */
+       if (!pagino)
+               pagino = be32_to_cpu(agi->agi_newino);
+
+       error = xfs_check_agi_freecount(cur, agi);
+       if (error)
+               goto error0;
+
+       /*
+        * If in the same AG as the parent, try to get near the parent.
+        */
+       if (pagno == agno) {
+               int             doneleft;       /* done, to the left */
+               int             doneright;      /* done, to the right */
+               int             searchdistance = 10;
+
+               error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
+               if (error)
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+               error = xfs_inobt_get_rec(cur, &rec, &j);
+               if (error)
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(j == 1, error0);
+
+               if (rec.ir_freecount > 0) {
+                       /*
+                        * Found a free inode in the same chunk
+                        * as the parent, done.
+                        */
+                       goto alloc_inode;
+               }
+
+
+               /*
+                * In the same AG as parent, but parent's chunk is full.
+                */
+
+               /* duplicate the cursor, search left & right simultaneously */
+               error = xfs_btree_dup_cursor(cur, &tcur);
+               if (error)
+                       goto error0;
+
+               /*
+                * Skip to last blocks looked up if same parent inode.
+                */
+               if (pagino != NULLAGINO &&
+                   pag->pagl_pagino == pagino &&
+                   pag->pagl_leftrec != NULLAGINO &&
+                   pag->pagl_rightrec != NULLAGINO) {
+                       error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
+                                                  &trec, &doneleft);
+                       if (error)
+                               goto error1;
+
+                       error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
+                                                  &rec, &doneright);
+                       if (error)
+                               goto error1;
+               } else {
+                       /* search left with tcur, back up 1 record */
+                       error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1);
+                       if (error)
+                               goto error1;
+
+                       /* search right with cur, go forward 1 record. */
+                       error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0);
+                       if (error)
+                               goto error1;
+               }
+
+               /*
+                * Loop until we find an inode chunk with a free inode.
+                */
+               while (!doneleft || !doneright) {
+                       int     useleft;  /* using left inode chunk this time */
+
+                       if (!--searchdistance) {
+                               /*
+                                * Not in range - save last search
+                                * location and allocate a new inode
+                                */
+                               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+                               pag->pagl_leftrec = trec.ir_startino;
+                               pag->pagl_rightrec = rec.ir_startino;
+                               pag->pagl_pagino = pagino;
+                               goto newino;
+                       }
+
+                       /* figure out the closer block if both are valid. */
+                       if (!doneleft && !doneright) {
+                               useleft = pagino -
+                                (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
+                                 rec.ir_startino - pagino;
+                       } else {
+                               useleft = !doneleft;
+                       }
+
+                       /* free inodes to the left? */
+                       if (useleft && trec.ir_freecount) {
+                               rec = trec;
+                               xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+                               cur = tcur;
+
+                               pag->pagl_leftrec = trec.ir_startino;
+                               pag->pagl_rightrec = rec.ir_startino;
+                               pag->pagl_pagino = pagino;
+                               goto alloc_inode;
+                       }
+
+                       /* free inodes to the right? */
+                       if (!useleft && rec.ir_freecount) {
+                               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+
+                               pag->pagl_leftrec = trec.ir_startino;
+                               pag->pagl_rightrec = rec.ir_startino;
+                               pag->pagl_pagino = pagino;
+                               goto alloc_inode;
+                       }
+
+                       /* get next record to check */
+                       if (useleft) {
+                               error = xfs_ialloc_next_rec(tcur, &trec,
+                                                                &doneleft, 1);
+                       } else {
+                               error = xfs_ialloc_next_rec(cur, &rec,
+                                                                &doneright, 0);
+                       }
+                       if (error)
+                               goto error1;
+               }
+
+               /*
+                * We've reached the end of the btree. because
+                * we are only searching a small chunk of the
+                * btree each search, there is obviously free
+                * inodes closer to the parent inode than we
+                * are now. restart the search again.
+                */
+               pag->pagl_pagino = NULLAGINO;
+               pag->pagl_leftrec = NULLAGINO;
+               pag->pagl_rightrec = NULLAGINO;
+               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
+               xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+               goto restart_pagno;
+       }
+
+       /*
+        * In a different AG from the parent.
+        * See if the most recently allocated block has any free.
+        */
+newino:
+       if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
+               error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
+                                        XFS_LOOKUP_EQ, &i);
+               if (error)
+                       goto error0;
+
+               if (i == 1) {
+                       error = xfs_inobt_get_rec(cur, &rec, &j);
+                       if (error)
+                               goto error0;
+
+                       if (j == 1 && rec.ir_freecount > 0) {
+                               /*
+                                * The last chunk allocated in the group
+                                * still has a free inode.
+                                */
+                               goto alloc_inode;
+                       }
+               }
+       }
+
+       /*
+        * None left in the last group, search the whole AG
+        */
+       error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
+       if (error)
+               goto error0;
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+
+       for (;;) {
+               error = xfs_inobt_get_rec(cur, &rec, &i);
+               if (error)
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               if (rec.ir_freecount > 0)
+                       break;
+               error = xfs_btree_increment(cur, 0, &i);
+               if (error)
+                       goto error0;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       }
+
+alloc_inode:
+       offset = xfs_lowbit64(rec.ir_free);
+       ASSERT(offset >= 0);
+       ASSERT(offset < XFS_INODES_PER_CHUNK);
+       ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
+                                  XFS_INODES_PER_CHUNK) == 0);
+       ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
+       rec.ir_free &= ~XFS_INOBT_MASK(offset);
+       rec.ir_freecount--;
+       error = xfs_inobt_update(cur, &rec);
+       if (error)
+               goto error0;
+       be32_add_cpu(&agi->agi_freecount, -1);
+       xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
+       pag->pagi_freecount--;
+
+       error = xfs_check_agi_freecount(cur, agi);
+       if (error)
+               goto error0;
+
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
+       xfs_perag_put(pag);
+       *inop = ino;
+       return 0;
+error1:
+       xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
+error0:
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       xfs_perag_put(pag);
+       return error;
+}
+
+/*
+ * Use the free inode btree to allocate an inode based on distance from the
+ * parent. Note that the provided cursor may be deleted and replaced.
+ */
+STATIC int
+xfs_dialloc_ag_finobt_near(
+       xfs_agino_t                     pagino,
+       struct xfs_btree_cur            **ocur,
+       struct xfs_inobt_rec_incore     *rec)
+{
+       struct xfs_btree_cur            *lcur = *ocur;  /* left search cursor */
+       struct xfs_btree_cur            *rcur;  /* right search cursor */
+       struct xfs_inobt_rec_incore     rrec;
+       int                             error;
+       int                             i, j;
+
+       error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
+       if (error)
+               return error;
+
+       if (i == 1) {
+               error = xfs_inobt_get_rec(lcur, rec, &i);
+               if (error)
+                       return error;
+               XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+               /*
+                * See if we've landed in the parent inode record. The finobt
+                * only tracks chunks with at least one free inode, so record
+                * existence is enough.
+                */
+               if (pagino >= rec->ir_startino &&
+                   pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
+                       return 0;
+       }
+
+       error = xfs_btree_dup_cursor(lcur, &rcur);
+       if (error)
+               return error;
+
+       error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
+       if (error)
+               goto error_rcur;
+       if (j == 1) {
+               error = xfs_inobt_get_rec(rcur, &rrec, &j);
+               if (error)
+                       goto error_rcur;
+               XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
+       }
+
+       XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
+       if (i == 1 && j == 1) {
+               /*
+                * Both the left and right records are valid. Choose the closer
+                * inode chunk to the target.
+                */
+               if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
+                   (rrec.ir_startino - pagino)) {
+                       *rec = rrec;
+                       xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
+                       *ocur = rcur;
+               } else {
+                       xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
+               }
+       } else if (j == 1) {
+               /* only the right record is valid */
+               *rec = rrec;
+               xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
+               *ocur = rcur;
+       } else if (i == 1) {
+               /* only the left record is valid */
+               xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
+       }
+
+       return 0;
+
+error_rcur:
+       xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Use the free inode btree to find a free inode based on a newino hint. If
+ * the hint is NULL, find the first free inode in the AG.
+ */
+STATIC int
+xfs_dialloc_ag_finobt_newino(
+       struct xfs_agi                  *agi,
+       struct xfs_btree_cur            *cur,
+       struct xfs_inobt_rec_incore     *rec)
+{
+       int error;
+       int i;
+
+       if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
+               error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ,
+                                        &i);
+               if (error)
+                       return error;
+               if (i == 1) {
+                       error = xfs_inobt_get_rec(cur, rec, &i);
+                       if (error)
+                               return error;
+                       XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+                       return 0;
+               }
+       }
+
+       /*
+        * Find the first inode available in the AG.
+        */
+       error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
+       if (error)
+               return error;
+       XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+       error = xfs_inobt_get_rec(cur, rec, &i);
+       if (error)
+               return error;
+       XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+       return 0;
+}
+
+/*
+ * Update the inobt based on a modification made to the finobt. Also ensure that
+ * the records from both trees are equivalent post-modification.
+ */
+STATIC int
+xfs_dialloc_ag_update_inobt(
+       struct xfs_btree_cur            *cur,   /* inobt cursor */
+       struct xfs_inobt_rec_incore     *frec,  /* finobt record */
+       int                             offset) /* inode offset */
+{
+       struct xfs_inobt_rec_incore     rec;
+       int                             error;
+       int                             i;
+
+       error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
+       if (error)
+               return error;
+       XFS_WANT_CORRUPTED_RETURN(i == 1);
+
+       error = xfs_inobt_get_rec(cur, &rec, &i);
+       if (error)
+               return error;
+       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
+                                  XFS_INODES_PER_CHUNK) == 0);
+
+       rec.ir_free &= ~XFS_INOBT_MASK(offset);
+       rec.ir_freecount--;
+
+       XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
+                                 (rec.ir_freecount == frec->ir_freecount));
+
+       error = xfs_inobt_update(cur, &rec);
+       if (error)
+               return error;
+
+       return 0;
+}
+
+/*
+ * Allocate an inode using the free inode btree, if available. Otherwise, fall
+ * back to the inobt search algorithm.
+ *
+ * The caller selected an AG for us, and made sure that free inodes are
+ * available.
+ */
+STATIC int
+xfs_dialloc_ag(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_ino_t               parent,
+       xfs_ino_t               *inop)
+{
+       struct xfs_mount                *mp = tp->t_mountp;
+       struct xfs_agi                  *agi = XFS_BUF_TO_AGI(agbp);
+       xfs_agnumber_t                  agno = be32_to_cpu(agi->agi_seqno);
+       xfs_agnumber_t                  pagno = XFS_INO_TO_AGNO(mp, parent);
+       xfs_agino_t                     pagino = XFS_INO_TO_AGINO(mp, parent);
+       struct xfs_perag                *pag;
+       struct xfs_btree_cur            *cur;   /* finobt cursor */
+       struct xfs_btree_cur            *icur;  /* inobt cursor */
+       struct xfs_inobt_rec_incore     rec;
+       xfs_ino_t                       ino;
+       int                             error;
+       int                             offset;
+       int                             i;
+
+       if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+               return xfs_dialloc_ag_inobt(tp, agbp, parent, inop);
+
+       pag = xfs_perag_get(mp, agno);
+
+       /*
+        * If pagino is 0 (this is the root inode allocation) use newino.
+        * This must work because we've just allocated some.
+        */
+       if (!pagino)
+               pagino = be32_to_cpu(agi->agi_newino);
+
+       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
+
+       error = xfs_check_agi_freecount(cur, agi);
+       if (error)
+               goto error_cur;
+
+       /*
+        * The search algorithm depends on whether we're in the same AG as the
+        * parent. If so, find the closest available inode to the parent. If
+        * not, consider the agi hint or find the first free inode in the AG.
+        */
+       if (agno == pagno)
+               error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
+       else
+               error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
+       if (error)
+               goto error_cur;
+
+       offset = xfs_lowbit64(rec.ir_free);
+       ASSERT(offset >= 0);
+       ASSERT(offset < XFS_INODES_PER_CHUNK);
+       ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
+                                  XFS_INODES_PER_CHUNK) == 0);
+       ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
+
+       /*
+        * Modify or remove the finobt record.
+        */
+       rec.ir_free &= ~XFS_INOBT_MASK(offset);
+       rec.ir_freecount--;
+       if (rec.ir_freecount)
+               error = xfs_inobt_update(cur, &rec);
+       else
+               error = xfs_btree_delete(cur, &i);
+       if (error)
+               goto error_cur;
+
+       /*
+        * The finobt has now been updated appropriately. We haven't updated the
+        * agi and superblock yet, so we can create an inobt cursor and validate
+        * the original freecount. If all is well, make the equivalent update to
+        * the inobt using the finobt record and offset information.
+        */
+       icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
+
+       error = xfs_check_agi_freecount(icur, agi);
+       if (error)
+               goto error_icur;
+
+       error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
+       if (error)
+               goto error_icur;
+
+       /*
+        * Both trees have now been updated. We must update the perag and
+        * superblock before we can check the freecount for each btree.
+        */
+       be32_add_cpu(&agi->agi_freecount, -1);
+       xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
+       pag->pagi_freecount--;
+
+       xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
+
+       error = xfs_check_agi_freecount(icur, agi);
+       if (error)
+               goto error_icur;
+       error = xfs_check_agi_freecount(cur, agi);
+       if (error)
+               goto error_icur;
+
+       xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       xfs_perag_put(pag);
+       *inop = ino;
+       return 0;
+
+error_icur:
+       xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
+error_cur:
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       xfs_perag_put(pag);
+       return error;
+}
+
+/*
+ * Allocate an inode on disk.
+ *
+ * Mode is used to tell whether the new inode will need space, and whether it
+ * is a directory.
+ *
+ * This function is designed to be called twice if it has to do an allocation
+ * to make more free inodes.  On the first call, *IO_agbp should be set to NULL.
+ * If an inode is available without having to performn an allocation, an inode
+ * number is returned.  In this case, *IO_agbp is set to NULL.  If an allocation
+ * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp.
+ * The caller should then commit the current transaction, allocate a
+ * new transaction, and call xfs_dialloc() again, passing in the previous value
+ * of *IO_agbp.  IO_agbp should be held across the transactions. Since the AGI
+ * buffer is locked across the two calls, the second call is guaranteed to have
+ * a free inode available.
+ *
+ * Once we successfully pick an inode its number is returned and the on-disk
+ * data structures are updated.  The inode itself is not read in, since doing so
+ * would break ordering constraints with xfs_reclaim.
+ */
+int
+xfs_dialloc(
+       struct xfs_trans        *tp,
+       xfs_ino_t               parent,
+       umode_t                 mode,
+       int                     okalloc,
+       struct xfs_buf          **IO_agbp,
+       xfs_ino_t               *inop)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_buf          *agbp;
+       xfs_agnumber_t          agno;
+       int                     error;
+       int                     ialloced;
+       int                     noroom = 0;
+       xfs_agnumber_t          start_agno;
+       struct xfs_perag        *pag;
+
+       if (*IO_agbp) {
+               /*
+                * If the caller passes in a pointer to the AGI buffer,
+                * continue where we left off before.  In this case, we
+                * know that the allocation group has free inodes.
+                */
+               agbp = *IO_agbp;
+               goto out_alloc;
+       }
+
+       /*
+        * We do not have an agbp, so select an initial allocation
+        * group for inode allocation.
+        */
+       start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+       if (start_agno == NULLAGNUMBER) {
+               *inop = NULLFSINO;
+               return 0;
+       }
+
+       /*
+        * If we have already hit the ceiling of inode blocks then clear
+        * okalloc so we scan all available agi structures for a free
+        * inode.
+        */
+       if (mp->m_maxicount &&
+           mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
+               noroom = 1;
+               okalloc = 0;
+       }
+
+       /*
+        * Loop until we find an allocation group that either has free inodes
+        * or in which we can allocate some inodes.  Iterate through the
+        * allocation groups upward, wrapping at the end.
+        */
+       agno = start_agno;
+       for (;;) {
+               pag = xfs_perag_get(mp, agno);
+               if (!pag->pagi_inodeok) {
+                       xfs_ialloc_next_ag(mp);
+                       goto nextag;
+               }
+
+               if (!pag->pagi_init) {
+                       error = xfs_ialloc_pagi_init(mp, tp, agno);
+                       if (error)
+                               goto out_error;
+               }
+
+               /*
+                * Do a first racy fast path check if this AG is usable.
+                */
+               if (!pag->pagi_freecount && !okalloc)
+                       goto nextag;
+
+               /*
+                * Then read in the AGI buffer and recheck with the AGI buffer
+                * lock held.
+                */
+               error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+               if (error)
+                       goto out_error;
+
+               if (pag->pagi_freecount) {
+                       xfs_perag_put(pag);
+                       goto out_alloc;
+               }
+
+               if (!okalloc)
+                       goto nextag_relse_buffer;
+
+
+               error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
+               if (error) {
+                       xfs_trans_brelse(tp, agbp);
+
+                       if (error != ENOSPC)
+                               goto out_error;
+
+                       xfs_perag_put(pag);
+                       *inop = NULLFSINO;
+                       return 0;
+               }
+
+               if (ialloced) {
+                       /*
+                        * We successfully allocated some inodes, return
+                        * the current context to the caller so that it
+                        * can commit the current transaction and call
+                        * us again where we left off.
+                        */
+                       ASSERT(pag->pagi_freecount > 0);
+                       xfs_perag_put(pag);
+
+                       *IO_agbp = agbp;
+                       *inop = NULLFSINO;
+                       return 0;
+               }
+
+nextag_relse_buffer:
+               xfs_trans_brelse(tp, agbp);
+nextag:
+               xfs_perag_put(pag);
+               if (++agno == mp->m_sb.sb_agcount)
+                       agno = 0;
+               if (agno == start_agno) {
+                       *inop = NULLFSINO;
+                       return noroom ? ENOSPC : 0;
+               }
+       }
+
+out_alloc:
+       *IO_agbp = NULL;
+       return xfs_dialloc_ag(tp, agbp, parent, inop);
+out_error:
+       xfs_perag_put(pag);
+       return error;
+}
+
+STATIC int
+xfs_difree_inobt(
+       struct xfs_mount                *mp,
+       struct xfs_trans                *tp,
+       struct xfs_buf                  *agbp,
+       xfs_agino_t                     agino,
+       struct xfs_bmap_free            *flist,
+       int                             *deleted,
+       xfs_ino_t                       *first_ino,
+       struct xfs_inobt_rec_incore     *orec)
+{
+       struct xfs_agi                  *agi = XFS_BUF_TO_AGI(agbp);
+       xfs_agnumber_t                  agno = be32_to_cpu(agi->agi_seqno);
+       struct xfs_perag                *pag;
+       struct xfs_btree_cur            *cur;
+       struct xfs_inobt_rec_incore     rec;
+       int                             ilen;
+       int                             error;
+       int                             i;
+       int                             off;
+
+       ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+       ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
+
+       /*
+        * Initialize the cursor.
+        */
+       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
+
+       error = xfs_check_agi_freecount(cur, agi);
+       if (error)
+               goto error0;
+
+       /*
+        * Look for the entry describing this inode.
+        */
+       if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
+               xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
+                       __func__, error);
+               goto error0;
+       }
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       error = xfs_inobt_get_rec(cur, &rec, &i);
+       if (error) {
+               xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
+                       __func__, error);
+               goto error0;
+       }
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       /*
+        * Get the offset in the inode chunk.
+        */
+       off = agino - rec.ir_startino;
+       ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
+       ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off)));
+       /*
+        * Mark the inode free & increment the count.
+        */
+       rec.ir_free |= XFS_INOBT_MASK(off);
+       rec.ir_freecount++;
+
+       /*
+        * When an inode cluster is free, it becomes eligible for removal
+        */
+       if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
+           (rec.ir_freecount == mp->m_ialloc_inos)) {
+
+               *deleted = 1;
+               *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
+
+               /*
+                * Remove the inode cluster from the AGI B+Tree, adjust the
+                * AGI and Superblock inode counts, and mark the disk space
+                * to be freed when the transaction is committed.
+                */
+               ilen = mp->m_ialloc_inos;
+               be32_add_cpu(&agi->agi_count, -ilen);
+               be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
+               xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
+               pag = xfs_perag_get(mp, agno);
+               pag->pagi_freecount -= ilen - 1;
+               xfs_perag_put(pag);
+               xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
+               xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
+
+               if ((error = xfs_btree_delete(cur, &i))) {
+                       xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
+                               __func__, error);
+                       goto error0;
+               }
+
+               xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
+                                 XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
+                                 mp->m_ialloc_blks, flist, mp);
+       } else {
+               *deleted = 0;
+
+               error = xfs_inobt_update(cur, &rec);
+               if (error) {
+                       xfs_warn(mp, "%s: xfs_inobt_update returned error %d.",
+                               __func__, error);
+                       goto error0;
+               }
+
+               /* 
+                * Change the inode free counts and log the ag/sb changes.
+                */
+               be32_add_cpu(&agi->agi_freecount, 1);
+               xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
+               pag = xfs_perag_get(mp, agno);
+               pag->pagi_freecount++;
+               xfs_perag_put(pag);
+               xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
+       }
+
+       error = xfs_check_agi_freecount(cur, agi);
+       if (error)
+               goto error0;
+
+       *orec = rec;
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       return 0;
+
+error0:
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Free an inode in the free inode btree.
+ */
+STATIC int
+xfs_difree_finobt(
+       struct xfs_mount                *mp,
+       struct xfs_trans                *tp,
+       struct xfs_buf                  *agbp,
+       xfs_agino_t                     agino,
+       struct xfs_inobt_rec_incore     *ibtrec) /* inobt record */
+{
+       struct xfs_agi                  *agi = XFS_BUF_TO_AGI(agbp);
+       xfs_agnumber_t                  agno = be32_to_cpu(agi->agi_seqno);
+       struct xfs_btree_cur            *cur;
+       struct xfs_inobt_rec_incore     rec;
+       int                             offset = agino - ibtrec->ir_startino;
+       int                             error;
+       int                             i;
+
+       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
+
+       error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
+       if (error)
+               goto error;
+       if (i == 0) {
+               /*
+                * If the record does not exist in the finobt, we must have just
+                * freed an inode in a previously fully allocated chunk. If not,
+                * something is out of sync.
+                */
+               XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
+
+               error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
+                                            ibtrec->ir_free, &i);
+               if (error)
+                       goto error;
+               ASSERT(i == 1);
+
+               goto out;
+       }
+
+       /*
+        * Read and update the existing record. We could just copy the ibtrec
+        * across here, but that would defeat the purpose of having redundant
+        * metadata. By making the modifications independently, we can catch
+        * corruptions that we wouldn't see if we just copied from one record
+        * to another.
+        */
+       error = xfs_inobt_get_rec(cur, &rec, &i);
+       if (error)
+               goto error;
+       XFS_WANT_CORRUPTED_GOTO(i == 1, error);
+
+       rec.ir_free |= XFS_INOBT_MASK(offset);
+       rec.ir_freecount++;
+
+       XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
+                               (rec.ir_freecount == ibtrec->ir_freecount),
+                               error);
+
+       /*
+        * The content of inobt records should always match between the inobt
+        * and finobt. The lifecycle of records in the finobt is different from
+        * the inobt in that the finobt only tracks records with at least one
+        * free inode. Hence, if all of the inodes are free and we aren't
+        * keeping inode chunks permanently on disk, remove the record.
+        * Otherwise, update the record with the new information.
+        */
+       if (rec.ir_freecount == mp->m_ialloc_inos &&
+           !(mp->m_flags & XFS_MOUNT_IKEEP)) {
+               error = xfs_btree_delete(cur, &i);
+               if (error)
+                       goto error;
+               ASSERT(i == 1);
+       } else {
+               error = xfs_inobt_update(cur, &rec);
+               if (error)
+                       goto error;
+       }
+
+out:
+       error = xfs_check_agi_freecount(cur, agi);
+       if (error)
+               goto error;
+
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       return 0;
+
+error:
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * Free disk inode.  Carefully avoids touching the incore inode, all
+ * manipulations incore are the caller's responsibility.
+ * The on-disk inode is not changed by this operation, only the
+ * btree (free inode mask) is changed.
+ */
+int
+xfs_difree(
+       struct xfs_trans        *tp,            /* transaction pointer */
+       xfs_ino_t               inode,          /* inode to be freed */
+       struct xfs_bmap_free    *flist,         /* extents to free */
+       int                     *deleted,/* set if inode cluster was deleted */
+       xfs_ino_t               *first_ino)/* first inode in deleted cluster */
+{
+       /* REFERENCED */
+       xfs_agblock_t           agbno;  /* block number containing inode */
+       struct xfs_buf          *agbp;  /* buffer for allocation group header */
+       xfs_agino_t             agino;  /* allocation group inode number */
+       xfs_agnumber_t          agno;   /* allocation group number */
+       int                     error;  /* error return value */
+       struct xfs_mount        *mp;    /* mount structure for filesystem */
+       struct xfs_inobt_rec_incore rec;/* btree record */
+
+       mp = tp->t_mountp;
+
+       /*
+        * Break up inode number into its components.
+        */
+       agno = XFS_INO_TO_AGNO(mp, inode);
+       if (agno >= mp->m_sb.sb_agcount)  {
+               xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
+                       __func__, agno, mp->m_sb.sb_agcount);
+               ASSERT(0);
+               return EINVAL;
+       }
+       agino = XFS_INO_TO_AGINO(mp, inode);
+       if (inode != XFS_AGINO_TO_INO(mp, agno, agino))  {
+               xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
+                       __func__, (unsigned long long)inode,
+                       (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
+               ASSERT(0);
+               return EINVAL;
+       }
+       agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+       if (agbno >= mp->m_sb.sb_agblocks)  {
+               xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
+                       __func__, agbno, mp->m_sb.sb_agblocks);
+               ASSERT(0);
+               return EINVAL;
+       }
+       /*
+        * Get the allocation group header.
+        */
+       error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+       if (error) {
+               xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
+                       __func__, error);
+               return error;
+       }
+
+       /*
+        * Fix up the inode allocation btree.
+        */
+       error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
+                                &rec);
+       if (error)
+               goto error0;
+
+       /*
+        * Fix up the free inode btree.
+        */
+       if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+               error = xfs_difree_finobt(mp, tp, agbp, agino, &rec);
+               if (error)
+                       goto error0;
+       }
+
+       return 0;
+
+error0:
+       return error;
+}
+
+STATIC int
+xfs_imap_lookup(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_agnumber_t          agno,
+       xfs_agino_t             agino,
+       xfs_agblock_t           agbno,
+       xfs_agblock_t           *chunk_agbno,
+       xfs_agblock_t           *offset_agbno,
+       int                     flags)
+{
+       struct xfs_inobt_rec_incore rec;
+       struct xfs_btree_cur    *cur;
+       struct xfs_buf          *agbp;
+       int                     error;
+       int                     i;
+
+       error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+       if (error) {
+               xfs_alert(mp,
+                       "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
+                       __func__, error, agno);
+               return error;
+       }
+
+       /*
+        * Lookup the inode record for the given agino. If the record cannot be
+        * found, then it's an invalid inode number and we should abort. Once
+        * we have a record, we need to ensure it contains the inode number
+        * we are looking up.
+        */
+       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
+       error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
+       if (!error) {
+               if (i)
+                       error = xfs_inobt_get_rec(cur, &rec, &i);
+               if (!error && i == 0)
+                       error = EINVAL;
+       }
+
+       xfs_trans_brelse(tp, agbp);
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       if (error)
+               return error;
+
+       /* check that the returned record contains the required inode */
+       if (rec.ir_startino > agino ||
+           rec.ir_startino + mp->m_ialloc_inos <= agino)
+               return EINVAL;
+
+       /* for untrusted inodes check it is allocated first */
+       if ((flags & XFS_IGET_UNTRUSTED) &&
+           (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
+               return EINVAL;
+
+       *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
+       *offset_agbno = agbno - *chunk_agbno;
+       return 0;
+}
+
+/*
+ * Return the location of the inode in imap, for mapping it into a buffer.
+ */
+int
+xfs_imap(
+       xfs_mount_t      *mp,   /* file system mount structure */
+       xfs_trans_t      *tp,   /* transaction pointer */
+       xfs_ino_t       ino,    /* inode to locate */
+       struct xfs_imap *imap,  /* location map structure */
+       uint            flags)  /* flags for inode btree lookup */
+{
+       xfs_agblock_t   agbno;  /* block number of inode in the alloc group */
+       xfs_agino_t     agino;  /* inode number within alloc group */
+       xfs_agnumber_t  agno;   /* allocation group number */
+       int             blks_per_cluster; /* num blocks per inode cluster */
+       xfs_agblock_t   chunk_agbno;    /* first block in inode chunk */
+       xfs_agblock_t   cluster_agbno;  /* first block in inode cluster */
+       int             error;  /* error code */
+       int             offset; /* index of inode in its buffer */
+       xfs_agblock_t   offset_agbno;   /* blks from chunk start to inode */
+
+       ASSERT(ino != NULLFSINO);
+
+       /*
+        * Split up the inode number into its parts.
+        */
+       agno = XFS_INO_TO_AGNO(mp, ino);
+       agino = XFS_INO_TO_AGINO(mp, ino);
+       agbno = XFS_AGINO_TO_AGBNO(mp, agino);
+       if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
+           ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
+#ifdef DEBUG
+               /*
+                * Don't output diagnostic information for untrusted inodes
+                * as they can be invalid without implying corruption.
+                */
+               if (flags & XFS_IGET_UNTRUSTED)
+                       return EINVAL;
+               if (agno >= mp->m_sb.sb_agcount) {
+                       xfs_alert(mp,
+                               "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
+                               __func__, agno, mp->m_sb.sb_agcount);
+               }
+               if (agbno >= mp->m_sb.sb_agblocks) {
+                       xfs_alert(mp,
+               "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
+                               __func__, (unsigned long long)agbno,
+                               (unsigned long)mp->m_sb.sb_agblocks);
+               }
+               if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
+                       xfs_alert(mp,
+               "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
+                               __func__, ino,
+                               XFS_AGINO_TO_INO(mp, agno, agino));
+               }
+               xfs_stack_trace();
+#endif /* DEBUG */
+               return EINVAL;
+       }
+
+       blks_per_cluster = xfs_icluster_size_fsb(mp);
+
+       /*
+        * For bulkstat and handle lookups, we have an untrusted inode number
+        * that we have to verify is valid. We cannot do this just by reading
+        * the inode buffer as it may have been unlinked and removed leaving
+        * inodes in stale state on disk. Hence we have to do a btree lookup
+        * in all cases where an untrusted inode number is passed.
+        */
+       if (flags & XFS_IGET_UNTRUSTED) {
+               error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+                                       &chunk_agbno, &offset_agbno, flags);
+               if (error)
+                       return error;
+               goto out_map;
+       }
+
+       /*
+        * If the inode cluster size is the same as the blocksize or
+        * smaller we get to the buffer by simple arithmetics.
+        */
+       if (blks_per_cluster == 1) {
+               offset = XFS_INO_TO_OFFSET(mp, ino);
+               ASSERT(offset < mp->m_sb.sb_inopblock);
+
+               imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
+               imap->im_len = XFS_FSB_TO_BB(mp, 1);
+               imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
+               return 0;
+       }
+
+       /*
+        * If the inode chunks are aligned then use simple maths to
+        * find the location. Otherwise we have to do a btree
+        * lookup to find the location.
+        */
+       if (mp->m_inoalign_mask) {
+               offset_agbno = agbno & mp->m_inoalign_mask;
+               chunk_agbno = agbno - offset_agbno;
+       } else {
+               error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+                                       &chunk_agbno, &offset_agbno, flags);
+               if (error)
+                       return error;
+       }
+
+out_map:
+       ASSERT(agbno >= chunk_agbno);
+       cluster_agbno = chunk_agbno +
+               ((offset_agbno / blks_per_cluster) * blks_per_cluster);
+       offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
+               XFS_INO_TO_OFFSET(mp, ino);
+
+       imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
+       imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
+       imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
+
+       /*
+        * If the inode number maps to a block outside the bounds
+        * of the file system then return NULL rather than calling
+        * read_buf and panicing when we get an error from the
+        * driver.
+        */
+       if ((imap->im_blkno + imap->im_len) >
+           XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
+               xfs_alert(mp,
+       "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
+                       __func__, (unsigned long long) imap->im_blkno,
+                       (unsigned long long) imap->im_len,
+                       XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
+               return EINVAL;
+       }
+       return 0;
+}
+
+/*
+ * Compute and fill in value of m_in_maxlevels.
+ */
+void
+xfs_ialloc_compute_maxlevels(
+       xfs_mount_t     *mp)            /* file system mount structure */
+{
+       int             level;
+       uint            maxblocks;
+       uint            maxleafents;
+       int             minleafrecs;
+       int             minnoderecs;
+
+       maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
+               XFS_INODES_PER_CHUNK_LOG;
+       minleafrecs = mp->m_alloc_mnr[0];
+       minnoderecs = mp->m_alloc_mnr[1];
+       maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
+       for (level = 1; maxblocks > 1; level++)
+               maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
+       mp->m_in_maxlevels = level;
+}
+
+/*
+ * Log specified fields for the ag hdr (inode section). The growth of the agi
+ * structure over time requires that we interpret the buffer as two logical
+ * regions delineated by the end of the unlinked list. This is due to the size
+ * of the hash table and its location in the middle of the agi.
+ *
+ * For example, a request to log a field before agi_unlinked and a field after
+ * agi_unlinked could cause us to log the entire hash table and use an excessive
+ * amount of log space. To avoid this behavior, log the region up through
+ * agi_unlinked in one call and the region after agi_unlinked through the end of
+ * the structure in another.
+ */
+void
+xfs_ialloc_log_agi(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_buf_t       *bp,            /* allocation group header buffer */
+       int             fields)         /* bitmask of fields to log */
+{
+       int                     first;          /* first byte number */
+       int                     last;           /* last byte number */
+       static const short      offsets[] = {   /* field starting offsets */
+                                       /* keep in sync with bit definitions */
+               offsetof(xfs_agi_t, agi_magicnum),
+               offsetof(xfs_agi_t, agi_versionnum),
+               offsetof(xfs_agi_t, agi_seqno),
+               offsetof(xfs_agi_t, agi_length),
+               offsetof(xfs_agi_t, agi_count),
+               offsetof(xfs_agi_t, agi_root),
+               offsetof(xfs_agi_t, agi_level),
+               offsetof(xfs_agi_t, agi_freecount),
+               offsetof(xfs_agi_t, agi_newino),
+               offsetof(xfs_agi_t, agi_dirino),
+               offsetof(xfs_agi_t, agi_unlinked),
+               offsetof(xfs_agi_t, agi_free_root),
+               offsetof(xfs_agi_t, agi_free_level),
+               sizeof(xfs_agi_t)
+       };
+#ifdef DEBUG
+       xfs_agi_t               *agi;   /* allocation group header */
+
+       agi = XFS_BUF_TO_AGI(bp);
+       ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
+#endif
+
+       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
+
+       /*
+        * Compute byte offsets for the first and last fields in the first
+        * region and log the agi buffer. This only logs up through
+        * agi_unlinked.
+        */
+       if (fields & XFS_AGI_ALL_BITS_R1) {
+               xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
+                                 &first, &last);
+               xfs_trans_log_buf(tp, bp, first, last);
+       }
+
+       /*
+        * Mask off the bits in the first region and calculate the first and
+        * last field offsets for any bits in the second region.
+        */
+       fields &= ~XFS_AGI_ALL_BITS_R1;
+       if (fields) {
+               xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
+                                 &first, &last);
+               xfs_trans_log_buf(tp, bp, first, last);
+       }
+}
+
+#ifdef DEBUG
+STATIC void
+xfs_check_agi_unlinked(
+       struct xfs_agi          *agi)
+{
+       int                     i;
+
+       for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
+               ASSERT(agi->agi_unlinked[i]);
+}
+#else
+#define xfs_check_agi_unlinked(agi)
+#endif
+
+static bool
+xfs_agi_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_agi  *agi = XFS_BUF_TO_AGI(bp);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid))
+                       return false;
+       /*
+        * Validate the magic number of the agi block.
+        */
+       if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC))
+               return false;
+       if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
+               return false;
+
+       /*
+        * during growfs operations, the perag is not fully initialised,
+        * so we can't use it for any useful checking. growfs ensures we can't
+        * use it by using uncached buffers that don't have the perag attached
+        * so we can detect and avoid this problem.
+        */
+       if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
+               return false;
+
+       xfs_check_agi_unlinked(agi);
+       return true;
+}
+
+static void
+xfs_agi_read_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb) &&
+           !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
+                               XFS_ERRTAG_IALLOC_READ_AGI,
+                               XFS_RANDOM_IALLOC_READ_AGI))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+}
+
+static void
+xfs_agi_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+       if (!xfs_agi_verify(bp)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (bip)
+               XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+       xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_agi_buf_ops = {
+       .verify_read = xfs_agi_read_verify,
+       .verify_write = xfs_agi_write_verify,
+};
+
+/*
+ * Read in the allocation group header (inode allocation section)
+ */
+int
+xfs_read_agi(
+       struct xfs_mount        *mp,    /* file system mount structure */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       struct xfs_buf          **bpp)  /* allocation group hdr buf */
+{
+       int                     error;
+
+       trace_xfs_read_agi(mp, agno);
+
+       ASSERT(agno != NULLAGNUMBER);
+       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                       XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
+                       XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);
+       if (error)
+               return error;
+
+       xfs_buf_set_ref(*bpp, XFS_AGI_REF);
+       return 0;
+}
+
+int
+xfs_ialloc_read_agi(
+       struct xfs_mount        *mp,    /* file system mount structure */
+       struct xfs_trans        *tp,    /* transaction pointer */
+       xfs_agnumber_t          agno,   /* allocation group number */
+       struct xfs_buf          **bpp)  /* allocation group hdr buf */
+{
+       struct xfs_agi          *agi;   /* allocation group header */
+       struct xfs_perag        *pag;   /* per allocation group data */
+       int                     error;
+
+       trace_xfs_ialloc_read_agi(mp, agno);
+
+       error = xfs_read_agi(mp, tp, agno, bpp);
+       if (error)
+               return error;
+
+       agi = XFS_BUF_TO_AGI(*bpp);
+       pag = xfs_perag_get(mp, agno);
+       if (!pag->pagi_init) {
+               pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
+               pag->pagi_count = be32_to_cpu(agi->agi_count);
+               pag->pagi_init = 1;
+       }
+
+       /*
+        * It's possible for these to be out of sync if
+        * we are in the middle of a forced shutdown.
+        */
+       ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
+               XFS_FORCED_SHUTDOWN(mp));
+       xfs_perag_put(pag);
+       return 0;
+}
+
+/*
+ * Read in the agi to initialise the per-ag data in the mount structure
+ */
+int
+xfs_ialloc_pagi_init(
+       xfs_mount_t     *mp,            /* file system mount structure */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_agnumber_t  agno)           /* allocation group number */
+{
+       xfs_buf_t       *bp = NULL;
+       int             error;
+
+       error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
+       if (error)
+               return error;
+       if (bp)
+               xfs_trans_brelse(tp, bp);
+       return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
new file mode 100644 (file)
index 0000000..726f83a
--- /dev/null
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+
+
+STATIC int
+xfs_inobt_get_minrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       return cur->bc_mp->m_inobt_mnr[level != 0];
+}
+
+STATIC struct xfs_btree_cur *
+xfs_inobt_dup_cursor(
+       struct xfs_btree_cur    *cur)
+{
+       return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
+                       cur->bc_private.a.agbp, cur->bc_private.a.agno,
+                       cur->bc_btnum);
+}
+
+STATIC void
+xfs_inobt_set_root(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *nptr,
+       int                     inc)    /* level change */
+{
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
+
+       agi->agi_root = nptr->s;
+       be32_add_cpu(&agi->agi_level, inc);
+       xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
+}
+
+STATIC void
+xfs_finobt_set_root(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *nptr,
+       int                     inc)    /* level change */
+{
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
+
+       agi->agi_free_root = nptr->s;
+       be32_add_cpu(&agi->agi_free_level, inc);
+       xfs_ialloc_log_agi(cur->bc_tp, agbp,
+                          XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
+}
+
+STATIC int
+xfs_inobt_alloc_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *start,
+       union xfs_btree_ptr     *new,
+       int                     *stat)
+{
+       xfs_alloc_arg_t         args;           /* block allocation args */
+       int                     error;          /* error return value */
+       xfs_agblock_t           sbno = be32_to_cpu(start->s);
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+       memset(&args, 0, sizeof(args));
+       args.tp = cur->bc_tp;
+       args.mp = cur->bc_mp;
+       args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
+       args.minlen = 1;
+       args.maxlen = 1;
+       args.prod = 1;
+       args.type = XFS_ALLOCTYPE_NEAR_BNO;
+
+       error = xfs_alloc_vextent(&args);
+       if (error) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+               return error;
+       }
+       if (args.fsbno == NULLFSBLOCK) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+               *stat = 0;
+               return 0;
+       }
+       ASSERT(args.len == 1);
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+
+       new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
+       *stat = 1;
+       return 0;
+}
+
+STATIC int
+xfs_inobt_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       xfs_fsblock_t           fsbno;
+       int                     error;
+
+       fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
+       error = xfs_free_extent(cur->bc_tp, fsbno, 1);
+       if (error)
+               return error;
+
+       xfs_trans_binval(cur->bc_tp, bp);
+       return error;
+}
+
+STATIC int
+xfs_inobt_get_maxrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       return cur->bc_mp->m_inobt_mxr[level != 0];
+}
+
+STATIC void
+xfs_inobt_init_key_from_rec(
+       union xfs_btree_key     *key,
+       union xfs_btree_rec     *rec)
+{
+       key->inobt.ir_startino = rec->inobt.ir_startino;
+}
+
+STATIC void
+xfs_inobt_init_rec_from_key(
+       union xfs_btree_key     *key,
+       union xfs_btree_rec     *rec)
+{
+       rec->inobt.ir_startino = key->inobt.ir_startino;
+}
+
+STATIC void
+xfs_inobt_init_rec_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *rec)
+{
+       rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
+       rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
+       rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
+}
+
+/*
+ * initial value of ptr for lookup
+ */
+STATIC void
+xfs_inobt_init_ptr_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       struct xfs_agi          *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
+
+       ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
+
+       ptr->s = agi->agi_root;
+}
+
+STATIC void
+xfs_finobt_init_ptr_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       struct xfs_agi          *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
+
+       ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
+       ptr->s = agi->agi_free_root;
+}
+
+STATIC __int64_t
+xfs_inobt_key_diff(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *key)
+{
+       return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
+                         cur->bc_rec.i.ir_startino;
+}
+
+static int
+xfs_inobt_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       struct xfs_perag        *pag = bp->b_pag;
+       unsigned int            level;
+
+       /*
+        * During growfs operations, we can't verify the exact owner as the
+        * perag is not fully initialised and hence not attached to the buffer.
+        *
+        * Similarly, during log recovery we will have a perag structure
+        * attached, but the agi information will not yet have been initialised
+        * from the on disk AGI. We don't currently use any of this information,
+        * but beware of the landmine (i.e. need to check pag->pagi_init) if we
+        * ever do.
+        */
+       switch (block->bb_magic) {
+       case cpu_to_be32(XFS_IBT_CRC_MAGIC):
+       case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
+               if (!xfs_sb_version_hascrc(&mp->m_sb))
+                       return false;
+               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+                       return false;
+               if (pag &&
+                   be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+                       return false;
+               /* fall through */
+       case cpu_to_be32(XFS_IBT_MAGIC):
+       case cpu_to_be32(XFS_FIBT_MAGIC):
+               break;
+       default:
+               return 0;
+       }
+
+       /* numrecs and level verification */
+       level = be16_to_cpu(block->bb_level);
+       if (level >= mp->m_in_maxlevels)
+               return false;
+       if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
+               return false;
+
+       /* sibling pointer verification */
+       if (!block->bb_u.s.bb_leftsib ||
+           (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+            block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+               return false;
+       if (!block->bb_u.s.bb_rightsib ||
+           (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+            block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+               return false;
+
+       return true;
+}
+
+static void
+xfs_inobt_read_verify(
+       struct xfs_buf  *bp)
+{
+       if (!xfs_btree_sblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_inobt_verify(bp))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
+       }
+}
+
+static void
+xfs_inobt_write_verify(
+       struct xfs_buf  *bp)
+{
+       if (!xfs_inobt_verify(bp)) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+       xfs_btree_sblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_inobt_buf_ops = {
+       .verify_read = xfs_inobt_read_verify,
+       .verify_write = xfs_inobt_write_verify,
+};
+
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_inobt_keys_inorder(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *k1,
+       union xfs_btree_key     *k2)
+{
+       return be32_to_cpu(k1->inobt.ir_startino) <
+               be32_to_cpu(k2->inobt.ir_startino);
+}
+
+STATIC int
+xfs_inobt_recs_inorder(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *r1,
+       union xfs_btree_rec     *r2)
+{
+       return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
+               be32_to_cpu(r2->inobt.ir_startino);
+}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_inobt_ops = {
+       .rec_len                = sizeof(xfs_inobt_rec_t),
+       .key_len                = sizeof(xfs_inobt_key_t),
+
+       .dup_cursor             = xfs_inobt_dup_cursor,
+       .set_root               = xfs_inobt_set_root,
+       .alloc_block            = xfs_inobt_alloc_block,
+       .free_block             = xfs_inobt_free_block,
+       .get_minrecs            = xfs_inobt_get_minrecs,
+       .get_maxrecs            = xfs_inobt_get_maxrecs,
+       .init_key_from_rec      = xfs_inobt_init_key_from_rec,
+       .init_rec_from_key      = xfs_inobt_init_rec_from_key,
+       .init_rec_from_cur      = xfs_inobt_init_rec_from_cur,
+       .init_ptr_from_cur      = xfs_inobt_init_ptr_from_cur,
+       .key_diff               = xfs_inobt_key_diff,
+       .buf_ops                = &xfs_inobt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+       .keys_inorder           = xfs_inobt_keys_inorder,
+       .recs_inorder           = xfs_inobt_recs_inorder,
+#endif
+};
+
+static const struct xfs_btree_ops xfs_finobt_ops = {
+       .rec_len                = sizeof(xfs_inobt_rec_t),
+       .key_len                = sizeof(xfs_inobt_key_t),
+
+       .dup_cursor             = xfs_inobt_dup_cursor,
+       .set_root               = xfs_finobt_set_root,
+       .alloc_block            = xfs_inobt_alloc_block,
+       .free_block             = xfs_inobt_free_block,
+       .get_minrecs            = xfs_inobt_get_minrecs,
+       .get_maxrecs            = xfs_inobt_get_maxrecs,
+       .init_key_from_rec      = xfs_inobt_init_key_from_rec,
+       .init_rec_from_key      = xfs_inobt_init_rec_from_key,
+       .init_rec_from_cur      = xfs_inobt_init_rec_from_cur,
+       .init_ptr_from_cur      = xfs_finobt_init_ptr_from_cur,
+       .key_diff               = xfs_inobt_key_diff,
+       .buf_ops                = &xfs_inobt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+       .keys_inorder           = xfs_inobt_keys_inorder,
+       .recs_inorder           = xfs_inobt_recs_inorder,
+#endif
+};
+
+/*
+ * Allocate a new inode btree cursor.
+ */
+struct xfs_btree_cur *                         /* new inode btree cursor */
+xfs_inobt_init_cursor(
+       struct xfs_mount        *mp,            /* file system mount point */
+       struct xfs_trans        *tp,            /* transaction pointer */
+       struct xfs_buf          *agbp,          /* buffer for agi structure */
+       xfs_agnumber_t          agno,           /* allocation group number */
+       xfs_btnum_t             btnum)          /* ialloc or free ino btree */
+{
+       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
+       struct xfs_btree_cur    *cur;
+
+       cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+
+       cur->bc_tp = tp;
+       cur->bc_mp = mp;
+       cur->bc_btnum = btnum;
+       if (btnum == XFS_BTNUM_INO) {
+               cur->bc_nlevels = be32_to_cpu(agi->agi_level);
+               cur->bc_ops = &xfs_inobt_ops;
+       } else {
+               cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
+               cur->bc_ops = &xfs_finobt_ops;
+       }
+
+       cur->bc_blocklog = mp->m_sb.sb_blocklog;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
+
+       cur->bc_private.a.agbp = agbp;
+       cur->bc_private.a.agno = agno;
+
+       return cur;
+}
+
+/*
+ * Calculate number of records in an inobt btree block.
+ */
+int
+xfs_inobt_maxrecs(
+       struct xfs_mount        *mp,
+       int                     blocklen,
+       int                     leaf)
+{
+       blocklen -= XFS_INOBT_BLOCK_LEN(mp);
+
+       if (leaf)
+               return blocklen / sizeof(xfs_inobt_rec_t);
+       return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
+}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
new file mode 100644 (file)
index 0000000..1e5366d
--- /dev/null
@@ -0,0 +1,479 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_cksum.h"
+#include "xfs_icache.h"
+#include "xfs_trans.h"
+#include "xfs_ialloc.h"
+#include "xfs_dinode.h"
+
+/*
+ * Check that none of the inode's in the buffer have a next
+ * unlinked field of 0.
+ */
+#if defined(DEBUG)
+void
+xfs_inobp_check(
+       xfs_mount_t     *mp,
+       xfs_buf_t       *bp)
+{
+       int             i;
+       int             j;
+       xfs_dinode_t    *dip;
+
+       j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
+
+       for (i = 0; i < j; i++) {
+               dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+                                       i * mp->m_sb.sb_inodesize);
+               if (!dip->di_next_unlinked)  {
+                       xfs_alert(mp,
+       "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
+                               i, (long long)bp->b_bn);
+               }
+       }
+}
+#endif
+
+/*
+ * If we are doing readahead on an inode buffer, we might be in log recovery
+ * reading an inode allocation buffer that hasn't yet been replayed, and hence
+ * has not had the inode cores stamped into it. Hence for readahead, the buffer
+ * may be potentially invalid.
+ *
+ * If the readahead buffer is invalid, we don't want to mark it with an error,
+ * but we do want to clear the DONE status of the buffer so that a followup read
+ * will re-read it from disk. This will ensure that we don't get an unnecessary
+ * warnings during log recovery and we don't get unnecssary panics on debug
+ * kernels.
+ */
+static void
+xfs_inode_buf_verify(
+       struct xfs_buf  *bp,
+       bool            readahead)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       int             i;
+       int             ni;
+
+       /*
+        * Validate the magic number and version of every inode in the buffer
+        */
+       ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
+       for (i = 0; i < ni; i++) {
+               int             di_ok;
+               xfs_dinode_t    *dip;
+
+               dip = (struct xfs_dinode *)xfs_buf_offset(bp,
+                                       (i << mp->m_sb.sb_inodelog));
+               di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
+                           XFS_DINODE_GOOD_VERSION(dip->di_version);
+               if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
+                                               XFS_ERRTAG_ITOBP_INOTOBP,
+                                               XFS_RANDOM_ITOBP_INOTOBP))) {
+                       if (readahead) {
+                               bp->b_flags &= ~XBF_DONE;
+                               return;
+                       }
+
+                       xfs_buf_ioerror(bp, EFSCORRUPTED);
+                       xfs_verifier_error(bp);
+#ifdef DEBUG
+                       xfs_alert(mp,
+                               "bad inode magic/vsn daddr %lld #%d (magic=%x)",
+                               (unsigned long long)bp->b_bn, i,
+                               be16_to_cpu(dip->di_magic));
+#endif
+               }
+       }
+       xfs_inobp_check(mp, bp);
+}
+
+
+static void
+xfs_inode_buf_read_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_inode_buf_verify(bp, false);
+}
+
+static void
+xfs_inode_buf_readahead_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_inode_buf_verify(bp, true);
+}
+
+static void
+xfs_inode_buf_write_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_inode_buf_verify(bp, false);
+}
+
+const struct xfs_buf_ops xfs_inode_buf_ops = {
+       .verify_read = xfs_inode_buf_read_verify,
+       .verify_write = xfs_inode_buf_write_verify,
+};
+
+const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
+       .verify_read = xfs_inode_buf_readahead_verify,
+       .verify_write = xfs_inode_buf_write_verify,
+};
+
+
+/*
+ * This routine is called to map an inode to the buffer containing the on-disk
+ * version of the inode.  It returns a pointer to the buffer containing the
+ * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
+ * pointer to the on-disk inode within that buffer.
+ *
+ * If a non-zero error is returned, then the contents of bpp and dipp are
+ * undefined.
+ */
+int
+xfs_imap_to_bp(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct xfs_imap         *imap,
+       struct xfs_dinode       **dipp,
+       struct xfs_buf          **bpp,
+       uint                    buf_flags,
+       uint                    iget_flags)
+{
+       struct xfs_buf          *bp;
+       int                     error;
+
+       buf_flags |= XBF_UNMAPPED;
+       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
+                                  (int)imap->im_len, buf_flags, &bp,
+                                  &xfs_inode_buf_ops);
+       if (error) {
+               if (error == EAGAIN) {
+                       ASSERT(buf_flags & XBF_TRYLOCK);
+                       return error;
+               }
+
+               if (error == EFSCORRUPTED &&
+                   (iget_flags & XFS_IGET_UNTRUSTED))
+                       return EINVAL;
+
+               xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
+                       __func__, error);
+               return error;
+       }
+
+       *bpp = bp;
+       *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
+       return 0;
+}
+
+void
+xfs_dinode_from_disk(
+       xfs_icdinode_t          *to,
+       xfs_dinode_t            *from)
+{
+       to->di_magic = be16_to_cpu(from->di_magic);
+       to->di_mode = be16_to_cpu(from->di_mode);
+       to->di_version = from ->di_version;
+       to->di_format = from->di_format;
+       to->di_onlink = be16_to_cpu(from->di_onlink);
+       to->di_uid = be32_to_cpu(from->di_uid);
+       to->di_gid = be32_to_cpu(from->di_gid);
+       to->di_nlink = be32_to_cpu(from->di_nlink);
+       to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
+       to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
+       memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
+       to->di_flushiter = be16_to_cpu(from->di_flushiter);
+       to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
+       to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
+       to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
+       to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
+       to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
+       to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
+       to->di_size = be64_to_cpu(from->di_size);
+       to->di_nblocks = be64_to_cpu(from->di_nblocks);
+       to->di_extsize = be32_to_cpu(from->di_extsize);
+       to->di_nextents = be32_to_cpu(from->di_nextents);
+       to->di_anextents = be16_to_cpu(from->di_anextents);
+       to->di_forkoff = from->di_forkoff;
+       to->di_aformat  = from->di_aformat;
+       to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
+       to->di_dmstate  = be16_to_cpu(from->di_dmstate);
+       to->di_flags    = be16_to_cpu(from->di_flags);
+       to->di_gen      = be32_to_cpu(from->di_gen);
+
+       if (to->di_version == 3) {
+               to->di_changecount = be64_to_cpu(from->di_changecount);
+               to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
+               to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
+               to->di_flags2 = be64_to_cpu(from->di_flags2);
+               to->di_ino = be64_to_cpu(from->di_ino);
+               to->di_lsn = be64_to_cpu(from->di_lsn);
+               memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
+               uuid_copy(&to->di_uuid, &from->di_uuid);
+       }
+}
+
+void
+xfs_dinode_to_disk(
+       xfs_dinode_t            *to,
+       xfs_icdinode_t          *from)
+{
+       to->di_magic = cpu_to_be16(from->di_magic);
+       to->di_mode = cpu_to_be16(from->di_mode);
+       to->di_version = from ->di_version;
+       to->di_format = from->di_format;
+       to->di_onlink = cpu_to_be16(from->di_onlink);
+       to->di_uid = cpu_to_be32(from->di_uid);
+       to->di_gid = cpu_to_be32(from->di_gid);
+       to->di_nlink = cpu_to_be32(from->di_nlink);
+       to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+       to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
+       memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
+       to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
+       to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
+       to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
+       to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
+       to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
+       to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
+       to->di_size = cpu_to_be64(from->di_size);
+       to->di_nblocks = cpu_to_be64(from->di_nblocks);
+       to->di_extsize = cpu_to_be32(from->di_extsize);
+       to->di_nextents = cpu_to_be32(from->di_nextents);
+       to->di_anextents = cpu_to_be16(from->di_anextents);
+       to->di_forkoff = from->di_forkoff;
+       to->di_aformat = from->di_aformat;
+       to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
+       to->di_dmstate = cpu_to_be16(from->di_dmstate);
+       to->di_flags = cpu_to_be16(from->di_flags);
+       to->di_gen = cpu_to_be32(from->di_gen);
+
+       if (from->di_version == 3) {
+               to->di_changecount = cpu_to_be64(from->di_changecount);
+               to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
+               to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
+               to->di_flags2 = cpu_to_be64(from->di_flags2);
+               to->di_ino = cpu_to_be64(from->di_ino);
+               to->di_lsn = cpu_to_be64(from->di_lsn);
+               memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
+               uuid_copy(&to->di_uuid, &from->di_uuid);
+               to->di_flushiter = 0;
+       } else {
+               to->di_flushiter = cpu_to_be16(from->di_flushiter);
+       }
+}
+
+static bool
+xfs_dinode_verify(
+       struct xfs_mount        *mp,
+       struct xfs_inode        *ip,
+       struct xfs_dinode       *dip)
+{
+       if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
+               return false;
+
+       /* only version 3 or greater inodes are extensively verified here */
+       if (dip->di_version < 3)
+               return true;
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return false;
+       if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
+                             XFS_DINODE_CRC_OFF))
+               return false;
+       if (be64_to_cpu(dip->di_ino) != ip->i_ino)
+               return false;
+       if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
+               return false;
+       return true;
+}
+
+void
+xfs_dinode_calc_crc(
+       struct xfs_mount        *mp,
+       struct xfs_dinode       *dip)
+{
+       __uint32_t              crc;
+
+       if (dip->di_version < 3)
+               return;
+
+       ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
+       crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
+                             XFS_DINODE_CRC_OFF);
+       dip->di_crc = xfs_end_cksum(crc);
+}
+
+/*
+ * Read the disk inode attributes into the in-core inode structure.
+ *
+ * For version 5 superblocks, if we are initialising a new inode and we are not
+ * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
+ * inode core with a random generation number. If we are keeping inodes around,
+ * we need to read the inode cluster to get the existing generation number off
+ * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
+ * format) then log recovery is dependent on the di_flushiter field being
+ * initialised from the current on-disk value and hence we must also read the
+ * inode off disk.
+ */
+int
+xfs_iread(
+       xfs_mount_t     *mp,
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       uint            iget_flags)
+{
+       xfs_buf_t       *bp;
+       xfs_dinode_t    *dip;
+       int             error;
+
+       /*
+        * Fill in the location information in the in-core inode.
+        */
+       error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
+       if (error)
+               return error;
+
+       /* shortcut IO on inode allocation if possible */
+       if ((iget_flags & XFS_IGET_CREATE) &&
+           xfs_sb_version_hascrc(&mp->m_sb) &&
+           !(mp->m_flags & XFS_MOUNT_IKEEP)) {
+               /* initialise the on-disk inode core */
+               memset(&ip->i_d, 0, sizeof(ip->i_d));
+               ip->i_d.di_magic = XFS_DINODE_MAGIC;
+               ip->i_d.di_gen = prandom_u32();
+               if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                       ip->i_d.di_version = 3;
+                       ip->i_d.di_ino = ip->i_ino;
+                       uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
+               } else
+                       ip->i_d.di_version = 2;
+               return 0;
+       }
+
+       /*
+        * Get pointers to the on-disk inode and the buffer containing it.
+        */
+       error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
+       if (error)
+               return error;
+
+       /* even unallocated inodes are verified */
+       if (!xfs_dinode_verify(mp, ip, dip)) {
+               xfs_alert(mp, "%s: validation failed for inode %lld failed",
+                               __func__, ip->i_ino);
+
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
+               error = EFSCORRUPTED;
+               goto out_brelse;
+       }
+
+       /*
+        * If the on-disk inode is already linked to a directory
+        * entry, copy all of the inode into the in-core inode.
+        * xfs_iformat_fork() handles copying in the inode format
+        * specific information.
+        * Otherwise, just get the truly permanent information.
+        */
+       if (dip->di_mode) {
+               xfs_dinode_from_disk(&ip->i_d, dip);
+               error = xfs_iformat_fork(ip, dip);
+               if (error)  {
+#ifdef DEBUG
+                       xfs_alert(mp, "%s: xfs_iformat() returned error %d",
+                               __func__, error);
+#endif /* DEBUG */
+                       goto out_brelse;
+               }
+       } else {
+               /*
+                * Partial initialisation of the in-core inode. Just the bits
+                * that xfs_ialloc won't overwrite or relies on being correct.
+                */
+               ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
+               ip->i_d.di_version = dip->di_version;
+               ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
+               ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
+
+               if (dip->di_version == 3) {
+                       ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
+                       uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
+               }
+
+               /*
+                * Make sure to pull in the mode here as well in
+                * case the inode is released without being used.
+                * This ensures that xfs_inactive() will see that
+                * the inode is already free and not try to mess
+                * with the uninitialized part of it.
+                */
+               ip->i_d.di_mode = 0;
+       }
+
+       /*
+        * Automatically convert version 1 inode formats in memory to version 2
+        * inode format. If the inode is modified, it will get logged and
+        * rewritten as a version 2 inode. We can do this because we set the
+        * superblock feature bit for v2 inodes unconditionally during mount
+        * and it means the reast of the code can assume the inode version is 2
+        * or higher.
+        */
+       if (ip->i_d.di_version == 1) {
+               ip->i_d.di_version = 2;
+               memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
+               ip->i_d.di_nlink = ip->i_d.di_onlink;
+               ip->i_d.di_onlink = 0;
+               xfs_set_projid(ip, 0);
+       }
+
+       ip->i_delayed_blks = 0;
+
+       /*
+        * Mark the buffer containing the inode as something to keep
+        * around for a while.  This helps to keep recently accessed
+        * meta-data in-core longer.
+        */
+       xfs_buf_set_ref(bp, XFS_INO_REF);
+
+       /*
+        * Use xfs_trans_brelse() to release the buffer containing the on-disk
+        * inode, because it was acquired with xfs_trans_read_buf() in
+        * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
+        * brelse().  If we're within a transaction, then xfs_trans_brelse()
+        * will only release the buffer if it is not dirty within the
+        * transaction.  It will be OK to release the buffer in this case,
+        * because inodes on disk are never destroyed and we will be locking the
+        * new in-core inode before putting it in the cache where other
+        * processes can find it.  Thus we don't have to worry about the inode
+        * being changed just because we released the buffer.
+        */
+ out_brelse:
+       xfs_trans_brelse(tp, bp);
+       return error;
+}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
new file mode 100644 (file)
index 0000000..2a124e9
--- /dev/null
@@ -0,0 +1,1906 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/log2.h>
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+
+kmem_zone_t *xfs_ifork_zone;
+
+STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
+STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
+STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
+
+#ifdef DEBUG
+/*
+ * Make sure that the extents in the given memory buffer
+ * are valid.
+ */
+void
+xfs_validate_extents(
+       xfs_ifork_t             *ifp,
+       int                     nrecs,
+       xfs_exntfmt_t           fmt)
+{
+       xfs_bmbt_irec_t         irec;
+       xfs_bmbt_rec_host_t     rec;
+       int                     i;
+
+       for (i = 0; i < nrecs; i++) {
+               xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
+               rec.l0 = get_unaligned(&ep->l0);
+               rec.l1 = get_unaligned(&ep->l1);
+               xfs_bmbt_get_all(&rec, &irec);
+               if (fmt == XFS_EXTFMT_NOSTATE)
+                       ASSERT(irec.br_state == XFS_EXT_NORM);
+       }
+}
+#else /* DEBUG */
+#define xfs_validate_extents(ifp, nrecs, fmt)
+#endif /* DEBUG */
+
+
+/*
+ * Move inode type and inode format specific information from the
+ * on-disk inode to the in-core inode.  For fifos, devs, and sockets
+ * this means set if_rdev to the proper value.  For files, directories,
+ * and symlinks this means to bring in the in-line data or extent
+ * pointers.  For a file in B-tree format, only the root is immediately
+ * brought in-core.  The rest will be in-lined in if_extents when it
+ * is first referenced (see xfs_iread_extents()).
+ */
+int
+xfs_iformat_fork(
+       xfs_inode_t             *ip,
+       xfs_dinode_t            *dip)
+{
+       xfs_attr_shortform_t    *atp;
+       int                     size;
+       int                     error = 0;
+       xfs_fsize_t             di_size;
+
+       if (unlikely(be32_to_cpu(dip->di_nextents) +
+                    be16_to_cpu(dip->di_anextents) >
+                    be64_to_cpu(dip->di_nblocks))) {
+               xfs_warn(ip->i_mount,
+                       "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
+                       (unsigned long long)ip->i_ino,
+                       (int)(be32_to_cpu(dip->di_nextents) +
+                             be16_to_cpu(dip->di_anextents)),
+                       (unsigned long long)
+                               be64_to_cpu(dip->di_nblocks));
+               XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
+                                    ip->i_mount, dip);
+               return EFSCORRUPTED;
+       }
+
+       if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
+               xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
+                       (unsigned long long)ip->i_ino,
+                       dip->di_forkoff);
+               XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
+                                    ip->i_mount, dip);
+               return EFSCORRUPTED;
+       }
+
+       if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
+                    !ip->i_mount->m_rtdev_targp)) {
+               xfs_warn(ip->i_mount,
+                       "corrupt dinode %Lu, has realtime flag set.",
+                       ip->i_ino);
+               XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
+                                    XFS_ERRLEVEL_LOW, ip->i_mount, dip);
+               return EFSCORRUPTED;
+       }
+
+       switch (ip->i_d.di_mode & S_IFMT) {
+       case S_IFIFO:
+       case S_IFCHR:
+       case S_IFBLK:
+       case S_IFSOCK:
+               if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
+                       XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
+                                             ip->i_mount, dip);
+                       return EFSCORRUPTED;
+               }
+               ip->i_d.di_size = 0;
+               ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
+               break;
+
+       case S_IFREG:
+       case S_IFLNK:
+       case S_IFDIR:
+               switch (dip->di_format) {
+               case XFS_DINODE_FMT_LOCAL:
+                       /*
+                        * no local regular files yet
+                        */
+                       if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
+                               xfs_warn(ip->i_mount,
+                       "corrupt inode %Lu (local format for regular file).",
+                                       (unsigned long long) ip->i_ino);
+                               XFS_CORRUPTION_ERROR("xfs_iformat(4)",
+                                                    XFS_ERRLEVEL_LOW,
+                                                    ip->i_mount, dip);
+                               return EFSCORRUPTED;
+                       }
+
+                       di_size = be64_to_cpu(dip->di_size);
+                       if (unlikely(di_size < 0 ||
+                                    di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
+                               xfs_warn(ip->i_mount,
+                       "corrupt inode %Lu (bad size %Ld for local inode).",
+                                       (unsigned long long) ip->i_ino,
+                                       (long long) di_size);
+                               XFS_CORRUPTION_ERROR("xfs_iformat(5)",
+                                                    XFS_ERRLEVEL_LOW,
+                                                    ip->i_mount, dip);
+                               return EFSCORRUPTED;
+                       }
+
+                       size = (int)di_size;
+                       error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
+                       break;
+               case XFS_DINODE_FMT_EXTENTS:
+                       error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
+                       break;
+               case XFS_DINODE_FMT_BTREE:
+                       error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
+                       break;
+               default:
+                       XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
+                                        ip->i_mount);
+                       return EFSCORRUPTED;
+               }
+               break;
+
+       default:
+               XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
+               return EFSCORRUPTED;
+       }
+       if (error) {
+               return error;
+       }
+       if (!XFS_DFORK_Q(dip))
+               return 0;
+
+       ASSERT(ip->i_afp == NULL);
+       ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
+
+       switch (dip->di_aformat) {
+       case XFS_DINODE_FMT_LOCAL:
+               atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
+               size = be16_to_cpu(atp->hdr.totsize);
+
+               if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
+                       xfs_warn(ip->i_mount,
+                               "corrupt inode %Lu (bad attr fork size %Ld).",
+                               (unsigned long long) ip->i_ino,
+                               (long long) size);
+                       XFS_CORRUPTION_ERROR("xfs_iformat(8)",
+                                            XFS_ERRLEVEL_LOW,
+                                            ip->i_mount, dip);
+                       return EFSCORRUPTED;
+               }
+
+               error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
+               break;
+       case XFS_DINODE_FMT_EXTENTS:
+               error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
+               break;
+       case XFS_DINODE_FMT_BTREE:
+               error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
+               break;
+       default:
+               error = EFSCORRUPTED;
+               break;
+       }
+       if (error) {
+               kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+               ip->i_afp = NULL;
+               xfs_idestroy_fork(ip, XFS_DATA_FORK);
+       }
+       return error;
+}
+
+/*
+ * The file is in-lined in the on-disk inode.
+ * If it fits into if_inline_data, then copy
+ * it there, otherwise allocate a buffer for it
+ * and copy the data there.  Either way, set
+ * if_data to point at the data.
+ * If we allocate a buffer for the data, make
+ * sure that its size is a multiple of 4 and
+ * record the real size in i_real_bytes.
+ */
+STATIC int
+xfs_iformat_local(
+       xfs_inode_t     *ip,
+       xfs_dinode_t    *dip,
+       int             whichfork,
+       int             size)
+{
+       xfs_ifork_t     *ifp;
+       int             real_size;
+
+       /*
+        * If the size is unreasonable, then something
+        * is wrong and we just bail out rather than crash in
+        * kmem_alloc() or memcpy() below.
+        */
+       if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
+               xfs_warn(ip->i_mount,
+       "corrupt inode %Lu (bad size %d for local fork, size = %d).",
+                       (unsigned long long) ip->i_ino, size,
+                       XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
+               XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
+                                    ip->i_mount, dip);
+               return EFSCORRUPTED;
+       }
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       real_size = 0;
+       if (size == 0)
+               ifp->if_u1.if_data = NULL;
+       else if (size <= sizeof(ifp->if_u2.if_inline_data))
+               ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+       else {
+               real_size = roundup(size, 4);
+               ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
+       }
+       ifp->if_bytes = size;
+       ifp->if_real_bytes = real_size;
+       if (size)
+               memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
+       ifp->if_flags &= ~XFS_IFEXTENTS;
+       ifp->if_flags |= XFS_IFINLINE;
+       return 0;
+}
+
+/*
+ * The file consists of a set of extents all
+ * of which fit into the on-disk inode.
+ * If there are few enough extents to fit into
+ * the if_inline_ext, then copy them there.
+ * Otherwise allocate a buffer for them and copy
+ * them into it.  Either way, set if_extents
+ * to point at the extents.
+ */
+STATIC int
+xfs_iformat_extents(
+       xfs_inode_t     *ip,
+       xfs_dinode_t    *dip,
+       int             whichfork)
+{
+       xfs_bmbt_rec_t  *dp;
+       xfs_ifork_t     *ifp;
+       int             nex;
+       int             size;
+       int             i;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       nex = XFS_DFORK_NEXTENTS(dip, whichfork);
+       size = nex * (uint)sizeof(xfs_bmbt_rec_t);
+
+       /*
+        * If the number of extents is unreasonable, then something
+        * is wrong and we just bail out rather than crash in
+        * kmem_alloc() or memcpy() below.
+        */
+       if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
+               xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
+                       (unsigned long long) ip->i_ino, nex);
+               XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
+                                    ip->i_mount, dip);
+               return EFSCORRUPTED;
+       }
+
+       ifp->if_real_bytes = 0;
+       if (nex == 0)
+               ifp->if_u1.if_extents = NULL;
+       else if (nex <= XFS_INLINE_EXTS)
+               ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+       else
+               xfs_iext_add(ifp, 0, nex);
+
+       ifp->if_bytes = size;
+       if (size) {
+               dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
+               xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
+               for (i = 0; i < nex; i++, dp++) {
+                       xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
+                       ep->l0 = get_unaligned_be64(&dp->l0);
+                       ep->l1 = get_unaligned_be64(&dp->l1);
+               }
+               XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
+               if (whichfork != XFS_DATA_FORK ||
+                       XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
+                               if (unlikely(xfs_check_nostate_extents(
+                                   ifp, 0, nex))) {
+                                       XFS_ERROR_REPORT("xfs_iformat_extents(2)",
+                                                        XFS_ERRLEVEL_LOW,
+                                                        ip->i_mount);
+                                       return EFSCORRUPTED;
+                               }
+       }
+       ifp->if_flags |= XFS_IFEXTENTS;
+       return 0;
+}
+
+/*
+ * The file has too many extents to fit into
+ * the inode, so they are in B-tree format.
+ * Allocate a buffer for the root of the B-tree
+ * and copy the root into it.  The i_extents
+ * field will remain NULL until all of the
+ * extents are read in (when they are needed).
+ */
+STATIC int
+xfs_iformat_btree(
+       xfs_inode_t             *ip,
+       xfs_dinode_t            *dip,
+       int                     whichfork)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_bmdr_block_t        *dfp;
+       xfs_ifork_t             *ifp;
+       /* REFERENCED */
+       int                     nrecs;
+       int                     size;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
+       size = XFS_BMAP_BROOT_SPACE(mp, dfp);
+       nrecs = be16_to_cpu(dfp->bb_numrecs);
+
+       /*
+        * blow out if -- fork has less extents than can fit in
+        * fork (fork shouldn't be a btree format), root btree
+        * block has more records than can fit into the fork,
+        * or the number of extents is greater than the number of
+        * blocks.
+        */
+       if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
+                                       XFS_IFORK_MAXEXT(ip, whichfork) ||
+                    XFS_BMDR_SPACE_CALC(nrecs) >
+                                       XFS_DFORK_SIZE(dip, mp, whichfork) ||
+                    XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
+               xfs_warn(mp, "corrupt inode %Lu (btree).",
+                                       (unsigned long long) ip->i_ino);
+               XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
+                                        mp, dip);
+               return EFSCORRUPTED;
+       }
+
+       ifp->if_broot_bytes = size;
+       ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
+       ASSERT(ifp->if_broot != NULL);
+       /*
+        * Copy and convert from the on-disk structure
+        * to the in-memory structure.
+        */
+       xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
+                        ifp->if_broot, size);
+       ifp->if_flags &= ~XFS_IFEXTENTS;
+       ifp->if_flags |= XFS_IFBROOT;
+
+       return 0;
+}
+
+/*
+ * Read in extents from a btree-format inode.
+ * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
+ */
+int
+xfs_iread_extents(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       int             whichfork)
+{
+       int             error;
+       xfs_ifork_t     *ifp;
+       xfs_extnum_t    nextents;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+       if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
+               XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
+                                ip->i_mount);
+               return EFSCORRUPTED;
+       }
+       nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+
+       /*
+        * We know that the size is valid (it's checked in iformat_btree)
+        */
+       ifp->if_bytes = ifp->if_real_bytes = 0;
+       ifp->if_flags |= XFS_IFEXTENTS;
+       xfs_iext_add(ifp, 0, nextents);
+       error = xfs_bmap_read_extents(tp, ip, whichfork);
+       if (error) {
+               xfs_iext_destroy(ifp);
+               ifp->if_flags &= ~XFS_IFEXTENTS;
+               return error;
+       }
+       xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
+       return 0;
+}
+/*
+ * Reallocate the space for if_broot based on the number of records
+ * being added or deleted as indicated in rec_diff.  Move the records
+ * and pointers in if_broot to fit the new size.  When shrinking this
+ * will eliminate holes between the records and pointers created by
+ * the caller.  When growing this will create holes to be filled in
+ * by the caller.
+ *
+ * The caller must not request to add more records than would fit in
+ * the on-disk inode root.  If the if_broot is currently NULL, then
+ * if we are adding records, one will be allocated.  The caller must also
+ * not request that the number of records go below zero, although
+ * it can go to zero.
+ *
+ * ip -- the inode whose if_broot area is changing
+ * ext_diff -- the change in the number of records, positive or negative,
+ *      requested for the if_broot array.
+ */
+void
+xfs_iroot_realloc(
+       xfs_inode_t             *ip,
+       int                     rec_diff,
+       int                     whichfork)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       int                     cur_max;
+       xfs_ifork_t             *ifp;
+       struct xfs_btree_block  *new_broot;
+       int                     new_max;
+       size_t                  new_size;
+       char                    *np;
+       char                    *op;
+
+       /*
+        * Handle the degenerate case quietly.
+        */
+       if (rec_diff == 0) {
+               return;
+       }
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (rec_diff > 0) {
+               /*
+                * If there wasn't any memory allocated before, just
+                * allocate it now and get out.
+                */
+               if (ifp->if_broot_bytes == 0) {
+                       new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
+                       ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
+                       ifp->if_broot_bytes = (int)new_size;
+                       return;
+               }
+
+               /*
+                * If there is already an existing if_broot, then we need
+                * to realloc() it and shift the pointers to their new
+                * location.  The records don't change location because
+                * they are kept butted up against the btree block header.
+                */
+               cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
+               new_max = cur_max + rec_diff;
+               new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
+               ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
+                               XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
+                               KM_SLEEP | KM_NOFS);
+               op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+                                                    ifp->if_broot_bytes);
+               np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+                                                    (int)new_size);
+               ifp->if_broot_bytes = (int)new_size;
+               ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+                       XFS_IFORK_SIZE(ip, whichfork));
+               memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
+               return;
+       }
+
+       /*
+        * rec_diff is less than 0.  In this case, we are shrinking the
+        * if_broot buffer.  It must already exist.  If we go to zero
+        * records, just get rid of the root and clear the status bit.
+        */
+       ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
+       cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
+       new_max = cur_max + rec_diff;
+       ASSERT(new_max >= 0);
+       if (new_max > 0)
+               new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
+       else
+               new_size = 0;
+       if (new_size > 0) {
+               new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
+               /*
+                * First copy over the btree block header.
+                */
+               memcpy(new_broot, ifp->if_broot,
+                       XFS_BMBT_BLOCK_LEN(ip->i_mount));
+       } else {
+               new_broot = NULL;
+               ifp->if_flags &= ~XFS_IFBROOT;
+       }
+
+       /*
+        * Only copy the records and pointers if there are any.
+        */
+       if (new_max > 0) {
+               /*
+                * First copy the records.
+                */
+               op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
+               np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
+               memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
+
+               /*
+                * Then copy the pointers.
+                */
+               op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+                                                    ifp->if_broot_bytes);
+               np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
+                                                    (int)new_size);
+               memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
+       }
+       kmem_free(ifp->if_broot);
+       ifp->if_broot = new_broot;
+       ifp->if_broot_bytes = (int)new_size;
+       if (ifp->if_broot)
+               ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+                       XFS_IFORK_SIZE(ip, whichfork));
+       return;
+}
+
+
+/*
+ * This is called when the amount of space needed for if_data
+ * is increased or decreased.  The change in size is indicated by
+ * the number of bytes that need to be added or deleted in the
+ * byte_diff parameter.
+ *
+ * If the amount of space needed has decreased below the size of the
+ * inline buffer, then switch to using the inline buffer.  Otherwise,
+ * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
+ * to what is needed.
+ *
+ * ip -- the inode whose if_data area is changing
+ * byte_diff -- the change in the number of bytes, positive or negative,
+ *      requested for the if_data array.
+ */
+void
+xfs_idata_realloc(
+       xfs_inode_t     *ip,
+       int             byte_diff,
+       int             whichfork)
+{
+       xfs_ifork_t     *ifp;
+       int             new_size;
+       int             real_size;
+
+       if (byte_diff == 0) {
+               return;
+       }
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       new_size = (int)ifp->if_bytes + byte_diff;
+       ASSERT(new_size >= 0);
+
+       if (new_size == 0) {
+               if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+                       kmem_free(ifp->if_u1.if_data);
+               }
+               ifp->if_u1.if_data = NULL;
+               real_size = 0;
+       } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
+               /*
+                * If the valid extents/data can fit in if_inline_ext/data,
+                * copy them from the malloc'd vector and free it.
+                */
+               if (ifp->if_u1.if_data == NULL) {
+                       ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+               } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+                       ASSERT(ifp->if_real_bytes != 0);
+                       memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
+                             new_size);
+                       kmem_free(ifp->if_u1.if_data);
+                       ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+               }
+               real_size = 0;
+       } else {
+               /*
+                * Stuck with malloc/realloc.
+                * For inline data, the underlying buffer must be
+                * a multiple of 4 bytes in size so that it can be
+                * logged and stay on word boundaries.  We enforce
+                * that here.
+                */
+               real_size = roundup(new_size, 4);
+               if (ifp->if_u1.if_data == NULL) {
+                       ASSERT(ifp->if_real_bytes == 0);
+                       ifp->if_u1.if_data = kmem_alloc(real_size,
+                                                       KM_SLEEP | KM_NOFS);
+               } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+                       /*
+                        * Only do the realloc if the underlying size
+                        * is really changing.
+                        */
+                       if (ifp->if_real_bytes != real_size) {
+                               ifp->if_u1.if_data =
+                                       kmem_realloc(ifp->if_u1.if_data,
+                                                       real_size,
+                                                       ifp->if_real_bytes,
+                                                       KM_SLEEP | KM_NOFS);
+                       }
+               } else {
+                       ASSERT(ifp->if_real_bytes == 0);
+                       ifp->if_u1.if_data = kmem_alloc(real_size,
+                                                       KM_SLEEP | KM_NOFS);
+                       memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
+                               ifp->if_bytes);
+               }
+       }
+       ifp->if_real_bytes = real_size;
+       ifp->if_bytes = new_size;
+       ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
+}
+
+void
+xfs_idestroy_fork(
+       xfs_inode_t     *ip,
+       int             whichfork)
+{
+       xfs_ifork_t     *ifp;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (ifp->if_broot != NULL) {
+               kmem_free(ifp->if_broot);
+               ifp->if_broot = NULL;
+       }
+
+       /*
+        * If the format is local, then we can't have an extents
+        * array so just look for an inline data array.  If we're
+        * not local then we may or may not have an extents list,
+        * so check and free it up if we do.
+        */
+       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+               if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
+                   (ifp->if_u1.if_data != NULL)) {
+                       ASSERT(ifp->if_real_bytes != 0);
+                       kmem_free(ifp->if_u1.if_data);
+                       ifp->if_u1.if_data = NULL;
+                       ifp->if_real_bytes = 0;
+               }
+       } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
+                  ((ifp->if_flags & XFS_IFEXTIREC) ||
+                   ((ifp->if_u1.if_extents != NULL) &&
+                    (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
+               ASSERT(ifp->if_real_bytes != 0);
+               xfs_iext_destroy(ifp);
+       }
+       ASSERT(ifp->if_u1.if_extents == NULL ||
+              ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
+       ASSERT(ifp->if_real_bytes == 0);
+       if (whichfork == XFS_ATTR_FORK) {
+               kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+               ip->i_afp = NULL;
+       }
+}
+
+/*
+ * Convert in-core extents to on-disk form
+ *
+ * For either the data or attr fork in extent format, we need to endian convert
+ * the in-core extent as we place them into the on-disk inode.
+ *
+ * In the case of the data fork, the in-core and on-disk fork sizes can be
+ * different due to delayed allocation extents. We only copy on-disk extents
+ * here, so callers must always use the physical fork size to determine the
+ * size of the buffer passed to this routine.  We will return the size actually
+ * used.
+ */
+int
+xfs_iextents_copy(
+       xfs_inode_t             *ip,
+       xfs_bmbt_rec_t          *dp,
+       int                     whichfork)
+{
+       int                     copied;
+       int                     i;
+       xfs_ifork_t             *ifp;
+       int                     nrecs;
+       xfs_fsblock_t           start_block;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+       ASSERT(ifp->if_bytes > 0);
+
+       nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
+       ASSERT(nrecs > 0);
+
+       /*
+        * There are some delayed allocation extents in the
+        * inode, so copy the extents one at a time and skip
+        * the delayed ones.  There must be at least one
+        * non-delayed extent.
+        */
+       copied = 0;
+       for (i = 0; i < nrecs; i++) {
+               xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
+               start_block = xfs_bmbt_get_startblock(ep);
+               if (isnullstartblock(start_block)) {
+                       /*
+                        * It's a delayed allocation extent, so skip it.
+                        */
+                       continue;
+               }
+
+               /* Translate to on disk format */
+               put_unaligned_be64(ep->l0, &dp->l0);
+               put_unaligned_be64(ep->l1, &dp->l1);
+               dp++;
+               copied++;
+       }
+       ASSERT(copied != 0);
+       xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
+
+       return (copied * (uint)sizeof(xfs_bmbt_rec_t));
+}
+
+/*
+ * Each of the following cases stores data into the same region
+ * of the on-disk inode, so only one of them can be valid at
+ * any given time. While it is possible to have conflicting formats
+ * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
+ * in EXTENTS format, this can only happen when the fork has
+ * changed formats after being modified but before being flushed.
+ * In these cases, the format always takes precedence, because the
+ * format indicates the current state of the fork.
+ */
+void
+xfs_iflush_fork(
+       xfs_inode_t             *ip,
+       xfs_dinode_t            *dip,
+       xfs_inode_log_item_t    *iip,
+       int                     whichfork)
+{
+       char                    *cp;
+       xfs_ifork_t             *ifp;
+       xfs_mount_t             *mp;
+       static const short      brootflag[2] =
+               { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
+       static const short      dataflag[2] =
+               { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
+       static const short      extflag[2] =
+               { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
+
+       if (!iip)
+               return;
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       /*
+        * This can happen if we gave up in iformat in an error path,
+        * for the attribute fork.
+        */
+       if (!ifp) {
+               ASSERT(whichfork == XFS_ATTR_FORK);
+               return;
+       }
+       cp = XFS_DFORK_PTR(dip, whichfork);
+       mp = ip->i_mount;
+       switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+       case XFS_DINODE_FMT_LOCAL:
+               if ((iip->ili_fields & dataflag[whichfork]) &&
+                   (ifp->if_bytes > 0)) {
+                       ASSERT(ifp->if_u1.if_data != NULL);
+                       ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
+                       memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
+               }
+               break;
+
+       case XFS_DINODE_FMT_EXTENTS:
+               ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
+                      !(iip->ili_fields & extflag[whichfork]));
+               if ((iip->ili_fields & extflag[whichfork]) &&
+                   (ifp->if_bytes > 0)) {
+                       ASSERT(xfs_iext_get_ext(ifp, 0));
+                       ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
+                       (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
+                               whichfork);
+               }
+               break;
+
+       case XFS_DINODE_FMT_BTREE:
+               if ((iip->ili_fields & brootflag[whichfork]) &&
+                   (ifp->if_broot_bytes > 0)) {
+                       ASSERT(ifp->if_broot != NULL);
+                       ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+                               XFS_IFORK_SIZE(ip, whichfork));
+                       xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
+                               (xfs_bmdr_block_t *)cp,
+                               XFS_DFORK_SIZE(dip, mp, whichfork));
+               }
+               break;
+
+       case XFS_DINODE_FMT_DEV:
+               if (iip->ili_fields & XFS_ILOG_DEV) {
+                       ASSERT(whichfork == XFS_DATA_FORK);
+                       xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
+               }
+               break;
+
+       case XFS_DINODE_FMT_UUID:
+               if (iip->ili_fields & XFS_ILOG_UUID) {
+                       ASSERT(whichfork == XFS_DATA_FORK);
+                       memcpy(XFS_DFORK_DPTR(dip),
+                              &ip->i_df.if_u2.if_uuid,
+                              sizeof(uuid_t));
+               }
+               break;
+
+       default:
+               ASSERT(0);
+               break;
+       }
+}
+
+/*
+ * Return a pointer to the extent record at file index idx.
+ */
+xfs_bmbt_rec_host_t *
+xfs_iext_get_ext(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       xfs_extnum_t    idx)            /* index of target extent */
+{
+       ASSERT(idx >= 0);
+       ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+
+       if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
+               return ifp->if_u1.if_ext_irec->er_extbuf;
+       } else if (ifp->if_flags & XFS_IFEXTIREC) {
+               xfs_ext_irec_t  *erp;           /* irec pointer */
+               int             erp_idx = 0;    /* irec index */
+               xfs_extnum_t    page_idx = idx; /* ext index in target list */
+
+               erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
+               return &erp->er_extbuf[page_idx];
+       } else if (ifp->if_bytes) {
+               return &ifp->if_u1.if_extents[idx];
+       } else {
+               return NULL;
+       }
+}
+
+/*
+ * Insert new item(s) into the extent records for incore inode
+ * fork 'ifp'.  'count' new items are inserted at index 'idx'.
+ */
+void
+xfs_iext_insert(
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_extnum_t    idx,            /* starting index of new items */
+       xfs_extnum_t    count,          /* number of inserted items */
+       xfs_bmbt_irec_t *new,           /* items to insert */
+       int             state)          /* type of extent conversion */
+{
+       xfs_ifork_t     *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
+       xfs_extnum_t    i;              /* extent record index */
+
+       trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
+
+       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+       xfs_iext_add(ifp, idx, count);
+       for (i = idx; i < idx + count; i++, new++)
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
+}
+
+/*
+ * This is called when the amount of space required for incore file
+ * extents needs to be increased. The ext_diff parameter stores the
+ * number of new extents being added and the idx parameter contains
+ * the extent index where the new extents will be added. If the new
+ * extents are being appended, then we just need to (re)allocate and
+ * initialize the space. Otherwise, if the new extents are being
+ * inserted into the middle of the existing entries, a bit more work
+ * is required to make room for the new extents to be inserted. The
+ * caller is responsible for filling in the new extent entries upon
+ * return.
+ */
+void
+xfs_iext_add(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       xfs_extnum_t    idx,            /* index to begin adding exts */
+       int             ext_diff)       /* number of extents to add */
+{
+       int             byte_diff;      /* new bytes being added */
+       int             new_size;       /* size of extents after adding */
+       xfs_extnum_t    nextents;       /* number of extents in file */
+
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       ASSERT((idx >= 0) && (idx <= nextents));
+       byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
+       new_size = ifp->if_bytes + byte_diff;
+       /*
+        * If the new number of extents (nextents + ext_diff)
+        * fits inside the inode, then continue to use the inline
+        * extent buffer.
+        */
+       if (nextents + ext_diff <= XFS_INLINE_EXTS) {
+               if (idx < nextents) {
+                       memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
+                               &ifp->if_u2.if_inline_ext[idx],
+                               (nextents - idx) * sizeof(xfs_bmbt_rec_t));
+                       memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
+               }
+               ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+               ifp->if_real_bytes = 0;
+       }
+       /*
+        * Otherwise use a linear (direct) extent list.
+        * If the extents are currently inside the inode,
+        * xfs_iext_realloc_direct will switch us from
+        * inline to direct extent allocation mode.
+        */
+       else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
+               xfs_iext_realloc_direct(ifp, new_size);
+               if (idx < nextents) {
+                       memmove(&ifp->if_u1.if_extents[idx + ext_diff],
+                               &ifp->if_u1.if_extents[idx],
+                               (nextents - idx) * sizeof(xfs_bmbt_rec_t));
+                       memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
+               }
+       }
+       /* Indirection array */
+       else {
+               xfs_ext_irec_t  *erp;
+               int             erp_idx = 0;
+               int             page_idx = idx;
+
+               ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
+               if (ifp->if_flags & XFS_IFEXTIREC) {
+                       erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
+               } else {
+                       xfs_iext_irec_init(ifp);
+                       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+                       erp = ifp->if_u1.if_ext_irec;
+               }
+               /* Extents fit in target extent page */
+               if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
+                       if (page_idx < erp->er_extcount) {
+                               memmove(&erp->er_extbuf[page_idx + ext_diff],
+                                       &erp->er_extbuf[page_idx],
+                                       (erp->er_extcount - page_idx) *
+                                       sizeof(xfs_bmbt_rec_t));
+                               memset(&erp->er_extbuf[page_idx], 0, byte_diff);
+                       }
+                       erp->er_extcount += ext_diff;
+                       xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+               }
+               /* Insert a new extent page */
+               else if (erp) {
+                       xfs_iext_add_indirect_multi(ifp,
+                               erp_idx, page_idx, ext_diff);
+               }
+               /*
+                * If extent(s) are being appended to the last page in
+                * the indirection array and the new extent(s) don't fit
+                * in the page, then erp is NULL and erp_idx is set to
+                * the next index needed in the indirection array.
+                */
+               else {
+                       uint    count = ext_diff;
+
+                       while (count) {
+                               erp = xfs_iext_irec_new(ifp, erp_idx);
+                               erp->er_extcount = min(count, XFS_LINEAR_EXTS);
+                               count -= erp->er_extcount;
+                               if (count)
+                                       erp_idx++;
+                       }
+               }
+       }
+       ifp->if_bytes = new_size;
+}
+
+/*
+ * This is called when incore extents are being added to the indirection
+ * array and the new extents do not fit in the target extent list. The
+ * erp_idx parameter contains the irec index for the target extent list
+ * in the indirection array, and the idx parameter contains the extent
+ * index within the list. The number of extents being added is stored
+ * in the count parameter.
+ *
+ *    |-------|   |-------|
+ *    |       |   |       |    idx - number of extents before idx
+ *    |  idx  |   | count |
+ *    |       |   |       |    count - number of extents being inserted at idx
+ *    |-------|   |-------|
+ *    | count |   | nex2  |    nex2 - number of extents after idx + count
+ *    |-------|   |-------|
+ */
+void
+xfs_iext_add_indirect_multi(
+       xfs_ifork_t     *ifp,                   /* inode fork pointer */
+       int             erp_idx,                /* target extent irec index */
+       xfs_extnum_t    idx,                    /* index within target list */
+       int             count)                  /* new extents being added */
+{
+       int             byte_diff;              /* new bytes being added */
+       xfs_ext_irec_t  *erp;                   /* pointer to irec entry */
+       xfs_extnum_t    ext_diff;               /* number of extents to add */
+       xfs_extnum_t    ext_cnt;                /* new extents still needed */
+       xfs_extnum_t    nex2;                   /* extents after idx + count */
+       xfs_bmbt_rec_t  *nex2_ep = NULL;        /* temp list for nex2 extents */
+       int             nlists;                 /* number of irec's (lists) */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       erp = &ifp->if_u1.if_ext_irec[erp_idx];
+       nex2 = erp->er_extcount - idx;
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+       /*
+        * Save second part of target extent list
+        * (all extents past */
+       if (nex2) {
+               byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
+               nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
+               memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
+               erp->er_extcount -= nex2;
+               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
+               memset(&erp->er_extbuf[idx], 0, byte_diff);
+       }
+
+       /*
+        * Add the new extents to the end of the target
+        * list, then allocate new irec record(s) and
+        * extent buffer(s) as needed to store the rest
+        * of the new extents.
+        */
+       ext_cnt = count;
+       ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
+       if (ext_diff) {
+               erp->er_extcount += ext_diff;
+               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+               ext_cnt -= ext_diff;
+       }
+       while (ext_cnt) {
+               erp_idx++;
+               erp = xfs_iext_irec_new(ifp, erp_idx);
+               ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
+               erp->er_extcount = ext_diff;
+               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+               ext_cnt -= ext_diff;
+       }
+
+       /* Add nex2 extents back to indirection array */
+       if (nex2) {
+               xfs_extnum_t    ext_avail;
+               int             i;
+
+               byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
+               ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
+               i = 0;
+               /*
+                * If nex2 extents fit in the current page, append
+                * nex2_ep after the new extents.
+                */
+               if (nex2 <= ext_avail) {
+                       i = erp->er_extcount;
+               }
+               /*
+                * Otherwise, check if space is available in the
+                * next page.
+                */
+               else if ((erp_idx < nlists - 1) &&
+                        (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
+                         ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
+                       erp_idx++;
+                       erp++;
+                       /* Create a hole for nex2 extents */
+                       memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
+                               erp->er_extcount * sizeof(xfs_bmbt_rec_t));
+               }
+               /*
+                * Final choice, create a new extent page for
+                * nex2 extents.
+                */
+               else {
+                       erp_idx++;
+                       erp = xfs_iext_irec_new(ifp, erp_idx);
+               }
+               memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
+               kmem_free(nex2_ep);
+               erp->er_extcount += nex2;
+               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
+       }
+}
+
+/*
+ * This is called when the amount of space required for incore file
+ * extents needs to be decreased. The ext_diff parameter stores the
+ * number of extents to be removed and the idx parameter contains
+ * the extent index where the extents will be removed from.
+ *
+ * If the amount of space needed has decreased below the linear
+ * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
+ * extent array.  Otherwise, use kmem_realloc() to adjust the
+ * size to what is needed.
+ */
+void
+xfs_iext_remove(
+       xfs_inode_t     *ip,            /* incore inode pointer */
+       xfs_extnum_t    idx,            /* index to begin removing exts */
+       int             ext_diff,       /* number of extents to remove */
+       int             state)          /* type of extent conversion */
+{
+       xfs_ifork_t     *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
+       xfs_extnum_t    nextents;       /* number of extents in file */
+       int             new_size;       /* size of extents after removal */
+
+       trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
+
+       ASSERT(ext_diff > 0);
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
+
+       if (new_size == 0) {
+               xfs_iext_destroy(ifp);
+       } else if (ifp->if_flags & XFS_IFEXTIREC) {
+               xfs_iext_remove_indirect(ifp, idx, ext_diff);
+       } else if (ifp->if_real_bytes) {
+               xfs_iext_remove_direct(ifp, idx, ext_diff);
+       } else {
+               xfs_iext_remove_inline(ifp, idx, ext_diff);
+       }
+       ifp->if_bytes = new_size;
+}
+
+/*
+ * This removes ext_diff extents from the inline buffer, beginning
+ * at extent index idx.
+ */
+void
+xfs_iext_remove_inline(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       xfs_extnum_t    idx,            /* index to begin removing exts */
+       int             ext_diff)       /* number of extents to remove */
+{
+       int             nextents;       /* number of extents in file */
+
+       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+       ASSERT(idx < XFS_INLINE_EXTS);
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       ASSERT(((nextents - ext_diff) > 0) &&
+               (nextents - ext_diff) < XFS_INLINE_EXTS);
+
+       if (idx + ext_diff < nextents) {
+               memmove(&ifp->if_u2.if_inline_ext[idx],
+                       &ifp->if_u2.if_inline_ext[idx + ext_diff],
+                       (nextents - (idx + ext_diff)) *
+                        sizeof(xfs_bmbt_rec_t));
+               memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
+                       0, ext_diff * sizeof(xfs_bmbt_rec_t));
+       } else {
+               memset(&ifp->if_u2.if_inline_ext[idx], 0,
+                       ext_diff * sizeof(xfs_bmbt_rec_t));
+       }
+}
+
+/*
+ * This removes ext_diff extents from a linear (direct) extent list,
+ * beginning at extent index idx. If the extents are being removed
+ * from the end of the list (ie. truncate) then we just need to re-
+ * allocate the list to remove the extra space. Otherwise, if the
+ * extents are being removed from the middle of the existing extent
+ * entries, then we first need to move the extent records beginning
+ * at idx + ext_diff up in the list to overwrite the records being
+ * removed, then remove the extra space via kmem_realloc.
+ */
+void
+xfs_iext_remove_direct(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       xfs_extnum_t    idx,            /* index to begin removing exts */
+       int             ext_diff)       /* number of extents to remove */
+{
+       xfs_extnum_t    nextents;       /* number of extents in file */
+       int             new_size;       /* size of extents after removal */
+
+       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+       new_size = ifp->if_bytes -
+               (ext_diff * sizeof(xfs_bmbt_rec_t));
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+
+       if (new_size == 0) {
+               xfs_iext_destroy(ifp);
+               return;
+       }
+       /* Move extents up in the list (if needed) */
+       if (idx + ext_diff < nextents) {
+               memmove(&ifp->if_u1.if_extents[idx],
+                       &ifp->if_u1.if_extents[idx + ext_diff],
+                       (nextents - (idx + ext_diff)) *
+                        sizeof(xfs_bmbt_rec_t));
+       }
+       memset(&ifp->if_u1.if_extents[nextents - ext_diff],
+               0, ext_diff * sizeof(xfs_bmbt_rec_t));
+       /*
+        * Reallocate the direct extent list. If the extents
+        * will fit inside the inode then xfs_iext_realloc_direct
+        * will switch from direct to inline extent allocation
+        * mode for us.
+        */
+       xfs_iext_realloc_direct(ifp, new_size);
+       ifp->if_bytes = new_size;
+}
+
+/*
+ * This is called when incore extents are being removed from the
+ * indirection array and the extents being removed span multiple extent
+ * buffers. The idx parameter contains the file extent index where we
+ * want to begin removing extents, and the count parameter contains
+ * how many extents need to be removed.
+ *
+ *    |-------|   |-------|
+ *    | nex1  |   |       |    nex1 - number of extents before idx
+ *    |-------|   | count |
+ *    |       |   |       |    count - number of extents being removed at idx
+ *    | count |   |-------|
+ *    |       |   | nex2  |    nex2 - number of extents after idx + count
+ *    |-------|   |-------|
+ */
+void
+xfs_iext_remove_indirect(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       xfs_extnum_t    idx,            /* index to begin removing extents */
+       int             count)          /* number of extents to remove */
+{
+       xfs_ext_irec_t  *erp;           /* indirection array pointer */
+       int             erp_idx = 0;    /* indirection array index */
+       xfs_extnum_t    ext_cnt;        /* extents left to remove */
+       xfs_extnum_t    ext_diff;       /* extents to remove in current list */
+       xfs_extnum_t    nex1;           /* number of extents before idx */
+       xfs_extnum_t    nex2;           /* extents after idx + count */
+       int             page_idx = idx; /* index in target extent list */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
+       ASSERT(erp != NULL);
+       nex1 = page_idx;
+       ext_cnt = count;
+       while (ext_cnt) {
+               nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
+               ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
+               /*
+                * Check for deletion of entire list;
+                * xfs_iext_irec_remove() updates extent offsets.
+                */
+               if (ext_diff == erp->er_extcount) {
+                       xfs_iext_irec_remove(ifp, erp_idx);
+                       ext_cnt -= ext_diff;
+                       nex1 = 0;
+                       if (ext_cnt) {
+                               ASSERT(erp_idx < ifp->if_real_bytes /
+                                       XFS_IEXT_BUFSZ);
+                               erp = &ifp->if_u1.if_ext_irec[erp_idx];
+                               nex1 = 0;
+                               continue;
+                       } else {
+                               break;
+                       }
+               }
+               /* Move extents up (if needed) */
+               if (nex2) {
+                       memmove(&erp->er_extbuf[nex1],
+                               &erp->er_extbuf[nex1 + ext_diff],
+                               nex2 * sizeof(xfs_bmbt_rec_t));
+               }
+               /* Zero out rest of page */
+               memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
+                       ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
+               /* Update remaining counters */
+               erp->er_extcount -= ext_diff;
+               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
+               ext_cnt -= ext_diff;
+               nex1 = 0;
+               erp_idx++;
+               erp++;
+       }
+       ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
+       xfs_iext_irec_compact(ifp);
+}
+
+/*
+ * Create, destroy, or resize a linear (direct) block of extents.
+ */
+void
+xfs_iext_realloc_direct(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       int             new_size)       /* new size of extents after adding */
+{
+       int             rnew_size;      /* real new size of extents */
+
+       rnew_size = new_size;
+
+       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
+               ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
+                (new_size != ifp->if_real_bytes)));
+
+       /* Free extent records */
+       if (new_size == 0) {
+               xfs_iext_destroy(ifp);
+       }
+       /* Resize direct extent list and zero any new bytes */
+       else if (ifp->if_real_bytes) {
+               /* Check if extents will fit inside the inode */
+               if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
+                       xfs_iext_direct_to_inline(ifp, new_size /
+                               (uint)sizeof(xfs_bmbt_rec_t));
+                       ifp->if_bytes = new_size;
+                       return;
+               }
+               if (!is_power_of_2(new_size)){
+                       rnew_size = roundup_pow_of_two(new_size);
+               }
+               if (rnew_size != ifp->if_real_bytes) {
+                       ifp->if_u1.if_extents =
+                               kmem_realloc(ifp->if_u1.if_extents,
+                                               rnew_size,
+                                               ifp->if_real_bytes, KM_NOFS);
+               }
+               if (rnew_size > ifp->if_real_bytes) {
+                       memset(&ifp->if_u1.if_extents[ifp->if_bytes /
+                               (uint)sizeof(xfs_bmbt_rec_t)], 0,
+                               rnew_size - ifp->if_real_bytes);
+               }
+       }
+       /* Switch from the inline extent buffer to a direct extent list */
+       else {
+               if (!is_power_of_2(new_size)) {
+                       rnew_size = roundup_pow_of_two(new_size);
+               }
+               xfs_iext_inline_to_direct(ifp, rnew_size);
+       }
+       ifp->if_real_bytes = rnew_size;
+       ifp->if_bytes = new_size;
+}
+
+/*
+ * Switch from linear (direct) extent records to inline buffer.
+ */
+void
+xfs_iext_direct_to_inline(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       xfs_extnum_t    nextents)       /* number of extents in file */
+{
+       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+       ASSERT(nextents <= XFS_INLINE_EXTS);
+       /*
+        * The inline buffer was zeroed when we switched
+        * from inline to direct extent allocation mode,
+        * so we don't need to clear it here.
+        */
+       memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
+               nextents * sizeof(xfs_bmbt_rec_t));
+       kmem_free(ifp->if_u1.if_extents);
+       ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+       ifp->if_real_bytes = 0;
+}
+
+/*
+ * Switch from inline buffer to linear (direct) extent records.
+ * new_size should already be rounded up to the next power of 2
+ * by the caller (when appropriate), so use new_size as it is.
+ * However, since new_size may be rounded up, we can't update
+ * if_bytes here. It is the caller's responsibility to update
+ * if_bytes upon return.
+ */
+void
+xfs_iext_inline_to_direct(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       int             new_size)       /* number of extents in file */
+{
+       ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
+       memset(ifp->if_u1.if_extents, 0, new_size);
+       if (ifp->if_bytes) {
+               memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
+                       ifp->if_bytes);
+               memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
+                       sizeof(xfs_bmbt_rec_t));
+       }
+       ifp->if_real_bytes = new_size;
+}
+
+/*
+ * Resize an extent indirection array to new_size bytes.
+ */
+STATIC void
+xfs_iext_realloc_indirect(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       int             new_size)       /* new indirection array size */
+{
+       int             nlists;         /* number of irec's (ex lists) */
+       int             size;           /* current indirection array size */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+       size = nlists * sizeof(xfs_ext_irec_t);
+       ASSERT(ifp->if_real_bytes);
+       ASSERT((new_size >= 0) && (new_size != size));
+       if (new_size == 0) {
+               xfs_iext_destroy(ifp);
+       } else {
+               ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
+                       kmem_realloc(ifp->if_u1.if_ext_irec,
+                               new_size, size, KM_NOFS);
+       }
+}
+
+/*
+ * Switch from indirection array to linear (direct) extent allocations.
+ */
+STATIC void
+xfs_iext_indirect_to_direct(
+        xfs_ifork_t    *ifp)           /* inode fork pointer */
+{
+       xfs_bmbt_rec_host_t *ep;        /* extent record pointer */
+       xfs_extnum_t    nextents;       /* number of extents in file */
+       int             size;           /* size of file extents */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       ASSERT(nextents <= XFS_LINEAR_EXTS);
+       size = nextents * sizeof(xfs_bmbt_rec_t);
+
+       xfs_iext_irec_compact_pages(ifp);
+       ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
+
+       ep = ifp->if_u1.if_ext_irec->er_extbuf;
+       kmem_free(ifp->if_u1.if_ext_irec);
+       ifp->if_flags &= ~XFS_IFEXTIREC;
+       ifp->if_u1.if_extents = ep;
+       ifp->if_bytes = size;
+       if (nextents < XFS_LINEAR_EXTS) {
+               xfs_iext_realloc_direct(ifp, size);
+       }
+}
+
+/*
+ * Free incore file extents.
+ */
+void
+xfs_iext_destroy(
+       xfs_ifork_t     *ifp)           /* inode fork pointer */
+{
+       if (ifp->if_flags & XFS_IFEXTIREC) {
+               int     erp_idx;
+               int     nlists;
+
+               nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+               for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
+                       xfs_iext_irec_remove(ifp, erp_idx);
+               }
+               ifp->if_flags &= ~XFS_IFEXTIREC;
+       } else if (ifp->if_real_bytes) {
+               kmem_free(ifp->if_u1.if_extents);
+       } else if (ifp->if_bytes) {
+               memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
+                       sizeof(xfs_bmbt_rec_t));
+       }
+       ifp->if_u1.if_extents = NULL;
+       ifp->if_real_bytes = 0;
+       ifp->if_bytes = 0;
+}
+
+/*
+ * Return a pointer to the extent record for file system block bno.
+ */
+xfs_bmbt_rec_host_t *                  /* pointer to found extent record */
+xfs_iext_bno_to_ext(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       xfs_fileoff_t   bno,            /* block number to search for */
+       xfs_extnum_t    *idxp)          /* index of target extent */
+{
+       xfs_bmbt_rec_host_t *base;      /* pointer to first extent */
+       xfs_filblks_t   blockcount = 0; /* number of blocks in extent */
+       xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */
+       xfs_ext_irec_t  *erp = NULL;    /* indirection array pointer */
+       int             high;           /* upper boundary in search */
+       xfs_extnum_t    idx = 0;        /* index of target extent */
+       int             low;            /* lower boundary in search */
+       xfs_extnum_t    nextents;       /* number of file extents */
+       xfs_fileoff_t   startoff = 0;   /* start offset of extent */
+
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       if (nextents == 0) {
+               *idxp = 0;
+               return NULL;
+       }
+       low = 0;
+       if (ifp->if_flags & XFS_IFEXTIREC) {
+               /* Find target extent list */
+               int     erp_idx = 0;
+               erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
+               base = erp->er_extbuf;
+               high = erp->er_extcount - 1;
+       } else {
+               base = ifp->if_u1.if_extents;
+               high = nextents - 1;
+       }
+       /* Binary search extent records */
+       while (low <= high) {
+               idx = (low + high) >> 1;
+               ep = base + idx;
+               startoff = xfs_bmbt_get_startoff(ep);
+               blockcount = xfs_bmbt_get_blockcount(ep);
+               if (bno < startoff) {
+                       high = idx - 1;
+               } else if (bno >= startoff + blockcount) {
+                       low = idx + 1;
+               } else {
+                       /* Convert back to file-based extent index */
+                       if (ifp->if_flags & XFS_IFEXTIREC) {
+                               idx += erp->er_extoff;
+                       }
+                       *idxp = idx;
+                       return ep;
+               }
+       }
+       /* Convert back to file-based extent index */
+       if (ifp->if_flags & XFS_IFEXTIREC) {
+               idx += erp->er_extoff;
+       }
+       if (bno >= startoff + blockcount) {
+               if (++idx == nextents) {
+                       ep = NULL;
+               } else {
+                       ep = xfs_iext_get_ext(ifp, idx);
+               }
+       }
+       *idxp = idx;
+       return ep;
+}
+
+/*
+ * Return a pointer to the indirection array entry containing the
+ * extent record for filesystem block bno. Store the index of the
+ * target irec in *erp_idxp.
+ */
+xfs_ext_irec_t *                       /* pointer to found extent record */
+xfs_iext_bno_to_irec(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       xfs_fileoff_t   bno,            /* block number to search for */
+       int             *erp_idxp)      /* irec index of target ext list */
+{
+       xfs_ext_irec_t  *erp = NULL;    /* indirection array pointer */
+       xfs_ext_irec_t  *erp_next;      /* next indirection array entry */
+       int             erp_idx;        /* indirection array index */
+       int             nlists;         /* number of extent irec's (lists) */
+       int             high;           /* binary search upper limit */
+       int             low;            /* binary search lower limit */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+       erp_idx = 0;
+       low = 0;
+       high = nlists - 1;
+       while (low <= high) {
+               erp_idx = (low + high) >> 1;
+               erp = &ifp->if_u1.if_ext_irec[erp_idx];
+               erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
+               if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
+                       high = erp_idx - 1;
+               } else if (erp_next && bno >=
+                          xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
+                       low = erp_idx + 1;
+               } else {
+                       break;
+               }
+       }
+       *erp_idxp = erp_idx;
+       return erp;
+}
+
+/*
+ * Return a pointer to the indirection array entry containing the
+ * extent record at file extent index *idxp. Store the index of the
+ * target irec in *erp_idxp and store the page index of the target
+ * extent record in *idxp.
+ */
+xfs_ext_irec_t *
+xfs_iext_idx_to_irec(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       xfs_extnum_t    *idxp,          /* extent index (file -> page) */
+       int             *erp_idxp,      /* pointer to target irec */
+       int             realloc)        /* new bytes were just added */
+{
+       xfs_ext_irec_t  *prev;          /* pointer to previous irec */
+       xfs_ext_irec_t  *erp = NULL;    /* pointer to current irec */
+       int             erp_idx;        /* indirection array index */
+       int             nlists;         /* number of irec's (ex lists) */
+       int             high;           /* binary search upper limit */
+       int             low;            /* binary search lower limit */
+       xfs_extnum_t    page_idx = *idxp; /* extent index in target list */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       ASSERT(page_idx >= 0);
+       ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+       ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
+
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+       erp_idx = 0;
+       low = 0;
+       high = nlists - 1;
+
+       /* Binary search extent irec's */
+       while (low <= high) {
+               erp_idx = (low + high) >> 1;
+               erp = &ifp->if_u1.if_ext_irec[erp_idx];
+               prev = erp_idx > 0 ? erp - 1 : NULL;
+               if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
+                    realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
+                       high = erp_idx - 1;
+               } else if (page_idx > erp->er_extoff + erp->er_extcount ||
+                          (page_idx == erp->er_extoff + erp->er_extcount &&
+                           !realloc)) {
+                       low = erp_idx + 1;
+               } else if (page_idx == erp->er_extoff + erp->er_extcount &&
+                          erp->er_extcount == XFS_LINEAR_EXTS) {
+                       ASSERT(realloc);
+                       page_idx = 0;
+                       erp_idx++;
+                       erp = erp_idx < nlists ? erp + 1 : NULL;
+                       break;
+               } else {
+                       page_idx -= erp->er_extoff;
+                       break;
+               }
+       }
+       *idxp = page_idx;
+       *erp_idxp = erp_idx;
+       return erp;
+}
+
+/*
+ * Allocate and initialize an indirection array once the space needed
+ * for incore extents increases above XFS_IEXT_BUFSZ.
+ */
+void
+xfs_iext_irec_init(
+       xfs_ifork_t     *ifp)           /* inode fork pointer */
+{
+       xfs_ext_irec_t  *erp;           /* indirection array pointer */
+       xfs_extnum_t    nextents;       /* number of extents in file */
+
+       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       ASSERT(nextents <= XFS_LINEAR_EXTS);
+
+       erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
+
+       if (nextents == 0) {
+               ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
+       } else if (!ifp->if_real_bytes) {
+               xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
+       } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
+               xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
+       }
+       erp->er_extbuf = ifp->if_u1.if_extents;
+       erp->er_extcount = nextents;
+       erp->er_extoff = 0;
+
+       ifp->if_flags |= XFS_IFEXTIREC;
+       ifp->if_real_bytes = XFS_IEXT_BUFSZ;
+       ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
+       ifp->if_u1.if_ext_irec = erp;
+
+       return;
+}
+
+/*
+ * Allocate and initialize a new entry in the indirection array.
+ */
+xfs_ext_irec_t *
+xfs_iext_irec_new(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       int             erp_idx)        /* index for new irec */
+{
+       xfs_ext_irec_t  *erp;           /* indirection array pointer */
+       int             i;              /* loop counter */
+       int             nlists;         /* number of irec's (ex lists) */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+       /* Resize indirection array */
+       xfs_iext_realloc_indirect(ifp, ++nlists *
+                                 sizeof(xfs_ext_irec_t));
+       /*
+        * Move records down in the array so the
+        * new page can use erp_idx.
+        */
+       erp = ifp->if_u1.if_ext_irec;
+       for (i = nlists - 1; i > erp_idx; i--) {
+               memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
+       }
+       ASSERT(i == erp_idx);
+
+       /* Initialize new extent record */
+       erp = ifp->if_u1.if_ext_irec;
+       erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
+       ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
+       memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
+       erp[erp_idx].er_extcount = 0;
+       erp[erp_idx].er_extoff = erp_idx > 0 ?
+               erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
+       return (&erp[erp_idx]);
+}
+
+/*
+ * Remove a record from the indirection array.
+ */
+void
+xfs_iext_irec_remove(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       int             erp_idx)        /* irec index to remove */
+{
+       xfs_ext_irec_t  *erp;           /* indirection array pointer */
+       int             i;              /* loop counter */
+       int             nlists;         /* number of irec's (ex lists) */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+       erp = &ifp->if_u1.if_ext_irec[erp_idx];
+       if (erp->er_extbuf) {
+               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
+                       -erp->er_extcount);
+               kmem_free(erp->er_extbuf);
+       }
+       /* Compact extent records */
+       erp = ifp->if_u1.if_ext_irec;
+       for (i = erp_idx; i < nlists - 1; i++) {
+               memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
+       }
+       /*
+        * Manually free the last extent record from the indirection
+        * array.  A call to xfs_iext_realloc_indirect() with a size
+        * of zero would result in a call to xfs_iext_destroy() which
+        * would in turn call this function again, creating a nasty
+        * infinite loop.
+        */
+       if (--nlists) {
+               xfs_iext_realloc_indirect(ifp,
+                       nlists * sizeof(xfs_ext_irec_t));
+       } else {
+               kmem_free(ifp->if_u1.if_ext_irec);
+       }
+       ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
+}
+
+/*
+ * This is called to clean up large amounts of unused memory allocated
+ * by the indirection array.  Before compacting anything though, verify
+ * that the indirection array is still needed and switch back to the
+ * linear extent list (or even the inline buffer) if possible.  The
+ * compaction policy is as follows:
+ *
+ *    Full Compaction: Extents fit into a single page (or inline buffer)
+ * Partial Compaction: Extents occupy less than 50% of allocated space
+ *      No Compaction: Extents occupy at least 50% of allocated space
+ */
+void
+xfs_iext_irec_compact(
+       xfs_ifork_t     *ifp)           /* inode fork pointer */
+{
+       xfs_extnum_t    nextents;       /* number of extents in file */
+       int             nlists;         /* number of irec's (ex lists) */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+
+       if (nextents == 0) {
+               xfs_iext_destroy(ifp);
+       } else if (nextents <= XFS_INLINE_EXTS) {
+               xfs_iext_indirect_to_direct(ifp);
+               xfs_iext_direct_to_inline(ifp, nextents);
+       } else if (nextents <= XFS_LINEAR_EXTS) {
+               xfs_iext_indirect_to_direct(ifp);
+       } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
+               xfs_iext_irec_compact_pages(ifp);
+       }
+}
+
+/*
+ * Combine extents from neighboring extent pages.
+ */
+void
+xfs_iext_irec_compact_pages(
+       xfs_ifork_t     *ifp)           /* inode fork pointer */
+{
+       xfs_ext_irec_t  *erp, *erp_next;/* pointers to irec entries */
+       int             erp_idx = 0;    /* indirection array index */
+       int             nlists;         /* number of irec's (ex lists) */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+       while (erp_idx < nlists - 1) {
+               erp = &ifp->if_u1.if_ext_irec[erp_idx];
+               erp_next = erp + 1;
+               if (erp_next->er_extcount <=
+                   (XFS_LINEAR_EXTS - erp->er_extcount)) {
+                       memcpy(&erp->er_extbuf[erp->er_extcount],
+                               erp_next->er_extbuf, erp_next->er_extcount *
+                               sizeof(xfs_bmbt_rec_t));
+                       erp->er_extcount += erp_next->er_extcount;
+                       /*
+                        * Free page before removing extent record
+                        * so er_extoffs don't get modified in
+                        * xfs_iext_irec_remove.
+                        */
+                       kmem_free(erp_next->er_extbuf);
+                       erp_next->er_extbuf = NULL;
+                       xfs_iext_irec_remove(ifp, erp_idx + 1);
+                       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+               } else {
+                       erp_idx++;
+               }
+       }
+}
+
+/*
+ * This is called to update the er_extoff field in the indirection
+ * array when extents have been added or removed from one of the
+ * extent lists. erp_idx contains the irec index to begin updating
+ * at and ext_diff contains the number of extents that were added
+ * or removed.
+ */
+void
+xfs_iext_irec_update_extoffs(
+       xfs_ifork_t     *ifp,           /* inode fork pointer */
+       int             erp_idx,        /* irec index to update */
+       int             ext_diff)       /* number of new extents */
+{
+       int             i;              /* loop counter */
+       int             nlists;         /* number of irec's (ex lists */
+
+       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+       for (i = erp_idx; i < nlists; i++) {
+               ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
+       }
+}
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
new file mode 100644 (file)
index 0000000..ee7e0e8
--- /dev/null
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2013 Jie Liu.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_ag.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_trans_space.h"
+#include "xfs_inode.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_bmap_btree.h"
+
+/*
+ * Calculate the maximum length in bytes that would be required for a local
+ * attribute value as large attributes out of line are not logged.
+ */
+STATIC int
+xfs_log_calc_max_attrsetm_res(
+       struct xfs_mount        *mp)
+{
+       int                     size;
+       int                     nblks;
+
+       size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) -
+              MAXNAMELEN - 1;
+       nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
+       nblks += XFS_B_TO_FSB(mp, size);
+       nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK);
+
+       return  M_RES(mp)->tr_attrsetm.tr_logres +
+               M_RES(mp)->tr_attrsetrt.tr_logres * nblks;
+}
+
+/*
+ * Iterate over the log space reservation table to figure out and return
+ * the maximum one in terms of the pre-calculated values which were done
+ * at mount time.
+ */
+STATIC void
+xfs_log_get_max_trans_res(
+       struct xfs_mount        *mp,
+       struct xfs_trans_res    *max_resp)
+{
+       struct xfs_trans_res    *resp;
+       struct xfs_trans_res    *end_resp;
+       int                     log_space = 0;
+       int                     attr_space;
+
+       attr_space = xfs_log_calc_max_attrsetm_res(mp);
+
+       resp = (struct xfs_trans_res *)M_RES(mp);
+       end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1);
+       for (; resp < end_resp; resp++) {
+               int             tmp = resp->tr_logcount > 1 ?
+                                     resp->tr_logres * resp->tr_logcount :
+                                     resp->tr_logres;
+               if (log_space < tmp) {
+                       log_space = tmp;
+                       *max_resp = *resp;              /* struct copy */
+               }
+       }
+
+       if (attr_space > log_space) {
+               *max_resp = M_RES(mp)->tr_attrsetm;     /* struct copy */
+               max_resp->tr_logres = attr_space;
+       }
+}
+
+/*
+ * Calculate the minimum valid log size for the given superblock configuration.
+ * Used to calculate the minimum log size at mkfs time, and to determine if
+ * the log is large enough or not at mount time. Returns the minimum size in
+ * filesystem block size units.
+ */
+int
+xfs_log_calc_minimum_size(
+       struct xfs_mount        *mp)
+{
+       struct xfs_trans_res    tres = {0};
+       int                     max_logres;
+       int                     min_logblks = 0;
+       int                     lsunit = 0;
+
+       xfs_log_get_max_trans_res(mp, &tres);
+
+       max_logres = xfs_log_calc_unit_res(mp, tres.tr_logres);
+       if (tres.tr_logcount > 1)
+               max_logres *= tres.tr_logcount;
+
+       if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1)
+               lsunit = BTOBB(mp->m_sb.sb_logsunit);
+
+       /*
+        * Two factors should be taken into account for calculating the minimum
+        * log space.
+        * 1) The fundamental limitation is that no single transaction can be
+        *    larger than half size of the log.
+        *
+        *    From mkfs.xfs, this is considered by the XFS_MIN_LOG_FACTOR
+        *    define, which is set to 3. That means we can definitely fit
+        *    maximally sized 2 transactions in the log. We'll use this same
+        *    value here.
+        *
+        * 2) If the lsunit option is specified, a transaction requires 2 LSU
+        *    for the reservation because there are two log writes that can
+        *    require padding - the transaction data and the commit record which
+        *    are written separately and both can require padding to the LSU.
+        *    Consider that we can have an active CIL reservation holding 2*LSU,
+        *    but the CIL is not over a push threshold, in this case, if we
+        *    don't have enough log space for at one new transaction, which
+        *    includes another 2*LSU in the reservation, we will run into dead
+        *    loop situation in log space grant procedure. i.e.
+        *    xlog_grant_head_wait().
+        *
+        *    Hence the log size needs to be able to contain two maximally sized
+        *    and padded transactions, which is (2 * (2 * LSU + maxlres)).
+        *
+        * Also, the log size should be a multiple of the log stripe unit, round
+        * it up to lsunit boundary if lsunit is specified.
+        */
+       if (lsunit) {
+               min_logblks = roundup_64(BTOBB(max_logres), lsunit) +
+                             2 * lsunit;
+       } else
+               min_logblks = BTOBB(max_logres) + 2 * BBSIZE;
+       min_logblks *= XFS_MIN_LOG_FACTOR;
+
+       return XFS_BB_TO_FSB(mp, min_logblks);
+}
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
new file mode 100644 (file)
index 0000000..f4dd697
--- /dev/null
@@ -0,0 +1,973 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_trans.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+#include "xfs_buf.h"
+#include "xfs_icache.h"
+#include "xfs_dinode.h"
+#include "xfs_rtalloc.h"
+
+
+/*
+ * Realtime allocator bitmap functions shared with userspace.
+ */
+
+/*
+ * Get a buffer for the bitmap or summary file block specified.
+ * The buffer is returned read and locked.
+ */
+int
+xfs_rtbuf_get(
+       xfs_mount_t     *mp,            /* file system mount structure */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   block,          /* block number in bitmap or summary */
+       int             issum,          /* is summary not bitmap */
+       xfs_buf_t       **bpp)          /* output: buffer for the block */
+{
+       xfs_buf_t       *bp;            /* block buffer, result */
+       xfs_inode_t     *ip;            /* bitmap or summary inode */
+       xfs_bmbt_irec_t map;
+       int             nmap = 1;
+       int             error;          /* error value */
+
+       ip = issum ? mp->m_rsumip : mp->m_rbmip;
+
+       error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK);
+       if (error)
+               return error;
+
+       ASSERT(map.br_startblock != NULLFSBLOCK);
+       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                                  XFS_FSB_TO_DADDR(mp, map.br_startblock),
+                                  mp->m_bsize, 0, &bp, NULL);
+       if (error)
+               return error;
+       *bpp = bp;
+       return 0;
+}
+
+/*
+ * Searching backward from start to limit, find the first block whose
+ * allocated/free state is different from start's.
+ */
+int
+xfs_rtfind_back(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   start,          /* starting block to look at */
+       xfs_rtblock_t   limit,          /* last block to look at */
+       xfs_rtblock_t   *rtblock)       /* out: start block found */
+{
+       xfs_rtword_t    *b;             /* current word in buffer */
+       int             bit;            /* bit number in the word */
+       xfs_rtblock_t   block;          /* bitmap block number */
+       xfs_buf_t       *bp;            /* buf for the block */
+       xfs_rtword_t    *bufp;          /* starting word in buffer */
+       int             error;          /* error value */
+       xfs_rtblock_t   firstbit;       /* first useful bit in the word */
+       xfs_rtblock_t   i;              /* current bit number rel. to start */
+       xfs_rtblock_t   len;            /* length of inspected area */
+       xfs_rtword_t    mask;           /* mask of relevant bits for value */
+       xfs_rtword_t    want;           /* mask for "good" values */
+       xfs_rtword_t    wdiff;          /* difference from wanted value */
+       int             word;           /* word number in the buffer */
+
+       /*
+        * Compute and read in starting bitmap block for starting block.
+        */
+       block = XFS_BITTOBLOCK(mp, start);
+       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+       if (error) {
+               return error;
+       }
+       bufp = bp->b_addr;
+       /*
+        * Get the first word's index & point to it.
+        */
+       word = XFS_BITTOWORD(mp, start);
+       b = &bufp[word];
+       bit = (int)(start & (XFS_NBWORD - 1));
+       len = start - limit + 1;
+       /*
+        * Compute match value, based on the bit at start: if 1 (free)
+        * then all-ones, else all-zeroes.
+        */
+       want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+       /*
+        * If the starting position is not word-aligned, deal with the
+        * partial word.
+        */
+       if (bit < XFS_NBWORD - 1) {
+               /*
+                * Calculate first (leftmost) bit number to look at,
+                * and mask for all the relevant bits in this word.
+                */
+               firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0);
+               mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) <<
+                       firstbit;
+               /*
+                * Calculate the difference between the value there
+                * and what we're looking for.
+                */
+               if ((wdiff = (*b ^ want) & mask)) {
+                       /*
+                        * Different.  Mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i = bit - XFS_RTHIBIT(wdiff);
+                       *rtblock = start - i + 1;
+                       return 0;
+               }
+               i = bit - firstbit + 1;
+               /*
+                * Go on to previous block if that's where the previous word is
+                * and we need the previous word.
+                */
+               if (--word == -1 && i < len) {
+                       /*
+                        * If done with this block, get the previous one.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       bufp = bp->b_addr;
+                       word = XFS_BLOCKWMASK(mp);
+                       b = &bufp[word];
+               } else {
+                       /*
+                        * Go on to the previous word in the buffer.
+                        */
+                       b--;
+               }
+       } else {
+               /*
+                * Starting on a word boundary, no partial word.
+                */
+               i = 0;
+       }
+       /*
+        * Loop over whole words in buffers.  When we use up one buffer
+        * we move on to the previous one.
+        */
+       while (len - i >= XFS_NBWORD) {
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if ((wdiff = *b ^ want)) {
+                       /*
+                        * Different, mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
+                       *rtblock = start - i + 1;
+                       return 0;
+               }
+               i += XFS_NBWORD;
+               /*
+                * Go on to previous block if that's where the previous word is
+                * and we need the previous word.
+                */
+               if (--word == -1 && i < len) {
+                       /*
+                        * If done with this block, get the previous one.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       bufp = bp->b_addr;
+                       word = XFS_BLOCKWMASK(mp);
+                       b = &bufp[word];
+               } else {
+                       /*
+                        * Go on to the previous word in the buffer.
+                        */
+                       b--;
+               }
+       }
+       /*
+        * If not ending on a word boundary, deal with the last
+        * (partial) word.
+        */
+       if (len - i) {
+               /*
+                * Calculate first (leftmost) bit number to look at,
+                * and mask for all the relevant bits in this word.
+                */
+               firstbit = XFS_NBWORD - (len - i);
+               mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit;
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if ((wdiff = (*b ^ want) & mask)) {
+                       /*
+                        * Different, mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
+                       *rtblock = start - i + 1;
+                       return 0;
+               } else
+                       i = len;
+       }
+       /*
+        * No match, return that we scanned the whole area.
+        */
+       xfs_trans_brelse(tp, bp);
+       *rtblock = start - i + 1;
+       return 0;
+}
+
+/*
+ * Searching forward from start to limit, find the first block whose
+ * allocated/free state is different from start's.
+ */
+int
+xfs_rtfind_forw(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   start,          /* starting block to look at */
+       xfs_rtblock_t   limit,          /* last block to look at */
+       xfs_rtblock_t   *rtblock)       /* out: start block found */
+{
+       xfs_rtword_t    *b;             /* current word in buffer */
+       int             bit;            /* bit number in the word */
+       xfs_rtblock_t   block;          /* bitmap block number */
+       xfs_buf_t       *bp;            /* buf for the block */
+       xfs_rtword_t    *bufp;          /* starting word in buffer */
+       int             error;          /* error value */
+       xfs_rtblock_t   i;              /* current bit number rel. to start */
+       xfs_rtblock_t   lastbit;        /* last useful bit in the word */
+       xfs_rtblock_t   len;            /* length of inspected area */
+       xfs_rtword_t    mask;           /* mask of relevant bits for value */
+       xfs_rtword_t    want;           /* mask for "good" values */
+       xfs_rtword_t    wdiff;          /* difference from wanted value */
+       int             word;           /* word number in the buffer */
+
+       /*
+        * Compute and read in starting bitmap block for starting block.
+        */
+       block = XFS_BITTOBLOCK(mp, start);
+       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+       if (error) {
+               return error;
+       }
+       bufp = bp->b_addr;
+       /*
+        * Get the first word's index & point to it.
+        */
+       word = XFS_BITTOWORD(mp, start);
+       b = &bufp[word];
+       bit = (int)(start & (XFS_NBWORD - 1));
+       len = limit - start + 1;
+       /*
+        * Compute match value, based on the bit at start: if 1 (free)
+        * then all-ones, else all-zeroes.
+        */
+       want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+       /*
+        * If the starting position is not word-aligned, deal with the
+        * partial word.
+        */
+       if (bit) {
+               /*
+                * Calculate last (rightmost) bit number to look at,
+                * and mask for all the relevant bits in this word.
+                */
+               lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
+               mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+               /*
+                * Calculate the difference between the value there
+                * and what we're looking for.
+                */
+               if ((wdiff = (*b ^ want) & mask)) {
+                       /*
+                        * Different.  Mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i = XFS_RTLOBIT(wdiff) - bit;
+                       *rtblock = start + i - 1;
+                       return 0;
+               }
+               i = lastbit - bit;
+               /*
+                * Go on to next block if that's where the next word is
+                * and we need the next word.
+                */
+               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+                       /*
+                        * If done with this block, get the previous one.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       b = bufp = bp->b_addr;
+                       word = 0;
+               } else {
+                       /*
+                        * Go on to the previous word in the buffer.
+                        */
+                       b++;
+               }
+       } else {
+               /*
+                * Starting on a word boundary, no partial word.
+                */
+               i = 0;
+       }
+       /*
+        * Loop over whole words in buffers.  When we use up one buffer
+        * we move on to the next one.
+        */
+       while (len - i >= XFS_NBWORD) {
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if ((wdiff = *b ^ want)) {
+                       /*
+                        * Different, mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i += XFS_RTLOBIT(wdiff);
+                       *rtblock = start + i - 1;
+                       return 0;
+               }
+               i += XFS_NBWORD;
+               /*
+                * Go on to next block if that's where the next word is
+                * and we need the next word.
+                */
+               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+                       /*
+                        * If done with this block, get the next one.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       b = bufp = bp->b_addr;
+                       word = 0;
+               } else {
+                       /*
+                        * Go on to the next word in the buffer.
+                        */
+                       b++;
+               }
+       }
+       /*
+        * If not ending on a word boundary, deal with the last
+        * (partial) word.
+        */
+       if ((lastbit = len - i)) {
+               /*
+                * Calculate mask for all the relevant bits in this word.
+                */
+               mask = ((xfs_rtword_t)1 << lastbit) - 1;
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if ((wdiff = (*b ^ want) & mask)) {
+                       /*
+                        * Different, mark where we are and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i += XFS_RTLOBIT(wdiff);
+                       *rtblock = start + i - 1;
+                       return 0;
+               } else
+                       i = len;
+       }
+       /*
+        * No match, return that we scanned the whole area.
+        */
+       xfs_trans_brelse(tp, bp);
+       *rtblock = start + i - 1;
+       return 0;
+}
+
+/*
+ * Read and modify the summary information for a given extent size,
+ * bitmap block combination.
+ * Keeps track of a current summary block, so we don't keep reading
+ * it from the buffer cache.
+ */
+int
+xfs_rtmodify_summary(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       int             log,            /* log2 of extent size */
+       xfs_rtblock_t   bbno,           /* bitmap block number */
+       int             delta,          /* change to make to summary info */
+       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       xfs_fsblock_t   *rsb)           /* in/out: summary block number */
+{
+       xfs_buf_t       *bp;            /* buffer for the summary block */
+       int             error;          /* error value */
+       xfs_fsblock_t   sb;             /* summary fsblock */
+       int             so;             /* index into the summary file */
+       xfs_suminfo_t   *sp;            /* pointer to returned data */
+
+       /*
+        * Compute entry number in the summary file.
+        */
+       so = XFS_SUMOFFS(mp, log, bbno);
+       /*
+        * Compute the block number in the summary file.
+        */
+       sb = XFS_SUMOFFSTOBLOCK(mp, so);
+       /*
+        * If we have an old buffer, and the block number matches, use that.
+        */
+       if (rbpp && *rbpp && *rsb == sb)
+               bp = *rbpp;
+       /*
+        * Otherwise we have to get the buffer.
+        */
+       else {
+               /*
+                * If there was an old one, get rid of it first.
+                */
+               if (rbpp && *rbpp)
+                       xfs_trans_brelse(tp, *rbpp);
+               error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
+               if (error) {
+                       return error;
+               }
+               /*
+                * Remember this buffer and block for the next call.
+                */
+               if (rbpp) {
+                       *rbpp = bp;
+                       *rsb = sb;
+               }
+       }
+       /*
+        * Point to the summary information, modify and log it.
+        */
+       sp = XFS_SUMPTR(mp, bp, so);
+       *sp += delta;
+       xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
+               (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
+       return 0;
+}
+
+/*
+ * Set the given range of bitmap bits to the given value.
+ * Do whatever I/O and logging is required.
+ */
+int
+xfs_rtmodify_range(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   start,          /* starting block to modify */
+       xfs_extlen_t    len,            /* length of extent to modify */
+       int             val)            /* 1 for free, 0 for allocated */
+{
+       xfs_rtword_t    *b;             /* current word in buffer */
+       int             bit;            /* bit number in the word */
+       xfs_rtblock_t   block;          /* bitmap block number */
+       xfs_buf_t       *bp;            /* buf for the block */
+       xfs_rtword_t    *bufp;          /* starting word in buffer */
+       int             error;          /* error value */
+       xfs_rtword_t    *first;         /* first used word in the buffer */
+       int             i;              /* current bit number rel. to start */
+       int             lastbit;        /* last useful bit in word */
+       xfs_rtword_t    mask;           /* mask o frelevant bits for value */
+       int             word;           /* word number in the buffer */
+
+       /*
+        * Compute starting bitmap block number.
+        */
+       block = XFS_BITTOBLOCK(mp, start);
+       /*
+        * Read the bitmap block, and point to its data.
+        */
+       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+       if (error) {
+               return error;
+       }
+       bufp = bp->b_addr;
+       /*
+        * Compute the starting word's address, and starting bit.
+        */
+       word = XFS_BITTOWORD(mp, start);
+       first = b = &bufp[word];
+       bit = (int)(start & (XFS_NBWORD - 1));
+       /*
+        * 0 (allocated) => all zeroes; 1 (free) => all ones.
+        */
+       val = -val;
+       /*
+        * If not starting on a word boundary, deal with the first
+        * (partial) word.
+        */
+       if (bit) {
+               /*
+                * Compute first bit not changed and mask of relevant bits.
+                */
+               lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
+               mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+               /*
+                * Set/clear the active bits.
+                */
+               if (val)
+                       *b |= mask;
+               else
+                       *b &= ~mask;
+               i = lastbit - bit;
+               /*
+                * Go on to the next block if that's where the next word is
+                * and we need the next word.
+                */
+               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+                       /*
+                        * Log the changed part of this block.
+                        * Get the next one.
+                        */
+                       xfs_trans_log_buf(tp, bp,
+                               (uint)((char *)first - (char *)bufp),
+                               (uint)((char *)b - (char *)bufp));
+                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       first = b = bufp = bp->b_addr;
+                       word = 0;
+               } else {
+                       /*
+                        * Go on to the next word in the buffer
+                        */
+                       b++;
+               }
+       } else {
+               /*
+                * Starting on a word boundary, no partial word.
+                */
+               i = 0;
+       }
+       /*
+        * Loop over whole words in buffers.  When we use up one buffer
+        * we move on to the next one.
+        */
+       while (len - i >= XFS_NBWORD) {
+               /*
+                * Set the word value correctly.
+                */
+               *b = val;
+               i += XFS_NBWORD;
+               /*
+                * Go on to the next block if that's where the next word is
+                * and we need the next word.
+                */
+               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+                       /*
+                        * Log the changed part of this block.
+                        * Get the next one.
+                        */
+                       xfs_trans_log_buf(tp, bp,
+                               (uint)((char *)first - (char *)bufp),
+                               (uint)((char *)b - (char *)bufp));
+                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       first = b = bufp = bp->b_addr;
+                       word = 0;
+               } else {
+                       /*
+                        * Go on to the next word in the buffer
+                        */
+                       b++;
+               }
+       }
+       /*
+        * If not ending on a word boundary, deal with the last
+        * (partial) word.
+        */
+       if ((lastbit = len - i)) {
+               /*
+                * Compute a mask of relevant bits.
+                */
+               bit = 0;
+               mask = ((xfs_rtword_t)1 << lastbit) - 1;
+               /*
+                * Set/clear the active bits.
+                */
+               if (val)
+                       *b |= mask;
+               else
+                       *b &= ~mask;
+               b++;
+       }
+       /*
+        * Log any remaining changed bytes.
+        */
+       if (b > first)
+               xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp),
+                       (uint)((char *)b - (char *)bufp - 1));
+       return 0;
+}
+
+/*
+ * Mark an extent specified by start and len freed.
+ * Updates all the summary information as well as the bitmap.
+ */
+int
+xfs_rtfree_range(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   start,          /* starting block to free */
+       xfs_extlen_t    len,            /* length to free */
+       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       xfs_fsblock_t   *rsb)           /* in/out: summary block number */
+{
+       xfs_rtblock_t   end;            /* end of the freed extent */
+       int             error;          /* error value */
+       xfs_rtblock_t   postblock;      /* first block freed > end */
+       xfs_rtblock_t   preblock;       /* first block freed < start */
+
+       end = start + len - 1;
+       /*
+        * Modify the bitmap to mark this extent freed.
+        */
+       error = xfs_rtmodify_range(mp, tp, start, len, 1);
+       if (error) {
+               return error;
+       }
+       /*
+        * Assume we're freeing out of the middle of an allocated extent.
+        * We need to find the beginning and end of the extent so we can
+        * properly update the summary.
+        */
+       error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
+       if (error) {
+               return error;
+       }
+       /*
+        * Find the next allocated block (end of allocated extent).
+        */
+       error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
+               &postblock);
+       if (error)
+               return error;
+       /*
+        * If there are blocks not being freed at the front of the
+        * old extent, add summary data for them to be allocated.
+        */
+       if (preblock < start) {
+               error = xfs_rtmodify_summary(mp, tp,
+                       XFS_RTBLOCKLOG(start - preblock),
+                       XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
+               if (error) {
+                       return error;
+               }
+       }
+       /*
+        * If there are blocks not being freed at the end of the
+        * old extent, add summary data for them to be allocated.
+        */
+       if (postblock > end) {
+               error = xfs_rtmodify_summary(mp, tp,
+                       XFS_RTBLOCKLOG(postblock - end),
+                       XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb);
+               if (error) {
+                       return error;
+               }
+       }
+       /*
+        * Increment the summary information corresponding to the entire
+        * (new) free extent.
+        */
+       error = xfs_rtmodify_summary(mp, tp,
+               XFS_RTBLOCKLOG(postblock + 1 - preblock),
+               XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
+       return error;
+}
+
+/*
+ * Check that the given range is either all allocated (val = 0) or
+ * all free (val = 1).
+ */
+int
+xfs_rtcheck_range(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   start,          /* starting block number of extent */
+       xfs_extlen_t    len,            /* length of extent */
+       int             val,            /* 1 for free, 0 for allocated */
+       xfs_rtblock_t   *new,           /* out: first block not matching */
+       int             *stat)          /* out: 1 for matches, 0 for not */
+{
+       xfs_rtword_t    *b;             /* current word in buffer */
+       int             bit;            /* bit number in the word */
+       xfs_rtblock_t   block;          /* bitmap block number */
+       xfs_buf_t       *bp;            /* buf for the block */
+       xfs_rtword_t    *bufp;          /* starting word in buffer */
+       int             error;          /* error value */
+       xfs_rtblock_t   i;              /* current bit number rel. to start */
+       xfs_rtblock_t   lastbit;        /* last useful bit in word */
+       xfs_rtword_t    mask;           /* mask of relevant bits for value */
+       xfs_rtword_t    wdiff;          /* difference from wanted value */
+       int             word;           /* word number in the buffer */
+
+       /*
+        * Compute starting bitmap block number
+        */
+       block = XFS_BITTOBLOCK(mp, start);
+       /*
+        * Read the bitmap block.
+        */
+       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
+       if (error) {
+               return error;
+       }
+       bufp = bp->b_addr;
+       /*
+        * Compute the starting word's address, and starting bit.
+        */
+       word = XFS_BITTOWORD(mp, start);
+       b = &bufp[word];
+       bit = (int)(start & (XFS_NBWORD - 1));
+       /*
+        * 0 (allocated) => all zero's; 1 (free) => all one's.
+        */
+       val = -val;
+       /*
+        * If not starting on a word boundary, deal with the first
+        * (partial) word.
+        */
+       if (bit) {
+               /*
+                * Compute first bit not examined.
+                */
+               lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
+               /*
+                * Mask of relevant bits.
+                */
+               mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if ((wdiff = (*b ^ val) & mask)) {
+                       /*
+                        * Different, compute first wrong bit and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i = XFS_RTLOBIT(wdiff) - bit;
+                       *new = start + i;
+                       *stat = 0;
+                       return 0;
+               }
+               i = lastbit - bit;
+               /*
+                * Go on to next block if that's where the next word is
+                * and we need the next word.
+                */
+               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+                       /*
+                        * If done with this block, get the next one.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       b = bufp = bp->b_addr;
+                       word = 0;
+               } else {
+                       /*
+                        * Go on to the next word in the buffer.
+                        */
+                       b++;
+               }
+       } else {
+               /*
+                * Starting on a word boundary, no partial word.
+                */
+               i = 0;
+       }
+       /*
+        * Loop over whole words in buffers.  When we use up one buffer
+        * we move on to the next one.
+        */
+       while (len - i >= XFS_NBWORD) {
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if ((wdiff = *b ^ val)) {
+                       /*
+                        * Different, compute first wrong bit and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i += XFS_RTLOBIT(wdiff);
+                       *new = start + i;
+                       *stat = 0;
+                       return 0;
+               }
+               i += XFS_NBWORD;
+               /*
+                * Go on to next block if that's where the next word is
+                * and we need the next word.
+                */
+               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+                       /*
+                        * If done with this block, get the next one.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
+                       if (error) {
+                               return error;
+                       }
+                       b = bufp = bp->b_addr;
+                       word = 0;
+               } else {
+                       /*
+                        * Go on to the next word in the buffer.
+                        */
+                       b++;
+               }
+       }
+       /*
+        * If not ending on a word boundary, deal with the last
+        * (partial) word.
+        */
+       if ((lastbit = len - i)) {
+               /*
+                * Mask of relevant bits.
+                */
+               mask = ((xfs_rtword_t)1 << lastbit) - 1;
+               /*
+                * Compute difference between actual and desired value.
+                */
+               if ((wdiff = (*b ^ val) & mask)) {
+                       /*
+                        * Different, compute first wrong bit and return.
+                        */
+                       xfs_trans_brelse(tp, bp);
+                       i += XFS_RTLOBIT(wdiff);
+                       *new = start + i;
+                       *stat = 0;
+                       return 0;
+               } else
+                       i = len;
+       }
+       /*
+        * Successful, return.
+        */
+       xfs_trans_brelse(tp, bp);
+       *new = start + i;
+       *stat = 1;
+       return 0;
+}
+
+#ifdef DEBUG
+/*
+ * Check that the given extent (block range) is allocated already.
+ */
+STATIC int                             /* error */
+xfs_rtcheck_alloc_range(
+       xfs_mount_t     *mp,            /* file system mount point */
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   bno,            /* starting block number of extent */
+       xfs_extlen_t    len)            /* length of extent */
+{
+       xfs_rtblock_t   new;            /* dummy for xfs_rtcheck_range */
+       int             stat;
+       int             error;
+
+       error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat);
+       if (error)
+               return error;
+       ASSERT(stat);
+       return 0;
+}
+#else
+#define xfs_rtcheck_alloc_range(m,t,b,l)       (0)
+#endif
+/*
+ * Free an extent in the realtime subvolume.  Length is expressed in
+ * realtime extents, as is the block number.
+ */
+int                                    /* error */
+xfs_rtfree_extent(
+       xfs_trans_t     *tp,            /* transaction pointer */
+       xfs_rtblock_t   bno,            /* starting block number to free */
+       xfs_extlen_t    len)            /* length of extent freed */
+{
+       int             error;          /* error value */
+       xfs_mount_t     *mp;            /* file system mount structure */
+       xfs_fsblock_t   sb;             /* summary file block number */
+       xfs_buf_t       *sumbp = NULL;  /* summary file block buffer */
+
+       mp = tp->t_mountp;
+
+       ASSERT(mp->m_rbmip->i_itemp != NULL);
+       ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
+
+       error = xfs_rtcheck_alloc_range(mp, tp, bno, len);
+       if (error)
+               return error;
+
+       /*
+        * Free the range of realtime blocks.
+        */
+       error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb);
+       if (error) {
+               return error;
+       }
+       /*
+        * Mark more blocks free in the superblock.
+        */
+       xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len);
+       /*
+        * If we've now freed all the blocks, reset the file sequence
+        * number to 0.
+        */
+       if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
+           mp->m_sb.sb_rextents) {
+               if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
+                       mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
+               *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
+               xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
+       }
+       return 0;
+}
+
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
new file mode 100644 (file)
index 0000000..23c2f25
--- /dev/null
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * Copyright (c) 2012-2013 Red Hat, Inc.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_shared.h"
+#include "xfs_trans_resv.h"
+#include "xfs_ag.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_symlink.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
+
+
+/*
+ * Each contiguous block has a header, so it is not just a simple pathlen
+ * to FSB conversion.
+ */
+int
+xfs_symlink_blocks(
+       struct xfs_mount *mp,
+       int             pathlen)
+{
+       int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
+
+       return (pathlen + buflen - 1) / buflen;
+}
+
+int
+xfs_symlink_hdr_set(
+       struct xfs_mount        *mp,
+       xfs_ino_t               ino,
+       uint32_t                offset,
+       uint32_t                size,
+       struct xfs_buf          *bp)
+{
+       struct xfs_dsymlink_hdr *dsl = bp->b_addr;
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return 0;
+
+       dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
+       dsl->sl_offset = cpu_to_be32(offset);
+       dsl->sl_bytes = cpu_to_be32(size);
+       uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
+       dsl->sl_owner = cpu_to_be64(ino);
+       dsl->sl_blkno = cpu_to_be64(bp->b_bn);
+       bp->b_ops = &xfs_symlink_buf_ops;
+
+       return sizeof(struct xfs_dsymlink_hdr);
+}
+
+/*
+ * Checking of the symlink header is split into two parts. the verifier does
+ * CRC, location and bounds checking, the unpacking function checks the path
+ * parameters and owner.
+ */
+bool
+xfs_symlink_hdr_ok(
+       xfs_ino_t               ino,
+       uint32_t                offset,
+       uint32_t                size,
+       struct xfs_buf          *bp)
+{
+       struct xfs_dsymlink_hdr *dsl = bp->b_addr;
+
+       if (offset != be32_to_cpu(dsl->sl_offset))
+               return false;
+       if (size != be32_to_cpu(dsl->sl_bytes))
+               return false;
+       if (ino != be64_to_cpu(dsl->sl_owner))
+               return false;
+
+       /* ok */
+       return true;
+}
+
+static bool
+xfs_symlink_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_dsymlink_hdr *dsl = bp->b_addr;
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return false;
+       if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
+               return false;
+       if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
+               return false;
+       if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
+               return false;
+       if (be32_to_cpu(dsl->sl_offset) +
+                               be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
+               return false;
+       if (dsl->sl_owner == 0)
+               return false;
+
+       return true;
+}
+
+static void
+xfs_symlink_read_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+
+       /* no verification of non-crc buffers */
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
+               xfs_buf_ioerror(bp, EFSBADCRC);
+       else if (!xfs_symlink_verify(bp))
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+       if (bp->b_error)
+               xfs_verifier_error(bp);
+}
+
+static void
+xfs_symlink_write_verify(
+       struct xfs_buf  *bp)
+{
+       struct xfs_mount *mp = bp->b_target->bt_mount;
+       struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+       /* no verification of non-crc buffers */
+       if (!xfs_sb_version_hascrc(&mp->m_sb))
+               return;
+
+       if (!xfs_symlink_verify(bp)) {
+               xfs_buf_ioerror(bp, EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+
+       if (bip) {
+               struct xfs_dsymlink_hdr *dsl = bp->b_addr;
+               dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+       }
+       xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_symlink_buf_ops = {
+       .verify_read = xfs_symlink_read_verify,
+       .verify_write = xfs_symlink_write_verify,
+};
+
+void
+xfs_symlink_local_to_remote(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *bp,
+       struct xfs_inode        *ip,
+       struct xfs_ifork        *ifp)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       char                    *buf;
+
+       if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+               bp->b_ops = NULL;
+               memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+               return;
+       }
+
+       /*
+        * As this symlink fits in an inode literal area, it must also fit in
+        * the smallest buffer the filesystem supports.
+        */
+       ASSERT(BBTOB(bp->b_length) >=
+                       ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr));
+
+       bp->b_ops = &xfs_symlink_buf_ops;
+
+       buf = bp->b_addr;
+       buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
+       memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
+}
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
new file mode 100644 (file)
index 0000000..f2bda7c
--- /dev/null
@@ -0,0 +1,894 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (C) 2010 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_qm.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+
+/*
+ * A buffer has a format structure overhead in the log in addition
+ * to the data, so we need to take this into account when reserving
+ * space in a transaction for a buffer.  Round the space required up
+ * to a multiple of 128 bytes so that we don't change the historical
+ * reservation that has been used for this overhead.
+ */
+STATIC uint
+xfs_buf_log_overhead(void)
+{
+       return round_up(sizeof(struct xlog_op_header) +
+                       sizeof(struct xfs_buf_log_format), 128);
+}
+
+/*
+ * Calculate out transaction log reservation per item in bytes.
+ *
+ * The nbufs argument is used to indicate the number of items that
+ * will be changed in a transaction.  size is used to tell how many
+ * bytes should be reserved per item.
+ */
+STATIC uint
+xfs_calc_buf_res(
+       uint            nbufs,
+       uint            size)
+{
+       return nbufs * (size + xfs_buf_log_overhead());
+}
+
+/*
+ * Logging inodes is really tricksy. They are logged in memory format,
+ * which means that what we write into the log doesn't directly translate into
+ * the amount of space they use on disk.
+ *
+ * Case in point - btree format forks in memory format use more space than the
+ * on-disk format. In memory, the buffer contains a normal btree block header so
+ * the btree code can treat it as though it is just another generic buffer.
+ * However, when we write it to the inode fork, we don't write all of this
+ * header as it isn't needed. e.g. the root is only ever in the inode, so
+ * there's no need for sibling pointers which would waste 16 bytes of space.
+ *
+ * Hence when we have an inode with a maximally sized btree format fork, then
+ * amount of information we actually log is greater than the size of the inode
+ * on disk. Hence we need an inode reservation function that calculates all this
+ * correctly. So, we log:
+ *
+ * - 4 log op headers for object
+ *     - for the ilf, the inode core and 2 forks
+ * - inode log format object
+ * - the inode core
+ * - two inode forks containing bmap btree root blocks.
+ *     - the btree data contained by both forks will fit into the inode size,
+ *       hence when combined with the inode core above, we have a total of the
+ *       actual inode size.
+ *     - the BMBT headers need to be accounted separately, as they are
+ *       additional to the records and pointers that fit inside the inode
+ *       forks.
+ */
+STATIC uint
+xfs_calc_inode_res(
+       struct xfs_mount        *mp,
+       uint                    ninodes)
+{
+       return ninodes *
+               (4 * sizeof(struct xlog_op_header) +
+                sizeof(struct xfs_inode_log_format) +
+                mp->m_sb.sb_inodesize +
+                2 * XFS_BMBT_BLOCK_LEN(mp));
+}
+
+/*
+ * The free inode btree is a conditional feature and the log reservation
+ * requirements differ slightly from that of the traditional inode allocation
+ * btree. The finobt tracks records for inode chunks with at least one free
+ * inode. A record can be removed from the tree for an inode allocation
+ * or free and thus the finobt reservation is unconditional across:
+ *
+ *     - inode allocation
+ *     - inode free
+ *     - inode chunk allocation
+ *
+ * The 'modify' param indicates to include the record modification scenario. The
+ * 'alloc' param indicates to include the reservation for free space btree
+ * modifications on behalf of finobt modifications. This is required only for
+ * transactions that do not already account for free space btree modifications.
+ *
+ * the free inode btree: max depth * block size
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the free inode btree entry: block size
+ */
+STATIC uint
+xfs_calc_finobt_res(
+       struct xfs_mount        *mp,
+       int                     alloc,
+       int                     modify)
+{
+       uint res;
+
+       if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+               return 0;
+
+       res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
+       if (alloc)
+               res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 
+                                       XFS_FSB_TO_B(mp, 1));
+       if (modify)
+               res += (uint)XFS_FSB_TO_B(mp, 1);
+
+       return res;
+}
+
+/*
+ * Various log reservation values.
+ *
+ * These are based on the size of the file system block because that is what
+ * most transactions manipulate.  Each adds in an additional 128 bytes per
+ * item logged to try to account for the overhead of the transaction mechanism.
+ *
+ * Note:  Most of the reservations underestimate the number of allocation
+ * groups into which they could free extents in the xfs_bmap_finish() call.
+ * This is because the number in the worst case is quite high and quite
+ * unusual.  In order to fix this we need to change xfs_bmap_finish() to free
+ * extents in only a single AG at a time.  This will require changes to the
+ * EFI code as well, however, so that the EFI for the extents not freed is
+ * logged again in each transaction.  See SGI PV #261917.
+ *
+ * Reservation functions here avoid a huge stack in xfs_trans_init due to
+ * register overflow from temporaries in the calculations.
+ */
+
+
+/*
+ * In a write transaction we can allocate a maximum of 2
+ * extents.  This gives:
+ *    the inode getting the new extents: inode size
+ *    the inode's bmap btree: max depth * block size
+ *    the agfs of the ags from which the extents are allocated: 2 * sector
+ *    the superblock free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join:
+ *    the agfs of the ags containing the blocks: 2 * sector size
+ *    the agfls of the ags containing the blocks: 2 * sector size
+ *    the super block free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_write_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((xfs_calc_inode_res(mp, 1) +
+                    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
+                                     XFS_FSB_TO_B(mp, 1)) +
+                    xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+                                     XFS_FSB_TO_B(mp, 1))),
+                   (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+                                     XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * In truncating a file we free up to two extents at once.  We can modify:
+ *    the inode being truncated: inode size
+ *    the inode's bmap btree: (max depth + 1) * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *             4 exts * 2 trees * (2 * max depth - 1) * block size
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_itruncate_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((xfs_calc_inode_res(mp, 1) +
+                    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
+                                     XFS_FSB_TO_B(mp, 1))),
+                   (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+                                     XFS_FSB_TO_B(mp, 1)) +
+                   xfs_calc_buf_res(5, 0) +
+                   xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                    XFS_FSB_TO_B(mp, 1)) +
+                   xfs_calc_buf_res(2 + mp->m_ialloc_blks +
+                                    mp->m_in_maxlevels, 0)));
+}
+
+/*
+ * In renaming a files we can modify:
+ *    the four inodes involved: 4 * inode size
+ *    the two directory btrees: 2 * (max depth + v2) * dir block size
+ *    the two directory bmap btrees: 2 * max depth * block size
+ * And the bmap_finish transaction can free dir and bmap blocks (two sets
+ *     of bmap blocks) giving:
+ *    the agf for the ags in which the blocks live: 3 * sector size
+ *    the agfl for the ags in which the blocks live: 3 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_rename_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((xfs_calc_inode_res(mp, 4) +
+                    xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
+                                     XFS_FSB_TO_B(mp, 1))),
+                   (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
+                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
+                                     XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * For removing an inode from unlinked list at first, we can modify:
+ *    the agi hash list and counters: sector size
+ *    the on disk inode before ours in the agi hash list: inode cluster size
+ */
+STATIC uint
+xfs_calc_iunlink_remove_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+              max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+}
+
+/*
+ * For creating a link to an inode:
+ *    the parent directory inode: inode size
+ *    the linked inode: inode size
+ *    the directory btree could split: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free some bmap blocks giving:
+ *    the agf for the ag in which the blocks live: sector size
+ *    the agfl for the ag in which the blocks live: sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_link_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               xfs_calc_iunlink_remove_reservation(mp) +
+               MAX((xfs_calc_inode_res(mp, 2) +
+                    xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
+                                     XFS_FSB_TO_B(mp, 1))),
+                   (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                     XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * For adding an inode to unlinked list we can modify:
+ *    the agi hash list: sector size
+ *    the unlinked inode: inode size
+ */
+STATIC uint
+xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+               xfs_calc_inode_res(mp, 1);
+}
+
+/*
+ * For removing a directory entry we can modify:
+ *    the parent directory inode: inode size
+ *    the removed inode: inode size
+ *    the directory btree could join: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free the dir and bmap blocks giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_remove_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               xfs_calc_iunlink_add_reservation(mp) +
+               MAX((xfs_calc_inode_res(mp, 1) +
+                    xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
+                                     XFS_FSB_TO_B(mp, 1))),
+                   (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
+                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+                                     XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * For create, break it in to the two cases that the transaction
+ * covers. We start with the modify case - allocation done by modification
+ * of the state of existing inodes - and the allocation case.
+ */
+
+/*
+ * For create we can modify:
+ *    the parent directory inode: inode size
+ *    the new inode: inode size
+ *    the inode btree entry: block size
+ *    the superblock for the nlink flag: sector size
+ *    the directory btree: (max depth + v2) * dir block size
+ *    the directory inode's bmap btree: (max depth + v2) * block size
+ *    the finobt (record modification and allocation btrees)
+ */
+STATIC uint
+xfs_calc_create_resv_modify(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_inode_res(mp, 2) +
+               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+               (uint)XFS_FSB_TO_B(mp, 1) +
+               xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_finobt_res(mp, 1, 1);
+}
+
+/*
+ * For create we can allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the superblock for the nlink flag: sector size
+ *    the inode blocks allocated: mp->m_ialloc_blks * blocksize
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_create_resv_alloc(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+               mp->m_sb.sb_sectsize +
+               xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                XFS_FSB_TO_B(mp, 1));
+}
+
+STATIC uint
+__xfs_calc_create_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX(xfs_calc_create_resv_alloc(mp),
+                   xfs_calc_create_resv_modify(mp));
+}
+
+/*
+ * For icreate we can allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the superblock for the nlink flag: sector size
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ *    the finobt (record insertion)
+ */
+STATIC uint
+xfs_calc_icreate_resv_alloc(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+               mp->m_sb.sb_sectsize +
+               xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_finobt_res(mp, 0, 0);
+}
+
+STATIC uint
+xfs_calc_icreate_reservation(xfs_mount_t *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX(xfs_calc_icreate_resv_alloc(mp),
+                   xfs_calc_create_resv_modify(mp));
+}
+
+STATIC uint
+xfs_calc_create_reservation(
+       struct xfs_mount        *mp)
+{
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               return xfs_calc_icreate_reservation(mp);
+       return __xfs_calc_create_reservation(mp);
+
+}
+
+STATIC uint
+xfs_calc_create_tmpfile_reservation(
+       struct xfs_mount        *mp)
+{
+       uint    res = XFS_DQUOT_LOGRES(mp);
+
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               res += xfs_calc_icreate_resv_alloc(mp);
+       else
+               res += xfs_calc_create_resv_alloc(mp);
+
+       return res + xfs_calc_iunlink_add_reservation(mp);
+}
+
+/*
+ * Making a new directory is the same as creating a new file.
+ */
+STATIC uint
+xfs_calc_mkdir_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_create_reservation(mp);
+}
+
+
+/*
+ * Making a new symplink is the same as creating a new file, but
+ * with the added blocks for remote symlink data which can be up to 1kB in
+ * length (MAXPATHLEN).
+ */
+STATIC uint
+xfs_calc_symlink_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_create_reservation(mp) +
+              xfs_calc_buf_res(1, MAXPATHLEN);
+}
+
+/*
+ * In freeing an inode we can modify:
+ *    the inode being freed: inode size
+ *    the super block free inode counter: sector size
+ *    the agi hash list and counters: sector size
+ *    the inode btree entry: block size
+ *    the on disk inode before ours in the agi hash list: inode cluster size
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ *    the finobt (record insertion, removal or modification)
+ */
+STATIC uint
+xfs_calc_ifree_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               xfs_calc_inode_res(mp, 1) +
+               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+               xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_iunlink_remove_reservation(mp) +
+               xfs_calc_buf_res(1, 0) +
+               xfs_calc_buf_res(2 + mp->m_ialloc_blks +
+                                mp->m_in_maxlevels, 0) +
+               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_finobt_res(mp, 0, 1);
+}
+
+/*
+ * When only changing the inode we log the inode and possibly the superblock
+ * We also add a bit of slop for the transaction stuff.
+ */
+STATIC uint
+xfs_calc_ichange_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               xfs_calc_inode_res(mp, 1) +
+               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+
+}
+
+/*
+ * Growing the data section of the filesystem.
+ *     superblock
+ *     agi and agf
+ *     allocation btrees
+ */
+STATIC uint
+xfs_calc_growdata_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the first set of transactions (ALLOC) we allocate space to the
+ * bitmap or summary files.
+ *     superblock: sector size
+ *     agf of the ag from which the extent is allocated: sector size
+ *     bmap btree for bitmap/summary inode: max depth * blocksize
+ *     bitmap/summary inode: inode size
+ *     allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
+ */
+STATIC uint
+xfs_calc_growrtalloc_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+               xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
+                                XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_inode_res(mp, 1) +
+               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the second set of transactions (ZERO) we zero the new metadata blocks.
+ *     one bitmap/summary block: blocksize
+ */
+STATIC uint
+xfs_calc_growrtzero_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the third set of transactions (FREE) we update metadata without
+ * allocating any new blocks.
+ *     superblock: sector size
+ *     bitmap inode: inode size
+ *     summary inode: inode size
+ *     one bitmap block: blocksize
+ *     summary blocks: new summary size
+ */
+STATIC uint
+xfs_calc_growrtfree_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+               xfs_calc_inode_res(mp, 2) +
+               xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
+               xfs_calc_buf_res(1, mp->m_rsumsize);
+}
+
+/*
+ * Logging the inode modification timestamp on a synchronous write.
+ *     inode
+ */
+STATIC uint
+xfs_calc_swrite_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_inode_res(mp, 1);
+}
+
+/*
+ * Logging the inode mode bits when writing a setuid/setgid file
+ *     inode
+ */
+STATIC uint
+xfs_calc_writeid_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_inode_res(mp, 1);
+}
+
+/*
+ * Converting the inode from non-attributed to attributed.
+ *     the inode being converted: inode size
+ *     agf block and superblock (for block allocation)
+ *     the new block (directory sized)
+ *     bmap blocks for the new directory block
+ *     allocation btrees
+ */
+STATIC uint
+xfs_calc_addafork_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               xfs_calc_inode_res(mp, 1) +
+               xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+               xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
+               xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
+                                XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Removing the attribute fork of a file
+ *    the inode being truncated: inode size
+ *    the inode's bmap btree: max depth * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *             4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_attrinval_reservation(
+       struct xfs_mount        *mp)
+{
+       return MAX((xfs_calc_inode_res(mp, 1) +
+                   xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
+                                    XFS_FSB_TO_B(mp, 1))),
+                  (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+                   xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+                                    XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * Setting an attribute at mount time.
+ *     the inode getting the attribute
+ *     the superblock for allocations
+ *     the agfs extents are allocated from
+ *     the attribute btree * max depth
+ *     the inode allocation btree
+ * Since attribute transaction space is dependent on the size of the attribute,
+ * the calculation is done partially at mount time and partially at runtime(see
+ * below).
+ */
+STATIC uint
+xfs_calc_attrsetm_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               xfs_calc_inode_res(mp, 1) +
+               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+               xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Setting an attribute at runtime, transaction space unit per block.
+ *     the superblock for allocations: sector size
+ *     the inode bmap btree could join or split: max depth * block size
+ * Since the runtime attribute transaction space is dependent on the total
+ * blocks needed for the 1st bmap, here we calculate out the space unit for
+ * one block so that the caller could figure out the total space according
+ * to the attibute extent length in blocks by:
+ *     ext * M_RES(mp)->tr_attrsetrt.tr_logres
+ */
+STATIC uint
+xfs_calc_attrsetrt_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+               xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
+                                XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Removing an attribute.
+ *    the inode: inode size
+ *    the attribute btree could join: max depth * block size
+ *    the inode bmap btree could join or split: max depth * block size
+ * And the bmap_finish transaction can free the attr blocks freed giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_attrrm_reservation(
+       struct xfs_mount        *mp)
+{
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((xfs_calc_inode_res(mp, 1) +
+                    xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
+                                     XFS_FSB_TO_B(mp, 1)) +
+                    (uint)XFS_FSB_TO_B(mp,
+                                       XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+                    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
+                   (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+                                     XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * Clearing a bad agino number in an agi hash bucket.
+ */
+STATIC uint
+xfs_calc_clear_agi_bucket_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+/*
+ * Clearing the quotaflags in the superblock.
+ *     the super block for changing quota flags: sector size
+ */
+STATIC uint
+xfs_calc_qm_sbchange_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+/*
+ * Adjusting quota limits.
+ *    the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
+ */
+STATIC uint
+xfs_calc_qm_setqlim_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
+}
+
+/*
+ * Allocating quota on disk if needed.
+ *     the write transaction log space for quota file extent allocation
+ *     the unit of quota allocation: one system block size
+ */
+STATIC uint
+xfs_calc_qm_dqalloc_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_write_reservation(mp) +
+               xfs_calc_buf_res(1,
+                       XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
+}
+
+/*
+ * Turning off quotas.
+ *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
+ *    the superblock for the quota flags: sector size
+ */
+STATIC uint
+xfs_calc_qm_quotaoff_reservation(
+       struct xfs_mount        *mp)
+{
+       return sizeof(struct xfs_qoff_logitem) * 2 +
+               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+/*
+ * End of turning off quotas.
+ *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
+ */
+STATIC uint
+xfs_calc_qm_quotaoff_end_reservation(
+       struct xfs_mount        *mp)
+{
+       return sizeof(struct xfs_qoff_logitem) * 2;
+}
+
+/*
+ * Syncing the incore super block changes to disk.
+ *     the super block to reflect the changes: sector size
+ */
+STATIC uint
+xfs_calc_sb_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+void
+xfs_trans_resv_calc(
+       struct xfs_mount        *mp,
+       struct xfs_trans_resv   *resp)
+{
+       /*
+        * The following transactions are logged in physical format and
+        * require a permanent reservation on space.
+        */
+       resp->tr_write.tr_logres = xfs_calc_write_reservation(mp);
+       resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
+       resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp);
+       resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
+       resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
+       resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT;
+       resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
+       resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT;
+       resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
+       resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT;
+       resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
+       resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
+       resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_create.tr_logres = xfs_calc_create_reservation(mp);
+       resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
+       resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_create_tmpfile.tr_logres =
+                       xfs_calc_create_tmpfile_reservation(mp);
+       resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
+       resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
+       resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
+       resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
+       resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
+       resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp);
+       resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT;
+       resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp);
+       resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT;
+       resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp);
+       resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT;
+       resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp);
+       resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT;
+       resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp);
+       resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
+       resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp);
+       resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
+       resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+       /*
+        * The following transactions are logged in logical format with
+        * a default log count.
+        */
+       resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp);
+       resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT;
+
+       resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp);
+       resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
+
+       resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp);
+       resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
+
+       resp->tr_qm_equotaoff.tr_logres =
+               xfs_calc_qm_quotaoff_end_reservation(mp);
+       resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
+
+       resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
+       resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
+
+       /* The following transaction are logged in logical format */
+       resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
+       resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
+       resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
+       resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
+       resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
+       resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
+       resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
+       resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
+}
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
deleted file mode 100644 (file)
index d438132..0000000
+++ /dev/null
@@ -1,2630 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_shared.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_extent_busy.h"
-#include "xfs_error.h"
-#include "xfs_cksum.h"
-#include "xfs_trace.h"
-#include "xfs_trans.h"
-#include "xfs_buf_item.h"
-#include "xfs_log.h"
-
-struct workqueue_struct *xfs_alloc_wq;
-
-#define XFS_ABSDIFF(a,b)       (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
-
-#define        XFSA_FIXUP_BNO_OK       1
-#define        XFSA_FIXUP_CNT_OK       2
-
-STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
-               xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
-
-/*
- * Lookup the record equal to [bno, len] in the btree given by cur.
- */
-STATIC int                             /* error */
-xfs_alloc_lookup_eq(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_agblock_t           bno,    /* starting block of extent */
-       xfs_extlen_t            len,    /* length of extent */
-       int                     *stat)  /* success/failure */
-{
-       cur->bc_rec.a.ar_startblock = bno;
-       cur->bc_rec.a.ar_blockcount = len;
-       return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
-}
-
-/*
- * Lookup the first record greater than or equal to [bno, len]
- * in the btree given by cur.
- */
-int                            /* error */
-xfs_alloc_lookup_ge(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_agblock_t           bno,    /* starting block of extent */
-       xfs_extlen_t            len,    /* length of extent */
-       int                     *stat)  /* success/failure */
-{
-       cur->bc_rec.a.ar_startblock = bno;
-       cur->bc_rec.a.ar_blockcount = len;
-       return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
-}
-
-/*
- * Lookup the first record less than or equal to [bno, len]
- * in the btree given by cur.
- */
-int                                    /* error */
-xfs_alloc_lookup_le(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_agblock_t           bno,    /* starting block of extent */
-       xfs_extlen_t            len,    /* length of extent */
-       int                     *stat)  /* success/failure */
-{
-       cur->bc_rec.a.ar_startblock = bno;
-       cur->bc_rec.a.ar_blockcount = len;
-       return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
-}
-
-/*
- * Update the record referred to by cur to the value given
- * by [bno, len].
- * This either works (return 0) or gets an EFSCORRUPTED error.
- */
-STATIC int                             /* error */
-xfs_alloc_update(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_agblock_t           bno,    /* starting block of extent */
-       xfs_extlen_t            len)    /* length of extent */
-{
-       union xfs_btree_rec     rec;
-
-       rec.alloc.ar_startblock = cpu_to_be32(bno);
-       rec.alloc.ar_blockcount = cpu_to_be32(len);
-       return xfs_btree_update(cur, &rec);
-}
-
-/*
- * Get the data from the pointed-to record.
- */
-int                                    /* error */
-xfs_alloc_get_rec(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_agblock_t           *bno,   /* output: starting block of extent */
-       xfs_extlen_t            *len,   /* output: length of extent */
-       int                     *stat)  /* output: success/failure */
-{
-       union xfs_btree_rec     *rec;
-       int                     error;
-
-       error = xfs_btree_get_rec(cur, &rec, stat);
-       if (!error && *stat == 1) {
-               *bno = be32_to_cpu(rec->alloc.ar_startblock);
-               *len = be32_to_cpu(rec->alloc.ar_blockcount);
-       }
-       return error;
-}
-
-/*
- * Compute aligned version of the found extent.
- * Takes alignment and min length into account.
- */
-STATIC void
-xfs_alloc_compute_aligned(
-       xfs_alloc_arg_t *args,          /* allocation argument structure */
-       xfs_agblock_t   foundbno,       /* starting block in found extent */
-       xfs_extlen_t    foundlen,       /* length in found extent */
-       xfs_agblock_t   *resbno,        /* result block number */
-       xfs_extlen_t    *reslen)        /* result length */
-{
-       xfs_agblock_t   bno;
-       xfs_extlen_t    len;
-
-       /* Trim busy sections out of found extent */
-       xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
-
-       if (args->alignment > 1 && len >= args->minlen) {
-               xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
-               xfs_extlen_t    diff = aligned_bno - bno;
-
-               *resbno = aligned_bno;
-               *reslen = diff >= len ? 0 : len - diff;
-       } else {
-               *resbno = bno;
-               *reslen = len;
-       }
-}
-
-/*
- * Compute best start block and diff for "near" allocations.
- * freelen >= wantlen already checked by caller.
- */
-STATIC xfs_extlen_t                    /* difference value (absolute) */
-xfs_alloc_compute_diff(
-       xfs_agblock_t   wantbno,        /* target starting block */
-       xfs_extlen_t    wantlen,        /* target length */
-       xfs_extlen_t    alignment,      /* target alignment */
-       char            userdata,       /* are we allocating data? */
-       xfs_agblock_t   freebno,        /* freespace's starting block */
-       xfs_extlen_t    freelen,        /* freespace's length */
-       xfs_agblock_t   *newbnop)       /* result: best start block from free */
-{
-       xfs_agblock_t   freeend;        /* end of freespace extent */
-       xfs_agblock_t   newbno1;        /* return block number */
-       xfs_agblock_t   newbno2;        /* other new block number */
-       xfs_extlen_t    newlen1=0;      /* length with newbno1 */
-       xfs_extlen_t    newlen2=0;      /* length with newbno2 */
-       xfs_agblock_t   wantend;        /* end of target extent */
-
-       ASSERT(freelen >= wantlen);
-       freeend = freebno + freelen;
-       wantend = wantbno + wantlen;
-       /*
-        * We want to allocate from the start of a free extent if it is past
-        * the desired block or if we are allocating user data and the free
-        * extent is before desired block. The second case is there to allow
-        * for contiguous allocation from the remaining free space if the file
-        * grows in the short term.
-        */
-       if (freebno >= wantbno || (userdata && freeend < wantend)) {
-               if ((newbno1 = roundup(freebno, alignment)) >= freeend)
-                       newbno1 = NULLAGBLOCK;
-       } else if (freeend >= wantend && alignment > 1) {
-               newbno1 = roundup(wantbno, alignment);
-               newbno2 = newbno1 - alignment;
-               if (newbno1 >= freeend)
-                       newbno1 = NULLAGBLOCK;
-               else
-                       newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
-               if (newbno2 < freebno)
-                       newbno2 = NULLAGBLOCK;
-               else
-                       newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
-               if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
-                       if (newlen1 < newlen2 ||
-                           (newlen1 == newlen2 &&
-                            XFS_ABSDIFF(newbno1, wantbno) >
-                            XFS_ABSDIFF(newbno2, wantbno)))
-                               newbno1 = newbno2;
-               } else if (newbno2 != NULLAGBLOCK)
-                       newbno1 = newbno2;
-       } else if (freeend >= wantend) {
-               newbno1 = wantbno;
-       } else if (alignment > 1) {
-               newbno1 = roundup(freeend - wantlen, alignment);
-               if (newbno1 > freeend - wantlen &&
-                   newbno1 - alignment >= freebno)
-                       newbno1 -= alignment;
-               else if (newbno1 >= freeend)
-                       newbno1 = NULLAGBLOCK;
-       } else
-               newbno1 = freeend - wantlen;
-       *newbnop = newbno1;
-       return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
-}
-
-/*
- * Fix up the length, based on mod and prod.
- * len should be k * prod + mod for some k.
- * If len is too small it is returned unchanged.
- * If len hits maxlen it is left alone.
- */
-STATIC void
-xfs_alloc_fix_len(
-       xfs_alloc_arg_t *args)          /* allocation argument structure */
-{
-       xfs_extlen_t    k;
-       xfs_extlen_t    rlen;
-
-       ASSERT(args->mod < args->prod);
-       rlen = args->len;
-       ASSERT(rlen >= args->minlen);
-       ASSERT(rlen <= args->maxlen);
-       if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
-           (args->mod == 0 && rlen < args->prod))
-               return;
-       k = rlen % args->prod;
-       if (k == args->mod)
-               return;
-       if (k > args->mod)
-               rlen = rlen - (k - args->mod);
-       else
-               rlen = rlen - args->prod + (args->mod - k);
-       if ((int)rlen < (int)args->minlen)
-               return;
-       ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
-       ASSERT(rlen % args->prod == args->mod);
-       args->len = rlen;
-}
-
-/*
- * Fix up length if there is too little space left in the a.g.
- * Return 1 if ok, 0 if too little, should give up.
- */
-STATIC int
-xfs_alloc_fix_minleft(
-       xfs_alloc_arg_t *args)          /* allocation argument structure */
-{
-       xfs_agf_t       *agf;           /* a.g. freelist header */
-       int             diff;           /* free space difference */
-
-       if (args->minleft == 0)
-               return 1;
-       agf = XFS_BUF_TO_AGF(args->agbp);
-       diff = be32_to_cpu(agf->agf_freeblks)
-               - args->len - args->minleft;
-       if (diff >= 0)
-               return 1;
-       args->len += diff;              /* shrink the allocated space */
-       if (args->len >= args->minlen)
-               return 1;
-       args->agbno = NULLAGBLOCK;
-       return 0;
-}
-
-/*
- * Update the two btrees, logically removing from freespace the extent
- * starting at rbno, rlen blocks.  The extent is contained within the
- * actual (current) free extent fbno for flen blocks.
- * Flags are passed in indicating whether the cursors are set to the
- * relevant records.
- */
-STATIC int                             /* error code */
-xfs_alloc_fixup_trees(
-       xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
-       xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
-       xfs_agblock_t   fbno,           /* starting block of free extent */
-       xfs_extlen_t    flen,           /* length of free extent */
-       xfs_agblock_t   rbno,           /* starting block of returned extent */
-       xfs_extlen_t    rlen,           /* length of returned extent */
-       int             flags)          /* flags, XFSA_FIXUP_... */
-{
-       int             error;          /* error code */
-       int             i;              /* operation results */
-       xfs_agblock_t   nfbno1;         /* first new free startblock */
-       xfs_agblock_t   nfbno2;         /* second new free startblock */
-       xfs_extlen_t    nflen1=0;       /* first new free length */
-       xfs_extlen_t    nflen2=0;       /* second new free length */
-
-       /*
-        * Look up the record in the by-size tree if necessary.
-        */
-       if (flags & XFSA_FIXUP_CNT_OK) {
-#ifdef DEBUG
-               if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(
-                       i == 1 && nfbno1 == fbno && nflen1 == flen);
-#endif
-       } else {
-               if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
-       }
-       /*
-        * Look up the record in the by-block tree if necessary.
-        */
-       if (flags & XFSA_FIXUP_BNO_OK) {
-#ifdef DEBUG
-               if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(
-                       i == 1 && nfbno1 == fbno && nflen1 == flen);
-#endif
-       } else {
-               if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
-       }
-
-#ifdef DEBUG
-       if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
-               struct xfs_btree_block  *bnoblock;
-               struct xfs_btree_block  *cntblock;
-
-               bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
-               cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
-
-               XFS_WANT_CORRUPTED_RETURN(
-                       bnoblock->bb_numrecs == cntblock->bb_numrecs);
-       }
-#endif
-
-       /*
-        * Deal with all four cases: the allocated record is contained
-        * within the freespace record, so we can have new freespace
-        * at either (or both) end, or no freespace remaining.
-        */
-       if (rbno == fbno && rlen == flen)
-               nfbno1 = nfbno2 = NULLAGBLOCK;
-       else if (rbno == fbno) {
-               nfbno1 = rbno + rlen;
-               nflen1 = flen - rlen;
-               nfbno2 = NULLAGBLOCK;
-       } else if (rbno + rlen == fbno + flen) {
-               nfbno1 = fbno;
-               nflen1 = flen - rlen;
-               nfbno2 = NULLAGBLOCK;
-       } else {
-               nfbno1 = fbno;
-               nflen1 = rbno - fbno;
-               nfbno2 = rbno + rlen;
-               nflen2 = (fbno + flen) - nfbno2;
-       }
-       /*
-        * Delete the entry from the by-size btree.
-        */
-       if ((error = xfs_btree_delete(cnt_cur, &i)))
-               return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
-       /*
-        * Add new by-size btree entry(s).
-        */
-       if (nfbno1 != NULLAGBLOCK) {
-               if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 0);
-               if ((error = xfs_btree_insert(cnt_cur, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
-       }
-       if (nfbno2 != NULLAGBLOCK) {
-               if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 0);
-               if ((error = xfs_btree_insert(cnt_cur, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
-       }
-       /*
-        * Fix up the by-block btree entry(s).
-        */
-       if (nfbno1 == NULLAGBLOCK) {
-               /*
-                * No remaining freespace, just delete the by-block tree entry.
-                */
-               if ((error = xfs_btree_delete(bno_cur, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
-       } else {
-               /*
-                * Update the by-block entry to start later|be shorter.
-                */
-               if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
-                       return error;
-       }
-       if (nfbno2 != NULLAGBLOCK) {
-               /*
-                * 2 resulting free entries, need to add one.
-                */
-               if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 0);
-               if ((error = xfs_btree_insert(bno_cur, &i)))
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
-       }
-       return 0;
-}
-
-static bool
-xfs_agfl_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
-       int             i;
-
-       if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
-               return false;
-       if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
-               return false;
-       /*
-        * during growfs operations, the perag is not fully initialised,
-        * so we can't use it for any useful checking. growfs ensures we can't
-        * use it by using uncached buffers that don't have the perag attached
-        * so we can detect and avoid this problem.
-        */
-       if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
-               return false;
-
-       for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
-               if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
-                   be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
-                       return false;
-       }
-       return true;
-}
-
-static void
-xfs_agfl_read_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-
-       /*
-        * There is no verification of non-crc AGFLs because mkfs does not
-        * initialise the AGFL to zero or NULL. Hence the only valid part of the
-        * AGFL is what the AGF says is active. We can't get to the AGF, so we
-        * can't verify just those entries are valid.
-        */
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_agfl_verify(bp))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-}
-
-static void
-xfs_agfl_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-
-       /* no verification of non-crc AGFLs */
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (!xfs_agfl_verify(bp)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-
-       if (bip)
-               XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-
-       xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
-}
-
-const struct xfs_buf_ops xfs_agfl_buf_ops = {
-       .verify_read = xfs_agfl_read_verify,
-       .verify_write = xfs_agfl_write_verify,
-};
-
-/*
- * Read in the allocation group free block array.
- */
-STATIC int                             /* error */
-xfs_alloc_read_agfl(
-       xfs_mount_t     *mp,            /* mount point structure */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_agnumber_t  agno,           /* allocation group number */
-       xfs_buf_t       **bpp)          /* buffer for the ag free block array */
-{
-       xfs_buf_t       *bp;            /* return value */
-       int             error;
-
-       ASSERT(agno != NULLAGNUMBER);
-       error = xfs_trans_read_buf(
-                       mp, tp, mp->m_ddev_targp,
-                       XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
-                       XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
-       if (error)
-               return error;
-       xfs_buf_set_ref(bp, XFS_AGFL_REF);
-       *bpp = bp;
-       return 0;
-}
-
-STATIC int
-xfs_alloc_update_counters(
-       struct xfs_trans        *tp,
-       struct xfs_perag        *pag,
-       struct xfs_buf          *agbp,
-       long                    len)
-{
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
-
-       pag->pagf_freeblks += len;
-       be32_add_cpu(&agf->agf_freeblks, len);
-
-       xfs_trans_agblocks_delta(tp, len);
-       if (unlikely(be32_to_cpu(agf->agf_freeblks) >
-                    be32_to_cpu(agf->agf_length)))
-               return EFSCORRUPTED;
-
-       xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
-       return 0;
-}
-
-/*
- * Allocation group level functions.
- */
-
-/*
- * Allocate a variable extent in the allocation group agno.
- * Type and bno are used to determine where in the allocation group the
- * extent will start.
- * Extent's length (returned in *len) will be between minlen and maxlen,
- * and of the form k * prod + mod unless there's nothing that large.
- * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
- */
-STATIC int                     /* error */
-xfs_alloc_ag_vextent(
-       xfs_alloc_arg_t *args)  /* argument structure for allocation */
-{
-       int             error=0;
-
-       ASSERT(args->minlen > 0);
-       ASSERT(args->maxlen > 0);
-       ASSERT(args->minlen <= args->maxlen);
-       ASSERT(args->mod < args->prod);
-       ASSERT(args->alignment > 0);
-       /*
-        * Branch to correct routine based on the type.
-        */
-       args->wasfromfl = 0;
-       switch (args->type) {
-       case XFS_ALLOCTYPE_THIS_AG:
-               error = xfs_alloc_ag_vextent_size(args);
-               break;
-       case XFS_ALLOCTYPE_NEAR_BNO:
-               error = xfs_alloc_ag_vextent_near(args);
-               break;
-       case XFS_ALLOCTYPE_THIS_BNO:
-               error = xfs_alloc_ag_vextent_exact(args);
-               break;
-       default:
-               ASSERT(0);
-               /* NOTREACHED */
-       }
-
-       if (error || args->agbno == NULLAGBLOCK)
-               return error;
-
-       ASSERT(args->len >= args->minlen);
-       ASSERT(args->len <= args->maxlen);
-       ASSERT(!args->wasfromfl || !args->isfl);
-       ASSERT(args->agbno % args->alignment == 0);
-
-       if (!args->wasfromfl) {
-               error = xfs_alloc_update_counters(args->tp, args->pag,
-                                                 args->agbp,
-                                                 -((long)(args->len)));
-               if (error)
-                       return error;
-
-               ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
-                                             args->agbno, args->len));
-       }
-
-       if (!args->isfl) {
-               xfs_trans_mod_sb(args->tp, args->wasdel ?
-                                XFS_TRANS_SB_RES_FDBLOCKS :
-                                XFS_TRANS_SB_FDBLOCKS,
-                                -((long)(args->len)));
-       }
-
-       XFS_STATS_INC(xs_allocx);
-       XFS_STATS_ADD(xs_allocb, args->len);
-       return error;
-}
-
-/*
- * Allocate a variable extent at exactly agno/bno.
- * Extent's length (returned in *len) will be between minlen and maxlen,
- * and of the form k * prod + mod unless there's nothing that large.
- * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
- */
-STATIC int                     /* error */
-xfs_alloc_ag_vextent_exact(
-       xfs_alloc_arg_t *args)  /* allocation argument structure */
-{
-       xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
-       xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
-       int             error;
-       xfs_agblock_t   fbno;   /* start block of found extent */
-       xfs_extlen_t    flen;   /* length of found extent */
-       xfs_agblock_t   tbno;   /* start block of trimmed extent */
-       xfs_extlen_t    tlen;   /* length of trimmed extent */
-       xfs_agblock_t   tend;   /* end block of trimmed extent */
-       int             i;      /* success/failure of operation */
-
-       ASSERT(args->alignment == 1);
-
-       /*
-        * Allocate/initialize a cursor for the by-number freespace btree.
-        */
-       bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
-                                         args->agno, XFS_BTNUM_BNO);
-
-       /*
-        * Lookup bno and minlen in the btree (minlen is irrelevant, really).
-        * Look for the closest free block <= bno, it must contain bno
-        * if any free block does.
-        */
-       error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
-       if (error)
-               goto error0;
-       if (!i)
-               goto not_found;
-
-       /*
-        * Grab the freespace record.
-        */
-       error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
-       if (error)
-               goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-       ASSERT(fbno <= args->agbno);
-
-       /*
-        * Check for overlapping busy extents.
-        */
-       xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
-
-       /*
-        * Give up if the start of the extent is busy, or the freespace isn't
-        * long enough for the minimum request.
-        */
-       if (tbno > args->agbno)
-               goto not_found;
-       if (tlen < args->minlen)
-               goto not_found;
-       tend = tbno + tlen;
-       if (tend < args->agbno + args->minlen)
-               goto not_found;
-
-       /*
-        * End of extent will be smaller of the freespace end and the
-        * maximal requested end.
-        *
-        * Fix the length according to mod and prod if given.
-        */
-       args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
-                                               - args->agbno;
-       xfs_alloc_fix_len(args);
-       if (!xfs_alloc_fix_minleft(args))
-               goto not_found;
-
-       ASSERT(args->agbno + args->len <= tend);
-
-       /*
-        * We are allocating agbno for args->len
-        * Allocate/initialize a cursor for the by-size btree.
-        */
-       cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
-               args->agno, XFS_BTNUM_CNT);
-       ASSERT(args->agbno + args->len <=
-               be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
-       error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
-                                     args->len, XFSA_FIXUP_BNO_OK);
-       if (error) {
-               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
-               goto error0;
-       }
-
-       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
-       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-
-       args->wasfromfl = 0;
-       trace_xfs_alloc_exact_done(args);
-       return 0;
-
-not_found:
-       /* Didn't find it, return null. */
-       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
-       args->agbno = NULLAGBLOCK;
-       trace_xfs_alloc_exact_notfound(args);
-       return 0;
-
-error0:
-       xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
-       trace_xfs_alloc_exact_error(args);
-       return error;
-}
-
-/*
- * Search the btree in a given direction via the search cursor and compare
- * the records found against the good extent we've already found.
- */
-STATIC int
-xfs_alloc_find_best_extent(
-       struct xfs_alloc_arg    *args,  /* allocation argument structure */
-       struct xfs_btree_cur    **gcur, /* good cursor */
-       struct xfs_btree_cur    **scur, /* searching cursor */
-       xfs_agblock_t           gdiff,  /* difference for search comparison */
-       xfs_agblock_t           *sbno,  /* extent found by search */
-       xfs_extlen_t            *slen,  /* extent length */
-       xfs_agblock_t           *sbnoa, /* aligned extent found by search */
-       xfs_extlen_t            *slena, /* aligned extent length */
-       int                     dir)    /* 0 = search right, 1 = search left */
-{
-       xfs_agblock_t           new;
-       xfs_agblock_t           sdiff;
-       int                     error;
-       int                     i;
-
-       /* The good extent is perfect, no need to  search. */
-       if (!gdiff)
-               goto out_use_good;
-
-       /*
-        * Look until we find a better one, run out of space or run off the end.
-        */
-       do {
-               error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
-               if (error)
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
-
-               /*
-                * The good extent is closer than this one.
-                */
-               if (!dir) {
-                       if (*sbnoa >= args->agbno + gdiff)
-                               goto out_use_good;
-               } else {
-                       if (*sbnoa <= args->agbno - gdiff)
-                               goto out_use_good;
-               }
-
-               /*
-                * Same distance, compare length and pick the best.
-                */
-               if (*slena >= args->minlen) {
-                       args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
-                       xfs_alloc_fix_len(args);
-
-                       sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                                                      args->alignment,
-                                                      args->userdata, *sbnoa,
-                                                      *slena, &new);
-
-                       /*
-                        * Choose closer size and invalidate other cursor.
-                        */
-                       if (sdiff < gdiff)
-                               goto out_use_search;
-                       goto out_use_good;
-               }
-
-               if (!dir)
-                       error = xfs_btree_increment(*scur, 0, &i);
-               else
-                       error = xfs_btree_decrement(*scur, 0, &i);
-               if (error)
-                       goto error0;
-       } while (i);
-
-out_use_good:
-       xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
-       *scur = NULL;
-       return 0;
-
-out_use_search:
-       xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
-       *gcur = NULL;
-       return 0;
-
-error0:
-       /* caller invalidates cursors */
-       return error;
-}
-
-/*
- * Allocate a variable extent near bno in the allocation group agno.
- * Extent's length (returned in len) will be between minlen and maxlen,
- * and of the form k * prod + mod unless there's nothing that large.
- * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
- */
-STATIC int                             /* error */
-xfs_alloc_ag_vextent_near(
-       xfs_alloc_arg_t *args)          /* allocation argument structure */
-{
-       xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
-       xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
-       xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
-       xfs_agblock_t   gtbno;          /* start bno of right side entry */
-       xfs_agblock_t   gtbnoa;         /* aligned ... */
-       xfs_extlen_t    gtdiff;         /* difference to right side entry */
-       xfs_extlen_t    gtlen;          /* length of right side entry */
-       xfs_extlen_t    gtlena;         /* aligned ... */
-       xfs_agblock_t   gtnew;          /* useful start bno of right side */
-       int             error;          /* error code */
-       int             i;              /* result code, temporary */
-       int             j;              /* result code, temporary */
-       xfs_agblock_t   ltbno;          /* start bno of left side entry */
-       xfs_agblock_t   ltbnoa;         /* aligned ... */
-       xfs_extlen_t    ltdiff;         /* difference to left side entry */
-       xfs_extlen_t    ltlen;          /* length of left side entry */
-       xfs_extlen_t    ltlena;         /* aligned ... */
-       xfs_agblock_t   ltnew;          /* useful start bno of left side */
-       xfs_extlen_t    rlen;           /* length of returned extent */
-       int             forced = 0;
-#ifdef DEBUG
-       /*
-        * Randomly don't execute the first algorithm.
-        */
-       int             dofirst;        /* set to do first algorithm */
-
-       dofirst = prandom_u32() & 1;
-#endif
-
-restart:
-       bno_cur_lt = NULL;
-       bno_cur_gt = NULL;
-       ltlen = 0;
-       gtlena = 0;
-       ltlena = 0;
-
-       /*
-        * Get a cursor for the by-size btree.
-        */
-       cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
-               args->agno, XFS_BTNUM_CNT);
-
-       /*
-        * See if there are any free extents as big as maxlen.
-        */
-       if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
-               goto error0;
-       /*
-        * If none, then pick up the last entry in the tree unless the
-        * tree is empty.
-        */
-       if (!i) {
-               if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
-                               &ltlen, &i)))
-                       goto error0;
-               if (i == 0 || ltlen == 0) {
-                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-                       trace_xfs_alloc_near_noentry(args);
-                       return 0;
-               }
-               ASSERT(i == 1);
-       }
-       args->wasfromfl = 0;
-
-       /*
-        * First algorithm.
-        * If the requested extent is large wrt the freespaces available
-        * in this a.g., then the cursor will be pointing to a btree entry
-        * near the right edge of the tree.  If it's in the last btree leaf
-        * block, then we just examine all the entries in that block
-        * that are big enough, and pick the best one.
-        * This is written as a while loop so we can break out of it,
-        * but we never loop back to the top.
-        */
-       while (xfs_btree_islastblock(cnt_cur, 0)) {
-               xfs_extlen_t    bdiff;
-               int             besti=0;
-               xfs_extlen_t    blen=0;
-               xfs_agblock_t   bnew=0;
-
-#ifdef DEBUG
-               if (dofirst)
-                       break;
-#endif
-               /*
-                * Start from the entry that lookup found, sequence through
-                * all larger free blocks.  If we're actually pointing at a
-                * record smaller than maxlen, go to the start of this block,
-                * and skip all those smaller than minlen.
-                */
-               if (ltlen || args->alignment > 1) {
-                       cnt_cur->bc_ptrs[0] = 1;
-                       do {
-                               if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
-                                               &ltlen, &i)))
-                                       goto error0;
-                               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                               if (ltlen >= args->minlen)
-                                       break;
-                               if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
-                                       goto error0;
-                       } while (i);
-                       ASSERT(ltlen >= args->minlen);
-                       if (!i)
-                               break;
-               }
-               i = cnt_cur->bc_ptrs[0];
-               for (j = 1, blen = 0, bdiff = 0;
-                    !error && j && (blen < args->maxlen || bdiff > 0);
-                    error = xfs_btree_increment(cnt_cur, 0, &j)) {
-                       /*
-                        * For each entry, decide if it's better than
-                        * the previous best entry.
-                        */
-                       if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
-                               goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                       xfs_alloc_compute_aligned(args, ltbno, ltlen,
-                                                 &ltbnoa, &ltlena);
-                       if (ltlena < args->minlen)
-                               continue;
-                       args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
-                       xfs_alloc_fix_len(args);
-                       ASSERT(args->len >= args->minlen);
-                       if (args->len < blen)
-                               continue;
-                       ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                               args->alignment, args->userdata, ltbnoa,
-                               ltlena, &ltnew);
-                       if (ltnew != NULLAGBLOCK &&
-                           (args->len > blen || ltdiff < bdiff)) {
-                               bdiff = ltdiff;
-                               bnew = ltnew;
-                               blen = args->len;
-                               besti = cnt_cur->bc_ptrs[0];
-                       }
-               }
-               /*
-                * It didn't work.  We COULD be in a case where
-                * there's a good record somewhere, so try again.
-                */
-               if (blen == 0)
-                       break;
-               /*
-                * Point at the best entry, and retrieve it again.
-                */
-               cnt_cur->bc_ptrs[0] = besti;
-               if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
-               args->len = blen;
-               if (!xfs_alloc_fix_minleft(args)) {
-                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-                       trace_xfs_alloc_near_nominleft(args);
-                       return 0;
-               }
-               blen = args->len;
-               /*
-                * We are allocating starting at bnew for blen blocks.
-                */
-               args->agbno = bnew;
-               ASSERT(bnew >= ltbno);
-               ASSERT(bnew + blen <= ltbno + ltlen);
-               /*
-                * Set up a cursor for the by-bno tree.
-                */
-               bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
-                       args->agbp, args->agno, XFS_BTNUM_BNO);
-               /*
-                * Fix up the btree entries.
-                */
-               if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
-                               ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
-                       goto error0;
-               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-               xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
-
-               trace_xfs_alloc_near_first(args);
-               return 0;
-       }
-       /*
-        * Second algorithm.
-        * Search in the by-bno tree to the left and to the right
-        * simultaneously, until in each case we find a space big enough,
-        * or run into the edge of the tree.  When we run into the edge,
-        * we deallocate that cursor.
-        * If both searches succeed, we compare the two spaces and pick
-        * the better one.
-        * With alignment, it's possible for both to fail; the upper
-        * level algorithm that picks allocation groups for allocations
-        * is not supposed to do this.
-        */
-       /*
-        * Allocate and initialize the cursor for the leftward search.
-        */
-       bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
-               args->agno, XFS_BTNUM_BNO);
-       /*
-        * Lookup <= bno to find the leftward search's starting point.
-        */
-       if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
-               goto error0;
-       if (!i) {
-               /*
-                * Didn't find anything; use this cursor for the rightward
-                * search.
-                */
-               bno_cur_gt = bno_cur_lt;
-               bno_cur_lt = NULL;
-       }
-       /*
-        * Found something.  Duplicate the cursor for the rightward search.
-        */
-       else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
-               goto error0;
-       /*
-        * Increment the cursor, so we will point at the entry just right
-        * of the leftward entry if any, or to the leftmost entry.
-        */
-       if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
-               goto error0;
-       if (!i) {
-               /*
-                * It failed, there are no rightward entries.
-                */
-               xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
-               bno_cur_gt = NULL;
-       }
-       /*
-        * Loop going left with the leftward cursor, right with the
-        * rightward cursor, until either both directions give up or
-        * we find an entry at least as big as minlen.
-        */
-       do {
-               if (bno_cur_lt) {
-                       if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
-                               goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                       xfs_alloc_compute_aligned(args, ltbno, ltlen,
-                                                 &ltbnoa, &ltlena);
-                       if (ltlena >= args->minlen)
-                               break;
-                       if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
-                               goto error0;
-                       if (!i) {
-                               xfs_btree_del_cursor(bno_cur_lt,
-                                                    XFS_BTREE_NOERROR);
-                               bno_cur_lt = NULL;
-                       }
-               }
-               if (bno_cur_gt) {
-                       if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
-                               goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                       xfs_alloc_compute_aligned(args, gtbno, gtlen,
-                                                 &gtbnoa, &gtlena);
-                       if (gtlena >= args->minlen)
-                               break;
-                       if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
-                               goto error0;
-                       if (!i) {
-                               xfs_btree_del_cursor(bno_cur_gt,
-                                                    XFS_BTREE_NOERROR);
-                               bno_cur_gt = NULL;
-                       }
-               }
-       } while (bno_cur_lt || bno_cur_gt);
-
-       /*
-        * Got both cursors still active, need to find better entry.
-        */
-       if (bno_cur_lt && bno_cur_gt) {
-               if (ltlena >= args->minlen) {
-                       /*
-                        * Left side is good, look for a right side entry.
-                        */
-                       args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
-                       xfs_alloc_fix_len(args);
-                       ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                               args->alignment, args->userdata, ltbnoa,
-                               ltlena, &ltnew);
-
-                       error = xfs_alloc_find_best_extent(args,
-                                               &bno_cur_lt, &bno_cur_gt,
-                                               ltdiff, &gtbno, &gtlen,
-                                               &gtbnoa, &gtlena,
-                                               0 /* search right */);
-               } else {
-                       ASSERT(gtlena >= args->minlen);
-
-                       /*
-                        * Right side is good, look for a left side entry.
-                        */
-                       args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
-                       xfs_alloc_fix_len(args);
-                       gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-                               args->alignment, args->userdata, gtbnoa,
-                               gtlena, &gtnew);
-
-                       error = xfs_alloc_find_best_extent(args,
-                                               &bno_cur_gt, &bno_cur_lt,
-                                               gtdiff, &ltbno, &ltlen,
-                                               &ltbnoa, &ltlena,
-                                               1 /* search left */);
-               }
-
-               if (error)
-                       goto error0;
-       }
-
-       /*
-        * If we couldn't get anything, give up.
-        */
-       if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
-               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-
-               if (!forced++) {
-                       trace_xfs_alloc_near_busy(args);
-                       xfs_log_force(args->mp, XFS_LOG_SYNC);
-                       goto restart;
-               }
-               trace_xfs_alloc_size_neither(args);
-               args->agbno = NULLAGBLOCK;
-               return 0;
-       }
-
-       /*
-        * At this point we have selected a freespace entry, either to the
-        * left or to the right.  If it's on the right, copy all the
-        * useful variables to the "left" set so we only have one
-        * copy of this code.
-        */
-       if (bno_cur_gt) {
-               bno_cur_lt = bno_cur_gt;
-               bno_cur_gt = NULL;
-               ltbno = gtbno;
-               ltbnoa = gtbnoa;
-               ltlen = gtlen;
-               ltlena = gtlena;
-               j = 1;
-       } else
-               j = 0;
-
-       /*
-        * Fix up the length and compute the useful address.
-        */
-       args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
-       xfs_alloc_fix_len(args);
-       if (!xfs_alloc_fix_minleft(args)) {
-               trace_xfs_alloc_near_nominleft(args);
-               xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
-               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-               return 0;
-       }
-       rlen = args->len;
-       (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
-                                    args->userdata, ltbnoa, ltlena, &ltnew);
-       ASSERT(ltnew >= ltbno);
-       ASSERT(ltnew + rlen <= ltbnoa + ltlena);
-       ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
-       args->agbno = ltnew;
-
-       if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
-                       ltnew, rlen, XFSA_FIXUP_BNO_OK)))
-               goto error0;
-
-       if (j)
-               trace_xfs_alloc_near_greater(args);
-       else
-               trace_xfs_alloc_near_lesser(args);
-
-       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-       xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
-       return 0;
-
- error0:
-       trace_xfs_alloc_near_error(args);
-       if (cnt_cur != NULL)
-               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
-       if (bno_cur_lt != NULL)
-               xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
-       if (bno_cur_gt != NULL)
-               xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
-       return error;
-}
-
-/*
- * Allocate a variable extent anywhere in the allocation group agno.
- * Extent's length (returned in len) will be between minlen and maxlen,
- * and of the form k * prod + mod unless there's nothing that large.
- * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
- */
-STATIC int                             /* error */
-xfs_alloc_ag_vextent_size(
-       xfs_alloc_arg_t *args)          /* allocation argument structure */
-{
-       xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
-       xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
-       int             error;          /* error result */
-       xfs_agblock_t   fbno;           /* start of found freespace */
-       xfs_extlen_t    flen;           /* length of found freespace */
-       int             i;              /* temp status variable */
-       xfs_agblock_t   rbno;           /* returned block number */
-       xfs_extlen_t    rlen;           /* length of returned extent */
-       int             forced = 0;
-
-restart:
-       /*
-        * Allocate and initialize a cursor for the by-size btree.
-        */
-       cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
-               args->agno, XFS_BTNUM_CNT);
-       bno_cur = NULL;
-
-       /*
-        * Look for an entry >= maxlen+alignment-1 blocks.
-        */
-       if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
-                       args->maxlen + args->alignment - 1, &i)))
-               goto error0;
-
-       /*
-        * If none or we have busy extents that we cannot allocate from, then
-        * we have to settle for a smaller extent. In the case that there are
-        * no large extents, this will return the last entry in the tree unless
-        * the tree is empty. In the case that there are only busy large
-        * extents, this will return the largest small extent unless there
-        * are no smaller extents available.
-        */
-       if (!i || forced > 1) {
-               error = xfs_alloc_ag_vextent_small(args, cnt_cur,
-                                                  &fbno, &flen, &i);
-               if (error)
-                       goto error0;
-               if (i == 0 || flen == 0) {
-                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-                       trace_xfs_alloc_size_noentry(args);
-                       return 0;
-               }
-               ASSERT(i == 1);
-               xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
-       } else {
-               /*
-                * Search for a non-busy extent that is large enough.
-                * If we are at low space, don't check, or if we fall of
-                * the end of the btree, turn off the busy check and
-                * restart.
-                */
-               for (;;) {
-                       error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
-                       if (error)
-                               goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-
-                       xfs_alloc_compute_aligned(args, fbno, flen,
-                                                 &rbno, &rlen);
-
-                       if (rlen >= args->maxlen)
-                               break;
-
-                       error = xfs_btree_increment(cnt_cur, 0, &i);
-                       if (error)
-                               goto error0;
-                       if (i == 0) {
-                               /*
-                                * Our only valid extents must have been busy.
-                                * Make it unbusy by forcing the log out and
-                                * retrying. If we've been here before, forcing
-                                * the log isn't making the extents available,
-                                * which means they have probably been freed in
-                                * this transaction.  In that case, we have to
-                                * give up on them and we'll attempt a minlen
-                                * allocation the next time around.
-                                */
-                               xfs_btree_del_cursor(cnt_cur,
-                                                    XFS_BTREE_NOERROR);
-                               trace_xfs_alloc_size_busy(args);
-                               if (!forced++)
-                                       xfs_log_force(args->mp, XFS_LOG_SYNC);
-                               goto restart;
-                       }
-               }
-       }
-
-       /*
-        * In the first case above, we got the last entry in the
-        * by-size btree.  Now we check to see if the space hits maxlen
-        * once aligned; if not, we search left for something better.
-        * This can't happen in the second case above.
-        */
-       rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
-       XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
-                       (rlen <= flen && rbno + rlen <= fbno + flen), error0);
-       if (rlen < args->maxlen) {
-               xfs_agblock_t   bestfbno;
-               xfs_extlen_t    bestflen;
-               xfs_agblock_t   bestrbno;
-               xfs_extlen_t    bestrlen;
-
-               bestrlen = rlen;
-               bestrbno = rbno;
-               bestflen = flen;
-               bestfbno = fbno;
-               for (;;) {
-                       if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
-                               goto error0;
-                       if (i == 0)
-                               break;
-                       if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
-                                       &i)))
-                               goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-                       if (flen < bestrlen)
-                               break;
-                       xfs_alloc_compute_aligned(args, fbno, flen,
-                                                 &rbno, &rlen);
-                       rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
-                       XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
-                               (rlen <= flen && rbno + rlen <= fbno + flen),
-                               error0);
-                       if (rlen > bestrlen) {
-                               bestrlen = rlen;
-                               bestrbno = rbno;
-                               bestflen = flen;
-                               bestfbno = fbno;
-                               if (rlen == args->maxlen)
-                                       break;
-                       }
-               }
-               if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
-                               &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               rlen = bestrlen;
-               rbno = bestrbno;
-               flen = bestflen;
-               fbno = bestfbno;
-       }
-       args->wasfromfl = 0;
-       /*
-        * Fix up the length.
-        */
-       args->len = rlen;
-       if (rlen < args->minlen) {
-               if (!forced++) {
-                       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-                       trace_xfs_alloc_size_busy(args);
-                       xfs_log_force(args->mp, XFS_LOG_SYNC);
-                       goto restart;
-               }
-               goto out_nominleft;
-       }
-       xfs_alloc_fix_len(args);
-
-       if (!xfs_alloc_fix_minleft(args))
-               goto out_nominleft;
-       rlen = args->len;
-       XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
-       /*
-        * Allocate and initialize a cursor for the by-block tree.
-        */
-       bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
-               args->agno, XFS_BTNUM_BNO);
-       if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
-                       rbno, rlen, XFSA_FIXUP_CNT_OK)))
-               goto error0;
-       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
-       cnt_cur = bno_cur = NULL;
-       args->len = rlen;
-       args->agbno = rbno;
-       XFS_WANT_CORRUPTED_GOTO(
-               args->agbno + args->len <=
-                       be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
-               error0);
-       trace_xfs_alloc_size_done(args);
-       return 0;
-
-error0:
-       trace_xfs_alloc_size_error(args);
-       if (cnt_cur)
-               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
-       if (bno_cur)
-               xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
-       return error;
-
-out_nominleft:
-       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-       trace_xfs_alloc_size_nominleft(args);
-       args->agbno = NULLAGBLOCK;
-       return 0;
-}
-
-/*
- * Deal with the case where only small freespaces remain.
- * Either return the contents of the last freespace record,
- * or allocate space from the freelist if there is nothing in the tree.
- */
-STATIC int                     /* error */
-xfs_alloc_ag_vextent_small(
-       xfs_alloc_arg_t *args,  /* allocation argument structure */
-       xfs_btree_cur_t *ccur,  /* by-size cursor */
-       xfs_agblock_t   *fbnop, /* result block number */
-       xfs_extlen_t    *flenp, /* result length */
-       int             *stat)  /* status: 0-freelist, 1-normal/none */
-{
-       int             error;
-       xfs_agblock_t   fbno;
-       xfs_extlen_t    flen;
-       int             i;
-
-       if ((error = xfs_btree_decrement(ccur, 0, &i)))
-               goto error0;
-       if (i) {
-               if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-       }
-       /*
-        * Nothing in the btree, try the freelist.  Make sure
-        * to respect minleft even when pulling from the
-        * freelist.
-        */
-       else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
-                (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
-                 > args->minleft)) {
-               error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
-               if (error)
-                       goto error0;
-               if (fbno != NULLAGBLOCK) {
-                       xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
-                                            args->userdata);
-
-                       if (args->userdata) {
-                               xfs_buf_t       *bp;
-
-                               bp = xfs_btree_get_bufs(args->mp, args->tp,
-                                       args->agno, fbno, 0);
-                               xfs_trans_binval(args->tp, bp);
-                       }
-                       args->len = 1;
-                       args->agbno = fbno;
-                       XFS_WANT_CORRUPTED_GOTO(
-                               args->agbno + args->len <=
-                               be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
-                               error0);
-                       args->wasfromfl = 1;
-                       trace_xfs_alloc_small_freelist(args);
-                       *stat = 0;
-                       return 0;
-               }
-               /*
-                * Nothing in the freelist.
-                */
-               else
-                       flen = 0;
-       }
-       /*
-        * Can't allocate from the freelist for some reason.
-        */
-       else {
-               fbno = NULLAGBLOCK;
-               flen = 0;
-       }
-       /*
-        * Can't do the allocation, give up.
-        */
-       if (flen < args->minlen) {
-               args->agbno = NULLAGBLOCK;
-               trace_xfs_alloc_small_notenough(args);
-               flen = 0;
-       }
-       *fbnop = fbno;
-       *flenp = flen;
-       *stat = 1;
-       trace_xfs_alloc_small_done(args);
-       return 0;
-
-error0:
-       trace_xfs_alloc_small_error(args);
-       return error;
-}
-
-/*
- * Free the extent starting at agno/bno for length.
- */
-STATIC int                     /* error */
-xfs_free_ag_extent(
-       xfs_trans_t     *tp,    /* transaction pointer */
-       xfs_buf_t       *agbp,  /* buffer for a.g. freelist header */
-       xfs_agnumber_t  agno,   /* allocation group number */
-       xfs_agblock_t   bno,    /* starting block number */
-       xfs_extlen_t    len,    /* length of extent */
-       int             isfl)   /* set if is freelist blocks - no sb acctg */
-{
-       xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
-       xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
-       int             error;          /* error return value */
-       xfs_agblock_t   gtbno;          /* start of right neighbor block */
-       xfs_extlen_t    gtlen;          /* length of right neighbor block */
-       int             haveleft;       /* have a left neighbor block */
-       int             haveright;      /* have a right neighbor block */
-       int             i;              /* temp, result code */
-       xfs_agblock_t   ltbno;          /* start of left neighbor block */
-       xfs_extlen_t    ltlen;          /* length of left neighbor block */
-       xfs_mount_t     *mp;            /* mount point struct for filesystem */
-       xfs_agblock_t   nbno;           /* new starting block of freespace */
-       xfs_extlen_t    nlen;           /* new length of freespace */
-       xfs_perag_t     *pag;           /* per allocation group data */
-
-       mp = tp->t_mountp;
-       /*
-        * Allocate and initialize a cursor for the by-block btree.
-        */
-       bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
-       cnt_cur = NULL;
-       /*
-        * Look for a neighboring block on the left (lower block numbers)
-        * that is contiguous with this space.
-        */
-       if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
-               goto error0;
-       if (haveleft) {
-               /*
-                * There is a block to our left.
-                */
-               if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               /*
-                * It's not contiguous, though.
-                */
-               if (ltbno + ltlen < bno)
-                       haveleft = 0;
-               else {
-                       /*
-                        * If this failure happens the request to free this
-                        * space was invalid, it's (partly) already free.
-                        * Very bad.
-                        */
-                       XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
-               }
-       }
-       /*
-        * Look for a neighboring block on the right (higher block numbers)
-        * that is contiguous with this space.
-        */
-       if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
-               goto error0;
-       if (haveright) {
-               /*
-                * There is a block to our right.
-                */
-               if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               /*
-                * It's not contiguous, though.
-                */
-               if (bno + len < gtbno)
-                       haveright = 0;
-               else {
-                       /*
-                        * If this failure happens the request to free this
-                        * space was invalid, it's (partly) already free.
-                        * Very bad.
-                        */
-                       XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
-               }
-       }
-       /*
-        * Now allocate and initialize a cursor for the by-size tree.
-        */
-       cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
-       /*
-        * Have both left and right contiguous neighbors.
-        * Merge all three into a single free block.
-        */
-       if (haveleft && haveright) {
-               /*
-                * Delete the old by-size entry on the left.
-                */
-               if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               if ((error = xfs_btree_delete(cnt_cur, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               /*
-                * Delete the old by-size entry on the right.
-                */
-               if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               if ((error = xfs_btree_delete(cnt_cur, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               /*
-                * Delete the old by-block entry for the right block.
-                */
-               if ((error = xfs_btree_delete(bno_cur, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               /*
-                * Move the by-block cursor back to the left neighbor.
-                */
-               if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-#ifdef DEBUG
-               /*
-                * Check that this is the right record: delete didn't
-                * mangle the cursor.
-                */
-               {
-                       xfs_agblock_t   xxbno;
-                       xfs_extlen_t    xxlen;
-
-                       if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
-                                       &i)))
-                               goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(
-                               i == 1 && xxbno == ltbno && xxlen == ltlen,
-                               error0);
-               }
-#endif
-               /*
-                * Update remaining by-block entry to the new, joined block.
-                */
-               nbno = ltbno;
-               nlen = len + ltlen + gtlen;
-               if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
-                       goto error0;
-       }
-       /*
-        * Have only a left contiguous neighbor.
-        * Merge it together with the new freespace.
-        */
-       else if (haveleft) {
-               /*
-                * Delete the old by-size entry on the left.
-                */
-               if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               if ((error = xfs_btree_delete(cnt_cur, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               /*
-                * Back up the by-block cursor to the left neighbor, and
-                * update its length.
-                */
-               if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               nbno = ltbno;
-               nlen = len + ltlen;
-               if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
-                       goto error0;
-       }
-       /*
-        * Have only a right contiguous neighbor.
-        * Merge it together with the new freespace.
-        */
-       else if (haveright) {
-               /*
-                * Delete the old by-size entry on the right.
-                */
-               if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               if ((error = xfs_btree_delete(cnt_cur, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               /*
-                * Update the starting block and length of the right
-                * neighbor in the by-block tree.
-                */
-               nbno = bno;
-               nlen = len + gtlen;
-               if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
-                       goto error0;
-       }
-       /*
-        * No contiguous neighbors.
-        * Insert the new freespace into the by-block tree.
-        */
-       else {
-               nbno = bno;
-               nlen = len;
-               if ((error = xfs_btree_insert(bno_cur, &i)))
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-       }
-       xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
-       bno_cur = NULL;
-       /*
-        * In all cases we need to insert the new freespace in the by-size tree.
-        */
-       if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
-               goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
-       if ((error = xfs_btree_insert(cnt_cur, &i)))
-               goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-       xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-       cnt_cur = NULL;
-
-       /*
-        * Update the freespace totals in the ag and superblock.
-        */
-       pag = xfs_perag_get(mp, agno);
-       error = xfs_alloc_update_counters(tp, pag, agbp, len);
-       xfs_perag_put(pag);
-       if (error)
-               goto error0;
-
-       if (!isfl)
-               xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
-       XFS_STATS_INC(xs_freex);
-       XFS_STATS_ADD(xs_freeb, len);
-
-       trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
-
-       return 0;
-
- error0:
-       trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
-       if (bno_cur)
-               xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
-       if (cnt_cur)
-               xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
-       return error;
-}
-
-/*
- * Visible (exported) allocation/free functions.
- * Some of these are used just by xfs_alloc_btree.c and this file.
- */
-
-/*
- * Compute and fill in value of m_ag_maxlevels.
- */
-void
-xfs_alloc_compute_maxlevels(
-       xfs_mount_t     *mp)    /* file system mount structure */
-{
-       int             level;
-       uint            maxblocks;
-       uint            maxleafents;
-       int             minleafrecs;
-       int             minnoderecs;
-
-       maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
-       minleafrecs = mp->m_alloc_mnr[0];
-       minnoderecs = mp->m_alloc_mnr[1];
-       maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
-       for (level = 1; maxblocks > 1; level++)
-               maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
-       mp->m_ag_maxlevels = level;
-}
-
-/*
- * Find the length of the longest extent in an AG.
- */
-xfs_extlen_t
-xfs_alloc_longest_free_extent(
-       struct xfs_mount        *mp,
-       struct xfs_perag        *pag)
-{
-       xfs_extlen_t            need, delta = 0;
-
-       need = XFS_MIN_FREELIST_PAG(pag, mp);
-       if (need > pag->pagf_flcount)
-               delta = need - pag->pagf_flcount;
-
-       if (pag->pagf_longest > delta)
-               return pag->pagf_longest - delta;
-       return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
-}
-
-/*
- * Decide whether to use this allocation group for this allocation.
- * If so, fix up the btree freelist's size.
- */
-STATIC int                     /* error */
-xfs_alloc_fix_freelist(
-       xfs_alloc_arg_t *args,  /* allocation argument structure */
-       int             flags)  /* XFS_ALLOC_FLAG_... */
-{
-       xfs_buf_t       *agbp;  /* agf buffer pointer */
-       xfs_agf_t       *agf;   /* a.g. freespace structure pointer */
-       xfs_buf_t       *agflbp;/* agfl buffer pointer */
-       xfs_agblock_t   bno;    /* freelist block */
-       xfs_extlen_t    delta;  /* new blocks needed in freelist */
-       int             error;  /* error result code */
-       xfs_extlen_t    longest;/* longest extent in allocation group */
-       xfs_mount_t     *mp;    /* file system mount point structure */
-       xfs_extlen_t    need;   /* total blocks needed in freelist */
-       xfs_perag_t     *pag;   /* per-ag information structure */
-       xfs_alloc_arg_t targs;  /* local allocation arguments */
-       xfs_trans_t     *tp;    /* transaction pointer */
-
-       mp = args->mp;
-
-       pag = args->pag;
-       tp = args->tp;
-       if (!pag->pagf_init) {
-               if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
-                               &agbp)))
-                       return error;
-               if (!pag->pagf_init) {
-                       ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
-                       ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
-                       args->agbp = NULL;
-                       return 0;
-               }
-       } else
-               agbp = NULL;
-
-       /*
-        * If this is a metadata preferred pag and we are user data
-        * then try somewhere else if we are not being asked to
-        * try harder at this point
-        */
-       if (pag->pagf_metadata && args->userdata &&
-           (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
-               ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
-               args->agbp = NULL;
-               return 0;
-       }
-
-       if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
-               /*
-                * If it looks like there isn't a long enough extent, or enough
-                * total blocks, reject it.
-                */
-               need = XFS_MIN_FREELIST_PAG(pag, mp);
-               longest = xfs_alloc_longest_free_extent(mp, pag);
-               if ((args->minlen + args->alignment + args->minalignslop - 1) >
-                               longest ||
-                   ((int)(pag->pagf_freeblks + pag->pagf_flcount -
-                          need - args->total) < (int)args->minleft)) {
-                       if (agbp)
-                               xfs_trans_brelse(tp, agbp);
-                       args->agbp = NULL;
-                       return 0;
-               }
-       }
-
-       /*
-        * Get the a.g. freespace buffer.
-        * Can fail if we're not blocking on locks, and it's held.
-        */
-       if (agbp == NULL) {
-               if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
-                               &agbp)))
-                       return error;
-               if (agbp == NULL) {
-                       ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
-                       ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
-                       args->agbp = NULL;
-                       return 0;
-               }
-       }
-       /*
-        * Figure out how many blocks we should have in the freelist.
-        */
-       agf = XFS_BUF_TO_AGF(agbp);
-       need = XFS_MIN_FREELIST(agf, mp);
-       /*
-        * If there isn't enough total or single-extent, reject it.
-        */
-       if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
-               delta = need > be32_to_cpu(agf->agf_flcount) ?
-                       (need - be32_to_cpu(agf->agf_flcount)) : 0;
-               longest = be32_to_cpu(agf->agf_longest);
-               longest = (longest > delta) ? (longest - delta) :
-                       (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
-               if ((args->minlen + args->alignment + args->minalignslop - 1) >
-                               longest ||
-                   ((int)(be32_to_cpu(agf->agf_freeblks) +
-                    be32_to_cpu(agf->agf_flcount) - need - args->total) <
-                               (int)args->minleft)) {
-                       xfs_trans_brelse(tp, agbp);
-                       args->agbp = NULL;
-                       return 0;
-               }
-       }
-       /*
-        * Make the freelist shorter if it's too long.
-        */
-       while (be32_to_cpu(agf->agf_flcount) > need) {
-               xfs_buf_t       *bp;
-
-               error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
-               if (error)
-                       return error;
-               if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
-                       return error;
-               bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
-               xfs_trans_binval(tp, bp);
-       }
-       /*
-        * Initialize the args structure.
-        */
-       memset(&targs, 0, sizeof(targs));
-       targs.tp = tp;
-       targs.mp = mp;
-       targs.agbp = agbp;
-       targs.agno = args->agno;
-       targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
-       targs.type = XFS_ALLOCTYPE_THIS_AG;
-       targs.pag = pag;
-       if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
-               return error;
-       /*
-        * Make the freelist longer if it's too short.
-        */
-       while (be32_to_cpu(agf->agf_flcount) < need) {
-               targs.agbno = 0;
-               targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
-               /*
-                * Allocate as many blocks as possible at once.
-                */
-               if ((error = xfs_alloc_ag_vextent(&targs))) {
-                       xfs_trans_brelse(tp, agflbp);
-                       return error;
-               }
-               /*
-                * Stop if we run out.  Won't happen if callers are obeying
-                * the restrictions correctly.  Can happen for free calls
-                * on a completely full ag.
-                */
-               if (targs.agbno == NULLAGBLOCK) {
-                       if (flags & XFS_ALLOC_FLAG_FREEING)
-                               break;
-                       xfs_trans_brelse(tp, agflbp);
-                       args->agbp = NULL;
-                       return 0;
-               }
-               /*
-                * Put each allocated block on the list.
-                */
-               for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
-                       error = xfs_alloc_put_freelist(tp, agbp,
-                                                       agflbp, bno, 0);
-                       if (error)
-                               return error;
-               }
-       }
-       xfs_trans_brelse(tp, agflbp);
-       args->agbp = agbp;
-       return 0;
-}
-
-/*
- * Get a block from the freelist.
- * Returns with the buffer for the block gotten.
- */
-int                            /* error */
-xfs_alloc_get_freelist(
-       xfs_trans_t     *tp,    /* transaction pointer */
-       xfs_buf_t       *agbp,  /* buffer containing the agf structure */
-       xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
-       int             btreeblk) /* destination is a AGF btree */
-{
-       xfs_agf_t       *agf;   /* a.g. freespace structure */
-       xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
-       xfs_agblock_t   bno;    /* block number returned */
-       __be32          *agfl_bno;
-       int             error;
-       int             logflags;
-       xfs_mount_t     *mp = tp->t_mountp;
-       xfs_perag_t     *pag;   /* per allocation group data */
-
-       /*
-        * Freelist is empty, give up.
-        */
-       agf = XFS_BUF_TO_AGF(agbp);
-       if (!agf->agf_flcount) {
-               *bnop = NULLAGBLOCK;
-               return 0;
-       }
-       /*
-        * Read the array of free blocks.
-        */
-       error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
-                                   &agflbp);
-       if (error)
-               return error;
-
-
-       /*
-        * Get the block number and update the data structures.
-        */
-       agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
-       bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
-       be32_add_cpu(&agf->agf_flfirst, 1);
-       xfs_trans_brelse(tp, agflbp);
-       if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
-               agf->agf_flfirst = 0;
-
-       pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
-       be32_add_cpu(&agf->agf_flcount, -1);
-       xfs_trans_agflist_delta(tp, -1);
-       pag->pagf_flcount--;
-       xfs_perag_put(pag);
-
-       logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
-       if (btreeblk) {
-               be32_add_cpu(&agf->agf_btreeblks, 1);
-               pag->pagf_btreeblks++;
-               logflags |= XFS_AGF_BTREEBLKS;
-       }
-
-       xfs_alloc_log_agf(tp, agbp, logflags);
-       *bnop = bno;
-
-       return 0;
-}
-
-/*
- * Log the given fields from the agf structure.
- */
-void
-xfs_alloc_log_agf(
-       xfs_trans_t     *tp,    /* transaction pointer */
-       xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
-       int             fields) /* mask of fields to be logged (XFS_AGF_...) */
-{
-       int     first;          /* first byte offset */
-       int     last;           /* last byte offset */
-       static const short      offsets[] = {
-               offsetof(xfs_agf_t, agf_magicnum),
-               offsetof(xfs_agf_t, agf_versionnum),
-               offsetof(xfs_agf_t, agf_seqno),
-               offsetof(xfs_agf_t, agf_length),
-               offsetof(xfs_agf_t, agf_roots[0]),
-               offsetof(xfs_agf_t, agf_levels[0]),
-               offsetof(xfs_agf_t, agf_flfirst),
-               offsetof(xfs_agf_t, agf_fllast),
-               offsetof(xfs_agf_t, agf_flcount),
-               offsetof(xfs_agf_t, agf_freeblks),
-               offsetof(xfs_agf_t, agf_longest),
-               offsetof(xfs_agf_t, agf_btreeblks),
-               offsetof(xfs_agf_t, agf_uuid),
-               sizeof(xfs_agf_t)
-       };
-
-       trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
-
-       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
-
-       xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
-       xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
-}
-
-/*
- * Interface for inode allocation to force the pag data to be initialized.
- */
-int                                    /* error */
-xfs_alloc_pagf_init(
-       xfs_mount_t             *mp,    /* file system mount structure */
-       xfs_trans_t             *tp,    /* transaction pointer */
-       xfs_agnumber_t          agno,   /* allocation group number */
-       int                     flags)  /* XFS_ALLOC_FLAGS_... */
-{
-       xfs_buf_t               *bp;
-       int                     error;
-
-       if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
-               return error;
-       if (bp)
-               xfs_trans_brelse(tp, bp);
-       return 0;
-}
-
-/*
- * Put the block on the freelist for the allocation group.
- */
-int                                    /* error */
-xfs_alloc_put_freelist(
-       xfs_trans_t             *tp,    /* transaction pointer */
-       xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
-       xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
-       xfs_agblock_t           bno,    /* block being freed */
-       int                     btreeblk) /* block came from a AGF btree */
-{
-       xfs_agf_t               *agf;   /* a.g. freespace structure */
-       __be32                  *blockp;/* pointer to array entry */
-       int                     error;
-       int                     logflags;
-       xfs_mount_t             *mp;    /* mount structure */
-       xfs_perag_t             *pag;   /* per allocation group data */
-       __be32                  *agfl_bno;
-       int                     startoff;
-
-       agf = XFS_BUF_TO_AGF(agbp);
-       mp = tp->t_mountp;
-
-       if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
-                       be32_to_cpu(agf->agf_seqno), &agflbp)))
-               return error;
-       be32_add_cpu(&agf->agf_fllast, 1);
-       if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
-               agf->agf_fllast = 0;
-
-       pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
-       be32_add_cpu(&agf->agf_flcount, 1);
-       xfs_trans_agflist_delta(tp, 1);
-       pag->pagf_flcount++;
-
-       logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
-       if (btreeblk) {
-               be32_add_cpu(&agf->agf_btreeblks, -1);
-               pag->pagf_btreeblks--;
-               logflags |= XFS_AGF_BTREEBLKS;
-       }
-       xfs_perag_put(pag);
-
-       xfs_alloc_log_agf(tp, agbp, logflags);
-
-       ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
-
-       agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
-       blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
-       *blockp = cpu_to_be32(bno);
-       startoff = (char *)blockp - (char *)agflbp->b_addr;
-
-       xfs_alloc_log_agf(tp, agbp, logflags);
-
-       xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
-       xfs_trans_log_buf(tp, agflbp, startoff,
-                         startoff + sizeof(xfs_agblock_t) - 1);
-       return 0;
-}
-
-static bool
-xfs_agf_verify(
-       struct xfs_mount *mp,
-       struct xfs_buf  *bp)
- {
-       struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
-
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-           !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
-                       return false;
-
-       if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
-             XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
-             be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
-             be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
-             be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
-             be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
-               return false;
-
-       /*
-        * during growfs operations, the perag is not fully initialised,
-        * so we can't use it for any useful checking. growfs ensures we can't
-        * use it by using uncached buffers that don't have the perag attached
-        * so we can detect and avoid this problem.
-        */
-       if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
-               return false;
-
-       if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
-           be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
-               return false;
-
-       return true;;
-
-}
-
-static void
-xfs_agf_read_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-           !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
-                               XFS_ERRTAG_ALLOC_READ_AGF,
-                               XFS_RANDOM_ALLOC_READ_AGF))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-}
-
-static void
-xfs_agf_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-
-       if (!xfs_agf_verify(mp, bp)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (bip)
-               XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-
-       xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
-}
-
-const struct xfs_buf_ops xfs_agf_buf_ops = {
-       .verify_read = xfs_agf_read_verify,
-       .verify_write = xfs_agf_write_verify,
-};
-
-/*
- * Read in the allocation group header (free/alloc section).
- */
-int                                    /* error */
-xfs_read_agf(
-       struct xfs_mount        *mp,    /* mount point structure */
-       struct xfs_trans        *tp,    /* transaction pointer */
-       xfs_agnumber_t          agno,   /* allocation group number */
-       int                     flags,  /* XFS_BUF_ */
-       struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
-{
-       int             error;
-
-       trace_xfs_read_agf(mp, agno);
-
-       ASSERT(agno != NULLAGNUMBER);
-       error = xfs_trans_read_buf(
-                       mp, tp, mp->m_ddev_targp,
-                       XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
-                       XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
-       if (error)
-               return error;
-       if (!*bpp)
-               return 0;
-
-       ASSERT(!(*bpp)->b_error);
-       xfs_buf_set_ref(*bpp, XFS_AGF_REF);
-       return 0;
-}
-
-/*
- * Read in the allocation group header (free/alloc section).
- */
-int                                    /* error */
-xfs_alloc_read_agf(
-       struct xfs_mount        *mp,    /* mount point structure */
-       struct xfs_trans        *tp,    /* transaction pointer */
-       xfs_agnumber_t          agno,   /* allocation group number */
-       int                     flags,  /* XFS_ALLOC_FLAG_... */
-       struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
-{
-       struct xfs_agf          *agf;           /* ag freelist header */
-       struct xfs_perag        *pag;           /* per allocation group data */
-       int                     error;
-
-       trace_xfs_alloc_read_agf(mp, agno);
-
-       ASSERT(agno != NULLAGNUMBER);
-       error = xfs_read_agf(mp, tp, agno,
-                       (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
-                       bpp);
-       if (error)
-               return error;
-       if (!*bpp)
-               return 0;
-       ASSERT(!(*bpp)->b_error);
-
-       agf = XFS_BUF_TO_AGF(*bpp);
-       pag = xfs_perag_get(mp, agno);
-       if (!pag->pagf_init) {
-               pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
-               pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
-               pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
-               pag->pagf_longest = be32_to_cpu(agf->agf_longest);
-               pag->pagf_levels[XFS_BTNUM_BNOi] =
-                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
-               pag->pagf_levels[XFS_BTNUM_CNTi] =
-                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
-               spin_lock_init(&pag->pagb_lock);
-               pag->pagb_count = 0;
-               pag->pagb_tree = RB_ROOT;
-               pag->pagf_init = 1;
-       }
-#ifdef DEBUG
-       else if (!XFS_FORCED_SHUTDOWN(mp)) {
-               ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
-               ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
-               ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
-               ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
-               ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
-                      be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
-               ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
-                      be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
-       }
-#endif
-       xfs_perag_put(pag);
-       return 0;
-}
-
-/*
- * Allocate an extent (variable-size).
- * Depending on the allocation type, we either look in a single allocation
- * group or loop over the allocation groups to find the result.
- */
-int                            /* error */
-xfs_alloc_vextent(
-       xfs_alloc_arg_t *args)  /* allocation argument structure */
-{
-       xfs_agblock_t   agsize; /* allocation group size */
-       int             error;
-       int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
-       xfs_extlen_t    minleft;/* minimum left value, temp copy */
-       xfs_mount_t     *mp;    /* mount structure pointer */
-       xfs_agnumber_t  sagno;  /* starting allocation group number */
-       xfs_alloctype_t type;   /* input allocation type */
-       int             bump_rotor = 0;
-       int             no_min = 0;
-       xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
-
-       mp = args->mp;
-       type = args->otype = args->type;
-       args->agbno = NULLAGBLOCK;
-       /*
-        * Just fix this up, for the case where the last a.g. is shorter
-        * (or there's only one a.g.) and the caller couldn't easily figure
-        * that out (xfs_bmap_alloc).
-        */
-       agsize = mp->m_sb.sb_agblocks;
-       if (args->maxlen > agsize)
-               args->maxlen = agsize;
-       if (args->alignment == 0)
-               args->alignment = 1;
-       ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
-       ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
-       ASSERT(args->minlen <= args->maxlen);
-       ASSERT(args->minlen <= agsize);
-       ASSERT(args->mod < args->prod);
-       if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
-           XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
-           args->minlen > args->maxlen || args->minlen > agsize ||
-           args->mod >= args->prod) {
-               args->fsbno = NULLFSBLOCK;
-               trace_xfs_alloc_vextent_badargs(args);
-               return 0;
-       }
-       minleft = args->minleft;
-
-       switch (type) {
-       case XFS_ALLOCTYPE_THIS_AG:
-       case XFS_ALLOCTYPE_NEAR_BNO:
-       case XFS_ALLOCTYPE_THIS_BNO:
-               /*
-                * These three force us into a single a.g.
-                */
-               args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
-               args->pag = xfs_perag_get(mp, args->agno);
-               args->minleft = 0;
-               error = xfs_alloc_fix_freelist(args, 0);
-               args->minleft = minleft;
-               if (error) {
-                       trace_xfs_alloc_vextent_nofix(args);
-                       goto error0;
-               }
-               if (!args->agbp) {
-                       trace_xfs_alloc_vextent_noagbp(args);
-                       break;
-               }
-               args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
-               if ((error = xfs_alloc_ag_vextent(args)))
-                       goto error0;
-               break;
-       case XFS_ALLOCTYPE_START_BNO:
-               /*
-                * Try near allocation first, then anywhere-in-ag after
-                * the first a.g. fails.
-                */
-               if ((args->userdata  == XFS_ALLOC_INITIAL_USER_DATA) &&
-                   (mp->m_flags & XFS_MOUNT_32BITINODES)) {
-                       args->fsbno = XFS_AGB_TO_FSB(mp,
-                                       ((mp->m_agfrotor / rotorstep) %
-                                       mp->m_sb.sb_agcount), 0);
-                       bump_rotor = 1;
-               }
-               args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
-               args->type = XFS_ALLOCTYPE_NEAR_BNO;
-               /* FALLTHROUGH */
-       case XFS_ALLOCTYPE_ANY_AG:
-       case XFS_ALLOCTYPE_START_AG:
-       case XFS_ALLOCTYPE_FIRST_AG:
-               /*
-                * Rotate through the allocation groups looking for a winner.
-                */
-               if (type == XFS_ALLOCTYPE_ANY_AG) {
-                       /*
-                        * Start with the last place we left off.
-                        */
-                       args->agno = sagno = (mp->m_agfrotor / rotorstep) %
-                                       mp->m_sb.sb_agcount;
-                       args->type = XFS_ALLOCTYPE_THIS_AG;
-                       flags = XFS_ALLOC_FLAG_TRYLOCK;
-               } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
-                       /*
-                        * Start with allocation group given by bno.
-                        */
-                       args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
-                       args->type = XFS_ALLOCTYPE_THIS_AG;
-                       sagno = 0;
-                       flags = 0;
-               } else {
-                       if (type == XFS_ALLOCTYPE_START_AG)
-                               args->type = XFS_ALLOCTYPE_THIS_AG;
-                       /*
-                        * Start with the given allocation group.
-                        */
-                       args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
-                       flags = XFS_ALLOC_FLAG_TRYLOCK;
-               }
-               /*
-                * Loop over allocation groups twice; first time with
-                * trylock set, second time without.
-                */
-               for (;;) {
-                       args->pag = xfs_perag_get(mp, args->agno);
-                       if (no_min) args->minleft = 0;
-                       error = xfs_alloc_fix_freelist(args, flags);
-                       args->minleft = minleft;
-                       if (error) {
-                               trace_xfs_alloc_vextent_nofix(args);
-                               goto error0;
-                       }
-                       /*
-                        * If we get a buffer back then the allocation will fly.
-                        */
-                       if (args->agbp) {
-                               if ((error = xfs_alloc_ag_vextent(args)))
-                                       goto error0;
-                               break;
-                       }
-
-                       trace_xfs_alloc_vextent_loopfailed(args);
-
-                       /*
-                        * Didn't work, figure out the next iteration.
-                        */
-                       if (args->agno == sagno &&
-                           type == XFS_ALLOCTYPE_START_BNO)
-                               args->type = XFS_ALLOCTYPE_THIS_AG;
-                       /*
-                       * For the first allocation, we can try any AG to get
-                       * space.  However, if we already have allocated a
-                       * block, we don't want to try AGs whose number is below
-                       * sagno. Otherwise, we may end up with out-of-order
-                       * locking of AGF, which might cause deadlock.
-                       */
-                       if (++(args->agno) == mp->m_sb.sb_agcount) {
-                               if (args->firstblock != NULLFSBLOCK)
-                                       args->agno = sagno;
-                               else
-                                       args->agno = 0;
-                       }
-                       /*
-                        * Reached the starting a.g., must either be done
-                        * or switch to non-trylock mode.
-                        */
-                       if (args->agno == sagno) {
-                               if (no_min == 1) {
-                                       args->agbno = NULLAGBLOCK;
-                                       trace_xfs_alloc_vextent_allfailed(args);
-                                       break;
-                               }
-                               if (flags == 0) {
-                                       no_min = 1;
-                               } else {
-                                       flags = 0;
-                                       if (type == XFS_ALLOCTYPE_START_BNO) {
-                                               args->agbno = XFS_FSB_TO_AGBNO(mp,
-                                                       args->fsbno);
-                                               args->type = XFS_ALLOCTYPE_NEAR_BNO;
-                                       }
-                               }
-                       }
-                       xfs_perag_put(args->pag);
-               }
-               if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
-                       if (args->agno == sagno)
-                               mp->m_agfrotor = (mp->m_agfrotor + 1) %
-                                       (mp->m_sb.sb_agcount * rotorstep);
-                       else
-                               mp->m_agfrotor = (args->agno * rotorstep + 1) %
-                                       (mp->m_sb.sb_agcount * rotorstep);
-               }
-               break;
-       default:
-               ASSERT(0);
-               /* NOTREACHED */
-       }
-       if (args->agbno == NULLAGBLOCK)
-               args->fsbno = NULLFSBLOCK;
-       else {
-               args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
-#ifdef DEBUG
-               ASSERT(args->len >= args->minlen);
-               ASSERT(args->len <= args->maxlen);
-               ASSERT(args->agbno % args->alignment == 0);
-               XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
-                       args->len);
-#endif
-       }
-       xfs_perag_put(args->pag);
-       return 0;
-error0:
-       xfs_perag_put(args->pag);
-       return error;
-}
-
-/*
- * Free an extent.
- * Just break up the extent address and hand off to xfs_free_ag_extent
- * after fixing up the freelist.
- */
-int                            /* error */
-xfs_free_extent(
-       xfs_trans_t     *tp,    /* transaction pointer */
-       xfs_fsblock_t   bno,    /* starting block number of extent */
-       xfs_extlen_t    len)    /* length of extent */
-{
-       xfs_alloc_arg_t args;
-       int             error;
-
-       ASSERT(len != 0);
-       memset(&args, 0, sizeof(xfs_alloc_arg_t));
-       args.tp = tp;
-       args.mp = tp->t_mountp;
-
-       /*
-        * validate that the block number is legal - the enables us to detect
-        * and handle a silent filesystem corruption rather than crashing.
-        */
-       args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
-       if (args.agno >= args.mp->m_sb.sb_agcount)
-               return EFSCORRUPTED;
-
-       args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
-       if (args.agbno >= args.mp->m_sb.sb_agblocks)
-               return EFSCORRUPTED;
-
-       args.pag = xfs_perag_get(args.mp, args.agno);
-       ASSERT(args.pag);
-
-       error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
-       if (error)
-               goto error0;
-
-       /* validate the extent size is legal now we have the agf locked */
-       if (args.agbno + len >
-                       be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
-               error = EFSCORRUPTED;
-               goto error0;
-       }
-
-       error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
-       if (!error)
-               xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
-error0:
-       xfs_perag_put(args.pag);
-       return error;
-}
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
deleted file mode 100644 (file)
index 8358f1d..0000000
+++ /dev/null
@@ -1,504 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_extent_busy.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_cksum.h"
-#include "xfs_trans.h"
-
-
-STATIC struct xfs_btree_cur *
-xfs_allocbt_dup_cursor(
-       struct xfs_btree_cur    *cur)
-{
-       return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
-                       cur->bc_private.a.agbp, cur->bc_private.a.agno,
-                       cur->bc_btnum);
-}
-
-STATIC void
-xfs_allocbt_set_root(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr,
-       int                     inc)
-{
-       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
-       xfs_agnumber_t          seqno = be32_to_cpu(agf->agf_seqno);
-       int                     btnum = cur->bc_btnum;
-       struct xfs_perag        *pag = xfs_perag_get(cur->bc_mp, seqno);
-
-       ASSERT(ptr->s != 0);
-
-       agf->agf_roots[btnum] = ptr->s;
-       be32_add_cpu(&agf->agf_levels[btnum], inc);
-       pag->pagf_levels[btnum] += inc;
-       xfs_perag_put(pag);
-
-       xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
-}
-
-STATIC int
-xfs_allocbt_alloc_block(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *start,
-       union xfs_btree_ptr     *new,
-       int                     *stat)
-{
-       int                     error;
-       xfs_agblock_t           bno;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
-       /* Allocate the new block from the freelist. If we can't, give up.  */
-       error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
-                                      &bno, 1);
-       if (error) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-               return error;
-       }
-
-       if (bno == NULLAGBLOCK) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-               *stat = 0;
-               return 0;
-       }
-
-       xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
-
-       xfs_trans_agbtree_delta(cur->bc_tp, 1);
-       new->s = cpu_to_be32(bno);
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 1;
-       return 0;
-}
-
-STATIC int
-xfs_allocbt_free_block(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp)
-{
-       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
-       xfs_agblock_t           bno;
-       int                     error;
-
-       bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
-       error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
-       if (error)
-               return error;
-
-       xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
-                             XFS_EXTENT_BUSY_SKIP_DISCARD);
-       xfs_trans_agbtree_delta(cur->bc_tp, -1);
-
-       xfs_trans_binval(cur->bc_tp, bp);
-       return 0;
-}
-
-/*
- * Update the longest extent in the AGF
- */
-STATIC void
-xfs_allocbt_update_lastrec(
-       struct xfs_btree_cur    *cur,
-       struct xfs_btree_block  *block,
-       union xfs_btree_rec     *rec,
-       int                     ptr,
-       int                     reason)
-{
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
-       xfs_agnumber_t          seqno = be32_to_cpu(agf->agf_seqno);
-       struct xfs_perag        *pag;
-       __be32                  len;
-       int                     numrecs;
-
-       ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
-
-       switch (reason) {
-       case LASTREC_UPDATE:
-               /*
-                * If this is the last leaf block and it's the last record,
-                * then update the size of the longest extent in the AG.
-                */
-               if (ptr != xfs_btree_get_numrecs(block))
-                       return;
-               len = rec->alloc.ar_blockcount;
-               break;
-       case LASTREC_INSREC:
-               if (be32_to_cpu(rec->alloc.ar_blockcount) <=
-                   be32_to_cpu(agf->agf_longest))
-                       return;
-               len = rec->alloc.ar_blockcount;
-               break;
-       case LASTREC_DELREC:
-               numrecs = xfs_btree_get_numrecs(block);
-               if (ptr <= numrecs)
-                       return;
-               ASSERT(ptr == numrecs + 1);
-
-               if (numrecs) {
-                       xfs_alloc_rec_t *rrp;
-
-                       rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
-                       len = rrp->ar_blockcount;
-               } else {
-                       len = 0;
-               }
-
-               break;
-       default:
-               ASSERT(0);
-               return;
-       }
-
-       agf->agf_longest = len;
-       pag = xfs_perag_get(cur->bc_mp, seqno);
-       pag->pagf_longest = be32_to_cpu(len);
-       xfs_perag_put(pag);
-       xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
-}
-
-STATIC int
-xfs_allocbt_get_minrecs(
-       struct xfs_btree_cur    *cur,
-       int                     level)
-{
-       return cur->bc_mp->m_alloc_mnr[level != 0];
-}
-
-STATIC int
-xfs_allocbt_get_maxrecs(
-       struct xfs_btree_cur    *cur,
-       int                     level)
-{
-       return cur->bc_mp->m_alloc_mxr[level != 0];
-}
-
-STATIC void
-xfs_allocbt_init_key_from_rec(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
-{
-       ASSERT(rec->alloc.ar_startblock != 0);
-
-       key->alloc.ar_startblock = rec->alloc.ar_startblock;
-       key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
-}
-
-STATIC void
-xfs_allocbt_init_rec_from_key(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
-{
-       ASSERT(key->alloc.ar_startblock != 0);
-
-       rec->alloc.ar_startblock = key->alloc.ar_startblock;
-       rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
-}
-
-STATIC void
-xfs_allocbt_init_rec_from_cur(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_rec     *rec)
-{
-       ASSERT(cur->bc_rec.a.ar_startblock != 0);
-
-       rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
-       rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
-}
-
-STATIC void
-xfs_allocbt_init_ptr_from_cur(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr)
-{
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
-
-       ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
-       ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
-
-       ptr->s = agf->agf_roots[cur->bc_btnum];
-}
-
-STATIC __int64_t
-xfs_allocbt_key_diff(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *key)
-{
-       xfs_alloc_rec_incore_t  *rec = &cur->bc_rec.a;
-       xfs_alloc_key_t         *kp = &key->alloc;
-       __int64_t               diff;
-
-       if (cur->bc_btnum == XFS_BTNUM_BNO) {
-               return (__int64_t)be32_to_cpu(kp->ar_startblock) -
-                               rec->ar_startblock;
-       }
-
-       diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
-       if (diff)
-               return diff;
-
-       return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
-}
-
-static bool
-xfs_allocbt_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       struct xfs_perag        *pag = bp->b_pag;
-       unsigned int            level;
-
-       /*
-        * magic number and level verification
-        *
-        * During growfs operations, we can't verify the exact level or owner as
-        * the perag is not fully initialised and hence not attached to the
-        * buffer.  In this case, check against the maximum tree depth.
-        *
-        * Similarly, during log recovery we will have a perag structure
-        * attached, but the agf information will not yet have been initialised
-        * from the on disk AGF. Again, we can only check against maximum limits
-        * in this case.
-        */
-       level = be16_to_cpu(block->bb_level);
-       switch (block->bb_magic) {
-       case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
-               if (!xfs_sb_version_hascrc(&mp->m_sb))
-                       return false;
-               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
-                       return false;
-               if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
-                       return false;
-               if (pag &&
-                   be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
-                       return false;
-               /* fall through */
-       case cpu_to_be32(XFS_ABTB_MAGIC):
-               if (pag && pag->pagf_init) {
-                       if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
-                               return false;
-               } else if (level >= mp->m_ag_maxlevels)
-                       return false;
-               break;
-       case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
-               if (!xfs_sb_version_hascrc(&mp->m_sb))
-                       return false;
-               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
-                       return false;
-               if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
-                       return false;
-               if (pag &&
-                   be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
-                       return false;
-               /* fall through */
-       case cpu_to_be32(XFS_ABTC_MAGIC):
-               if (pag && pag->pagf_init) {
-                       if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
-                               return false;
-               } else if (level >= mp->m_ag_maxlevels)
-                       return false;
-               break;
-       default:
-               return false;
-       }
-
-       /* numrecs verification */
-       if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
-               return false;
-
-       /* sibling pointer verification */
-       if (!block->bb_u.s.bb_leftsib ||
-           (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
-            block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
-               return false;
-       if (!block->bb_u.s.bb_rightsib ||
-           (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
-            block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
-               return false;
-
-       return true;
-}
-
-static void
-xfs_allocbt_read_verify(
-       struct xfs_buf  *bp)
-{
-       if (!xfs_btree_sblock_verify_crc(bp))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_allocbt_verify(bp))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_verifier_error(bp);
-       }
-}
-
-static void
-xfs_allocbt_write_verify(
-       struct xfs_buf  *bp)
-{
-       if (!xfs_allocbt_verify(bp)) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-       xfs_btree_sblock_calc_crc(bp);
-
-}
-
-const struct xfs_buf_ops xfs_allocbt_buf_ops = {
-       .verify_read = xfs_allocbt_read_verify,
-       .verify_write = xfs_allocbt_write_verify,
-};
-
-
-#if defined(DEBUG) || defined(XFS_WARN)
-STATIC int
-xfs_allocbt_keys_inorder(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *k1,
-       union xfs_btree_key     *k2)
-{
-       if (cur->bc_btnum == XFS_BTNUM_BNO) {
-               return be32_to_cpu(k1->alloc.ar_startblock) <
-                      be32_to_cpu(k2->alloc.ar_startblock);
-       } else {
-               return be32_to_cpu(k1->alloc.ar_blockcount) <
-                       be32_to_cpu(k2->alloc.ar_blockcount) ||
-                       (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
-                        be32_to_cpu(k1->alloc.ar_startblock) <
-                        be32_to_cpu(k2->alloc.ar_startblock));
-       }
-}
-
-STATIC int
-xfs_allocbt_recs_inorder(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_rec     *r1,
-       union xfs_btree_rec     *r2)
-{
-       if (cur->bc_btnum == XFS_BTNUM_BNO) {
-               return be32_to_cpu(r1->alloc.ar_startblock) +
-                       be32_to_cpu(r1->alloc.ar_blockcount) <=
-                       be32_to_cpu(r2->alloc.ar_startblock);
-       } else {
-               return be32_to_cpu(r1->alloc.ar_blockcount) <
-                       be32_to_cpu(r2->alloc.ar_blockcount) ||
-                       (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
-                        be32_to_cpu(r1->alloc.ar_startblock) <
-                        be32_to_cpu(r2->alloc.ar_startblock));
-       }
-}
-#endif /* DEBUG */
-
-static const struct xfs_btree_ops xfs_allocbt_ops = {
-       .rec_len                = sizeof(xfs_alloc_rec_t),
-       .key_len                = sizeof(xfs_alloc_key_t),
-
-       .dup_cursor             = xfs_allocbt_dup_cursor,
-       .set_root               = xfs_allocbt_set_root,
-       .alloc_block            = xfs_allocbt_alloc_block,
-       .free_block             = xfs_allocbt_free_block,
-       .update_lastrec         = xfs_allocbt_update_lastrec,
-       .get_minrecs            = xfs_allocbt_get_minrecs,
-       .get_maxrecs            = xfs_allocbt_get_maxrecs,
-       .init_key_from_rec      = xfs_allocbt_init_key_from_rec,
-       .init_rec_from_key      = xfs_allocbt_init_rec_from_key,
-       .init_rec_from_cur      = xfs_allocbt_init_rec_from_cur,
-       .init_ptr_from_cur      = xfs_allocbt_init_ptr_from_cur,
-       .key_diff               = xfs_allocbt_key_diff,
-       .buf_ops                = &xfs_allocbt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
-       .keys_inorder           = xfs_allocbt_keys_inorder,
-       .recs_inorder           = xfs_allocbt_recs_inorder,
-#endif
-};
-
-/*
- * Allocate a new allocation btree cursor.
- */
-struct xfs_btree_cur *                 /* new alloc btree cursor */
-xfs_allocbt_init_cursor(
-       struct xfs_mount        *mp,            /* file system mount point */
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_buf          *agbp,          /* buffer for agf structure */
-       xfs_agnumber_t          agno,           /* allocation group number */
-       xfs_btnum_t             btnum)          /* btree identifier */
-{
-       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
-       struct xfs_btree_cur    *cur;
-
-       ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
-
-       cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
-
-       cur->bc_tp = tp;
-       cur->bc_mp = mp;
-       cur->bc_btnum = btnum;
-       cur->bc_blocklog = mp->m_sb.sb_blocklog;
-       cur->bc_ops = &xfs_allocbt_ops;
-
-       if (btnum == XFS_BTNUM_CNT) {
-               cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
-               cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
-       } else {
-               cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
-       }
-
-       cur->bc_private.a.agbp = agbp;
-       cur->bc_private.a.agno = agno;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
-
-       return cur;
-}
-
-/*
- * Calculate number of records in an alloc btree block.
- */
-int
-xfs_allocbt_maxrecs(
-       struct xfs_mount        *mp,
-       int                     blocklen,
-       int                     leaf)
-{
-       blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
-
-       if (leaf)
-               return blocklen / sizeof(xfs_alloc_rec_t);
-       return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
-}
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
deleted file mode 100644 (file)
index 7d95b16..0000000
+++ /dev/null
@@ -1,1459 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_attr_remote.h"
-#include "xfs_error.h"
-#include "xfs_quota.h"
-#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_dinode.h"
-
-/*
- * xfs_attr.c
- *
- * Provide the external interfaces to manage attribute lists.
- */
-
-/*========================================================================
- * Function prototypes for the kernel.
- *========================================================================*/
-
-/*
- * Internal routines when attribute list fits inside the inode.
- */
-STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
-
-/*
- * Internal routines when attribute list is one block.
- */
-STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
-STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
-STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
-
-/*
- * Internal routines when attribute list is more than one block.
- */
-STATIC int xfs_attr_node_get(xfs_da_args_t *args);
-STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
-STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
-STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
-STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
-
-
-STATIC int
-xfs_attr_args_init(
-       struct xfs_da_args      *args,
-       struct xfs_inode        *dp,
-       const unsigned char     *name,
-       int                     flags)
-{
-
-       if (!name)
-               return EINVAL;
-
-       memset(args, 0, sizeof(*args));
-       args->geo = dp->i_mount->m_attr_geo;
-       args->whichfork = XFS_ATTR_FORK;
-       args->dp = dp;
-       args->flags = flags;
-       args->name = name;
-       args->namelen = strlen((const char *)name);
-       if (args->namelen >= MAXNAMELEN)
-               return EFAULT;          /* match IRIX behaviour */
-
-       args->hashval = xfs_da_hashname(args->name, args->namelen);
-       return 0;
-}
-
-int
-xfs_inode_hasattr(
-       struct xfs_inode        *ip)
-{
-       if (!XFS_IFORK_Q(ip) ||
-           (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-            ip->i_d.di_anextents == 0))
-               return 0;
-       return 1;
-}
-
-/*========================================================================
- * Overall external interface routines.
- *========================================================================*/
-
-int
-xfs_attr_get(
-       struct xfs_inode        *ip,
-       const unsigned char     *name,
-       unsigned char           *value,
-       int                     *valuelenp,
-       int                     flags)
-{
-       struct xfs_da_args      args;
-       uint                    lock_mode;
-       int                     error;
-
-       XFS_STATS_INC(xs_attr_get);
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return EIO;
-
-       if (!xfs_inode_hasattr(ip))
-               return ENOATTR;
-
-       error = xfs_attr_args_init(&args, ip, name, flags);
-       if (error)
-               return error;
-
-       args.value = value;
-       args.valuelen = *valuelenp;
-
-       lock_mode = xfs_ilock_attr_map_shared(ip);
-       if (!xfs_inode_hasattr(ip))
-               error = ENOATTR;
-       else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
-               error = xfs_attr_shortform_getvalue(&args);
-       else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
-               error = xfs_attr_leaf_get(&args);
-       else
-               error = xfs_attr_node_get(&args);
-       xfs_iunlock(ip, lock_mode);
-
-       *valuelenp = args.valuelen;
-       return error == EEXIST ? 0 : error;
-}
-
-/*
- * Calculate how many blocks we need for the new attribute,
- */
-STATIC int
-xfs_attr_calc_size(
-       struct xfs_da_args      *args,
-       int                     *local)
-{
-       struct xfs_mount        *mp = args->dp->i_mount;
-       int                     size;
-       int                     nblks;
-
-       /*
-        * Determine space new attribute will use, and if it would be
-        * "local" or "remote" (note: local != inline).
-        */
-       size = xfs_attr_leaf_newentsize(args, local);
-       nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
-       if (*local) {
-               if (size > (args->geo->blksize / 2)) {
-                       /* Double split possible */
-                       nblks *= 2;
-               }
-       } else {
-               /*
-                * Out of line attribute, cannot double split, but
-                * make room for the attribute value itself.
-                */
-               uint    dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
-               nblks += dblocks;
-               nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
-       }
-
-       return nblks;
-}
-
-int
-xfs_attr_set(
-       struct xfs_inode        *dp,
-       const unsigned char     *name,
-       unsigned char           *value,
-       int                     valuelen,
-       int                     flags)
-{
-       struct xfs_mount        *mp = dp->i_mount;
-       struct xfs_da_args      args;
-       struct xfs_bmap_free    flist;
-       struct xfs_trans_res    tres;
-       xfs_fsblock_t           firstblock;
-       int                     rsvd = (flags & ATTR_ROOT) != 0;
-       int                     error, err2, committed, local;
-
-       XFS_STATS_INC(xs_attr_set);
-
-       if (XFS_FORCED_SHUTDOWN(dp->i_mount))
-               return EIO;
-
-       error = xfs_attr_args_init(&args, dp, name, flags);
-       if (error)
-               return error;
-
-       args.value = value;
-       args.valuelen = valuelen;
-       args.firstblock = &firstblock;
-       args.flist = &flist;
-       args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-       args.total = xfs_attr_calc_size(&args, &local);
-
-       error = xfs_qm_dqattach(dp, 0);
-       if (error)
-               return error;
-
-       /*
-        * If the inode doesn't have an attribute fork, add one.
-        * (inode must not be locked when we call this routine)
-        */
-       if (XFS_IFORK_Q(dp) == 0) {
-               int sf_size = sizeof(xfs_attr_sf_hdr_t) +
-                       XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
-
-               error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
-               if (error)
-                       return error;
-       }
-
-       /*
-        * Start our first transaction of the day.
-        *
-        * All future transactions during this code must be "chained" off
-        * this one via the trans_dup() call.  All transactions will contain
-        * the inode, and the inode will always be marked with trans_ihold().
-        * Since the inode will be locked in all transactions, we must log
-        * the inode in every transaction to let it float upward through
-        * the log.
-        */
-       args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
-
-       /*
-        * Root fork attributes can use reserved data blocks for this
-        * operation if necessary
-        */
-
-       if (rsvd)
-               args.trans->t_flags |= XFS_TRANS_RESERVE;
-
-       tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
-                        M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
-       tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
-       tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
-       error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
-       if (error) {
-               xfs_trans_cancel(args.trans, 0);
-               return error;
-       }
-       xfs_ilock(dp, XFS_ILOCK_EXCL);
-
-       error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
-                               rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
-                                      XFS_QMOPT_RES_REGBLKS);
-       if (error) {
-               xfs_iunlock(dp, XFS_ILOCK_EXCL);
-               xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
-               return error;
-       }
-
-       xfs_trans_ijoin(args.trans, dp, 0);
-
-       /*
-        * If the attribute list is non-existent or a shortform list,
-        * upgrade it to a single-leaf-block attribute list.
-        */
-       if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
-           (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-            dp->i_d.di_anextents == 0)) {
-
-               /*
-                * Build initial attribute list (if required).
-                */
-               if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
-                       xfs_attr_shortform_create(&args);
-
-               /*
-                * Try to add the attr to the attribute list in
-                * the inode.
-                */
-               error = xfs_attr_shortform_addname(&args);
-               if (error != ENOSPC) {
-                       /*
-                        * Commit the shortform mods, and we're done.
-                        * NOTE: this is also the error path (EEXIST, etc).
-                        */
-                       ASSERT(args.trans != NULL);
-
-                       /*
-                        * If this is a synchronous mount, make sure that
-                        * the transaction goes to disk before returning
-                        * to the user.
-                        */
-                       if (mp->m_flags & XFS_MOUNT_WSYNC)
-                               xfs_trans_set_sync(args.trans);
-
-                       if (!error && (flags & ATTR_KERNOTIME) == 0) {
-                               xfs_trans_ichgtime(args.trans, dp,
-                                                       XFS_ICHGTIME_CHG);
-                       }
-                       err2 = xfs_trans_commit(args.trans,
-                                                XFS_TRANS_RELEASE_LOG_RES);
-                       xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
-                       return error ? error : err2;
-               }
-
-               /*
-                * It won't fit in the shortform, transform to a leaf block.
-                * GROT: another possible req'mt for a double-split btree op.
-                */
-               xfs_bmap_init(args.flist, args.firstblock);
-               error = xfs_attr_shortform_to_leaf(&args);
-               if (!error) {
-                       error = xfs_bmap_finish(&args.trans, args.flist,
-                                               &committed);
-               }
-               if (error) {
-                       ASSERT(committed);
-                       args.trans = NULL;
-                       xfs_bmap_cancel(&flist);
-                       goto out;
-               }
-
-               /*
-                * bmap_finish() may have committed the last trans and started
-                * a new one.  We need the inode to be in all transactions.
-                */
-               if (committed)
-                       xfs_trans_ijoin(args.trans, dp, 0);
-
-               /*
-                * Commit the leaf transformation.  We'll need another (linked)
-                * transaction to add the new attribute to the leaf.
-                */
-
-               error = xfs_trans_roll(&args.trans, dp);
-               if (error)
-                       goto out;
-
-       }
-
-       if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
-               error = xfs_attr_leaf_addname(&args);
-       else
-               error = xfs_attr_node_addname(&args);
-       if (error)
-               goto out;
-
-       /*
-        * If this is a synchronous mount, make sure that the
-        * transaction goes to disk before returning to the user.
-        */
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(args.trans);
-
-       if ((flags & ATTR_KERNOTIME) == 0)
-               xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
-
-       /*
-        * Commit the last in the sequence of transactions.
-        */
-       xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
-       error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
-       xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
-       return error;
-
-out:
-       if (args.trans) {
-               xfs_trans_cancel(args.trans,
-                       XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
-       }
-       xfs_iunlock(dp, XFS_ILOCK_EXCL);
-       return error;
-}
-
-/*
- * Generic handler routine to remove a name from an attribute list.
- * Transitions attribute list from Btree to shortform as necessary.
- */
-int
-xfs_attr_remove(
-       struct xfs_inode        *dp,
-       const unsigned char     *name,
-       int                     flags)
-{
-       struct xfs_mount        *mp = dp->i_mount;
-       struct xfs_da_args      args;
-       struct xfs_bmap_free    flist;
-       xfs_fsblock_t           firstblock;
-       int                     error;
-
-       XFS_STATS_INC(xs_attr_remove);
-
-       if (XFS_FORCED_SHUTDOWN(dp->i_mount))
-               return EIO;
-
-       if (!xfs_inode_hasattr(dp))
-               return ENOATTR;
-
-       error = xfs_attr_args_init(&args, dp, name, flags);
-       if (error)
-               return error;
-
-       args.firstblock = &firstblock;
-       args.flist = &flist;
-
-       /*
-        * we have no control over the attribute names that userspace passes us
-        * to remove, so we have to allow the name lookup prior to attribute
-        * removal to fail.
-        */
-       args.op_flags = XFS_DA_OP_OKNOENT;
-
-       error = xfs_qm_dqattach(dp, 0);
-       if (error)
-               return error;
-
-       /*
-        * Start our first transaction of the day.
-        *
-        * All future transactions during this code must be "chained" off
-        * this one via the trans_dup() call.  All transactions will contain
-        * the inode, and the inode will always be marked with trans_ihold().
-        * Since the inode will be locked in all transactions, we must log
-        * the inode in every transaction to let it float upward through
-        * the log.
-        */
-       args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
-
-       /*
-        * Root fork attributes can use reserved data blocks for this
-        * operation if necessary
-        */
-
-       if (flags & ATTR_ROOT)
-               args.trans->t_flags |= XFS_TRANS_RESERVE;
-
-       error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
-                                 XFS_ATTRRM_SPACE_RES(mp), 0);
-       if (error) {
-               xfs_trans_cancel(args.trans, 0);
-               return error;
-       }
-
-       xfs_ilock(dp, XFS_ILOCK_EXCL);
-       /*
-        * No need to make quota reservations here. We expect to release some
-        * blocks not allocate in the common case.
-        */
-       xfs_trans_ijoin(args.trans, dp, 0);
-
-       if (!xfs_inode_hasattr(dp)) {
-               error = ENOATTR;
-       } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-               ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
-               error = xfs_attr_shortform_remove(&args);
-       } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
-               error = xfs_attr_leaf_removename(&args);
-       } else {
-               error = xfs_attr_node_removename(&args);
-       }
-
-       if (error)
-               goto out;
-
-       /*
-        * If this is a synchronous mount, make sure that the
-        * transaction goes to disk before returning to the user.
-        */
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(args.trans);
-
-       if ((flags & ATTR_KERNOTIME) == 0)
-               xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
-
-       /*
-        * Commit the last in the sequence of transactions.
-        */
-       xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
-       error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
-       xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
-       return error;
-
-out:
-       if (args.trans) {
-               xfs_trans_cancel(args.trans,
-                       XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
-       }
-       xfs_iunlock(dp, XFS_ILOCK_EXCL);
-       return error;
-}
-
-/*========================================================================
- * External routines when attribute list is inside the inode
- *========================================================================*/
-
-/*
- * Add a name to the shortform attribute list structure
- * This is the external routine.
- */
-STATIC int
-xfs_attr_shortform_addname(xfs_da_args_t *args)
-{
-       int newsize, forkoff, retval;
-
-       trace_xfs_attr_sf_addname(args);
-
-       retval = xfs_attr_shortform_lookup(args);
-       if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
-               return retval;
-       } else if (retval == EEXIST) {
-               if (args->flags & ATTR_CREATE)
-                       return retval;
-               retval = xfs_attr_shortform_remove(args);
-               ASSERT(retval == 0);
-       }
-
-       if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
-           args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
-               return ENOSPC;
-
-       newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
-       newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
-
-       forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
-       if (!forkoff)
-               return ENOSPC;
-
-       xfs_attr_shortform_add(args, forkoff);
-       return 0;
-}
-
-
-/*========================================================================
- * External routines when attribute list is one block
- *========================================================================*/
-
-/*
- * Add a name to the leaf attribute list structure
- *
- * This leaf block cannot have a "remote" value, we only call this routine
- * if bmap_one_block() says there is only one block (ie: no remote blks).
- */
-STATIC int
-xfs_attr_leaf_addname(xfs_da_args_t *args)
-{
-       xfs_inode_t *dp;
-       struct xfs_buf *bp;
-       int retval, error, committed, forkoff;
-
-       trace_xfs_attr_leaf_addname(args);
-
-       /*
-        * Read the (only) block in the attribute list in.
-        */
-       dp = args->dp;
-       args->blkno = 0;
-       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
-       if (error)
-               return error;
-
-       /*
-        * Look up the given attribute in the leaf block.  Figure out if
-        * the given flags produce an error or call for an atomic rename.
-        */
-       retval = xfs_attr3_leaf_lookup_int(bp, args);
-       if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
-               xfs_trans_brelse(args->trans, bp);
-               return retval;
-       } else if (retval == EEXIST) {
-               if (args->flags & ATTR_CREATE) {        /* pure create op */
-                       xfs_trans_brelse(args->trans, bp);
-                       return retval;
-               }
-
-               trace_xfs_attr_leaf_replace(args);
-
-               /* save the attribute state for later removal*/
-               args->op_flags |= XFS_DA_OP_RENAME;     /* an atomic rename */
-               args->blkno2 = args->blkno;             /* set 2nd entry info*/
-               args->index2 = args->index;
-               args->rmtblkno2 = args->rmtblkno;
-               args->rmtblkcnt2 = args->rmtblkcnt;
-               args->rmtvaluelen2 = args->rmtvaluelen;
-
-               /*
-                * clear the remote attr state now that it is saved so that the
-                * values reflect the state of the attribute we are about to
-                * add, not the attribute we just found and will remove later.
-                */
-               args->rmtblkno = 0;
-               args->rmtblkcnt = 0;
-               args->rmtvaluelen = 0;
-       }
-
-       /*
-        * Add the attribute to the leaf block, transitioning to a Btree
-        * if required.
-        */
-       retval = xfs_attr3_leaf_add(bp, args);
-       if (retval == ENOSPC) {
-               /*
-                * Promote the attribute list to the Btree format, then
-                * Commit that transaction so that the node_addname() call
-                * can manage its own transactions.
-                */
-               xfs_bmap_init(args->flist, args->firstblock);
-               error = xfs_attr3_leaf_to_node(args);
-               if (!error) {
-                       error = xfs_bmap_finish(&args->trans, args->flist,
-                                               &committed);
-               }
-               if (error) {
-                       ASSERT(committed);
-                       args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
-                       return error;
-               }
-
-               /*
-                * bmap_finish() may have committed the last trans and started
-                * a new one.  We need the inode to be in all transactions.
-                */
-               if (committed)
-                       xfs_trans_ijoin(args->trans, dp, 0);
-
-               /*
-                * Commit the current trans (including the inode) and start
-                * a new one.
-                */
-               error = xfs_trans_roll(&args->trans, dp);
-               if (error)
-                       return error;
-
-               /*
-                * Fob the whole rest of the problem off on the Btree code.
-                */
-               error = xfs_attr_node_addname(args);
-               return error;
-       }
-
-       /*
-        * Commit the transaction that added the attr name so that
-        * later routines can manage their own transactions.
-        */
-       error = xfs_trans_roll(&args->trans, dp);
-       if (error)
-               return error;
-
-       /*
-        * If there was an out-of-line value, allocate the blocks we
-        * identified for its storage and copy the value.  This is done
-        * after we create the attribute so that we don't overflow the
-        * maximum size of a transaction and/or hit a deadlock.
-        */
-       if (args->rmtblkno > 0) {
-               error = xfs_attr_rmtval_set(args);
-               if (error)
-                       return error;
-       }
-
-       /*
-        * If this is an atomic rename operation, we must "flip" the
-        * incomplete flags on the "new" and "old" attribute/value pairs
-        * so that one disappears and one appears atomically.  Then we
-        * must remove the "old" attribute/value pair.
-        */
-       if (args->op_flags & XFS_DA_OP_RENAME) {
-               /*
-                * In a separate transaction, set the incomplete flag on the
-                * "old" attr and clear the incomplete flag on the "new" attr.
-                */
-               error = xfs_attr3_leaf_flipflags(args);
-               if (error)
-                       return error;
-
-               /*
-                * Dismantle the "old" attribute/value pair by removing
-                * a "remote" value (if it exists).
-                */
-               args->index = args->index2;
-               args->blkno = args->blkno2;
-               args->rmtblkno = args->rmtblkno2;
-               args->rmtblkcnt = args->rmtblkcnt2;
-               args->rmtvaluelen = args->rmtvaluelen2;
-               if (args->rmtblkno) {
-                       error = xfs_attr_rmtval_remove(args);
-                       if (error)
-                               return error;
-               }
-
-               /*
-                * Read in the block containing the "old" attr, then
-                * remove the "old" attr from that block (neat, huh!)
-                */
-               error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
-                                          -1, &bp);
-               if (error)
-                       return error;
-
-               xfs_attr3_leaf_remove(bp, args);
-
-               /*
-                * If the result is small enough, shrink it all into the inode.
-                */
-               if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-                       xfs_bmap_init(args->flist, args->firstblock);
-                       error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
-                       /* bp is gone due to xfs_da_shrink_inode */
-                       if (!error) {
-                               error = xfs_bmap_finish(&args->trans,
-                                                       args->flist,
-                                                       &committed);
-                       }
-                       if (error) {
-                               ASSERT(committed);
-                               args->trans = NULL;
-                               xfs_bmap_cancel(args->flist);
-                               return error;
-                       }
-
-                       /*
-                        * bmap_finish() may have committed the last trans
-                        * and started a new one.  We need the inode to be
-                        * in all transactions.
-                        */
-                       if (committed)
-                               xfs_trans_ijoin(args->trans, dp, 0);
-               }
-
-               /*
-                * Commit the remove and start the next trans in series.
-                */
-               error = xfs_trans_roll(&args->trans, dp);
-
-       } else if (args->rmtblkno > 0) {
-               /*
-                * Added a "remote" value, just clear the incomplete flag.
-                */
-               error = xfs_attr3_leaf_clearflag(args);
-       }
-       return error;
-}
-
-/*
- * Remove a name from the leaf attribute list structure
- *
- * This leaf block cannot have a "remote" value, we only call this routine
- * if bmap_one_block() says there is only one block (ie: no remote blks).
- */
-STATIC int
-xfs_attr_leaf_removename(xfs_da_args_t *args)
-{
-       xfs_inode_t *dp;
-       struct xfs_buf *bp;
-       int error, committed, forkoff;
-
-       trace_xfs_attr_leaf_removename(args);
-
-       /*
-        * Remove the attribute.
-        */
-       dp = args->dp;
-       args->blkno = 0;
-       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
-       if (error)
-               return error;
-
-       error = xfs_attr3_leaf_lookup_int(bp, args);
-       if (error == ENOATTR) {
-               xfs_trans_brelse(args->trans, bp);
-               return error;
-       }
-
-       xfs_attr3_leaf_remove(bp, args);
-
-       /*
-        * If the result is small enough, shrink it all into the inode.
-        */
-       if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-               xfs_bmap_init(args->flist, args->firstblock);
-               error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
-               /* bp is gone due to xfs_da_shrink_inode */
-               if (!error) {
-                       error = xfs_bmap_finish(&args->trans, args->flist,
-                                               &committed);
-               }
-               if (error) {
-                       ASSERT(committed);
-                       args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
-                       return error;
-               }
-
-               /*
-                * bmap_finish() may have committed the last trans and started
-                * a new one.  We need the inode to be in all transactions.
-                */
-               if (committed)
-                       xfs_trans_ijoin(args->trans, dp, 0);
-       }
-       return 0;
-}
-
-/*
- * Look up a name in a leaf attribute list structure.
- *
- * This leaf block cannot have a "remote" value, we only call this routine
- * if bmap_one_block() says there is only one block (ie: no remote blks).
- */
-STATIC int
-xfs_attr_leaf_get(xfs_da_args_t *args)
-{
-       struct xfs_buf *bp;
-       int error;
-
-       trace_xfs_attr_leaf_get(args);
-
-       args->blkno = 0;
-       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
-       if (error)
-               return error;
-
-       error = xfs_attr3_leaf_lookup_int(bp, args);
-       if (error != EEXIST)  {
-               xfs_trans_brelse(args->trans, bp);
-               return error;
-       }
-       error = xfs_attr3_leaf_getvalue(bp, args);
-       xfs_trans_brelse(args->trans, bp);
-       if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
-               error = xfs_attr_rmtval_get(args);
-       }
-       return error;
-}
-
-/*========================================================================
- * External routines when attribute list size > geo->blksize
- *========================================================================*/
-
-/*
- * Add a name to a Btree-format attribute list.
- *
- * This will involve walking down the Btree, and may involve splitting
- * leaf nodes and even splitting intermediate nodes up to and including
- * the root node (a special case of an intermediate node).
- *
- * "Remote" attribute values confuse the issue and atomic rename operations
- * add a whole extra layer of confusion on top of that.
- */
-STATIC int
-xfs_attr_node_addname(xfs_da_args_t *args)
-{
-       xfs_da_state_t *state;
-       xfs_da_state_blk_t *blk;
-       xfs_inode_t *dp;
-       xfs_mount_t *mp;
-       int committed, retval, error;
-
-       trace_xfs_attr_node_addname(args);
-
-       /*
-        * Fill in bucket of arguments/results/context to carry around.
-        */
-       dp = args->dp;
-       mp = dp->i_mount;
-restart:
-       state = xfs_da_state_alloc();
-       state->args = args;
-       state->mp = mp;
-
-       /*
-        * Search to see if name already exists, and get back a pointer
-        * to where it should go.
-        */
-       error = xfs_da3_node_lookup_int(state, &retval);
-       if (error)
-               goto out;
-       blk = &state->path.blk[ state->path.active-1 ];
-       ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
-       if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
-               goto out;
-       } else if (retval == EEXIST) {
-               if (args->flags & ATTR_CREATE)
-                       goto out;
-
-               trace_xfs_attr_node_replace(args);
-
-               /* save the attribute state for later removal*/
-               args->op_flags |= XFS_DA_OP_RENAME;     /* atomic rename op */
-               args->blkno2 = args->blkno;             /* set 2nd entry info*/
-               args->index2 = args->index;
-               args->rmtblkno2 = args->rmtblkno;
-               args->rmtblkcnt2 = args->rmtblkcnt;
-               args->rmtvaluelen2 = args->rmtvaluelen;
-
-               /*
-                * clear the remote attr state now that it is saved so that the
-                * values reflect the state of the attribute we are about to
-                * add, not the attribute we just found and will remove later.
-                */
-               args->rmtblkno = 0;
-               args->rmtblkcnt = 0;
-               args->rmtvaluelen = 0;
-       }
-
-       retval = xfs_attr3_leaf_add(blk->bp, state->args);
-       if (retval == ENOSPC) {
-               if (state->path.active == 1) {
-                       /*
-                        * Its really a single leaf node, but it had
-                        * out-of-line values so it looked like it *might*
-                        * have been a b-tree.
-                        */
-                       xfs_da_state_free(state);
-                       state = NULL;
-                       xfs_bmap_init(args->flist, args->firstblock);
-                       error = xfs_attr3_leaf_to_node(args);
-                       if (!error) {
-                               error = xfs_bmap_finish(&args->trans,
-                                                       args->flist,
-                                                       &committed);
-                       }
-                       if (error) {
-                               ASSERT(committed);
-                               args->trans = NULL;
-                               xfs_bmap_cancel(args->flist);
-                               goto out;
-                       }
-
-                       /*
-                        * bmap_finish() may have committed the last trans
-                        * and started a new one.  We need the inode to be
-                        * in all transactions.
-                        */
-                       if (committed)
-                               xfs_trans_ijoin(args->trans, dp, 0);
-
-                       /*
-                        * Commit the node conversion and start the next
-                        * trans in the chain.
-                        */
-                       error = xfs_trans_roll(&args->trans, dp);
-                       if (error)
-                               goto out;
-
-                       goto restart;
-               }
-
-               /*
-                * Split as many Btree elements as required.
-                * This code tracks the new and old attr's location
-                * in the index/blkno/rmtblkno/rmtblkcnt fields and
-                * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
-                */
-               xfs_bmap_init(args->flist, args->firstblock);
-               error = xfs_da3_split(state);
-               if (!error) {
-                       error = xfs_bmap_finish(&args->trans, args->flist,
-                                               &committed);
-               }
-               if (error) {
-                       ASSERT(committed);
-                       args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
-                       goto out;
-               }
-
-               /*
-                * bmap_finish() may have committed the last trans and started
-                * a new one.  We need the inode to be in all transactions.
-                */
-               if (committed)
-                       xfs_trans_ijoin(args->trans, dp, 0);
-       } else {
-               /*
-                * Addition succeeded, update Btree hashvals.
-                */
-               xfs_da3_fixhashpath(state, &state->path);
-       }
-
-       /*
-        * Kill the state structure, we're done with it and need to
-        * allow the buffers to come back later.
-        */
-       xfs_da_state_free(state);
-       state = NULL;
-
-       /*
-        * Commit the leaf addition or btree split and start the next
-        * trans in the chain.
-        */
-       error = xfs_trans_roll(&args->trans, dp);
-       if (error)
-               goto out;
-
-       /*
-        * If there was an out-of-line value, allocate the blocks we
-        * identified for its storage and copy the value.  This is done
-        * after we create the attribute so that we don't overflow the
-        * maximum size of a transaction and/or hit a deadlock.
-        */
-       if (args->rmtblkno > 0) {
-               error = xfs_attr_rmtval_set(args);
-               if (error)
-                       return error;
-       }
-
-       /*
-        * If this is an atomic rename operation, we must "flip" the
-        * incomplete flags on the "new" and "old" attribute/value pairs
-        * so that one disappears and one appears atomically.  Then we
-        * must remove the "old" attribute/value pair.
-        */
-       if (args->op_flags & XFS_DA_OP_RENAME) {
-               /*
-                * In a separate transaction, set the incomplete flag on the
-                * "old" attr and clear the incomplete flag on the "new" attr.
-                */
-               error = xfs_attr3_leaf_flipflags(args);
-               if (error)
-                       goto out;
-
-               /*
-                * Dismantle the "old" attribute/value pair by removing
-                * a "remote" value (if it exists).
-                */
-               args->index = args->index2;
-               args->blkno = args->blkno2;
-               args->rmtblkno = args->rmtblkno2;
-               args->rmtblkcnt = args->rmtblkcnt2;
-               args->rmtvaluelen = args->rmtvaluelen2;
-               if (args->rmtblkno) {
-                       error = xfs_attr_rmtval_remove(args);
-                       if (error)
-                               return error;
-               }
-
-               /*
-                * Re-find the "old" attribute entry after any split ops.
-                * The INCOMPLETE flag means that we will find the "old"
-                * attr, not the "new" one.
-                */
-               args->flags |= XFS_ATTR_INCOMPLETE;
-               state = xfs_da_state_alloc();
-               state->args = args;
-               state->mp = mp;
-               state->inleaf = 0;
-               error = xfs_da3_node_lookup_int(state, &retval);
-               if (error)
-                       goto out;
-
-               /*
-                * Remove the name and update the hashvals in the tree.
-                */
-               blk = &state->path.blk[ state->path.active-1 ];
-               ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
-               error = xfs_attr3_leaf_remove(blk->bp, args);
-               xfs_da3_fixhashpath(state, &state->path);
-
-               /*
-                * Check to see if the tree needs to be collapsed.
-                */
-               if (retval && (state->path.active > 1)) {
-                       xfs_bmap_init(args->flist, args->firstblock);
-                       error = xfs_da3_join(state);
-                       if (!error) {
-                               error = xfs_bmap_finish(&args->trans,
-                                                       args->flist,
-                                                       &committed);
-                       }
-                       if (error) {
-                               ASSERT(committed);
-                               args->trans = NULL;
-                               xfs_bmap_cancel(args->flist);
-                               goto out;
-                       }
-
-                       /*
-                        * bmap_finish() may have committed the last trans
-                        * and started a new one.  We need the inode to be
-                        * in all transactions.
-                        */
-                       if (committed)
-                               xfs_trans_ijoin(args->trans, dp, 0);
-               }
-
-               /*
-                * Commit and start the next trans in the chain.
-                */
-               error = xfs_trans_roll(&args->trans, dp);
-               if (error)
-                       goto out;
-
-       } else if (args->rmtblkno > 0) {
-               /*
-                * Added a "remote" value, just clear the incomplete flag.
-                */
-               error = xfs_attr3_leaf_clearflag(args);
-               if (error)
-                       goto out;
-       }
-       retval = error = 0;
-
-out:
-       if (state)
-               xfs_da_state_free(state);
-       if (error)
-               return error;
-       return retval;
-}
-
-/*
- * Remove a name from a B-tree attribute list.
- *
- * This will involve walking down the Btree, and may involve joining
- * leaf nodes and even joining intermediate nodes up to and including
- * the root node (a special case of an intermediate node).
- */
-STATIC int
-xfs_attr_node_removename(xfs_da_args_t *args)
-{
-       xfs_da_state_t *state;
-       xfs_da_state_blk_t *blk;
-       xfs_inode_t *dp;
-       struct xfs_buf *bp;
-       int retval, error, committed, forkoff;
-
-       trace_xfs_attr_node_removename(args);
-
-       /*
-        * Tie a string around our finger to remind us where we are.
-        */
-       dp = args->dp;
-       state = xfs_da_state_alloc();
-       state->args = args;
-       state->mp = dp->i_mount;
-
-       /*
-        * Search to see if name exists, and get back a pointer to it.
-        */
-       error = xfs_da3_node_lookup_int(state, &retval);
-       if (error || (retval != EEXIST)) {
-               if (error == 0)
-                       error = retval;
-               goto out;
-       }
-
-       /*
-        * If there is an out-of-line value, de-allocate the blocks.
-        * This is done before we remove the attribute so that we don't
-        * overflow the maximum size of a transaction and/or hit a deadlock.
-        */
-       blk = &state->path.blk[ state->path.active-1 ];
-       ASSERT(blk->bp != NULL);
-       ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
-       if (args->rmtblkno > 0) {
-               /*
-                * Fill in disk block numbers in the state structure
-                * so that we can get the buffers back after we commit
-                * several transactions in the following calls.
-                */
-               error = xfs_attr_fillstate(state);
-               if (error)
-                       goto out;
-
-               /*
-                * Mark the attribute as INCOMPLETE, then bunmapi() the
-                * remote value.
-                */
-               error = xfs_attr3_leaf_setflag(args);
-               if (error)
-                       goto out;
-               error = xfs_attr_rmtval_remove(args);
-               if (error)
-                       goto out;
-
-               /*
-                * Refill the state structure with buffers, the prior calls
-                * released our buffers.
-                */
-               error = xfs_attr_refillstate(state);
-               if (error)
-                       goto out;
-       }
-
-       /*
-        * Remove the name and update the hashvals in the tree.
-        */
-       blk = &state->path.blk[ state->path.active-1 ];
-       ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
-       retval = xfs_attr3_leaf_remove(blk->bp, args);
-       xfs_da3_fixhashpath(state, &state->path);
-
-       /*
-        * Check to see if the tree needs to be collapsed.
-        */
-       if (retval && (state->path.active > 1)) {
-               xfs_bmap_init(args->flist, args->firstblock);
-               error = xfs_da3_join(state);
-               if (!error) {
-                       error = xfs_bmap_finish(&args->trans, args->flist,
-                                               &committed);
-               }
-               if (error) {
-                       ASSERT(committed);
-                       args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
-                       goto out;
-               }
-
-               /*
-                * bmap_finish() may have committed the last trans and started
-                * a new one.  We need the inode to be in all transactions.
-                */
-               if (committed)
-                       xfs_trans_ijoin(args->trans, dp, 0);
-
-               /*
-                * Commit the Btree join operation and start a new trans.
-                */
-               error = xfs_trans_roll(&args->trans, dp);
-               if (error)
-                       goto out;
-       }
-
-       /*
-        * If the result is small enough, push it all into the inode.
-        */
-       if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
-               /*
-                * Have to get rid of the copy of this dabuf in the state.
-                */
-               ASSERT(state->path.active == 1);
-               ASSERT(state->path.blk[0].bp);
-               state->path.blk[0].bp = NULL;
-
-               error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
-               if (error)
-                       goto out;
-
-               if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
-                       xfs_bmap_init(args->flist, args->firstblock);
-                       error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
-                       /* bp is gone due to xfs_da_shrink_inode */
-                       if (!error) {
-                               error = xfs_bmap_finish(&args->trans,
-                                                       args->flist,
-                                                       &committed);
-                       }
-                       if (error) {
-                               ASSERT(committed);
-                               args->trans = NULL;
-                               xfs_bmap_cancel(args->flist);
-                               goto out;
-                       }
-
-                       /*
-                        * bmap_finish() may have committed the last trans
-                        * and started a new one.  We need the inode to be
-                        * in all transactions.
-                        */
-                       if (committed)
-                               xfs_trans_ijoin(args->trans, dp, 0);
-               } else
-                       xfs_trans_brelse(args->trans, bp);
-       }
-       error = 0;
-
-out:
-       xfs_da_state_free(state);
-       return error;
-}
-
-/*
- * Fill in the disk block numbers in the state structure for the buffers
- * that are attached to the state structure.
- * This is done so that we can quickly reattach ourselves to those buffers
- * after some set of transaction commits have released these buffers.
- */
-STATIC int
-xfs_attr_fillstate(xfs_da_state_t *state)
-{
-       xfs_da_state_path_t *path;
-       xfs_da_state_blk_t *blk;
-       int level;
-
-       trace_xfs_attr_fillstate(state->args);
-
-       /*
-        * Roll down the "path" in the state structure, storing the on-disk
-        * block number for those buffers in the "path".
-        */
-       path = &state->path;
-       ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
-       for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
-               if (blk->bp) {
-                       blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
-                       blk->bp = NULL;
-               } else {
-                       blk->disk_blkno = 0;
-               }
-       }
-
-       /*
-        * Roll down the "altpath" in the state structure, storing the on-disk
-        * block number for those buffers in the "altpath".
-        */
-       path = &state->altpath;
-       ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
-       for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
-               if (blk->bp) {
-                       blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
-                       blk->bp = NULL;
-               } else {
-                       blk->disk_blkno = 0;
-               }
-       }
-
-       return 0;
-}
-
-/*
- * Reattach the buffers to the state structure based on the disk block
- * numbers stored in the state structure.
- * This is done after some set of transaction commits have released those
- * buffers from our grip.
- */
-STATIC int
-xfs_attr_refillstate(xfs_da_state_t *state)
-{
-       xfs_da_state_path_t *path;
-       xfs_da_state_blk_t *blk;
-       int level, error;
-
-       trace_xfs_attr_refillstate(state->args);
-
-       /*
-        * Roll down the "path" in the state structure, storing the on-disk
-        * block number for those buffers in the "path".
-        */
-       path = &state->path;
-       ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
-       for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
-               if (blk->disk_blkno) {
-                       error = xfs_da3_node_read(state->args->trans,
-                                               state->args->dp,
-                                               blk->blkno, blk->disk_blkno,
-                                               &blk->bp, XFS_ATTR_FORK);
-                       if (error)
-                               return error;
-               } else {
-                       blk->bp = NULL;
-               }
-       }
-
-       /*
-        * Roll down the "altpath" in the state structure, storing the on-disk
-        * block number for those buffers in the "altpath".
-        */
-       path = &state->altpath;
-       ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
-       for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
-               if (blk->disk_blkno) {
-                       error = xfs_da3_node_read(state->args->trans,
-                                               state->args->dp,
-                                               blk->blkno, blk->disk_blkno,
-                                               &blk->bp, XFS_ATTR_FORK);
-                       if (error)
-                               return error;
-               } else {
-                       blk->bp = NULL;
-               }
-       }
-
-       return 0;
-}
-
-/*
- * Look up a filename in a node attribute list.
- *
- * This routine gets called for any attribute fork that has more than one
- * block, ie: both true Btree attr lists and for single-leaf-blocks with
- * "remote" values taking up more blocks.
- */
-STATIC int
-xfs_attr_node_get(xfs_da_args_t *args)
-{
-       xfs_da_state_t *state;
-       xfs_da_state_blk_t *blk;
-       int error, retval;
-       int i;
-
-       trace_xfs_attr_node_get(args);
-
-       state = xfs_da_state_alloc();
-       state->args = args;
-       state->mp = args->dp->i_mount;
-
-       /*
-        * Search to see if name exists, and get back a pointer to it.
-        */
-       error = xfs_da3_node_lookup_int(state, &retval);
-       if (error) {
-               retval = error;
-       } else if (retval == EEXIST) {
-               blk = &state->path.blk[ state->path.active-1 ];
-               ASSERT(blk->bp != NULL);
-               ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
-
-               /*
-                * Get the value, local or "remote"
-                */
-               retval = xfs_attr3_leaf_getvalue(blk->bp, args);
-               if (!retval && (args->rmtblkno > 0)
-                   && !(args->flags & ATTR_KERNOVAL)) {
-                       retval = xfs_attr_rmtval_get(args);
-               }
-       }
-
-       /*
-        * If not in a transaction, we have to release all the buffers.
-        */
-       for (i = 0; i < state->path.active; i++) {
-               xfs_trans_brelse(args->trans, state->path.blk[i].bp);
-               state->path.blk[i].bp = NULL;
-       }
-
-       xfs_da_state_free(state);
-       return retval;
-}
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
deleted file mode 100644 (file)
index 127d96a..0000000
+++ /dev/null
@@ -1,2697 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_attr_sf.h"
-#include "xfs_attr_remote.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
-#include "xfs_dinode.h"
-#include "xfs_dir2.h"
-
-
-/*
- * xfs_attr_leaf.c
- *
- * Routines to implement leaf blocks of attributes as Btrees of hashed names.
- */
-
-/*========================================================================
- * Function prototypes for the kernel.
- *========================================================================*/
-
-/*
- * Routines used for growing the Btree.
- */
-STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args,
-                                xfs_dablk_t which_block, struct xfs_buf **bpp);
-STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
-                                  struct xfs_attr3_icleaf_hdr *ichdr,
-                                  struct xfs_da_args *args, int freemap_index);
-STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args,
-                                  struct xfs_attr3_icleaf_hdr *ichdr,
-                                  struct xfs_buf *leaf_buffer);
-STATIC void xfs_attr3_leaf_rebalance(xfs_da_state_t *state,
-                                                  xfs_da_state_blk_t *blk1,
-                                                  xfs_da_state_blk_t *blk2);
-STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state,
-                       xfs_da_state_blk_t *leaf_blk_1,
-                       struct xfs_attr3_icleaf_hdr *ichdr1,
-                       xfs_da_state_blk_t *leaf_blk_2,
-                       struct xfs_attr3_icleaf_hdr *ichdr2,
-                       int *number_entries_in_blk1,
-                       int *number_usedbytes_in_blk1);
-
-/*
- * Utility routines.
- */
-STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args,
-                       struct xfs_attr_leafblock *src_leaf,
-                       struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start,
-                       struct xfs_attr_leafblock *dst_leaf,
-                       struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start,
-                       int move_count);
-STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
-
-void
-xfs_attr3_leaf_hdr_from_disk(
-       struct xfs_attr3_icleaf_hdr     *to,
-       struct xfs_attr_leafblock       *from)
-{
-       int     i;
-
-       ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
-              from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
-
-       if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) {
-               struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)from;
-
-               to->forw = be32_to_cpu(hdr3->info.hdr.forw);
-               to->back = be32_to_cpu(hdr3->info.hdr.back);
-               to->magic = be16_to_cpu(hdr3->info.hdr.magic);
-               to->count = be16_to_cpu(hdr3->count);
-               to->usedbytes = be16_to_cpu(hdr3->usedbytes);
-               to->firstused = be16_to_cpu(hdr3->firstused);
-               to->holes = hdr3->holes;
-
-               for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
-                       to->freemap[i].base = be16_to_cpu(hdr3->freemap[i].base);
-                       to->freemap[i].size = be16_to_cpu(hdr3->freemap[i].size);
-               }
-               return;
-       }
-       to->forw = be32_to_cpu(from->hdr.info.forw);
-       to->back = be32_to_cpu(from->hdr.info.back);
-       to->magic = be16_to_cpu(from->hdr.info.magic);
-       to->count = be16_to_cpu(from->hdr.count);
-       to->usedbytes = be16_to_cpu(from->hdr.usedbytes);
-       to->firstused = be16_to_cpu(from->hdr.firstused);
-       to->holes = from->hdr.holes;
-
-       for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
-               to->freemap[i].base = be16_to_cpu(from->hdr.freemap[i].base);
-               to->freemap[i].size = be16_to_cpu(from->hdr.freemap[i].size);
-       }
-}
-
-void
-xfs_attr3_leaf_hdr_to_disk(
-       struct xfs_attr_leafblock       *to,
-       struct xfs_attr3_icleaf_hdr     *from)
-{
-       int     i;
-
-       ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC ||
-              from->magic == XFS_ATTR3_LEAF_MAGIC);
-
-       if (from->magic == XFS_ATTR3_LEAF_MAGIC) {
-               struct xfs_attr3_leaf_hdr *hdr3 = (struct xfs_attr3_leaf_hdr *)to;
-
-               hdr3->info.hdr.forw = cpu_to_be32(from->forw);
-               hdr3->info.hdr.back = cpu_to_be32(from->back);
-               hdr3->info.hdr.magic = cpu_to_be16(from->magic);
-               hdr3->count = cpu_to_be16(from->count);
-               hdr3->usedbytes = cpu_to_be16(from->usedbytes);
-               hdr3->firstused = cpu_to_be16(from->firstused);
-               hdr3->holes = from->holes;
-               hdr3->pad1 = 0;
-
-               for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
-                       hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base);
-                       hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size);
-               }
-               return;
-       }
-       to->hdr.info.forw = cpu_to_be32(from->forw);
-       to->hdr.info.back = cpu_to_be32(from->back);
-       to->hdr.info.magic = cpu_to_be16(from->magic);
-       to->hdr.count = cpu_to_be16(from->count);
-       to->hdr.usedbytes = cpu_to_be16(from->usedbytes);
-       to->hdr.firstused = cpu_to_be16(from->firstused);
-       to->hdr.holes = from->holes;
-       to->hdr.pad1 = 0;
-
-       for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
-               to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base);
-               to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size);
-       }
-}
-
-static bool
-xfs_attr3_leaf_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_attr_leafblock *leaf = bp->b_addr;
-       struct xfs_attr3_icleaf_hdr ichdr;
-
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
-
-               if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC)
-                       return false;
-
-               if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
-                       return false;
-               if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
-                       return false;
-       } else {
-               if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
-                       return false;
-       }
-       if (ichdr.count == 0)
-               return false;
-
-       /* XXX: need to range check rest of attr header values */
-       /* XXX: hash order check? */
-
-       return true;
-}
-
-static void
-xfs_attr3_leaf_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-       struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
-
-       if (!xfs_attr3_leaf_verify(bp)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (bip)
-               hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
-
-       xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF);
-}
-
-/*
- * leaf/node format detection on trees is sketchy, so a node read can be done on
- * leaf level blocks when detection identifies the tree as a node format tree
- * incorrectly. In this case, we need to swap the verifier to match the correct
- * format of the block being read.
- */
-static void
-xfs_attr3_leaf_read_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_attr3_leaf_verify(bp))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-}
-
-const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
-       .verify_read = xfs_attr3_leaf_read_verify,
-       .verify_write = xfs_attr3_leaf_write_verify,
-};
-
-int
-xfs_attr3_leaf_read(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *dp,
-       xfs_dablk_t             bno,
-       xfs_daddr_t             mappedbno,
-       struct xfs_buf          **bpp)
-{
-       int                     err;
-
-       err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
-                               XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops);
-       if (!err && tp)
-               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
-       return err;
-}
-
-/*========================================================================
- * Namespace helper routines
- *========================================================================*/
-
-/*
- * If namespace bits don't match return 0.
- * If all match then return 1.
- */
-STATIC int
-xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
-{
-       return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
-}
-
-
-/*========================================================================
- * External routines when attribute fork size < XFS_LITINO(mp).
- *========================================================================*/
-
-/*
- * Query whether the requested number of additional bytes of extended
- * attribute space will be able to fit inline.
- *
- * Returns zero if not, else the di_forkoff fork offset to be used in the
- * literal area for attribute data once the new bytes have been added.
- *
- * di_forkoff must be 8 byte aligned, hence is stored as a >>3 value;
- * special case for dev/uuid inodes, they have fixed size data forks.
- */
-int
-xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
-{
-       int offset;
-       int minforkoff; /* lower limit on valid forkoff locations */
-       int maxforkoff; /* upper limit on valid forkoff locations */
-       int dsize;
-       xfs_mount_t *mp = dp->i_mount;
-
-       /* rounded down */
-       offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3;
-
-       switch (dp->i_d.di_format) {
-       case XFS_DINODE_FMT_DEV:
-               minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
-               return (offset >= minforkoff) ? minforkoff : 0;
-       case XFS_DINODE_FMT_UUID:
-               minforkoff = roundup(sizeof(uuid_t), 8) >> 3;
-               return (offset >= minforkoff) ? minforkoff : 0;
-       }
-
-       /*
-        * If the requested numbers of bytes is smaller or equal to the
-        * current attribute fork size we can always proceed.
-        *
-        * Note that if_bytes in the data fork might actually be larger than
-        * the current data fork size is due to delalloc extents. In that
-        * case either the extent count will go down when they are converted
-        * to real extents, or the delalloc conversion will take care of the
-        * literal area rebalancing.
-        */
-       if (bytes <= XFS_IFORK_ASIZE(dp))
-               return dp->i_d.di_forkoff;
-
-       /*
-        * For attr2 we can try to move the forkoff if there is space in the
-        * literal area, but for the old format we are done if there is no
-        * space in the fixed attribute fork.
-        */
-       if (!(mp->m_flags & XFS_MOUNT_ATTR2))
-               return 0;
-
-       dsize = dp->i_df.if_bytes;
-
-       switch (dp->i_d.di_format) {
-       case XFS_DINODE_FMT_EXTENTS:
-               /*
-                * If there is no attr fork and the data fork is extents, 
-                * determine if creating the default attr fork will result
-                * in the extents form migrating to btree. If so, the
-                * minimum offset only needs to be the space required for
-                * the btree root.
-                */
-               if (!dp->i_d.di_forkoff && dp->i_df.if_bytes >
-                   xfs_default_attroffset(dp))
-                       dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
-               break;
-       case XFS_DINODE_FMT_BTREE:
-               /*
-                * If we have a data btree then keep forkoff if we have one,
-                * otherwise we are adding a new attr, so then we set
-                * minforkoff to where the btree root can finish so we have
-                * plenty of room for attrs
-                */
-               if (dp->i_d.di_forkoff) {
-                       if (offset < dp->i_d.di_forkoff)
-                               return 0;
-                       return dp->i_d.di_forkoff;
-               }
-               dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot);
-               break;
-       }
-
-       /*
-        * A data fork btree root must have space for at least
-        * MINDBTPTRS key/ptr pairs if the data fork is small or empty.
-        */
-       minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
-       minforkoff = roundup(minforkoff, 8) >> 3;
-
-       /* attr fork btree root can have at least this many key/ptr pairs */
-       maxforkoff = XFS_LITINO(mp, dp->i_d.di_version) -
-                       XFS_BMDR_SPACE_CALC(MINABTPTRS);
-       maxforkoff = maxforkoff >> 3;   /* rounded down */
-
-       if (offset >= maxforkoff)
-               return maxforkoff;
-       if (offset >= minforkoff)
-               return offset;
-       return 0;
-}
-
-/*
- * Switch on the ATTR2 superblock bit (implies also FEATURES2)
- */
-STATIC void
-xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
-{
-       if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
-           !(xfs_sb_version_hasattr2(&mp->m_sb))) {
-               spin_lock(&mp->m_sb_lock);
-               if (!xfs_sb_version_hasattr2(&mp->m_sb)) {
-                       xfs_sb_version_addattr2(&mp->m_sb);
-                       spin_unlock(&mp->m_sb_lock);
-                       xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
-               } else
-                       spin_unlock(&mp->m_sb_lock);
-       }
-}
-
-/*
- * Create the initial contents of a shortform attribute list.
- */
-void
-xfs_attr_shortform_create(xfs_da_args_t *args)
-{
-       xfs_attr_sf_hdr_t *hdr;
-       xfs_inode_t *dp;
-       xfs_ifork_t *ifp;
-
-       trace_xfs_attr_sf_create(args);
-
-       dp = args->dp;
-       ASSERT(dp != NULL);
-       ifp = dp->i_afp;
-       ASSERT(ifp != NULL);
-       ASSERT(ifp->if_bytes == 0);
-       if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) {
-               ifp->if_flags &= ~XFS_IFEXTENTS;        /* just in case */
-               dp->i_d.di_aformat = XFS_DINODE_FMT_LOCAL;
-               ifp->if_flags |= XFS_IFINLINE;
-       } else {
-               ASSERT(ifp->if_flags & XFS_IFINLINE);
-       }
-       xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
-       hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data;
-       hdr->count = 0;
-       hdr->totsize = cpu_to_be16(sizeof(*hdr));
-       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
-}
-
-/*
- * Add a name/value pair to the shortform attribute list.
- * Overflow from the inode has already been checked for.
- */
-void
-xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
-{
-       xfs_attr_shortform_t *sf;
-       xfs_attr_sf_entry_t *sfe;
-       int i, offset, size;
-       xfs_mount_t *mp;
-       xfs_inode_t *dp;
-       xfs_ifork_t *ifp;
-
-       trace_xfs_attr_sf_add(args);
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       dp->i_d.di_forkoff = forkoff;
-
-       ifp = dp->i_afp;
-       ASSERT(ifp->if_flags & XFS_IFINLINE);
-       sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
-       sfe = &sf->list[0];
-       for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
-#ifdef DEBUG
-               if (sfe->namelen != args->namelen)
-                       continue;
-               if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
-                       continue;
-               if (!xfs_attr_namesp_match(args->flags, sfe->flags))
-                       continue;
-               ASSERT(0);
-#endif
-       }
-
-       offset = (char *)sfe - (char *)sf;
-       size = XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
-       xfs_idata_realloc(dp, size, XFS_ATTR_FORK);
-       sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
-       sfe = (xfs_attr_sf_entry_t *)((char *)sf + offset);
-
-       sfe->namelen = args->namelen;
-       sfe->valuelen = args->valuelen;
-       sfe->flags = XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
-       memcpy(sfe->nameval, args->name, args->namelen);
-       memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
-       sf->hdr.count++;
-       be16_add_cpu(&sf->hdr.totsize, size);
-       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
-
-       xfs_sbversion_add_attr2(mp, args->trans);
-}
-
-/*
- * After the last attribute is removed revert to original inode format,
- * making all literal area available to the data fork once more.
- */
-STATIC void
-xfs_attr_fork_reset(
-       struct xfs_inode        *ip,
-       struct xfs_trans        *tp)
-{
-       xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-       ip->i_d.di_forkoff = 0;
-       ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
-
-       ASSERT(ip->i_d.di_anextents == 0);
-       ASSERT(ip->i_afp == NULL);
-
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-}
-
-/*
- * Remove an attribute from the shortform attribute list structure.
- */
-int
-xfs_attr_shortform_remove(xfs_da_args_t *args)
-{
-       xfs_attr_shortform_t *sf;
-       xfs_attr_sf_entry_t *sfe;
-       int base, size=0, end, totsize, i;
-       xfs_mount_t *mp;
-       xfs_inode_t *dp;
-
-       trace_xfs_attr_sf_remove(args);
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       base = sizeof(xfs_attr_sf_hdr_t);
-       sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
-       sfe = &sf->list[0];
-       end = sf->hdr.count;
-       for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe),
-                                       base += size, i++) {
-               size = XFS_ATTR_SF_ENTSIZE(sfe);
-               if (sfe->namelen != args->namelen)
-                       continue;
-               if (memcmp(sfe->nameval, args->name, args->namelen) != 0)
-                       continue;
-               if (!xfs_attr_namesp_match(args->flags, sfe->flags))
-                       continue;
-               break;
-       }
-       if (i == end)
-               return ENOATTR;
-
-       /*
-        * Fix up the attribute fork data, covering the hole
-        */
-       end = base + size;
-       totsize = be16_to_cpu(sf->hdr.totsize);
-       if (end != totsize)
-               memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end);
-       sf->hdr.count--;
-       be16_add_cpu(&sf->hdr.totsize, -size);
-
-       /*
-        * Fix up the start offset of the attribute fork
-        */
-       totsize -= size;
-       if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
-           (mp->m_flags & XFS_MOUNT_ATTR2) &&
-           (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
-           !(args->op_flags & XFS_DA_OP_ADDNAME)) {
-               xfs_attr_fork_reset(dp, args->trans);
-       } else {
-               xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
-               dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
-               ASSERT(dp->i_d.di_forkoff);
-               ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
-                               (args->op_flags & XFS_DA_OP_ADDNAME) ||
-                               !(mp->m_flags & XFS_MOUNT_ATTR2) ||
-                               dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
-               xfs_trans_log_inode(args->trans, dp,
-                                       XFS_ILOG_CORE | XFS_ILOG_ADATA);
-       }
-
-       xfs_sbversion_add_attr2(mp, args->trans);
-
-       return 0;
-}
-
-/*
- * Look up a name in a shortform attribute list structure.
- */
-/*ARGSUSED*/
-int
-xfs_attr_shortform_lookup(xfs_da_args_t *args)
-{
-       xfs_attr_shortform_t *sf;
-       xfs_attr_sf_entry_t *sfe;
-       int i;
-       xfs_ifork_t *ifp;
-
-       trace_xfs_attr_sf_lookup(args);
-
-       ifp = args->dp->i_afp;
-       ASSERT(ifp->if_flags & XFS_IFINLINE);
-       sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
-       sfe = &sf->list[0];
-       for (i = 0; i < sf->hdr.count;
-                               sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
-               if (sfe->namelen != args->namelen)
-                       continue;
-               if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
-                       continue;
-               if (!xfs_attr_namesp_match(args->flags, sfe->flags))
-                       continue;
-               return EEXIST;
-       }
-       return ENOATTR;
-}
-
-/*
- * Look up a name in a shortform attribute list structure.
- */
-/*ARGSUSED*/
-int
-xfs_attr_shortform_getvalue(xfs_da_args_t *args)
-{
-       xfs_attr_shortform_t *sf;
-       xfs_attr_sf_entry_t *sfe;
-       int i;
-
-       ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE);
-       sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data;
-       sfe = &sf->list[0];
-       for (i = 0; i < sf->hdr.count;
-                               sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) {
-               if (sfe->namelen != args->namelen)
-                       continue;
-               if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
-                       continue;
-               if (!xfs_attr_namesp_match(args->flags, sfe->flags))
-                       continue;
-               if (args->flags & ATTR_KERNOVAL) {
-                       args->valuelen = sfe->valuelen;
-                       return EEXIST;
-               }
-               if (args->valuelen < sfe->valuelen) {
-                       args->valuelen = sfe->valuelen;
-                       return ERANGE;
-               }
-               args->valuelen = sfe->valuelen;
-               memcpy(args->value, &sfe->nameval[args->namelen],
-                                                   args->valuelen);
-               return EEXIST;
-       }
-       return ENOATTR;
-}
-
-/*
- * Convert from using the shortform to the leaf.
- */
-int
-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
-{
-       xfs_inode_t *dp;
-       xfs_attr_shortform_t *sf;
-       xfs_attr_sf_entry_t *sfe;
-       xfs_da_args_t nargs;
-       char *tmpbuffer;
-       int error, i, size;
-       xfs_dablk_t blkno;
-       struct xfs_buf *bp;
-       xfs_ifork_t *ifp;
-
-       trace_xfs_attr_sf_to_leaf(args);
-
-       dp = args->dp;
-       ifp = dp->i_afp;
-       sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
-       size = be16_to_cpu(sf->hdr.totsize);
-       tmpbuffer = kmem_alloc(size, KM_SLEEP);
-       ASSERT(tmpbuffer != NULL);
-       memcpy(tmpbuffer, ifp->if_u1.if_data, size);
-       sf = (xfs_attr_shortform_t *)tmpbuffer;
-
-       xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
-       xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK);
-
-       bp = NULL;
-       error = xfs_da_grow_inode(args, &blkno);
-       if (error) {
-               /*
-                * If we hit an IO error middle of the transaction inside
-                * grow_inode(), we may have inconsistent data. Bail out.
-                */
-               if (error == EIO)
-                       goto out;
-               xfs_idata_realloc(dp, size, XFS_ATTR_FORK);     /* try to put */
-               memcpy(ifp->if_u1.if_data, tmpbuffer, size);    /* it back */
-               goto out;
-       }
-
-       ASSERT(blkno == 0);
-       error = xfs_attr3_leaf_create(args, blkno, &bp);
-       if (error) {
-               error = xfs_da_shrink_inode(args, 0, bp);
-               bp = NULL;
-               if (error)
-                       goto out;
-               xfs_idata_realloc(dp, size, XFS_ATTR_FORK);     /* try to put */
-               memcpy(ifp->if_u1.if_data, tmpbuffer, size);    /* it back */
-               goto out;
-       }
-
-       memset((char *)&nargs, 0, sizeof(nargs));
-       nargs.dp = dp;
-       nargs.geo = args->geo;
-       nargs.firstblock = args->firstblock;
-       nargs.flist = args->flist;
-       nargs.total = args->total;
-       nargs.whichfork = XFS_ATTR_FORK;
-       nargs.trans = args->trans;
-       nargs.op_flags = XFS_DA_OP_OKNOENT;
-
-       sfe = &sf->list[0];
-       for (i = 0; i < sf->hdr.count; i++) {
-               nargs.name = sfe->nameval;
-               nargs.namelen = sfe->namelen;
-               nargs.value = &sfe->nameval[nargs.namelen];
-               nargs.valuelen = sfe->valuelen;
-               nargs.hashval = xfs_da_hashname(sfe->nameval,
-                                               sfe->namelen);
-               nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
-               error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
-               ASSERT(error == ENOATTR);
-               error = xfs_attr3_leaf_add(bp, &nargs);
-               ASSERT(error != ENOSPC);
-               if (error)
-                       goto out;
-               sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
-       }
-       error = 0;
-
-out:
-       kmem_free(tmpbuffer);
-       return error;
-}
-
-/*
- * Check a leaf attribute block to see if all the entries would fit into
- * a shortform attribute list.
- */
-int
-xfs_attr_shortform_allfit(
-       struct xfs_buf          *bp,
-       struct xfs_inode        *dp)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr_leaf_entry *entry;
-       xfs_attr_leaf_name_local_t *name_loc;
-       struct xfs_attr3_icleaf_hdr leafhdr;
-       int                     bytes;
-       int                     i;
-
-       leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
-       entry = xfs_attr3_leaf_entryp(leaf);
-
-       bytes = sizeof(struct xfs_attr_sf_hdr);
-       for (i = 0; i < leafhdr.count; entry++, i++) {
-               if (entry->flags & XFS_ATTR_INCOMPLETE)
-                       continue;               /* don't copy partial entries */
-               if (!(entry->flags & XFS_ATTR_LOCAL))
-                       return 0;
-               name_loc = xfs_attr3_leaf_name_local(leaf, i);
-               if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX)
-                       return 0;
-               if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
-                       return 0;
-               bytes += sizeof(struct xfs_attr_sf_entry) - 1
-                               + name_loc->namelen
-                               + be16_to_cpu(name_loc->valuelen);
-       }
-       if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
-           (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
-           (bytes == sizeof(struct xfs_attr_sf_hdr)))
-               return -1;
-       return xfs_attr_shortform_bytesfit(dp, bytes);
-}
-
-/*
- * Convert a leaf attribute list to shortform attribute list
- */
-int
-xfs_attr3_leaf_to_shortform(
-       struct xfs_buf          *bp,
-       struct xfs_da_args      *args,
-       int                     forkoff)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr3_icleaf_hdr ichdr;
-       struct xfs_attr_leaf_entry *entry;
-       struct xfs_attr_leaf_name_local *name_loc;
-       struct xfs_da_args      nargs;
-       struct xfs_inode        *dp = args->dp;
-       char                    *tmpbuffer;
-       int                     error;
-       int                     i;
-
-       trace_xfs_attr_leaf_to_sf(args);
-
-       tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
-       if (!tmpbuffer)
-               return ENOMEM;
-
-       memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
-
-       leaf = (xfs_attr_leafblock_t *)tmpbuffer;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-       entry = xfs_attr3_leaf_entryp(leaf);
-
-       /* XXX (dgc): buffer is about to be marked stale - why zero it? */
-       memset(bp->b_addr, 0, args->geo->blksize);
-
-       /*
-        * Clean out the prior contents of the attribute list.
-        */
-       error = xfs_da_shrink_inode(args, 0, bp);
-       if (error)
-               goto out;
-
-       if (forkoff == -1) {
-               ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
-               ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
-               xfs_attr_fork_reset(dp, args->trans);
-               goto out;
-       }
-
-       xfs_attr_shortform_create(args);
-
-       /*
-        * Copy the attributes
-        */
-       memset((char *)&nargs, 0, sizeof(nargs));
-       nargs.geo = args->geo;
-       nargs.dp = dp;
-       nargs.firstblock = args->firstblock;
-       nargs.flist = args->flist;
-       nargs.total = args->total;
-       nargs.whichfork = XFS_ATTR_FORK;
-       nargs.trans = args->trans;
-       nargs.op_flags = XFS_DA_OP_OKNOENT;
-
-       for (i = 0; i < ichdr.count; entry++, i++) {
-               if (entry->flags & XFS_ATTR_INCOMPLETE)
-                       continue;       /* don't copy partial entries */
-               if (!entry->nameidx)
-                       continue;
-               ASSERT(entry->flags & XFS_ATTR_LOCAL);
-               name_loc = xfs_attr3_leaf_name_local(leaf, i);
-               nargs.name = name_loc->nameval;
-               nargs.namelen = name_loc->namelen;
-               nargs.value = &name_loc->nameval[nargs.namelen];
-               nargs.valuelen = be16_to_cpu(name_loc->valuelen);
-               nargs.hashval = be32_to_cpu(entry->hashval);
-               nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags);
-               xfs_attr_shortform_add(&nargs, forkoff);
-       }
-       error = 0;
-
-out:
-       kmem_free(tmpbuffer);
-       return error;
-}
-
-/*
- * Convert from using a single leaf to a root node and a leaf.
- */
-int
-xfs_attr3_leaf_to_node(
-       struct xfs_da_args      *args)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr3_icleaf_hdr icleafhdr;
-       struct xfs_attr_leaf_entry *entries;
-       struct xfs_da_node_entry *btree;
-       struct xfs_da3_icnode_hdr icnodehdr;
-       struct xfs_da_intnode   *node;
-       struct xfs_inode        *dp = args->dp;
-       struct xfs_mount        *mp = dp->i_mount;
-       struct xfs_buf          *bp1 = NULL;
-       struct xfs_buf          *bp2 = NULL;
-       xfs_dablk_t             blkno;
-       int                     error;
-
-       trace_xfs_attr_leaf_to_node(args);
-
-       error = xfs_da_grow_inode(args, &blkno);
-       if (error)
-               goto out;
-       error = xfs_attr3_leaf_read(args->trans, dp, 0, -1, &bp1);
-       if (error)
-               goto out;
-
-       error = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp2, XFS_ATTR_FORK);
-       if (error)
-               goto out;
-
-       /* copy leaf to new buffer, update identifiers */
-       xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF);
-       bp2->b_ops = bp1->b_ops;
-       memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize);
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               struct xfs_da3_blkinfo *hdr3 = bp2->b_addr;
-               hdr3->blkno = cpu_to_be64(bp2->b_bn);
-       }
-       xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1);
-
-       /*
-        * Set up the new root node.
-        */
-       error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
-       if (error)
-               goto out;
-       node = bp1->b_addr;
-       dp->d_ops->node_hdr_from_disk(&icnodehdr, node);
-       btree = dp->d_ops->node_tree_p(node);
-
-       leaf = bp2->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf);
-       entries = xfs_attr3_leaf_entryp(leaf);
-
-       /* both on-disk, don't endian-flip twice */
-       btree[0].hashval = entries[icleafhdr.count - 1].hashval;
-       btree[0].before = cpu_to_be32(blkno);
-       icnodehdr.count = 1;
-       dp->d_ops->node_hdr_to_disk(node, &icnodehdr);
-       xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1);
-       error = 0;
-out:
-       return error;
-}
-
-/*========================================================================
- * Routines used for growing the Btree.
- *========================================================================*/
-
-/*
- * Create the initial contents of a leaf attribute list
- * or a leaf in a node attribute list.
- */
-STATIC int
-xfs_attr3_leaf_create(
-       struct xfs_da_args      *args,
-       xfs_dablk_t             blkno,
-       struct xfs_buf          **bpp)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr3_icleaf_hdr ichdr;
-       struct xfs_inode        *dp = args->dp;
-       struct xfs_mount        *mp = dp->i_mount;
-       struct xfs_buf          *bp;
-       int                     error;
-
-       trace_xfs_attr_leaf_create(args);
-
-       error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
-                                           XFS_ATTR_FORK);
-       if (error)
-               return error;
-       bp->b_ops = &xfs_attr3_leaf_buf_ops;
-       xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF);
-       leaf = bp->b_addr;
-       memset(leaf, 0, args->geo->blksize);
-
-       memset(&ichdr, 0, sizeof(ichdr));
-       ichdr.firstused = args->geo->blksize;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               struct xfs_da3_blkinfo *hdr3 = bp->b_addr;
-
-               ichdr.magic = XFS_ATTR3_LEAF_MAGIC;
-
-               hdr3->blkno = cpu_to_be64(bp->b_bn);
-               hdr3->owner = cpu_to_be64(dp->i_ino);
-               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
-
-               ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr);
-       } else {
-               ichdr.magic = XFS_ATTR_LEAF_MAGIC;
-               ichdr.freemap[0].base = sizeof(struct xfs_attr_leaf_hdr);
-       }
-       ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;
-
-       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
-       xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1);
-
-       *bpp = bp;
-       return 0;
-}
-
-/*
- * Split the leaf node, rebalance, then add the new entry.
- */
-int
-xfs_attr3_leaf_split(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *oldblk,
-       struct xfs_da_state_blk *newblk)
-{
-       xfs_dablk_t blkno;
-       int error;
-
-       trace_xfs_attr_leaf_split(state->args);
-
-       /*
-        * Allocate space for a new leaf node.
-        */
-       ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC);
-       error = xfs_da_grow_inode(state->args, &blkno);
-       if (error)
-               return error;
-       error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp);
-       if (error)
-               return error;
-       newblk->blkno = blkno;
-       newblk->magic = XFS_ATTR_LEAF_MAGIC;
-
-       /*
-        * Rebalance the entries across the two leaves.
-        * NOTE: rebalance() currently depends on the 2nd block being empty.
-        */
-       xfs_attr3_leaf_rebalance(state, oldblk, newblk);
-       error = xfs_da3_blk_link(state, oldblk, newblk);
-       if (error)
-               return error;
-
-       /*
-        * Save info on "old" attribute for "atomic rename" ops, leaf_add()
-        * modifies the index/blkno/rmtblk/rmtblkcnt fields to show the
-        * "new" attrs info.  Will need the "old" info to remove it later.
-        *
-        * Insert the "new" entry in the correct block.
-        */
-       if (state->inleaf) {
-               trace_xfs_attr_leaf_add_old(state->args);
-               error = xfs_attr3_leaf_add(oldblk->bp, state->args);
-       } else {
-               trace_xfs_attr_leaf_add_new(state->args);
-               error = xfs_attr3_leaf_add(newblk->bp, state->args);
-       }
-
-       /*
-        * Update last hashval in each block since we added the name.
-        */
-       oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
-       newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
-       return error;
-}
-
-/*
- * Add a name to the leaf attribute list structure.
- */
-int
-xfs_attr3_leaf_add(
-       struct xfs_buf          *bp,
-       struct xfs_da_args      *args)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr3_icleaf_hdr ichdr;
-       int                     tablesize;
-       int                     entsize;
-       int                     sum;
-       int                     tmp;
-       int                     i;
-
-       trace_xfs_attr_leaf_add(args);
-
-       leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-       ASSERT(args->index >= 0 && args->index <= ichdr.count);
-       entsize = xfs_attr_leaf_newentsize(args, NULL);
-
-       /*
-        * Search through freemap for first-fit on new name length.
-        * (may need to figure in size of entry struct too)
-        */
-       tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t)
-                                       + xfs_attr3_leaf_hdr_size(leaf);
-       for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) {
-               if (tablesize > ichdr.firstused) {
-                       sum += ichdr.freemap[i].size;
-                       continue;
-               }
-               if (!ichdr.freemap[i].size)
-                       continue;       /* no space in this map */
-               tmp = entsize;
-               if (ichdr.freemap[i].base < ichdr.firstused)
-                       tmp += sizeof(xfs_attr_leaf_entry_t);
-               if (ichdr.freemap[i].size >= tmp) {
-                       tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
-                       goto out_log_hdr;
-               }
-               sum += ichdr.freemap[i].size;
-       }
-
-       /*
-        * If there are no holes in the address space of the block,
-        * and we don't have enough freespace, then compaction will do us
-        * no good and we should just give up.
-        */
-       if (!ichdr.holes && sum < entsize)
-               return ENOSPC;
-
-       /*
-        * Compact the entries to coalesce free space.
-        * This may change the hdr->count via dropping INCOMPLETE entries.
-        */
-       xfs_attr3_leaf_compact(args, &ichdr, bp);
-
-       /*
-        * After compaction, the block is guaranteed to have only one
-        * free region, in freemap[0].  If it is not big enough, give up.
-        */
-       if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
-               tmp = ENOSPC;
-               goto out_log_hdr;
-       }
-
-       tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
-
-out_log_hdr:
-       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
-       xfs_trans_log_buf(args->trans, bp,
-               XFS_DA_LOGRANGE(leaf, &leaf->hdr,
-                               xfs_attr3_leaf_hdr_size(leaf)));
-       return tmp;
-}
-
-/*
- * Add a name to a leaf attribute list structure.
- */
-STATIC int
-xfs_attr3_leaf_add_work(
-       struct xfs_buf          *bp,
-       struct xfs_attr3_icleaf_hdr *ichdr,
-       struct xfs_da_args      *args,
-       int                     mapindex)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr_leaf_entry *entry;
-       struct xfs_attr_leaf_name_local *name_loc;
-       struct xfs_attr_leaf_name_remote *name_rmt;
-       struct xfs_mount        *mp;
-       int                     tmp;
-       int                     i;
-
-       trace_xfs_attr_leaf_add_work(args);
-
-       leaf = bp->b_addr;
-       ASSERT(mapindex >= 0 && mapindex < XFS_ATTR_LEAF_MAPSIZE);
-       ASSERT(args->index >= 0 && args->index <= ichdr->count);
-
-       /*
-        * Force open some space in the entry array and fill it in.
-        */
-       entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
-       if (args->index < ichdr->count) {
-               tmp  = ichdr->count - args->index;
-               tmp *= sizeof(xfs_attr_leaf_entry_t);
-               memmove(entry + 1, entry, tmp);
-               xfs_trans_log_buf(args->trans, bp,
-                   XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
-       }
-       ichdr->count++;
-
-       /*
-        * Allocate space for the new string (at the end of the run).
-        */
-       mp = args->trans->t_mountp;
-       ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize);
-       ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0);
-       ASSERT(ichdr->freemap[mapindex].size >=
-               xfs_attr_leaf_newentsize(args, NULL));
-       ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize);
-       ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0);
-
-       ichdr->freemap[mapindex].size -= xfs_attr_leaf_newentsize(args, &tmp);
-
-       entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base +
-                                    ichdr->freemap[mapindex].size);
-       entry->hashval = cpu_to_be32(args->hashval);
-       entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
-       entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
-       if (args->op_flags & XFS_DA_OP_RENAME) {
-               entry->flags |= XFS_ATTR_INCOMPLETE;
-               if ((args->blkno2 == args->blkno) &&
-                   (args->index2 <= args->index)) {
-                       args->index2++;
-               }
-       }
-       xfs_trans_log_buf(args->trans, bp,
-                         XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
-       ASSERT((args->index == 0) ||
-              (be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval)));
-       ASSERT((args->index == ichdr->count - 1) ||
-              (be32_to_cpu(entry->hashval) <= be32_to_cpu((entry+1)->hashval)));
-
-       /*
-        * For "remote" attribute values, simply note that we need to
-        * allocate space for the "remote" value.  We can't actually
-        * allocate the extents in this transaction, and we can't decide
-        * which blocks they should be as we might allocate more blocks
-        * as part of this transaction (a split operation for example).
-        */
-       if (entry->flags & XFS_ATTR_LOCAL) {
-               name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
-               name_loc->namelen = args->namelen;
-               name_loc->valuelen = cpu_to_be16(args->valuelen);
-               memcpy((char *)name_loc->nameval, args->name, args->namelen);
-               memcpy((char *)&name_loc->nameval[args->namelen], args->value,
-                                  be16_to_cpu(name_loc->valuelen));
-       } else {
-               name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
-               name_rmt->namelen = args->namelen;
-               memcpy((char *)name_rmt->name, args->name, args->namelen);
-               entry->flags |= XFS_ATTR_INCOMPLETE;
-               /* just in case */
-               name_rmt->valuelen = 0;
-               name_rmt->valueblk = 0;
-               args->rmtblkno = 1;
-               args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
-               args->rmtvaluelen = args->valuelen;
-       }
-       xfs_trans_log_buf(args->trans, bp,
-            XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
-                                  xfs_attr_leaf_entsize(leaf, args->index)));
-
-       /*
-        * Update the control info for this leaf node
-        */
-       if (be16_to_cpu(entry->nameidx) < ichdr->firstused)
-               ichdr->firstused = be16_to_cpu(entry->nameidx);
-
-       ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t)
-                                       + xfs_attr3_leaf_hdr_size(leaf));
-       tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t)
-                                       + xfs_attr3_leaf_hdr_size(leaf);
-
-       for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
-               if (ichdr->freemap[i].base == tmp) {
-                       ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
-                       ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t);
-               }
-       }
-       ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
-       return 0;
-}
-
-/*
- * Garbage collect a leaf attribute list block by copying it to a new buffer.
- */
-STATIC void
-xfs_attr3_leaf_compact(
-       struct xfs_da_args      *args,
-       struct xfs_attr3_icleaf_hdr *ichdr_dst,
-       struct xfs_buf          *bp)
-{
-       struct xfs_attr_leafblock *leaf_src;
-       struct xfs_attr_leafblock *leaf_dst;
-       struct xfs_attr3_icleaf_hdr ichdr_src;
-       struct xfs_trans        *trans = args->trans;
-       char                    *tmpbuffer;
-
-       trace_xfs_attr_leaf_compact(args);
-
-       tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
-       memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
-       memset(bp->b_addr, 0, args->geo->blksize);
-       leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
-       leaf_dst = bp->b_addr;
-
-       /*
-        * Copy the on-disk header back into the destination buffer to ensure
-        * all the information in the header that is not part of the incore
-        * header structure is preserved.
-        */
-       memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src));
-
-       /* Initialise the incore headers */
-       ichdr_src = *ichdr_dst; /* struct copy */
-       ichdr_dst->firstused = args->geo->blksize;
-       ichdr_dst->usedbytes = 0;
-       ichdr_dst->count = 0;
-       ichdr_dst->holes = 0;
-       ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src);
-       ichdr_dst->freemap[0].size = ichdr_dst->firstused -
-                                               ichdr_dst->freemap[0].base;
-
-       /* write the header back to initialise the underlying buffer */
-       xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
-
-       /*
-        * Copy all entry's in the same (sorted) order,
-        * but allocate name/value pairs packed and in sequence.
-        */
-       xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0,
-                               leaf_dst, ichdr_dst, 0, ichdr_src.count);
-       /*
-        * this logs the entire buffer, but the caller must write the header
-        * back to the buffer when it is finished modifying it.
-        */
-       xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);
-
-       kmem_free(tmpbuffer);
-}
-
-/*
- * Compare two leaf blocks "order".
- * Return 0 unless leaf2 should go before leaf1.
- */
-static int
-xfs_attr3_leaf_order(
-       struct xfs_buf  *leaf1_bp,
-       struct xfs_attr3_icleaf_hdr *leaf1hdr,
-       struct xfs_buf  *leaf2_bp,
-       struct xfs_attr3_icleaf_hdr *leaf2hdr)
-{
-       struct xfs_attr_leaf_entry *entries1;
-       struct xfs_attr_leaf_entry *entries2;
-
-       entries1 = xfs_attr3_leaf_entryp(leaf1_bp->b_addr);
-       entries2 = xfs_attr3_leaf_entryp(leaf2_bp->b_addr);
-       if (leaf1hdr->count > 0 && leaf2hdr->count > 0 &&
-           ((be32_to_cpu(entries2[0].hashval) <
-             be32_to_cpu(entries1[0].hashval)) ||
-            (be32_to_cpu(entries2[leaf2hdr->count - 1].hashval) <
-             be32_to_cpu(entries1[leaf1hdr->count - 1].hashval)))) {
-               return 1;
-       }
-       return 0;
-}
-
-int
-xfs_attr_leaf_order(
-       struct xfs_buf  *leaf1_bp,
-       struct xfs_buf  *leaf2_bp)
-{
-       struct xfs_attr3_icleaf_hdr ichdr1;
-       struct xfs_attr3_icleaf_hdr ichdr2;
-
-       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr);
-       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr);
-       return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2);
-}
-
-/*
- * Redistribute the attribute list entries between two leaf nodes,
- * taking into account the size of the new entry.
- *
- * NOTE: if new block is empty, then it will get the upper half of the
- * old block.  At present, all (one) callers pass in an empty second block.
- *
- * This code adjusts the args->index/blkno and args->index2/blkno2 fields
- * to match what it is doing in splitting the attribute leaf block.  Those
- * values are used in "atomic rename" operations on attributes.  Note that
- * the "new" and "old" values can end up in different blocks.
- */
-STATIC void
-xfs_attr3_leaf_rebalance(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *blk1,
-       struct xfs_da_state_blk *blk2)
-{
-       struct xfs_da_args      *args;
-       struct xfs_attr_leafblock *leaf1;
-       struct xfs_attr_leafblock *leaf2;
-       struct xfs_attr3_icleaf_hdr ichdr1;
-       struct xfs_attr3_icleaf_hdr ichdr2;
-       struct xfs_attr_leaf_entry *entries1;
-       struct xfs_attr_leaf_entry *entries2;
-       int                     count;
-       int                     totallen;
-       int                     max;
-       int                     space;
-       int                     swap;
-
-       /*
-        * Set up environment.
-        */
-       ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC);
-       ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
-       leaf1 = blk1->bp->b_addr;
-       leaf2 = blk2->bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
-       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
-       ASSERT(ichdr2.count == 0);
-       args = state->args;
-
-       trace_xfs_attr_leaf_rebalance(args);
-
-       /*
-        * Check ordering of blocks, reverse if it makes things simpler.
-        *
-        * NOTE: Given that all (current) callers pass in an empty
-        * second block, this code should never set "swap".
-        */
-       swap = 0;
-       if (xfs_attr3_leaf_order(blk1->bp, &ichdr1, blk2->bp, &ichdr2)) {
-               struct xfs_da_state_blk *tmp_blk;
-               struct xfs_attr3_icleaf_hdr tmp_ichdr;
-
-               tmp_blk = blk1;
-               blk1 = blk2;
-               blk2 = tmp_blk;
-
-               /* struct copies to swap them rather than reconverting */
-               tmp_ichdr = ichdr1;
-               ichdr1 = ichdr2;
-               ichdr2 = tmp_ichdr;
-
-               leaf1 = blk1->bp->b_addr;
-               leaf2 = blk2->bp->b_addr;
-               swap = 1;
-       }
-
-       /*
-        * Examine entries until we reduce the absolute difference in
-        * byte usage between the two blocks to a minimum.  Then get
-        * the direction to copy and the number of elements to move.
-        *
-        * "inleaf" is true if the new entry should be inserted into blk1.
-        * If "swap" is also true, then reverse the sense of "inleaf".
-        */
-       state->inleaf = xfs_attr3_leaf_figure_balance(state, blk1, &ichdr1,
-                                                     blk2, &ichdr2,
-                                                     &count, &totallen);
-       if (swap)
-               state->inleaf = !state->inleaf;
-
-       /*
-        * Move any entries required from leaf to leaf:
-        */
-       if (count < ichdr1.count) {
-               /*
-                * Figure the total bytes to be added to the destination leaf.
-                */
-               /* number entries being moved */
-               count = ichdr1.count - count;
-               space  = ichdr1.usedbytes - totallen;
-               space += count * sizeof(xfs_attr_leaf_entry_t);
-
-               /*
-                * leaf2 is the destination, compact it if it looks tight.
-                */
-               max  = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1);
-               max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t);
-               if (space > max)
-                       xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp);
-
-               /*
-                * Move high entries from leaf1 to low end of leaf2.
-                */
-               xfs_attr3_leaf_moveents(args, leaf1, &ichdr1,
-                               ichdr1.count - count, leaf2, &ichdr2, 0, count);
-
-       } else if (count > ichdr1.count) {
-               /*
-                * I assert that since all callers pass in an empty
-                * second buffer, this code should never execute.
-                */
-               ASSERT(0);
-
-               /*
-                * Figure the total bytes to be added to the destination leaf.
-                */
-               /* number entries being moved */
-               count -= ichdr1.count;
-               space  = totallen - ichdr1.usedbytes;
-               space += count * sizeof(xfs_attr_leaf_entry_t);
-
-               /*
-                * leaf1 is the destination, compact it if it looks tight.
-                */
-               max  = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1);
-               max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t);
-               if (space > max)
-                       xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp);
-
-               /*
-                * Move low entries from leaf2 to high end of leaf1.
-                */
-               xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1,
-                                       ichdr1.count, count);
-       }
-
-       xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1);
-       xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2);
-       xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1);
-       xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1);
-
-       /*
-        * Copy out last hashval in each block for B-tree code.
-        */
-       entries1 = xfs_attr3_leaf_entryp(leaf1);
-       entries2 = xfs_attr3_leaf_entryp(leaf2);
-       blk1->hashval = be32_to_cpu(entries1[ichdr1.count - 1].hashval);
-       blk2->hashval = be32_to_cpu(entries2[ichdr2.count - 1].hashval);
-
-       /*
-        * Adjust the expected index for insertion.
-        * NOTE: this code depends on the (current) situation that the
-        * second block was originally empty.
-        *
-        * If the insertion point moved to the 2nd block, we must adjust
-        * the index.  We must also track the entry just following the
-        * new entry for use in an "atomic rename" operation, that entry
-        * is always the "old" entry and the "new" entry is what we are
-        * inserting.  The index/blkno fields refer to the "old" entry,
-        * while the index2/blkno2 fields refer to the "new" entry.
-        */
-       if (blk1->index > ichdr1.count) {
-               ASSERT(state->inleaf == 0);
-               blk2->index = blk1->index - ichdr1.count;
-               args->index = args->index2 = blk2->index;
-               args->blkno = args->blkno2 = blk2->blkno;
-       } else if (blk1->index == ichdr1.count) {
-               if (state->inleaf) {
-                       args->index = blk1->index;
-                       args->blkno = blk1->blkno;
-                       args->index2 = 0;
-                       args->blkno2 = blk2->blkno;
-               } else {
-                       /*
-                        * On a double leaf split, the original attr location
-                        * is already stored in blkno2/index2, so don't
-                        * overwrite it overwise we corrupt the tree.
-                        */
-                       blk2->index = blk1->index - ichdr1.count;
-                       args->index = blk2->index;
-                       args->blkno = blk2->blkno;
-                       if (!state->extravalid) {
-                               /*
-                                * set the new attr location to match the old
-                                * one and let the higher level split code
-                                * decide where in the leaf to place it.
-                                */
-                               args->index2 = blk2->index;
-                               args->blkno2 = blk2->blkno;
-                       }
-               }
-       } else {
-               ASSERT(state->inleaf == 1);
-               args->index = args->index2 = blk1->index;
-               args->blkno = args->blkno2 = blk1->blkno;
-       }
-}
-
-/*
- * Examine entries until we reduce the absolute difference in
- * byte usage between the two blocks to a minimum.
- * GROT: Is this really necessary?  With other than a 512 byte blocksize,
- * GROT: there will always be enough room in either block for a new entry.
- * GROT: Do a double-split for this case?
- */
-STATIC int
-xfs_attr3_leaf_figure_balance(
-       struct xfs_da_state             *state,
-       struct xfs_da_state_blk         *blk1,
-       struct xfs_attr3_icleaf_hdr     *ichdr1,
-       struct xfs_da_state_blk         *blk2,
-       struct xfs_attr3_icleaf_hdr     *ichdr2,
-       int                             *countarg,
-       int                             *usedbytesarg)
-{
-       struct xfs_attr_leafblock       *leaf1 = blk1->bp->b_addr;
-       struct xfs_attr_leafblock       *leaf2 = blk2->bp->b_addr;
-       struct xfs_attr_leaf_entry      *entry;
-       int                             count;
-       int                             max;
-       int                             index;
-       int                             totallen = 0;
-       int                             half;
-       int                             lastdelta;
-       int                             foundit = 0;
-       int                             tmp;
-
-       /*
-        * Examine entries until we reduce the absolute difference in
-        * byte usage between the two blocks to a minimum.
-        */
-       max = ichdr1->count + ichdr2->count;
-       half = (max + 1) * sizeof(*entry);
-       half += ichdr1->usedbytes + ichdr2->usedbytes +
-                       xfs_attr_leaf_newentsize(state->args, NULL);
-       half /= 2;
-       lastdelta = state->args->geo->blksize;
-       entry = xfs_attr3_leaf_entryp(leaf1);
-       for (count = index = 0; count < max; entry++, index++, count++) {
-
-#define XFS_ATTR_ABS(A)        (((A) < 0) ? -(A) : (A))
-               /*
-                * The new entry is in the first block, account for it.
-                */
-               if (count == blk1->index) {
-                       tmp = totallen + sizeof(*entry) +
-                               xfs_attr_leaf_newentsize(state->args, NULL);
-                       if (XFS_ATTR_ABS(half - tmp) > lastdelta)
-                               break;
-                       lastdelta = XFS_ATTR_ABS(half - tmp);
-                       totallen = tmp;
-                       foundit = 1;
-               }
-
-               /*
-                * Wrap around into the second block if necessary.
-                */
-               if (count == ichdr1->count) {
-                       leaf1 = leaf2;
-                       entry = xfs_attr3_leaf_entryp(leaf1);
-                       index = 0;
-               }
-
-               /*
-                * Figure out if next leaf entry would be too much.
-                */
-               tmp = totallen + sizeof(*entry) + xfs_attr_leaf_entsize(leaf1,
-                                                                       index);
-               if (XFS_ATTR_ABS(half - tmp) > lastdelta)
-                       break;
-               lastdelta = XFS_ATTR_ABS(half - tmp);
-               totallen = tmp;
-#undef XFS_ATTR_ABS
-       }
-
-       /*
-        * Calculate the number of usedbytes that will end up in lower block.
-        * If new entry not in lower block, fix up the count.
-        */
-       totallen -= count * sizeof(*entry);
-       if (foundit) {
-               totallen -= sizeof(*entry) +
-                               xfs_attr_leaf_newentsize(state->args, NULL);
-       }
-
-       *countarg = count;
-       *usedbytesarg = totallen;
-       return foundit;
-}
-
-/*========================================================================
- * Routines used for shrinking the Btree.
- *========================================================================*/
-
-/*
- * Check a leaf block and its neighbors to see if the block should be
- * collapsed into one or the other neighbor.  Always keep the block
- * with the smaller block number.
- * If the current block is over 50% full, don't try to join it, return 0.
- * If the block is empty, fill in the state structure and return 2.
- * If it can be collapsed, fill in the state structure and return 1.
- * If nothing can be done, return 0.
- *
- * GROT: allow for INCOMPLETE entries in calculation.
- */
-int
-xfs_attr3_leaf_toosmall(
-       struct xfs_da_state     *state,
-       int                     *action)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_da_state_blk *blk;
-       struct xfs_attr3_icleaf_hdr ichdr;
-       struct xfs_buf          *bp;
-       xfs_dablk_t             blkno;
-       int                     bytes;
-       int                     forward;
-       int                     error;
-       int                     retval;
-       int                     i;
-
-       trace_xfs_attr_leaf_toosmall(state->args);
-
-       /*
-        * Check for the degenerate case of the block being over 50% full.
-        * If so, it's not worth even looking to see if we might be able
-        * to coalesce with a sibling.
-        */
-       blk = &state->path.blk[ state->path.active-1 ];
-       leaf = blk->bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-       bytes = xfs_attr3_leaf_hdr_size(leaf) +
-               ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
-               ichdr.usedbytes;
-       if (bytes > (state->args->geo->blksize >> 1)) {
-               *action = 0;    /* blk over 50%, don't try to join */
-               return 0;
-       }
-
-       /*
-        * Check for the degenerate case of the block being empty.
-        * If the block is empty, we'll simply delete it, no need to
-        * coalesce it with a sibling block.  We choose (arbitrarily)
-        * to merge with the forward block unless it is NULL.
-        */
-       if (ichdr.count == 0) {
-               /*
-                * Make altpath point to the block we want to keep and
-                * path point to the block we want to drop (this one).
-                */
-               forward = (ichdr.forw != 0);
-               memcpy(&state->altpath, &state->path, sizeof(state->path));
-               error = xfs_da3_path_shift(state, &state->altpath, forward,
-                                                0, &retval);
-               if (error)
-                       return error;
-               if (retval) {
-                       *action = 0;
-               } else {
-                       *action = 2;
-               }
-               return 0;
-       }
-
-       /*
-        * Examine each sibling block to see if we can coalesce with
-        * at least 25% free space to spare.  We need to figure out
-        * whether to merge with the forward or the backward block.
-        * We prefer coalescing with the lower numbered sibling so as
-        * to shrink an attribute list over time.
-        */
-       /* start with smaller blk num */
-       forward = ichdr.forw < ichdr.back;
-       for (i = 0; i < 2; forward = !forward, i++) {
-               struct xfs_attr3_icleaf_hdr ichdr2;
-               if (forward)
-                       blkno = ichdr.forw;
-               else
-                       blkno = ichdr.back;
-               if (blkno == 0)
-                       continue;
-               error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
-                                       blkno, -1, &bp);
-               if (error)
-                       return error;
-
-               xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr);
-
-               bytes = state->args->geo->blksize -
-                       (state->args->geo->blksize >> 2) -
-                       ichdr.usedbytes - ichdr2.usedbytes -
-                       ((ichdr.count + ichdr2.count) *
-                                       sizeof(xfs_attr_leaf_entry_t)) -
-                       xfs_attr3_leaf_hdr_size(leaf);
-
-               xfs_trans_brelse(state->args->trans, bp);
-               if (bytes >= 0)
-                       break;  /* fits with at least 25% to spare */
-       }
-       if (i >= 2) {
-               *action = 0;
-               return 0;
-       }
-
-       /*
-        * Make altpath point to the block we want to keep (the lower
-        * numbered block) and path point to the block we want to drop.
-        */
-       memcpy(&state->altpath, &state->path, sizeof(state->path));
-       if (blkno < blk->blkno) {
-               error = xfs_da3_path_shift(state, &state->altpath, forward,
-                                                0, &retval);
-       } else {
-               error = xfs_da3_path_shift(state, &state->path, forward,
-                                                0, &retval);
-       }
-       if (error)
-               return error;
-       if (retval) {
-               *action = 0;
-       } else {
-               *action = 1;
-       }
-       return 0;
-}
-
-/*
- * Remove a name from the leaf attribute list structure.
- *
- * Return 1 if leaf is less than 37% full, 0 if >= 37% full.
- * If two leaves are 37% full, when combined they will leave 25% free.
- */
-int
-xfs_attr3_leaf_remove(
-       struct xfs_buf          *bp,
-       struct xfs_da_args      *args)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr3_icleaf_hdr ichdr;
-       struct xfs_attr_leaf_entry *entry;
-       int                     before;
-       int                     after;
-       int                     smallest;
-       int                     entsize;
-       int                     tablesize;
-       int                     tmp;
-       int                     i;
-
-       trace_xfs_attr_leaf_remove(args);
-
-       leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-
-       ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);
-       ASSERT(args->index >= 0 && args->index < ichdr.count);
-       ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) +
-                                       xfs_attr3_leaf_hdr_size(leaf));
-
-       entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
-
-       ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
-       ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);
-
-       /*
-        * Scan through free region table:
-        *    check for adjacency of free'd entry with an existing one,
-        *    find smallest free region in case we need to replace it,
-        *    adjust any map that borders the entry table,
-        */
-       tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t)
-                                       + xfs_attr3_leaf_hdr_size(leaf);
-       tmp = ichdr.freemap[0].size;
-       before = after = -1;
-       smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
-       entsize = xfs_attr_leaf_entsize(leaf, args->index);
-       for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
-               ASSERT(ichdr.freemap[i].base < args->geo->blksize);
-               ASSERT(ichdr.freemap[i].size < args->geo->blksize);
-               if (ichdr.freemap[i].base == tablesize) {
-                       ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t);
-                       ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t);
-               }
-
-               if (ichdr.freemap[i].base + ichdr.freemap[i].size ==
-                               be16_to_cpu(entry->nameidx)) {
-                       before = i;
-               } else if (ichdr.freemap[i].base ==
-                               (be16_to_cpu(entry->nameidx) + entsize)) {
-                       after = i;
-               } else if (ichdr.freemap[i].size < tmp) {
-                       tmp = ichdr.freemap[i].size;
-                       smallest = i;
-               }
-       }
-
-       /*
-        * Coalesce adjacent freemap regions,
-        * or replace the smallest region.
-        */
-       if ((before >= 0) || (after >= 0)) {
-               if ((before >= 0) && (after >= 0)) {
-                       ichdr.freemap[before].size += entsize;
-                       ichdr.freemap[before].size += ichdr.freemap[after].size;
-                       ichdr.freemap[after].base = 0;
-                       ichdr.freemap[after].size = 0;
-               } else if (before >= 0) {
-                       ichdr.freemap[before].size += entsize;
-               } else {
-                       ichdr.freemap[after].base = be16_to_cpu(entry->nameidx);
-                       ichdr.freemap[after].size += entsize;
-               }
-       } else {
-               /*
-                * Replace smallest region (if it is smaller than free'd entry)
-                */
-               if (ichdr.freemap[smallest].size < entsize) {
-                       ichdr.freemap[smallest].base = be16_to_cpu(entry->nameidx);
-                       ichdr.freemap[smallest].size = entsize;
-               }
-       }
-
-       /*
-        * Did we remove the first entry?
-        */
-       if (be16_to_cpu(entry->nameidx) == ichdr.firstused)
-               smallest = 1;
-       else
-               smallest = 0;
-
-       /*
-        * Compress the remaining entries and zero out the removed stuff.
-        */
-       memset(xfs_attr3_leaf_name(leaf, args->index), 0, entsize);
-       ichdr.usedbytes -= entsize;
-       xfs_trans_log_buf(args->trans, bp,
-            XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
-                                  entsize));
-
-       tmp = (ichdr.count - args->index) * sizeof(xfs_attr_leaf_entry_t);
-       memmove(entry, entry + 1, tmp);
-       ichdr.count--;
-       xfs_trans_log_buf(args->trans, bp,
-           XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(xfs_attr_leaf_entry_t)));
-
-       entry = &xfs_attr3_leaf_entryp(leaf)[ichdr.count];
-       memset(entry, 0, sizeof(xfs_attr_leaf_entry_t));
-
-       /*
-        * If we removed the first entry, re-find the first used byte
-        * in the name area.  Note that if the entry was the "firstused",
-        * then we don't have a "hole" in our block resulting from
-        * removing the name.
-        */
-       if (smallest) {
-               tmp = args->geo->blksize;
-               entry = xfs_attr3_leaf_entryp(leaf);
-               for (i = ichdr.count - 1; i >= 0; entry++, i--) {
-                       ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused);
-                       ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);
-
-                       if (be16_to_cpu(entry->nameidx) < tmp)
-                               tmp = be16_to_cpu(entry->nameidx);
-               }
-               ichdr.firstused = tmp;
-               if (!ichdr.firstused)
-                       ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN;
-       } else {
-               ichdr.holes = 1;        /* mark as needing compaction */
-       }
-       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
-       xfs_trans_log_buf(args->trans, bp,
-                         XFS_DA_LOGRANGE(leaf, &leaf->hdr,
-                                         xfs_attr3_leaf_hdr_size(leaf)));
-
-       /*
-        * Check if leaf is less than 50% full, caller may want to
-        * "join" the leaf with a sibling if so.
-        */
-       tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) +
-             ichdr.count * sizeof(xfs_attr_leaf_entry_t);
-
-       return tmp < args->geo->magicpct; /* leaf is < 37% full */
-}
-
-/*
- * Move all the attribute list entries from drop_leaf into save_leaf.
- */
-void
-xfs_attr3_leaf_unbalance(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *drop_blk,
-       struct xfs_da_state_blk *save_blk)
-{
-       struct xfs_attr_leafblock *drop_leaf = drop_blk->bp->b_addr;
-       struct xfs_attr_leafblock *save_leaf = save_blk->bp->b_addr;
-       struct xfs_attr3_icleaf_hdr drophdr;
-       struct xfs_attr3_icleaf_hdr savehdr;
-       struct xfs_attr_leaf_entry *entry;
-
-       trace_xfs_attr_leaf_unbalance(state->args);
-
-       drop_leaf = drop_blk->bp->b_addr;
-       save_leaf = save_blk->bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf);
-       xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf);
-       entry = xfs_attr3_leaf_entryp(drop_leaf);
-
-       /*
-        * Save last hashval from dying block for later Btree fixup.
-        */
-       drop_blk->hashval = be32_to_cpu(entry[drophdr.count - 1].hashval);
-
-       /*
-        * Check if we need a temp buffer, or can we do it in place.
-        * Note that we don't check "leaf" for holes because we will
-        * always be dropping it, toosmall() decided that for us already.
-        */
-       if (savehdr.holes == 0) {
-               /*
-                * dest leaf has no holes, so we add there.  May need
-                * to make some room in the entry array.
-                */
-               if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
-                                        drop_blk->bp, &drophdr)) {
-                       xfs_attr3_leaf_moveents(state->args,
-                                               drop_leaf, &drophdr, 0,
-                                               save_leaf, &savehdr, 0,
-                                               drophdr.count);
-               } else {
-                       xfs_attr3_leaf_moveents(state->args,
-                                               drop_leaf, &drophdr, 0,
-                                               save_leaf, &savehdr,
-                                               savehdr.count, drophdr.count);
-               }
-       } else {
-               /*
-                * Destination has holes, so we make a temporary copy
-                * of the leaf and add them both to that.
-                */
-               struct xfs_attr_leafblock *tmp_leaf;
-               struct xfs_attr3_icleaf_hdr tmphdr;
-
-               tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP);
-
-               /*
-                * Copy the header into the temp leaf so that all the stuff
-                * not in the incore header is present and gets copied back in
-                * once we've moved all the entries.
-                */
-               memcpy(tmp_leaf, save_leaf, xfs_attr3_leaf_hdr_size(save_leaf));
-
-               memset(&tmphdr, 0, sizeof(tmphdr));
-               tmphdr.magic = savehdr.magic;
-               tmphdr.forw = savehdr.forw;
-               tmphdr.back = savehdr.back;
-               tmphdr.firstused = state->args->geo->blksize;
-
-               /* write the header to the temp buffer to initialise it */
-               xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
-
-               if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
-                                        drop_blk->bp, &drophdr)) {
-                       xfs_attr3_leaf_moveents(state->args,
-                                               drop_leaf, &drophdr, 0,
-                                               tmp_leaf, &tmphdr, 0,
-                                               drophdr.count);
-                       xfs_attr3_leaf_moveents(state->args,
-                                               save_leaf, &savehdr, 0,
-                                               tmp_leaf, &tmphdr, tmphdr.count,
-                                               savehdr.count);
-               } else {
-                       xfs_attr3_leaf_moveents(state->args,
-                                               save_leaf, &savehdr, 0,
-                                               tmp_leaf, &tmphdr, 0,
-                                               savehdr.count);
-                       xfs_attr3_leaf_moveents(state->args,
-                                               drop_leaf, &drophdr, 0,
-                                               tmp_leaf, &tmphdr, tmphdr.count,
-                                               drophdr.count);
-               }
-               memcpy(save_leaf, tmp_leaf, state->args->geo->blksize);
-               savehdr = tmphdr; /* struct copy */
-               kmem_free(tmp_leaf);
-       }
-
-       xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr);
-       xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
-                                          state->args->geo->blksize - 1);
-
-       /*
-        * Copy out last hashval in each block for B-tree code.
-        */
-       entry = xfs_attr3_leaf_entryp(save_leaf);
-       save_blk->hashval = be32_to_cpu(entry[savehdr.count - 1].hashval);
-}
-
-/*========================================================================
- * Routines used for finding things in the Btree.
- *========================================================================*/
-
-/*
- * Look up a name in a leaf attribute list structure.
- * This is the internal routine, it uses the caller's buffer.
- *
- * Note that duplicate keys are allowed, but only check within the
- * current leaf node.  The Btree code must check in adjacent leaf nodes.
- *
- * Return in args->index the index into the entry[] array of either
- * the found entry, or where the entry should have been (insert before
- * that entry).
- *
- * Don't change the args->value unless we find the attribute.
- */
-int
-xfs_attr3_leaf_lookup_int(
-       struct xfs_buf          *bp,
-       struct xfs_da_args      *args)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr3_icleaf_hdr ichdr;
-       struct xfs_attr_leaf_entry *entry;
-       struct xfs_attr_leaf_entry *entries;
-       struct xfs_attr_leaf_name_local *name_loc;
-       struct xfs_attr_leaf_name_remote *name_rmt;
-       xfs_dahash_t            hashval;
-       int                     probe;
-       int                     span;
-
-       trace_xfs_attr_leaf_lookup(args);
-
-       leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-       entries = xfs_attr3_leaf_entryp(leaf);
-       ASSERT(ichdr.count < args->geo->blksize / 8);
-
-       /*
-        * Binary search.  (note: small blocks will skip this loop)
-        */
-       hashval = args->hashval;
-       probe = span = ichdr.count / 2;
-       for (entry = &entries[probe]; span > 4; entry = &entries[probe]) {
-               span /= 2;
-               if (be32_to_cpu(entry->hashval) < hashval)
-                       probe += span;
-               else if (be32_to_cpu(entry->hashval) > hashval)
-                       probe -= span;
-               else
-                       break;
-       }
-       ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count));
-       ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval);
-
-       /*
-        * Since we may have duplicate hashval's, find the first matching
-        * hashval in the leaf.
-        */
-       while (probe > 0 && be32_to_cpu(entry->hashval) >= hashval) {
-               entry--;
-               probe--;
-       }
-       while (probe < ichdr.count &&
-              be32_to_cpu(entry->hashval) < hashval) {
-               entry++;
-               probe++;
-       }
-       if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) {
-               args->index = probe;
-               return ENOATTR;
-       }
-
-       /*
-        * Duplicate keys may be present, so search all of them for a match.
-        */
-       for (; probe < ichdr.count && (be32_to_cpu(entry->hashval) == hashval);
-                       entry++, probe++) {
-/*
- * GROT: Add code to remove incomplete entries.
- */
-               /*
-                * If we are looking for INCOMPLETE entries, show only those.
-                * If we are looking for complete entries, show only those.
-                */
-               if ((args->flags & XFS_ATTR_INCOMPLETE) !=
-                   (entry->flags & XFS_ATTR_INCOMPLETE)) {
-                       continue;
-               }
-               if (entry->flags & XFS_ATTR_LOCAL) {
-                       name_loc = xfs_attr3_leaf_name_local(leaf, probe);
-                       if (name_loc->namelen != args->namelen)
-                               continue;
-                       if (memcmp(args->name, name_loc->nameval,
-                                                       args->namelen) != 0)
-                               continue;
-                       if (!xfs_attr_namesp_match(args->flags, entry->flags))
-                               continue;
-                       args->index = probe;
-                       return EEXIST;
-               } else {
-                       name_rmt = xfs_attr3_leaf_name_remote(leaf, probe);
-                       if (name_rmt->namelen != args->namelen)
-                               continue;
-                       if (memcmp(args->name, name_rmt->name,
-                                                       args->namelen) != 0)
-                               continue;
-                       if (!xfs_attr_namesp_match(args->flags, entry->flags))
-                               continue;
-                       args->index = probe;
-                       args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
-                       args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
-                       args->rmtblkcnt = xfs_attr3_rmt_blocks(
-                                                       args->dp->i_mount,
-                                                       args->rmtvaluelen);
-                       return EEXIST;
-               }
-       }
-       args->index = probe;
-       return ENOATTR;
-}
-
-/*
- * Get the value associated with an attribute name from a leaf attribute
- * list structure.
- */
-int
-xfs_attr3_leaf_getvalue(
-       struct xfs_buf          *bp,
-       struct xfs_da_args      *args)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr3_icleaf_hdr ichdr;
-       struct xfs_attr_leaf_entry *entry;
-       struct xfs_attr_leaf_name_local *name_loc;
-       struct xfs_attr_leaf_name_remote *name_rmt;
-       int                     valuelen;
-
-       leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-       ASSERT(ichdr.count < args->geo->blksize / 8);
-       ASSERT(args->index < ichdr.count);
-
-       entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
-       if (entry->flags & XFS_ATTR_LOCAL) {
-               name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
-               ASSERT(name_loc->namelen == args->namelen);
-               ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0);
-               valuelen = be16_to_cpu(name_loc->valuelen);
-               if (args->flags & ATTR_KERNOVAL) {
-                       args->valuelen = valuelen;
-                       return 0;
-               }
-               if (args->valuelen < valuelen) {
-                       args->valuelen = valuelen;
-                       return ERANGE;
-               }
-               args->valuelen = valuelen;
-               memcpy(args->value, &name_loc->nameval[args->namelen], valuelen);
-       } else {
-               name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
-               ASSERT(name_rmt->namelen == args->namelen);
-               ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
-               args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
-               args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
-               args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
-                                                      args->rmtvaluelen);
-               if (args->flags & ATTR_KERNOVAL) {
-                       args->valuelen = args->rmtvaluelen;
-                       return 0;
-               }
-               if (args->valuelen < args->rmtvaluelen) {
-                       args->valuelen = args->rmtvaluelen;
-                       return ERANGE;
-               }
-               args->valuelen = args->rmtvaluelen;
-       }
-       return 0;
-}
-
-/*========================================================================
- * Utility routines.
- *========================================================================*/
-
-/*
- * Move the indicated entries from one leaf to another.
- * NOTE: this routine modifies both source and destination leaves.
- */
-/*ARGSUSED*/
-STATIC void
-xfs_attr3_leaf_moveents(
-       struct xfs_da_args              *args,
-       struct xfs_attr_leafblock       *leaf_s,
-       struct xfs_attr3_icleaf_hdr     *ichdr_s,
-       int                             start_s,
-       struct xfs_attr_leafblock       *leaf_d,
-       struct xfs_attr3_icleaf_hdr     *ichdr_d,
-       int                             start_d,
-       int                             count)
-{
-       struct xfs_attr_leaf_entry      *entry_s;
-       struct xfs_attr_leaf_entry      *entry_d;
-       int                             desti;
-       int                             tmp;
-       int                             i;
-
-       /*
-        * Check for nothing to do.
-        */
-       if (count == 0)
-               return;
-
-       /*
-        * Set up environment.
-        */
-       ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC ||
-              ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC);
-       ASSERT(ichdr_s->magic == ichdr_d->magic);
-       ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8);
-       ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s))
-                                       + xfs_attr3_leaf_hdr_size(leaf_s));
-       ASSERT(ichdr_d->count < args->geo->blksize / 8);
-       ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d))
-                                       + xfs_attr3_leaf_hdr_size(leaf_d));
-
-       ASSERT(start_s < ichdr_s->count);
-       ASSERT(start_d <= ichdr_d->count);
-       ASSERT(count <= ichdr_s->count);
-
-
-       /*
-        * Move the entries in the destination leaf up to make a hole?
-        */
-       if (start_d < ichdr_d->count) {
-               tmp  = ichdr_d->count - start_d;
-               tmp *= sizeof(xfs_attr_leaf_entry_t);
-               entry_s = &xfs_attr3_leaf_entryp(leaf_d)[start_d];
-               entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d + count];
-               memmove(entry_d, entry_s, tmp);
-       }
-
-       /*
-        * Copy all entry's in the same (sorted) order,
-        * but allocate attribute info packed and in sequence.
-        */
-       entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
-       entry_d = &xfs_attr3_leaf_entryp(leaf_d)[start_d];
-       desti = start_d;
-       for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) {
-               ASSERT(be16_to_cpu(entry_s->nameidx) >= ichdr_s->firstused);
-               tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i);
-#ifdef GROT
-               /*
-                * Code to drop INCOMPLETE entries.  Difficult to use as we
-                * may also need to change the insertion index.  Code turned
-                * off for 6.2, should be revisited later.
-                */
-               if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */
-                       memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);
-                       ichdr_s->usedbytes -= tmp;
-                       ichdr_s->count -= 1;
-                       entry_d--;      /* to compensate for ++ in loop hdr */
-                       desti--;
-                       if ((start_s + i) < offset)
-                               result++;       /* insertion index adjustment */
-               } else {
-#endif /* GROT */
-                       ichdr_d->firstused -= tmp;
-                       /* both on-disk, don't endian flip twice */
-                       entry_d->hashval = entry_s->hashval;
-                       entry_d->nameidx = cpu_to_be16(ichdr_d->firstused);
-                       entry_d->flags = entry_s->flags;
-                       ASSERT(be16_to_cpu(entry_d->nameidx) + tmp
-                                                       <= args->geo->blksize);
-                       memmove(xfs_attr3_leaf_name(leaf_d, desti),
-                               xfs_attr3_leaf_name(leaf_s, start_s + i), tmp);
-                       ASSERT(be16_to_cpu(entry_s->nameidx) + tmp
-                                                       <= args->geo->blksize);
-                       memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);
-                       ichdr_s->usedbytes -= tmp;
-                       ichdr_d->usedbytes += tmp;
-                       ichdr_s->count -= 1;
-                       ichdr_d->count += 1;
-                       tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t)
-                                       + xfs_attr3_leaf_hdr_size(leaf_d);
-                       ASSERT(ichdr_d->firstused >= tmp);
-#ifdef GROT
-               }
-#endif /* GROT */
-       }
-
-       /*
-        * Zero out the entries we just copied.
-        */
-       if (start_s == ichdr_s->count) {
-               tmp = count * sizeof(xfs_attr_leaf_entry_t);
-               entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
-               ASSERT(((char *)entry_s + tmp) <=
-                      ((char *)leaf_s + args->geo->blksize));
-               memset(entry_s, 0, tmp);
-       } else {
-               /*
-                * Move the remaining entries down to fill the hole,
-                * then zero the entries at the top.
-                */
-               tmp  = (ichdr_s->count - count) * sizeof(xfs_attr_leaf_entry_t);
-               entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s + count];
-               entry_d = &xfs_attr3_leaf_entryp(leaf_s)[start_s];
-               memmove(entry_d, entry_s, tmp);
-
-               tmp = count * sizeof(xfs_attr_leaf_entry_t);
-               entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count];
-               ASSERT(((char *)entry_s + tmp) <=
-                      ((char *)leaf_s + args->geo->blksize));
-               memset(entry_s, 0, tmp);
-       }
-
-       /*
-        * Fill in the freemap information
-        */
-       ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d);
-       ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t);
-       ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
-       ichdr_d->freemap[1].base = 0;
-       ichdr_d->freemap[2].base = 0;
-       ichdr_d->freemap[1].size = 0;
-       ichdr_d->freemap[2].size = 0;
-       ichdr_s->holes = 1;     /* leaf may not be compact */
-}
-
-/*
- * Pick up the last hashvalue from a leaf block.
- */
-xfs_dahash_t
-xfs_attr_leaf_lasthash(
-       struct xfs_buf  *bp,
-       int             *count)
-{
-       struct xfs_attr3_icleaf_hdr ichdr;
-       struct xfs_attr_leaf_entry *entries;
-
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr);
-       entries = xfs_attr3_leaf_entryp(bp->b_addr);
-       if (count)
-               *count = ichdr.count;
-       if (!ichdr.count)
-               return 0;
-       return be32_to_cpu(entries[ichdr.count - 1].hashval);
-}
-
-/*
- * Calculate the number of bytes used to store the indicated attribute
- * (whether local or remote only calculate bytes in this block).
- */
-STATIC int
-xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
-{
-       struct xfs_attr_leaf_entry *entries;
-       xfs_attr_leaf_name_local_t *name_loc;
-       xfs_attr_leaf_name_remote_t *name_rmt;
-       int size;
-
-       entries = xfs_attr3_leaf_entryp(leaf);
-       if (entries[index].flags & XFS_ATTR_LOCAL) {
-               name_loc = xfs_attr3_leaf_name_local(leaf, index);
-               size = xfs_attr_leaf_entsize_local(name_loc->namelen,
-                                                  be16_to_cpu(name_loc->valuelen));
-       } else {
-               name_rmt = xfs_attr3_leaf_name_remote(leaf, index);
-               size = xfs_attr_leaf_entsize_remote(name_rmt->namelen);
-       }
-       return size;
-}
-
-/*
- * Calculate the number of bytes that would be required to store the new
- * attribute (whether local or remote only calculate bytes in this block).
- * This routine decides as a side effect whether the attribute will be
- * a "local" or a "remote" attribute.
- */
-int
-xfs_attr_leaf_newentsize(
-       struct xfs_da_args      *args,
-       int                     *local)
-{
-       int                     size;
-
-       size = xfs_attr_leaf_entsize_local(args->namelen, args->valuelen);
-       if (size < xfs_attr_leaf_entsize_local_max(args->geo->blksize)) {
-               if (local)
-                       *local = 1;
-               return size;
-       }
-       if (local)
-               *local = 0;
-       return xfs_attr_leaf_entsize_remote(args->namelen);
-}
-
-
-/*========================================================================
- * Manage the INCOMPLETE flag in a leaf entry
- *========================================================================*/
-
-/*
- * Clear the INCOMPLETE flag on an entry in a leaf block.
- */
-int
-xfs_attr3_leaf_clearflag(
-       struct xfs_da_args      *args)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr_leaf_entry *entry;
-       struct xfs_attr_leaf_name_remote *name_rmt;
-       struct xfs_buf          *bp;
-       int                     error;
-#ifdef DEBUG
-       struct xfs_attr3_icleaf_hdr ichdr;
-       xfs_attr_leaf_name_local_t *name_loc;
-       int namelen;
-       char *name;
-#endif /* DEBUG */
-
-       trace_xfs_attr_leaf_clearflag(args);
-       /*
-        * Set up the operation.
-        */
-       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
-       if (error)
-               return error;
-
-       leaf = bp->b_addr;
-       entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
-       ASSERT(entry->flags & XFS_ATTR_INCOMPLETE);
-
-#ifdef DEBUG
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-       ASSERT(args->index < ichdr.count);
-       ASSERT(args->index >= 0);
-
-       if (entry->flags & XFS_ATTR_LOCAL) {
-               name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
-               namelen = name_loc->namelen;
-               name = (char *)name_loc->nameval;
-       } else {
-               name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
-               namelen = name_rmt->namelen;
-               name = (char *)name_rmt->name;
-       }
-       ASSERT(be32_to_cpu(entry->hashval) == args->hashval);
-       ASSERT(namelen == args->namelen);
-       ASSERT(memcmp(name, args->name, namelen) == 0);
-#endif /* DEBUG */
-
-       entry->flags &= ~XFS_ATTR_INCOMPLETE;
-       xfs_trans_log_buf(args->trans, bp,
-                        XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
-
-       if (args->rmtblkno) {
-               ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
-               name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
-               name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
-               name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
-               xfs_trans_log_buf(args->trans, bp,
-                        XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
-       }
-
-       /*
-        * Commit the flag value change and start the next trans in series.
-        */
-       return xfs_trans_roll(&args->trans, args->dp);
-}
-
-/*
- * Set the INCOMPLETE flag on an entry in a leaf block.
- */
-int
-xfs_attr3_leaf_setflag(
-       struct xfs_da_args      *args)
-{
-       struct xfs_attr_leafblock *leaf;
-       struct xfs_attr_leaf_entry *entry;
-       struct xfs_attr_leaf_name_remote *name_rmt;
-       struct xfs_buf          *bp;
-       int error;
-#ifdef DEBUG
-       struct xfs_attr3_icleaf_hdr ichdr;
-#endif
-
-       trace_xfs_attr_leaf_setflag(args);
-
-       /*
-        * Set up the operation.
-        */
-       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
-       if (error)
-               return error;
-
-       leaf = bp->b_addr;
-#ifdef DEBUG
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-       ASSERT(args->index < ichdr.count);
-       ASSERT(args->index >= 0);
-#endif
-       entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
-
-       ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0);
-       entry->flags |= XFS_ATTR_INCOMPLETE;
-       xfs_trans_log_buf(args->trans, bp,
-                       XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
-       if ((entry->flags & XFS_ATTR_LOCAL) == 0) {
-               name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
-               name_rmt->valueblk = 0;
-               name_rmt->valuelen = 0;
-               xfs_trans_log_buf(args->trans, bp,
-                        XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
-       }
-
-       /*
-        * Commit the flag value change and start the next trans in series.
-        */
-       return xfs_trans_roll(&args->trans, args->dp);
-}
-
-/*
- * In a single transaction, clear the INCOMPLETE flag on the leaf entry
- * given by args->blkno/index and set the INCOMPLETE flag on the leaf
- * entry given by args->blkno2/index2.
- *
- * Note that they could be in different blocks, or in the same block.
- */
-int
-xfs_attr3_leaf_flipflags(
-       struct xfs_da_args      *args)
-{
-       struct xfs_attr_leafblock *leaf1;
-       struct xfs_attr_leafblock *leaf2;
-       struct xfs_attr_leaf_entry *entry1;
-       struct xfs_attr_leaf_entry *entry2;
-       struct xfs_attr_leaf_name_remote *name_rmt;
-       struct xfs_buf          *bp1;
-       struct xfs_buf          *bp2;
-       int error;
-#ifdef DEBUG
-       struct xfs_attr3_icleaf_hdr ichdr1;
-       struct xfs_attr3_icleaf_hdr ichdr2;
-       xfs_attr_leaf_name_local_t *name_loc;
-       int namelen1, namelen2;
-       char *name1, *name2;
-#endif /* DEBUG */
-
-       trace_xfs_attr_leaf_flipflags(args);
-
-       /*
-        * Read the block containing the "old" attr
-        */
-       error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1);
-       if (error)
-               return error;
-
-       /*
-        * Read the block containing the "new" attr, if it is different
-        */
-       if (args->blkno2 != args->blkno) {
-               error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2,
-                                          -1, &bp2);
-               if (error)
-                       return error;
-       } else {
-               bp2 = bp1;
-       }
-
-       leaf1 = bp1->b_addr;
-       entry1 = &xfs_attr3_leaf_entryp(leaf1)[args->index];
-
-       leaf2 = bp2->b_addr;
-       entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2];
-
-#ifdef DEBUG
-       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
-       ASSERT(args->index < ichdr1.count);
-       ASSERT(args->index >= 0);
-
-       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
-       ASSERT(args->index2 < ichdr2.count);
-       ASSERT(args->index2 >= 0);
-
-       if (entry1->flags & XFS_ATTR_LOCAL) {
-               name_loc = xfs_attr3_leaf_name_local(leaf1, args->index);
-               namelen1 = name_loc->namelen;
-               name1 = (char *)name_loc->nameval;
-       } else {
-               name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
-               namelen1 = name_rmt->namelen;
-               name1 = (char *)name_rmt->name;
-       }
-       if (entry2->flags & XFS_ATTR_LOCAL) {
-               name_loc = xfs_attr3_leaf_name_local(leaf2, args->index2);
-               namelen2 = name_loc->namelen;
-               name2 = (char *)name_loc->nameval;
-       } else {
-               name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2);
-               namelen2 = name_rmt->namelen;
-               name2 = (char *)name_rmt->name;
-       }
-       ASSERT(be32_to_cpu(entry1->hashval) == be32_to_cpu(entry2->hashval));
-       ASSERT(namelen1 == namelen2);
-       ASSERT(memcmp(name1, name2, namelen1) == 0);
-#endif /* DEBUG */
-
-       ASSERT(entry1->flags & XFS_ATTR_INCOMPLETE);
-       ASSERT((entry2->flags & XFS_ATTR_INCOMPLETE) == 0);
-
-       entry1->flags &= ~XFS_ATTR_INCOMPLETE;
-       xfs_trans_log_buf(args->trans, bp1,
-                         XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1)));
-       if (args->rmtblkno) {
-               ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
-               name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
-               name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
-               name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
-               xfs_trans_log_buf(args->trans, bp1,
-                        XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
-       }
-
-       entry2->flags |= XFS_ATTR_INCOMPLETE;
-       xfs_trans_log_buf(args->trans, bp2,
-                         XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2)));
-       if ((entry2->flags & XFS_ATTR_LOCAL) == 0) {
-               name_rmt = xfs_attr3_leaf_name_remote(leaf2, args->index2);
-               name_rmt->valueblk = 0;
-               name_rmt->valuelen = 0;
-               xfs_trans_log_buf(args->trans, bp2,
-                        XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt)));
-       }
-
-       /*
-        * Commit the flag value change and start the next trans in series.
-        */
-       error = xfs_trans_roll(&args->trans, args->dp);
-
-       return error;
-}
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
deleted file mode 100644 (file)
index a8bbc56..0000000
+++ /dev/null
@@ -1,628 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_attr_remote.h"
-#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_cksum.h"
-#include "xfs_buf_item.h"
-#include "xfs_error.h"
-
-#define ATTR_RMTVALUE_MAPSIZE  1       /* # of map entries at once */
-
-/*
- * Each contiguous block has a header, so it is not just a simple attribute
- * length to FSB conversion.
- */
-int
-xfs_attr3_rmt_blocks(
-       struct xfs_mount *mp,
-       int             attrlen)
-{
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
-               return (attrlen + buflen - 1) / buflen;
-       }
-       return XFS_B_TO_FSB(mp, attrlen);
-}
-
-/*
- * Checking of the remote attribute header is split into two parts. The verifier
- * does CRC, location and bounds checking, the unpacking function checks the
- * attribute parameters and owner.
- */
-static bool
-xfs_attr3_rmt_hdr_ok(
-       void                    *ptr,
-       xfs_ino_t               ino,
-       uint32_t                offset,
-       uint32_t                size,
-       xfs_daddr_t             bno)
-{
-       struct xfs_attr3_rmt_hdr *rmt = ptr;
-
-       if (bno != be64_to_cpu(rmt->rm_blkno))
-               return false;
-       if (offset != be32_to_cpu(rmt->rm_offset))
-               return false;
-       if (size != be32_to_cpu(rmt->rm_bytes))
-               return false;
-       if (ino != be64_to_cpu(rmt->rm_owner))
-               return false;
-
-       /* ok */
-       return true;
-}
-
-static bool
-xfs_attr3_rmt_verify(
-       struct xfs_mount        *mp,
-       void                    *ptr,
-       int                     fsbsize,
-       xfs_daddr_t             bno)
-{
-       struct xfs_attr3_rmt_hdr *rmt = ptr;
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return false;
-       if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
-               return false;
-       if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
-               return false;
-       if (be64_to_cpu(rmt->rm_blkno) != bno)
-               return false;
-       if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
-               return false;
-       if (be32_to_cpu(rmt->rm_offset) +
-                               be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
-               return false;
-       if (rmt->rm_owner == 0)
-               return false;
-
-       return true;
-}
-
-static void
-xfs_attr3_rmt_read_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       char            *ptr;
-       int             len;
-       xfs_daddr_t     bno;
-       int             blksize = mp->m_attr_geo->blksize;
-
-       /* no verification of non-crc buffers */
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       ptr = bp->b_addr;
-       bno = bp->b_bn;
-       len = BBTOB(bp->b_length);
-       ASSERT(len >= blksize);
-
-       while (len > 0) {
-               if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
-                       xfs_buf_ioerror(bp, EFSBADCRC);
-                       break;
-               }
-               if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
-                       xfs_buf_ioerror(bp, EFSCORRUPTED);
-                       break;
-               }
-               len -= blksize;
-               ptr += blksize;
-               bno += BTOBB(blksize);
-       }
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-       else
-               ASSERT(len == 0);
-}
-
-static void
-xfs_attr3_rmt_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-       char            *ptr;
-       int             len;
-       xfs_daddr_t     bno;
-       int             blksize = mp->m_attr_geo->blksize;
-
-       /* no verification of non-crc buffers */
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       ptr = bp->b_addr;
-       bno = bp->b_bn;
-       len = BBTOB(bp->b_length);
-       ASSERT(len >= blksize);
-
-       while (len > 0) {
-               if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
-                       xfs_buf_ioerror(bp, EFSCORRUPTED);
-                       xfs_verifier_error(bp);
-                       return;
-               }
-               if (bip) {
-                       struct xfs_attr3_rmt_hdr *rmt;
-
-                       rmt = (struct xfs_attr3_rmt_hdr *)ptr;
-                       rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-               }
-               xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
-
-               len -= blksize;
-               ptr += blksize;
-               bno += BTOBB(blksize);
-       }
-       ASSERT(len == 0);
-}
-
-const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
-       .verify_read = xfs_attr3_rmt_read_verify,
-       .verify_write = xfs_attr3_rmt_write_verify,
-};
-
-STATIC int
-xfs_attr3_rmt_hdr_set(
-       struct xfs_mount        *mp,
-       void                    *ptr,
-       xfs_ino_t               ino,
-       uint32_t                offset,
-       uint32_t                size,
-       xfs_daddr_t             bno)
-{
-       struct xfs_attr3_rmt_hdr *rmt = ptr;
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return 0;
-
-       rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
-       rmt->rm_offset = cpu_to_be32(offset);
-       rmt->rm_bytes = cpu_to_be32(size);
-       uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
-       rmt->rm_owner = cpu_to_be64(ino);
-       rmt->rm_blkno = cpu_to_be64(bno);
-
-       return sizeof(struct xfs_attr3_rmt_hdr);
-}
-
-/*
- * Helper functions to copy attribute data in and out of the one disk extents
- */
-STATIC int
-xfs_attr_rmtval_copyout(
-       struct xfs_mount *mp,
-       struct xfs_buf  *bp,
-       xfs_ino_t       ino,
-       int             *offset,
-       int             *valuelen,
-       __uint8_t       **dst)
-{
-       char            *src = bp->b_addr;
-       xfs_daddr_t     bno = bp->b_bn;
-       int             len = BBTOB(bp->b_length);
-       int             blksize = mp->m_attr_geo->blksize;
-
-       ASSERT(len >= blksize);
-
-       while (len > 0 && *valuelen > 0) {
-               int hdr_size = 0;
-               int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
-
-               byte_cnt = min(*valuelen, byte_cnt);
-
-               if (xfs_sb_version_hascrc(&mp->m_sb)) {
-                       if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
-                                                 byte_cnt, bno)) {
-                               xfs_alert(mp,
-"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
-                                       bno, *offset, byte_cnt, ino);
-                               return EFSCORRUPTED;
-                       }
-                       hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
-               }
-
-               memcpy(*dst, src + hdr_size, byte_cnt);
-
-               /* roll buffer forwards */
-               len -= blksize;
-               src += blksize;
-               bno += BTOBB(blksize);
-
-               /* roll attribute data forwards */
-               *valuelen -= byte_cnt;
-               *dst += byte_cnt;
-               *offset += byte_cnt;
-       }
-       return 0;
-}
-
-STATIC void
-xfs_attr_rmtval_copyin(
-       struct xfs_mount *mp,
-       struct xfs_buf  *bp,
-       xfs_ino_t       ino,
-       int             *offset,
-       int             *valuelen,
-       __uint8_t       **src)
-{
-       char            *dst = bp->b_addr;
-       xfs_daddr_t     bno = bp->b_bn;
-       int             len = BBTOB(bp->b_length);
-       int             blksize = mp->m_attr_geo->blksize;
-
-       ASSERT(len >= blksize);
-
-       while (len > 0 && *valuelen > 0) {
-               int hdr_size;
-               int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
-
-               byte_cnt = min(*valuelen, byte_cnt);
-               hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
-                                                byte_cnt, bno);
-
-               memcpy(dst + hdr_size, *src, byte_cnt);
-
-               /*
-                * If this is the last block, zero the remainder of it.
-                * Check that we are actually the last block, too.
-                */
-               if (byte_cnt + hdr_size < blksize) {
-                       ASSERT(*valuelen - byte_cnt == 0);
-                       ASSERT(len == blksize);
-                       memset(dst + hdr_size + byte_cnt, 0,
-                                       blksize - hdr_size - byte_cnt);
-               }
-
-               /* roll buffer forwards */
-               len -= blksize;
-               dst += blksize;
-               bno += BTOBB(blksize);
-
-               /* roll attribute data forwards */
-               *valuelen -= byte_cnt;
-               *src += byte_cnt;
-               *offset += byte_cnt;
-       }
-}
-
-/*
- * Read the value associated with an attribute from the out-of-line buffer
- * that we stored it in.
- */
-int
-xfs_attr_rmtval_get(
-       struct xfs_da_args      *args)
-{
-       struct xfs_bmbt_irec    map[ATTR_RMTVALUE_MAPSIZE];
-       struct xfs_mount        *mp = args->dp->i_mount;
-       struct xfs_buf          *bp;
-       xfs_dablk_t             lblkno = args->rmtblkno;
-       __uint8_t               *dst = args->value;
-       int                     valuelen;
-       int                     nmap;
-       int                     error;
-       int                     blkcnt = args->rmtblkcnt;
-       int                     i;
-       int                     offset = 0;
-
-       trace_xfs_attr_rmtval_get(args);
-
-       ASSERT(!(args->flags & ATTR_KERNOVAL));
-       ASSERT(args->rmtvaluelen == args->valuelen);
-
-       valuelen = args->rmtvaluelen;
-       while (valuelen > 0) {
-               nmap = ATTR_RMTVALUE_MAPSIZE;
-               error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
-                                      blkcnt, map, &nmap,
-                                      XFS_BMAPI_ATTRFORK);
-               if (error)
-                       return error;
-               ASSERT(nmap >= 1);
-
-               for (i = 0; (i < nmap) && (valuelen > 0); i++) {
-                       xfs_daddr_t     dblkno;
-                       int             dblkcnt;
-
-                       ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
-                              (map[i].br_startblock != HOLESTARTBLOCK));
-                       dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
-                       dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
-                       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
-                                                  dblkno, dblkcnt, 0, &bp,
-                                                  &xfs_attr3_rmt_buf_ops);
-                       if (error)
-                               return error;
-
-                       error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
-                                                       &offset, &valuelen,
-                                                       &dst);
-                       xfs_buf_relse(bp);
-                       if (error)
-                               return error;
-
-                       /* roll attribute extent map forwards */
-                       lblkno += map[i].br_blockcount;
-                       blkcnt -= map[i].br_blockcount;
-               }
-       }
-       ASSERT(valuelen == 0);
-       return 0;
-}
-
-/*
- * Write the value associated with an attribute into the out-of-line buffer
- * that we have defined for it.
- */
-int
-xfs_attr_rmtval_set(
-       struct xfs_da_args      *args)
-{
-       struct xfs_inode        *dp = args->dp;
-       struct xfs_mount        *mp = dp->i_mount;
-       struct xfs_bmbt_irec    map;
-       xfs_dablk_t             lblkno;
-       xfs_fileoff_t           lfileoff = 0;
-       __uint8_t               *src = args->value;
-       int                     blkcnt;
-       int                     valuelen;
-       int                     nmap;
-       int                     error;
-       int                     offset = 0;
-
-       trace_xfs_attr_rmtval_set(args);
-
-       /*
-        * Find a "hole" in the attribute address space large enough for
-        * us to drop the new attribute's value into. Because CRC enable
-        * attributes have headers, we can't just do a straight byte to FSB
-        * conversion and have to take the header space into account.
-        */
-       blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
-       error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
-                                                  XFS_ATTR_FORK);
-       if (error)
-               return error;
-
-       args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
-       args->rmtblkcnt = blkcnt;
-
-       /*
-        * Roll through the "value", allocating blocks on disk as required.
-        */
-       while (blkcnt > 0) {
-               int     committed;
-
-               /*
-                * Allocate a single extent, up to the size of the value.
-                */
-               xfs_bmap_init(args->flist, args->firstblock);
-               nmap = 1;
-               error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
-                                 blkcnt,
-                                 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-                                 args->firstblock, args->total, &map, &nmap,
-                                 args->flist);
-               if (!error) {
-                       error = xfs_bmap_finish(&args->trans, args->flist,
-                                               &committed);
-               }
-               if (error) {
-                       ASSERT(committed);
-                       args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
-                       return error;
-               }
-
-               /*
-                * bmap_finish() may have committed the last trans and started
-                * a new one.  We need the inode to be in all transactions.
-                */
-               if (committed)
-                       xfs_trans_ijoin(args->trans, dp, 0);
-
-               ASSERT(nmap == 1);
-               ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
-                      (map.br_startblock != HOLESTARTBLOCK));
-               lblkno += map.br_blockcount;
-               blkcnt -= map.br_blockcount;
-
-               /*
-                * Start the next trans in the chain.
-                */
-               error = xfs_trans_roll(&args->trans, dp);
-               if (error)
-                       return error;
-       }
-
-       /*
-        * Roll through the "value", copying the attribute value to the
-        * already-allocated blocks.  Blocks are written synchronously
-        * so that we can know they are all on disk before we turn off
-        * the INCOMPLETE flag.
-        */
-       lblkno = args->rmtblkno;
-       blkcnt = args->rmtblkcnt;
-       valuelen = args->rmtvaluelen;
-       while (valuelen > 0) {
-               struct xfs_buf  *bp;
-               xfs_daddr_t     dblkno;
-               int             dblkcnt;
-
-               ASSERT(blkcnt > 0);
-
-               xfs_bmap_init(args->flist, args->firstblock);
-               nmap = 1;
-               error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
-                                      blkcnt, &map, &nmap,
-                                      XFS_BMAPI_ATTRFORK);
-               if (error)
-                       return error;
-               ASSERT(nmap == 1);
-               ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
-                      (map.br_startblock != HOLESTARTBLOCK));
-
-               dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
-               dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
-
-               bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
-               if (!bp)
-                       return ENOMEM;
-               bp->b_ops = &xfs_attr3_rmt_buf_ops;
-
-               xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
-                                      &valuelen, &src);
-
-               error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
-               xfs_buf_relse(bp);
-               if (error)
-                       return error;
-
-
-               /* roll attribute extent map forwards */
-               lblkno += map.br_blockcount;
-               blkcnt -= map.br_blockcount;
-       }
-       ASSERT(valuelen == 0);
-       return 0;
-}
-
-/*
- * Remove the value associated with an attribute by deleting the
- * out-of-line buffer that it is stored on.
- */
-int
-xfs_attr_rmtval_remove(
-       struct xfs_da_args      *args)
-{
-       struct xfs_mount        *mp = args->dp->i_mount;
-       xfs_dablk_t             lblkno;
-       int                     blkcnt;
-       int                     error;
-       int                     done;
-
-       trace_xfs_attr_rmtval_remove(args);
-
-       /*
-        * Roll through the "value", invalidating the attribute value's blocks.
-        */
-       lblkno = args->rmtblkno;
-       blkcnt = args->rmtblkcnt;
-       while (blkcnt > 0) {
-               struct xfs_bmbt_irec    map;
-               struct xfs_buf          *bp;
-               xfs_daddr_t             dblkno;
-               int                     dblkcnt;
-               int                     nmap;
-
-               /*
-                * Try to remember where we decided to put the value.
-                */
-               nmap = 1;
-               error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
-                                      blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
-               if (error)
-                       return error;
-               ASSERT(nmap == 1);
-               ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
-                      (map.br_startblock != HOLESTARTBLOCK));
-
-               dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
-               dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
-
-               /*
-                * If the "remote" value is in the cache, remove it.
-                */
-               bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
-               if (bp) {
-                       xfs_buf_stale(bp);
-                       xfs_buf_relse(bp);
-                       bp = NULL;
-               }
-
-               lblkno += map.br_blockcount;
-               blkcnt -= map.br_blockcount;
-       }
-
-       /*
-        * Keep de-allocating extents until the remote-value region is gone.
-        */
-       lblkno = args->rmtblkno;
-       blkcnt = args->rmtblkcnt;
-       done = 0;
-       while (!done) {
-               int committed;
-
-               xfs_bmap_init(args->flist, args->firstblock);
-               error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
-                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-                                   1, args->firstblock, args->flist,
-                                   &done);
-               if (!error) {
-                       error = xfs_bmap_finish(&args->trans, args->flist,
-                                               &committed);
-               }
-               if (error) {
-                       ASSERT(committed);
-                       args->trans = NULL;
-                       xfs_bmap_cancel(args->flist);
-                       return error;
-               }
-
-               /*
-                * bmap_finish() may have committed the last trans and started
-                * a new one.  We need the inode to be in all transactions.
-                */
-               if (committed)
-                       xfs_trans_ijoin(args->trans, args->dp, 0);
-
-               /*
-                * Close out trans and start the next one in the chain.
-                */
-               error = xfs_trans_roll(&args->trans, args->dp);
-               if (error)
-                       return error;
-       }
-       return 0;
-}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
deleted file mode 100644 (file)
index b44d631..0000000
+++ /dev/null
@@ -1,5609 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_dir2.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_extfree_item.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_quota.h"
-#include "xfs_trans_space.h"
-#include "xfs_buf_item.h"
-#include "xfs_trace.h"
-#include "xfs_symlink.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_dinode.h"
-#include "xfs_filestream.h"
-
-
-kmem_zone_t            *xfs_bmap_free_item_zone;
-
-/*
- * Miscellaneous helper functions
- */
-
-/*
- * Compute and fill in the value of the maximum depth of a bmap btree
- * in this filesystem.  Done once, during mount.
- */
-void
-xfs_bmap_compute_maxlevels(
-       xfs_mount_t     *mp,            /* file system mount structure */
-       int             whichfork)      /* data or attr fork */
-{
-       int             level;          /* btree level */
-       uint            maxblocks;      /* max blocks at this level */
-       uint            maxleafents;    /* max leaf entries possible */
-       int             maxrootrecs;    /* max records in root block */
-       int             minleafrecs;    /* min records in leaf block */
-       int             minnoderecs;    /* min records in node block */
-       int             sz;             /* root block size */
-
-       /*
-        * The maximum number of extents in a file, hence the maximum
-        * number of leaf entries, is controlled by the type of di_nextents
-        * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
-        * (a signed 16-bit number, xfs_aextnum_t).
-        *
-        * Note that we can no longer assume that if we are in ATTR1 that
-        * the fork offset of all the inodes will be
-        * (xfs_default_attroffset(ip) >> 3) because we could have mounted
-        * with ATTR2 and then mounted back with ATTR1, keeping the
-        * di_forkoff's fixed but probably at various positions. Therefore,
-        * for both ATTR1 and ATTR2 we have to assume the worst case scenario
-        * of a minimum size available.
-        */
-       if (whichfork == XFS_DATA_FORK) {
-               maxleafents = MAXEXTNUM;
-               sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
-       } else {
-               maxleafents = MAXAEXTNUM;
-               sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
-       }
-       maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
-       minleafrecs = mp->m_bmap_dmnr[0];
-       minnoderecs = mp->m_bmap_dmnr[1];
-       maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
-       for (level = 1; maxblocks > 1; level++) {
-               if (maxblocks <= maxrootrecs)
-                       maxblocks = 1;
-               else
-                       maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
-       }
-       mp->m_bm_maxlevels[whichfork] = level;
-}
-
-STATIC int                             /* error */
-xfs_bmbt_lookup_eq(
-       struct xfs_btree_cur    *cur,
-       xfs_fileoff_t           off,
-       xfs_fsblock_t           bno,
-       xfs_filblks_t           len,
-       int                     *stat)  /* success/failure */
-{
-       cur->bc_rec.b.br_startoff = off;
-       cur->bc_rec.b.br_startblock = bno;
-       cur->bc_rec.b.br_blockcount = len;
-       return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
-}
-
-STATIC int                             /* error */
-xfs_bmbt_lookup_ge(
-       struct xfs_btree_cur    *cur,
-       xfs_fileoff_t           off,
-       xfs_fsblock_t           bno,
-       xfs_filblks_t           len,
-       int                     *stat)  /* success/failure */
-{
-       cur->bc_rec.b.br_startoff = off;
-       cur->bc_rec.b.br_startblock = bno;
-       cur->bc_rec.b.br_blockcount = len;
-       return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
-}
-
-/*
- * Check if the inode needs to be converted to btree format.
- */
-static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
-{
-       return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
-               XFS_IFORK_NEXTENTS(ip, whichfork) >
-                       XFS_IFORK_MAXEXT(ip, whichfork);
-}
-
-/*
- * Check if the inode should be converted to extent format.
- */
-static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
-{
-       return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
-               XFS_IFORK_NEXTENTS(ip, whichfork) <=
-                       XFS_IFORK_MAXEXT(ip, whichfork);
-}
-
-/*
- * Update the record referred to by cur to the value given
- * by [off, bno, len, state].
- * This either works (return 0) or gets an EFSCORRUPTED error.
- */
-STATIC int
-xfs_bmbt_update(
-       struct xfs_btree_cur    *cur,
-       xfs_fileoff_t           off,
-       xfs_fsblock_t           bno,
-       xfs_filblks_t           len,
-       xfs_exntst_t            state)
-{
-       union xfs_btree_rec     rec;
-
-       xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
-       return xfs_btree_update(cur, &rec);
-}
-
-/*
- * Compute the worst-case number of indirect blocks that will be used
- * for ip's delayed extent of length "len".
- */
-STATIC xfs_filblks_t
-xfs_bmap_worst_indlen(
-       xfs_inode_t     *ip,            /* incore inode pointer */
-       xfs_filblks_t   len)            /* delayed extent length */
-{
-       int             level;          /* btree level number */
-       int             maxrecs;        /* maximum record count at this level */
-       xfs_mount_t     *mp;            /* mount structure */
-       xfs_filblks_t   rval;           /* return value */
-
-       mp = ip->i_mount;
-       maxrecs = mp->m_bmap_dmxr[0];
-       for (level = 0, rval = 0;
-            level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
-            level++) {
-               len += maxrecs - 1;
-               do_div(len, maxrecs);
-               rval += len;
-               if (len == 1)
-                       return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
-                               level - 1;
-               if (level == 0)
-                       maxrecs = mp->m_bmap_dmxr[1];
-       }
-       return rval;
-}
-
-/*
- * Calculate the default attribute fork offset for newly created inodes.
- */
-uint
-xfs_default_attroffset(
-       struct xfs_inode        *ip)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       uint                    offset;
-
-       if (mp->m_sb.sb_inodesize == 256) {
-               offset = XFS_LITINO(mp, ip->i_d.di_version) -
-                               XFS_BMDR_SPACE_CALC(MINABTPTRS);
-       } else {
-               offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
-       }
-
-       ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
-       return offset;
-}
-
-/*
- * Helper routine to reset inode di_forkoff field when switching
- * attribute fork from local to extent format - we reset it where
- * possible to make space available for inline data fork extents.
- */
-STATIC void
-xfs_bmap_forkoff_reset(
-       xfs_inode_t     *ip,
-       int             whichfork)
-{
-       if (whichfork == XFS_ATTR_FORK &&
-           ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
-           ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
-           ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
-               uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
-
-               if (dfl_forkoff > ip->i_d.di_forkoff)
-                       ip->i_d.di_forkoff = dfl_forkoff;
-       }
-}
-
-/*
- * Debug/sanity checking code
- */
-
-STATIC int
-xfs_bmap_sanity_check(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp,
-       int                     level)
-{
-       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-
-       if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
-           block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
-               return 0;
-
-       if (be16_to_cpu(block->bb_level) != level ||
-           be16_to_cpu(block->bb_numrecs) == 0 ||
-           be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
-               return 0;
-
-       return 1;
-}
-
-#ifdef DEBUG
-STATIC struct xfs_buf *
-xfs_bmap_get_bp(
-       struct xfs_btree_cur    *cur,
-       xfs_fsblock_t           bno)
-{
-       struct xfs_log_item_desc *lidp;
-       int                     i;
-
-       if (!cur)
-               return NULL;
-
-       for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
-               if (!cur->bc_bufs[i])
-                       break;
-               if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
-                       return cur->bc_bufs[i];
-       }
-
-       /* Chase down all the log items to see if the bp is there */
-       list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
-               struct xfs_buf_log_item *bip;
-               bip = (struct xfs_buf_log_item *)lidp->lid_item;
-               if (bip->bli_item.li_type == XFS_LI_BUF &&
-                   XFS_BUF_ADDR(bip->bli_buf) == bno)
-                       return bip->bli_buf;
-       }
-
-       return NULL;
-}
-
-STATIC void
-xfs_check_block(
-       struct xfs_btree_block  *block,
-       xfs_mount_t             *mp,
-       int                     root,
-       short                   sz)
-{
-       int                     i, j, dmxr;
-       __be64                  *pp, *thispa;   /* pointer to block address */
-       xfs_bmbt_key_t          *prevp, *keyp;
-
-       ASSERT(be16_to_cpu(block->bb_level) > 0);
-
-       prevp = NULL;
-       for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
-               dmxr = mp->m_bmap_dmxr[0];
-               keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
-
-               if (prevp) {
-                       ASSERT(be64_to_cpu(prevp->br_startoff) <
-                              be64_to_cpu(keyp->br_startoff));
-               }
-               prevp = keyp;
-
-               /*
-                * Compare the block numbers to see if there are dups.
-                */
-               if (root)
-                       pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
-               else
-                       pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
-
-               for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
-                       if (root)
-                               thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
-                       else
-                               thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
-                       if (*thispa == *pp) {
-                               xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
-                                       __func__, j, i,
-                                       (unsigned long long)be64_to_cpu(*thispa));
-                               panic("%s: ptrs are equal in node\n",
-                                       __func__);
-                       }
-               }
-       }
-}
-
-/*
- * Check that the extents for the inode ip are in the right order in all
- * btree leaves.
- */
-
-STATIC void
-xfs_bmap_check_leaf_extents(
-       xfs_btree_cur_t         *cur,   /* btree cursor or null */
-       xfs_inode_t             *ip,            /* incore inode pointer */
-       int                     whichfork)      /* data or attr fork */
-{
-       struct xfs_btree_block  *block; /* current btree block */
-       xfs_fsblock_t           bno;    /* block # of "block" */
-       xfs_buf_t               *bp;    /* buffer for "block" */
-       int                     error;  /* error return value */
-       xfs_extnum_t            i=0, j; /* index into the extents list */
-       xfs_ifork_t             *ifp;   /* fork structure */
-       int                     level;  /* btree level, for checking */
-       xfs_mount_t             *mp;    /* file system mount structure */
-       __be64                  *pp;    /* pointer to block address */
-       xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
-       xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
-       xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
-       int                     bp_release = 0;
-
-       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
-               return;
-       }
-
-       bno = NULLFSBLOCK;
-       mp = ip->i_mount;
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       block = ifp->if_broot;
-       /*
-        * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
-        */
-       level = be16_to_cpu(block->bb_level);
-       ASSERT(level > 0);
-       xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
-       pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
-       bno = be64_to_cpu(*pp);
-
-       ASSERT(bno != NULLDFSBNO);
-       ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
-       ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
-
-       /*
-        * Go down the tree until leaf level is reached, following the first
-        * pointer (leftmost) at each level.
-        */
-       while (level-- > 0) {
-               /* See if buf is in cur first */
-               bp_release = 0;
-               bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
-               if (!bp) {
-                       bp_release = 1;
-                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
-                                               XFS_BMAP_BTREE_REF,
-                                               &xfs_bmbt_buf_ops);
-                       if (error)
-                               goto error_norelse;
-               }
-               block = XFS_BUF_TO_BLOCK(bp);
-               XFS_WANT_CORRUPTED_GOTO(
-                       xfs_bmap_sanity_check(mp, bp, level),
-                       error0);
-               if (level == 0)
-                       break;
-
-               /*
-                * Check this block for basic sanity (increasing keys and
-                * no duplicate blocks).
-                */
-
-               xfs_check_block(block, mp, 0, 0);
-               pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
-               bno = be64_to_cpu(*pp);
-               XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
-               if (bp_release) {
-                       bp_release = 0;
-                       xfs_trans_brelse(NULL, bp);
-               }
-       }
-
-       /*
-        * Here with bp and block set to the leftmost leaf node in the tree.
-        */
-       i = 0;
-
-       /*
-        * Loop over all leaf nodes checking that all extents are in the right order.
-        */
-       for (;;) {
-               xfs_fsblock_t   nextbno;
-               xfs_extnum_t    num_recs;
-
-
-               num_recs = xfs_btree_get_numrecs(block);
-
-               /*
-                * Read-ahead the next leaf block, if any.
-                */
-
-               nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
-
-               /*
-                * Check all the extents to make sure they are OK.
-                * If we had a previous block, the last entry should
-                * conform with the first entry in this one.
-                */
-
-               ep = XFS_BMBT_REC_ADDR(mp, block, 1);
-               if (i) {
-                       ASSERT(xfs_bmbt_disk_get_startoff(&last) +
-                              xfs_bmbt_disk_get_blockcount(&last) <=
-                              xfs_bmbt_disk_get_startoff(ep));
-               }
-               for (j = 1; j < num_recs; j++) {
-                       nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
-                       ASSERT(xfs_bmbt_disk_get_startoff(ep) +
-                              xfs_bmbt_disk_get_blockcount(ep) <=
-                              xfs_bmbt_disk_get_startoff(nextp));
-                       ep = nextp;
-               }
-
-               last = *ep;
-               i += num_recs;
-               if (bp_release) {
-                       bp_release = 0;
-                       xfs_trans_brelse(NULL, bp);
-               }
-               bno = nextbno;
-               /*
-                * If we've reached the end, stop.
-                */
-               if (bno == NULLFSBLOCK)
-                       break;
-
-               bp_release = 0;
-               bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
-               if (!bp) {
-                       bp_release = 1;
-                       error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
-                                               XFS_BMAP_BTREE_REF,
-                                               &xfs_bmbt_buf_ops);
-                       if (error)
-                               goto error_norelse;
-               }
-               block = XFS_BUF_TO_BLOCK(bp);
-       }
-       if (bp_release) {
-               bp_release = 0;
-               xfs_trans_brelse(NULL, bp);
-       }
-       return;
-
-error0:
-       xfs_warn(mp, "%s: at error0", __func__);
-       if (bp_release)
-               xfs_trans_brelse(NULL, bp);
-error_norelse:
-       xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
-               __func__, i);
-       panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
-       return;
-}
-
-/*
- * Add bmap trace insert entries for all the contents of the extent records.
- */
-void
-xfs_bmap_trace_exlist(
-       xfs_inode_t     *ip,            /* incore inode pointer */
-       xfs_extnum_t    cnt,            /* count of entries in the list */
-       int             whichfork,      /* data or attr fork */
-       unsigned long   caller_ip)
-{
-       xfs_extnum_t    idx;            /* extent record index */
-       xfs_ifork_t     *ifp;           /* inode fork pointer */
-       int             state = 0;
-
-       if (whichfork == XFS_ATTR_FORK)
-               state |= BMAP_ATTRFORK;
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
-       for (idx = 0; idx < cnt; idx++)
-               trace_xfs_extlist(ip, idx, whichfork, caller_ip);
-}
-
-/*
- * Validate that the bmbt_irecs being returned from bmapi are valid
- * given the caller's original parameters.  Specifically check the
- * ranges of the returned irecs to ensure that they only extend beyond
- * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
- */
-STATIC void
-xfs_bmap_validate_ret(
-       xfs_fileoff_t           bno,
-       xfs_filblks_t           len,
-       int                     flags,
-       xfs_bmbt_irec_t         *mval,
-       int                     nmap,
-       int                     ret_nmap)
-{
-       int                     i;              /* index to map values */
-
-       ASSERT(ret_nmap <= nmap);
-
-       for (i = 0; i < ret_nmap; i++) {
-               ASSERT(mval[i].br_blockcount > 0);
-               if (!(flags & XFS_BMAPI_ENTIRE)) {
-                       ASSERT(mval[i].br_startoff >= bno);
-                       ASSERT(mval[i].br_blockcount <= len);
-                       ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
-                              bno + len);
-               } else {
-                       ASSERT(mval[i].br_startoff < bno + len);
-                       ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
-                              bno);
-               }
-               ASSERT(i == 0 ||
-                      mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
-                      mval[i].br_startoff);
-               ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
-                      mval[i].br_startblock != HOLESTARTBLOCK);
-               ASSERT(mval[i].br_state == XFS_EXT_NORM ||
-                      mval[i].br_state == XFS_EXT_UNWRITTEN);
-       }
-}
-
-#else
-#define xfs_bmap_check_leaf_extents(cur, ip, whichfork)                do { } while (0)
-#define        xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
-#endif /* DEBUG */
-
-/*
- * bmap free list manipulation functions
- */
-
-/*
- * Add the extent to the list of extents to be free at transaction end.
- * The list is maintained sorted (by block number).
- */
-void
-xfs_bmap_add_free(
-       xfs_fsblock_t           bno,            /* fs block number of extent */
-       xfs_filblks_t           len,            /* length of extent */
-       xfs_bmap_free_t         *flist,         /* list of extents */
-       xfs_mount_t             *mp)            /* mount point structure */
-{
-       xfs_bmap_free_item_t    *cur;           /* current (next) element */
-       xfs_bmap_free_item_t    *new;           /* new element */
-       xfs_bmap_free_item_t    *prev;          /* previous element */
-#ifdef DEBUG
-       xfs_agnumber_t          agno;
-       xfs_agblock_t           agbno;
-
-       ASSERT(bno != NULLFSBLOCK);
-       ASSERT(len > 0);
-       ASSERT(len <= MAXEXTLEN);
-       ASSERT(!isnullstartblock(bno));
-       agno = XFS_FSB_TO_AGNO(mp, bno);
-       agbno = XFS_FSB_TO_AGBNO(mp, bno);
-       ASSERT(agno < mp->m_sb.sb_agcount);
-       ASSERT(agbno < mp->m_sb.sb_agblocks);
-       ASSERT(len < mp->m_sb.sb_agblocks);
-       ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
-#endif
-       ASSERT(xfs_bmap_free_item_zone != NULL);
-       new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
-       new->xbfi_startblock = bno;
-       new->xbfi_blockcount = (xfs_extlen_t)len;
-       for (prev = NULL, cur = flist->xbf_first;
-            cur != NULL;
-            prev = cur, cur = cur->xbfi_next) {
-               if (cur->xbfi_startblock >= bno)
-                       break;
-       }
-       if (prev)
-               prev->xbfi_next = new;
-       else
-               flist->xbf_first = new;
-       new->xbfi_next = cur;
-       flist->xbf_count++;
-}
-
-/*
- * Remove the entry "free" from the free item list.  Prev points to the
- * previous entry, unless "free" is the head of the list.
- */
-void
-xfs_bmap_del_free(
-       xfs_bmap_free_t         *flist, /* free item list header */
-       xfs_bmap_free_item_t    *prev,  /* previous item on list, if any */
-       xfs_bmap_free_item_t    *free)  /* list item to be freed */
-{
-       if (prev)
-               prev->xbfi_next = free->xbfi_next;
-       else
-               flist->xbf_first = free->xbfi_next;
-       flist->xbf_count--;
-       kmem_zone_free(xfs_bmap_free_item_zone, free);
-}
-
-/*
- * Free up any items left in the list.
- */
-void
-xfs_bmap_cancel(
-       xfs_bmap_free_t         *flist) /* list of bmap_free_items */
-{
-       xfs_bmap_free_item_t    *free;  /* free list item */
-       xfs_bmap_free_item_t    *next;
-
-       if (flist->xbf_count == 0)
-               return;
-       ASSERT(flist->xbf_first != NULL);
-       for (free = flist->xbf_first; free; free = next) {
-               next = free->xbfi_next;
-               xfs_bmap_del_free(flist, NULL, free);
-       }
-       ASSERT(flist->xbf_count == 0);
-}
-
-/*
- * Inode fork format manipulation functions
- */
-
-/*
- * Transform a btree format file with only one leaf node, where the
- * extents list will fit in the inode, into an extents format file.
- * Since the file extents are already in-core, all we have to do is
- * give up the space for the btree root and pitch the leaf block.
- */
-STATIC int                             /* error */
-xfs_bmap_btree_to_extents(
-       xfs_trans_t             *tp,    /* transaction pointer */
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_btree_cur_t         *cur,   /* btree cursor */
-       int                     *logflagsp, /* inode logging flags */
-       int                     whichfork)  /* data or attr fork */
-{
-       /* REFERENCED */
-       struct xfs_btree_block  *cblock;/* child btree block */
-       xfs_fsblock_t           cbno;   /* child block number */
-       xfs_buf_t               *cbp;   /* child block's buffer */
-       int                     error;  /* error return value */
-       xfs_ifork_t             *ifp;   /* inode fork data */
-       xfs_mount_t             *mp;    /* mount point structure */
-       __be64                  *pp;    /* ptr to block address */
-       struct xfs_btree_block  *rblock;/* root btree block */
-
-       mp = ip->i_mount;
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
-       rblock = ifp->if_broot;
-       ASSERT(be16_to_cpu(rblock->bb_level) == 1);
-       ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
-       ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
-       pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
-       cbno = be64_to_cpu(*pp);
-       *logflagsp = 0;
-#ifdef DEBUG
-       if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
-               return error;
-#endif
-       error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
-                               &xfs_bmbt_buf_ops);
-       if (error)
-               return error;
-       cblock = XFS_BUF_TO_BLOCK(cbp);
-       if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
-               return error;
-       xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
-       ip->i_d.di_nblocks--;
-       xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
-       xfs_trans_binval(tp, cbp);
-       if (cur->bc_bufs[0] == cbp)
-               cur->bc_bufs[0] = NULL;
-       xfs_iroot_realloc(ip, -1, whichfork);
-       ASSERT(ifp->if_broot == NULL);
-       ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
-       XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
-       *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
-       return 0;
-}
-
-/*
- * Convert an extents-format file into a btree-format file.
- * The new file will have a root block (in the inode) and a single child block.
- */
-STATIC int                                     /* error */
-xfs_bmap_extents_to_btree(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_inode_t             *ip,            /* incore inode pointer */
-       xfs_fsblock_t           *firstblock,    /* first-block-allocated */
-       xfs_bmap_free_t         *flist,         /* blocks freed in xaction */
-       xfs_btree_cur_t         **curp,         /* cursor returned to caller */
-       int                     wasdel,         /* converting a delayed alloc */
-       int                     *logflagsp,     /* inode logging flags */
-       int                     whichfork)      /* data or attr fork */
-{
-       struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
-       xfs_buf_t               *abp;           /* buffer for ablock */
-       xfs_alloc_arg_t         args;           /* allocation arguments */
-       xfs_bmbt_rec_t          *arp;           /* child record pointer */
-       struct xfs_btree_block  *block;         /* btree root block */
-       xfs_btree_cur_t         *cur;           /* bmap btree cursor */
-       xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
-       int                     error;          /* error return value */
-       xfs_extnum_t            i, cnt;         /* extent record index */
-       xfs_ifork_t             *ifp;           /* inode fork pointer */
-       xfs_bmbt_key_t          *kp;            /* root block key pointer */
-       xfs_mount_t             *mp;            /* mount structure */
-       xfs_extnum_t            nextents;       /* number of file extents */
-       xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
-
-       mp = ip->i_mount;
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
-
-       /*
-        * Make space in the inode incore.
-        */
-       xfs_iroot_realloc(ip, 1, whichfork);
-       ifp->if_flags |= XFS_IFBROOT;
-
-       /*
-        * Fill in the root.
-        */
-       block = ifp->if_broot;
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
-                                XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
-                                XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
-       else
-               xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
-                                XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
-                                XFS_BTREE_LONG_PTRS);
-
-       /*
-        * Need a cursor.  Can't allocate until bb_level is filled in.
-        */
-       cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-       cur->bc_private.b.firstblock = *firstblock;
-       cur->bc_private.b.flist = flist;
-       cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
-       /*
-        * Convert to a btree with two levels, one record in root.
-        */
-       XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
-       memset(&args, 0, sizeof(args));
-       args.tp = tp;
-       args.mp = mp;
-       args.firstblock = *firstblock;
-       if (*firstblock == NULLFSBLOCK) {
-               args.type = XFS_ALLOCTYPE_START_BNO;
-               args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
-       } else if (flist->xbf_low) {
-               args.type = XFS_ALLOCTYPE_START_BNO;
-               args.fsbno = *firstblock;
-       } else {
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-               args.fsbno = *firstblock;
-       }
-       args.minlen = args.maxlen = args.prod = 1;
-       args.wasdel = wasdel;
-       *logflagsp = 0;
-       if ((error = xfs_alloc_vextent(&args))) {
-               xfs_iroot_realloc(ip, -1, whichfork);
-               xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-               return error;
-       }
-       /*
-        * Allocation can't fail, the space was reserved.
-        */
-       ASSERT(args.fsbno != NULLFSBLOCK);
-       ASSERT(*firstblock == NULLFSBLOCK ||
-              args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
-              (flist->xbf_low &&
-               args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
-       *firstblock = cur->bc_private.b.firstblock = args.fsbno;
-       cur->bc_private.b.allocated++;
-       ip->i_d.di_nblocks++;
-       xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
-       abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
-       /*
-        * Fill in the child block.
-        */
-       abp->b_ops = &xfs_bmbt_buf_ops;
-       ablock = XFS_BUF_TO_BLOCK(abp);
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               xfs_btree_init_block_int(mp, ablock, abp->b_bn,
-                               XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
-                               XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
-       else
-               xfs_btree_init_block_int(mp, ablock, abp->b_bn,
-                               XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
-                               XFS_BTREE_LONG_PTRS);
-
-       arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       for (cnt = i = 0; i < nextents; i++) {
-               ep = xfs_iext_get_ext(ifp, i);
-               if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
-                       arp->l0 = cpu_to_be64(ep->l0);
-                       arp->l1 = cpu_to_be64(ep->l1);
-                       arp++; cnt++;
-               }
-       }
-       ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
-       xfs_btree_set_numrecs(ablock, cnt);
-
-       /*
-        * Fill in the root key and pointer.
-        */
-       kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
-       arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
-       kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
-       pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
-                                               be16_to_cpu(block->bb_level)));
-       *pp = cpu_to_be64(args.fsbno);
-
-       /*
-        * Do all this logging at the end so that
-        * the root is at the right level.
-        */
-       xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
-       xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
-       ASSERT(*curp == NULL);
-       *curp = cur;
-       *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
-       return 0;
-}
-
-/*
- * Convert a local file to an extents file.
- * This code is out of bounds for data forks of regular files,
- * since the file data needs to get logged so things will stay consistent.
- * (The bmap-level manipulations are ok, though).
- */
-void
-xfs_bmap_local_to_extents_empty(
-       struct xfs_inode        *ip,
-       int                     whichfork)
-{
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
-
-       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
-       ASSERT(ifp->if_bytes == 0);
-       ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
-
-       xfs_bmap_forkoff_reset(ip, whichfork);
-       ifp->if_flags &= ~XFS_IFINLINE;
-       ifp->if_flags |= XFS_IFEXTENTS;
-       XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
-}
-
-
-STATIC int                             /* error */
-xfs_bmap_local_to_extents(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_inode_t     *ip,            /* incore inode pointer */
-       xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
-       xfs_extlen_t    total,          /* total blocks needed by transaction */
-       int             *logflagsp,     /* inode logging flags */
-       int             whichfork,
-       void            (*init_fn)(struct xfs_trans *tp,
-                                  struct xfs_buf *bp,
-                                  struct xfs_inode *ip,
-                                  struct xfs_ifork *ifp))
-{
-       int             error = 0;
-       int             flags;          /* logging flags returned */
-       xfs_ifork_t     *ifp;           /* inode fork pointer */
-       xfs_alloc_arg_t args;           /* allocation arguments */
-       xfs_buf_t       *bp;            /* buffer for extent block */
-       xfs_bmbt_rec_host_t *ep;        /* extent record pointer */
-
-       /*
-        * We don't want to deal with the case of keeping inode data inline yet.
-        * So sending the data fork of a regular inode is invalid.
-        */
-       ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
-
-       if (!ifp->if_bytes) {
-               xfs_bmap_local_to_extents_empty(ip, whichfork);
-               flags = XFS_ILOG_CORE;
-               goto done;
-       }
-
-       flags = 0;
-       error = 0;
-       ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) ==
-                                                               XFS_IFINLINE);
-       memset(&args, 0, sizeof(args));
-       args.tp = tp;
-       args.mp = ip->i_mount;
-       args.firstblock = *firstblock;
-       /*
-        * Allocate a block.  We know we need only one, since the
-        * file currently fits in an inode.
-        */
-       if (*firstblock == NULLFSBLOCK) {
-               args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
-               args.type = XFS_ALLOCTYPE_START_BNO;
-       } else {
-               args.fsbno = *firstblock;
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-       }
-       args.total = total;
-       args.minlen = args.maxlen = args.prod = 1;
-       error = xfs_alloc_vextent(&args);
-       if (error)
-               goto done;
-
-       /* Can't fail, the space was reserved. */
-       ASSERT(args.fsbno != NULLFSBLOCK);
-       ASSERT(args.len == 1);
-       *firstblock = args.fsbno;
-       bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
-
-       /* initialise the block and copy the data */
-       init_fn(tp, bp, ip, ifp);
-
-       /* account for the change in fork size and log everything */
-       xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
-       xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
-       xfs_bmap_local_to_extents_empty(ip, whichfork);
-       flags |= XFS_ILOG_CORE;
-
-       xfs_iext_add(ifp, 0, 1);
-       ep = xfs_iext_get_ext(ifp, 0);
-       xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
-       trace_xfs_bmap_post_update(ip, 0,
-                       whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
-                       _THIS_IP_);
-       XFS_IFORK_NEXT_SET(ip, whichfork, 1);
-       ip->i_d.di_nblocks = 1;
-       xfs_trans_mod_dquot_byino(tp, ip,
-               XFS_TRANS_DQ_BCOUNT, 1L);
-       flags |= xfs_ilog_fext(whichfork);
-
-done:
-       *logflagsp = flags;
-       return error;
-}
-
-/*
- * Called from xfs_bmap_add_attrfork to handle btree format files.
- */
-STATIC int                                     /* error */
-xfs_bmap_add_attrfork_btree(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_inode_t             *ip,            /* incore inode pointer */
-       xfs_fsblock_t           *firstblock,    /* first block allocated */
-       xfs_bmap_free_t         *flist,         /* blocks to free at commit */
-       int                     *flags)         /* inode logging flags */
-{
-       xfs_btree_cur_t         *cur;           /* btree cursor */
-       int                     error;          /* error return value */
-       xfs_mount_t             *mp;            /* file system mount struct */
-       int                     stat;           /* newroot status */
-
-       mp = ip->i_mount;
-       if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
-               *flags |= XFS_ILOG_DBROOT;
-       else {
-               cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
-               cur->bc_private.b.flist = flist;
-               cur->bc_private.b.firstblock = *firstblock;
-               if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
-                       goto error0;
-               /* must be at least one entry */
-               XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
-               if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
-                       goto error0;
-               if (stat == 0) {
-                       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-                       return ENOSPC;
-               }
-               *firstblock = cur->bc_private.b.firstblock;
-               cur->bc_private.b.allocated = 0;
-               xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-       }
-       return 0;
-error0:
-       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-       return error;
-}
-
-/*
- * Called from xfs_bmap_add_attrfork to handle extents format files.
- */
-STATIC int                                     /* error */
-xfs_bmap_add_attrfork_extents(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_inode_t             *ip,            /* incore inode pointer */
-       xfs_fsblock_t           *firstblock,    /* first block allocated */
-       xfs_bmap_free_t         *flist,         /* blocks to free at commit */
-       int                     *flags)         /* inode logging flags */
-{
-       xfs_btree_cur_t         *cur;           /* bmap btree cursor */
-       int                     error;          /* error return value */
-
-       if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
-               return 0;
-       cur = NULL;
-       error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0,
-               flags, XFS_DATA_FORK);
-       if (cur) {
-               cur->bc_private.b.allocated = 0;
-               xfs_btree_del_cursor(cur,
-                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
-       }
-       return error;
-}
-
-/*
- * Called from xfs_bmap_add_attrfork to handle local format files. Each
- * different data fork content type needs a different callout to do the
- * conversion. Some are basic and only require special block initialisation
- * callouts for the data formating, others (directories) are so specialised they
- * handle everything themselves.
- *
- * XXX (dgc): investigate whether directory conversion can use the generic
- * formatting callout. It should be possible - it's just a very complex
- * formatter.
- */
-STATIC int                                     /* error */
-xfs_bmap_add_attrfork_local(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       xfs_inode_t             *ip,            /* incore inode pointer */
-       xfs_fsblock_t           *firstblock,    /* first block allocated */
-       xfs_bmap_free_t         *flist,         /* blocks to free at commit */
-       int                     *flags)         /* inode logging flags */
-{
-       xfs_da_args_t           dargs;          /* args for dir/attr code */
-
-       if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
-               return 0;
-
-       if (S_ISDIR(ip->i_d.di_mode)) {
-               memset(&dargs, 0, sizeof(dargs));
-               dargs.geo = ip->i_mount->m_dir_geo;
-               dargs.dp = ip;
-               dargs.firstblock = firstblock;
-               dargs.flist = flist;
-               dargs.total = dargs.geo->fsbcount;
-               dargs.whichfork = XFS_DATA_FORK;
-               dargs.trans = tp;
-               return xfs_dir2_sf_to_block(&dargs);
-       }
-
-       if (S_ISLNK(ip->i_d.di_mode))
-               return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
-                                                flags, XFS_DATA_FORK,
-                                                xfs_symlink_local_to_remote);
-
-       /* should only be called for types that support local format data */
-       ASSERT(0);
-       return EFSCORRUPTED;
-}
-
-/*
- * Convert inode from non-attributed to attributed.
- * Must not be in a transaction, ip must not be locked.
- */
-int                                            /* error code */
-xfs_bmap_add_attrfork(
-       xfs_inode_t             *ip,            /* incore inode pointer */
-       int                     size,           /* space new attribute needs */
-       int                     rsvd)           /* xact may use reserved blks */
-{
-       xfs_fsblock_t           firstblock;     /* 1st block/ag allocated */
-       xfs_bmap_free_t         flist;          /* freed extent records */
-       xfs_mount_t             *mp;            /* mount structure */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       int                     blks;           /* space reservation */
-       int                     version = 1;    /* superblock attr version */
-       int                     committed;      /* xaction was committed */
-       int                     logflags;       /* logging flags */
-       int                     error;          /* error return value */
-       int                     cancel_flags = 0;
-
-       ASSERT(XFS_IFORK_Q(ip) == 0);
-
-       mp = ip->i_mount;
-       ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
-       tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
-       blks = XFS_ADDAFORK_SPACE_RES(mp);
-       if (rsvd)
-               tp->t_flags |= XFS_TRANS_RESERVE;
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               return error;
-       }
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
-                       XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
-                       XFS_QMOPT_RES_REGBLKS);
-       if (error)
-               goto trans_cancel;
-       cancel_flags |= XFS_TRANS_ABORT;
-       if (XFS_IFORK_Q(ip))
-               goto trans_cancel;
-       if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
-               /*
-                * For inodes coming from pre-6.2 filesystems.
-                */
-               ASSERT(ip->i_d.di_aformat == 0);
-               ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
-       }
-       ASSERT(ip->i_d.di_anextents == 0);
-
-       xfs_trans_ijoin(tp, ip, 0);
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       switch (ip->i_d.di_format) {
-       case XFS_DINODE_FMT_DEV:
-               ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
-               break;
-       case XFS_DINODE_FMT_UUID:
-               ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
-               break;
-       case XFS_DINODE_FMT_LOCAL:
-       case XFS_DINODE_FMT_EXTENTS:
-       case XFS_DINODE_FMT_BTREE:
-               ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
-               if (!ip->i_d.di_forkoff)
-                       ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
-               else if (mp->m_flags & XFS_MOUNT_ATTR2)
-                       version = 2;
-               break;
-       default:
-               ASSERT(0);
-               error = EINVAL;
-               goto trans_cancel;
-       }
-
-       ASSERT(ip->i_afp == NULL);
-       ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
-       ip->i_afp->if_flags = XFS_IFEXTENTS;
-       logflags = 0;
-       xfs_bmap_init(&flist, &firstblock);
-       switch (ip->i_d.di_format) {
-       case XFS_DINODE_FMT_LOCAL:
-               error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
-                       &logflags);
-               break;
-       case XFS_DINODE_FMT_EXTENTS:
-               error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
-                       &flist, &logflags);
-               break;
-       case XFS_DINODE_FMT_BTREE:
-               error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
-                       &logflags);
-               break;
-       default:
-               error = 0;
-               break;
-       }
-       if (logflags)
-               xfs_trans_log_inode(tp, ip, logflags);
-       if (error)
-               goto bmap_cancel;
-       if (!xfs_sb_version_hasattr(&mp->m_sb) ||
-          (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
-               __int64_t sbfields = 0;
-
-               spin_lock(&mp->m_sb_lock);
-               if (!xfs_sb_version_hasattr(&mp->m_sb)) {
-                       xfs_sb_version_addattr(&mp->m_sb);
-                       sbfields |= XFS_SB_VERSIONNUM;
-               }
-               if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
-                       xfs_sb_version_addattr2(&mp->m_sb);
-                       sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
-               }
-               if (sbfields) {
-                       spin_unlock(&mp->m_sb_lock);
-                       xfs_mod_sb(tp, sbfields);
-               } else
-                       spin_unlock(&mp->m_sb_lock);
-       }
-
-       error = xfs_bmap_finish(&tp, &flist, &committed);
-       if (error)
-               goto bmap_cancel;
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       return error;
-
-bmap_cancel:
-       xfs_bmap_cancel(&flist);
-trans_cancel:
-       xfs_trans_cancel(tp, cancel_flags);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       return error;
-}
-
-/*
- * Internal and external extent tree search functions.
- */
-
-/*
- * Read in the extents to if_extents.
- * All inode fields are set up by caller, we just traverse the btree
- * and copy the records in. If the file system cannot contain unwritten
- * extents, the records are checked for no "state" flags.
- */
-int                                    /* error */
-xfs_bmap_read_extents(
-       xfs_trans_t             *tp,    /* transaction pointer */
-       xfs_inode_t             *ip,    /* incore inode */
-       int                     whichfork) /* data or attr fork */
-{
-       struct xfs_btree_block  *block; /* current btree block */
-       xfs_fsblock_t           bno;    /* block # of "block" */
-       xfs_buf_t               *bp;    /* buffer for "block" */
-       int                     error;  /* error return value */
-       xfs_exntfmt_t           exntf;  /* XFS_EXTFMT_NOSTATE, if checking */
-       xfs_extnum_t            i, j;   /* index into the extents list */
-       xfs_ifork_t             *ifp;   /* fork structure */
-       int                     level;  /* btree level, for checking */
-       xfs_mount_t             *mp;    /* file system mount structure */
-       __be64                  *pp;    /* pointer to block address */
-       /* REFERENCED */
-       xfs_extnum_t            room;   /* number of entries there's room for */
-
-       bno = NULLFSBLOCK;
-       mp = ip->i_mount;
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
-                                       XFS_EXTFMT_INODE(ip);
-       block = ifp->if_broot;
-       /*
-        * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
-        */
-       level = be16_to_cpu(block->bb_level);
-       ASSERT(level > 0);
-       pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
-       bno = be64_to_cpu(*pp);
-       ASSERT(bno != NULLDFSBNO);
-       ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
-       ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
-       /*
-        * Go down the tree until leaf level is reached, following the first
-        * pointer (leftmost) at each level.
-        */
-       while (level-- > 0) {
-               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
-                               XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
-               if (error)
-                       return error;
-               block = XFS_BUF_TO_BLOCK(bp);
-               XFS_WANT_CORRUPTED_GOTO(
-                       xfs_bmap_sanity_check(mp, bp, level),
-                       error0);
-               if (level == 0)
-                       break;
-               pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
-               bno = be64_to_cpu(*pp);
-               XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
-               xfs_trans_brelse(tp, bp);
-       }
-       /*
-        * Here with bp and block set to the leftmost leaf node in the tree.
-        */
-       room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       i = 0;
-       /*
-        * Loop over all leaf nodes.  Copy information to the extent records.
-        */
-       for (;;) {
-               xfs_bmbt_rec_t  *frp;
-               xfs_fsblock_t   nextbno;
-               xfs_extnum_t    num_recs;
-               xfs_extnum_t    start;
-
-               num_recs = xfs_btree_get_numrecs(block);
-               if (unlikely(i + num_recs > room)) {
-                       ASSERT(i + num_recs <= room);
-                       xfs_warn(ip->i_mount,
-                               "corrupt dinode %Lu, (btree extents).",
-                               (unsigned long long) ip->i_ino);
-                       XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
-                               XFS_ERRLEVEL_LOW, ip->i_mount, block);
-                       goto error0;
-               }
-               XFS_WANT_CORRUPTED_GOTO(
-                       xfs_bmap_sanity_check(mp, bp, 0),
-                       error0);
-               /*
-                * Read-ahead the next leaf block, if any.
-                */
-               nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
-               if (nextbno != NULLFSBLOCK)
-                       xfs_btree_reada_bufl(mp, nextbno, 1,
-                                            &xfs_bmbt_buf_ops);
-               /*
-                * Copy records into the extent records.
-                */
-               frp = XFS_BMBT_REC_ADDR(mp, block, 1);
-               start = i;
-               for (j = 0; j < num_recs; j++, i++, frp++) {
-                       xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
-                       trp->l0 = be64_to_cpu(frp->l0);
-                       trp->l1 = be64_to_cpu(frp->l1);
-               }
-               if (exntf == XFS_EXTFMT_NOSTATE) {
-                       /*
-                        * Check all attribute bmap btree records and
-                        * any "older" data bmap btree records for a
-                        * set bit in the "extent flag" position.
-                        */
-                       if (unlikely(xfs_check_nostate_extents(ifp,
-                                       start, num_recs))) {
-                               XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
-                                                XFS_ERRLEVEL_LOW,
-                                                ip->i_mount);
-                               goto error0;
-                       }
-               }
-               xfs_trans_brelse(tp, bp);
-               bno = nextbno;
-               /*
-                * If we've reached the end, stop.
-                */
-               if (bno == NULLFSBLOCK)
-                       break;
-               error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
-                               XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
-               if (error)
-                       return error;
-               block = XFS_BUF_TO_BLOCK(bp);
-       }
-       ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
-       ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
-       XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
-       return 0;
-error0:
-       xfs_trans_brelse(tp, bp);
-       return EFSCORRUPTED;
-}
-
-
-/*
- * Search the extent records for the entry containing block bno.
- * If bno lies in a hole, point to the next entry.  If bno lies
- * past eof, *eofp will be set, and *prevp will contain the last
- * entry (null if none).  Else, *lastxp will be set to the index
- * of the found entry; *gotp will contain the entry.
- */
-STATIC xfs_bmbt_rec_host_t *           /* pointer to found extent entry */
-xfs_bmap_search_multi_extents(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_fileoff_t   bno,            /* block number searched for */
-       int             *eofp,          /* out: end of file found */
-       xfs_extnum_t    *lastxp,        /* out: last extent index */
-       xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
-       xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
-{
-       xfs_bmbt_rec_host_t *ep;                /* extent record pointer */
-       xfs_extnum_t    lastx;          /* last extent index */
-
-       /*
-        * Initialize the extent entry structure to catch access to
-        * uninitialized br_startblock field.
-        */
-       gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
-       gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
-       gotp->br_state = XFS_EXT_INVALID;
-#if XFS_BIG_BLKNOS
-       gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
-#else
-       gotp->br_startblock = 0xffffa5a5;
-#endif
-       prevp->br_startoff = NULLFILEOFF;
-
-       ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
-       if (lastx > 0) {
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
-       }
-       if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
-               xfs_bmbt_get_all(ep, gotp);
-               *eofp = 0;
-       } else {
-               if (lastx > 0) {
-                       *gotp = *prevp;
-               }
-               *eofp = 1;
-               ep = NULL;
-       }
-       *lastxp = lastx;
-       return ep;
-}
-
-/*
- * Search the extents list for the inode, for the extent containing bno.
- * If bno lies in a hole, point to the next entry.  If bno lies past eof,
- * *eofp will be set, and *prevp will contain the last entry (null if none).
- * Else, *lastxp will be set to the index of the found
- * entry; *gotp will contain the entry.
- */
-STATIC xfs_bmbt_rec_host_t *                 /* pointer to found extent entry */
-xfs_bmap_search_extents(
-       xfs_inode_t     *ip,            /* incore inode pointer */
-       xfs_fileoff_t   bno,            /* block number searched for */
-       int             fork,           /* data or attr fork */
-       int             *eofp,          /* out: end of file found */
-       xfs_extnum_t    *lastxp,        /* out: last extent index */
-       xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
-       xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
-{
-       xfs_ifork_t     *ifp;           /* inode fork pointer */
-       xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
-
-       XFS_STATS_INC(xs_look_exlist);
-       ifp = XFS_IFORK_PTR(ip, fork);
-
-       ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
-
-       if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
-                    !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
-               xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
-                               "Access to block zero in inode %llu "
-                               "start_block: %llx start_off: %llx "
-                               "blkcnt: %llx extent-state: %x lastx: %x",
-                       (unsigned long long)ip->i_ino,
-                       (unsigned long long)gotp->br_startblock,
-                       (unsigned long long)gotp->br_startoff,
-                       (unsigned long long)gotp->br_blockcount,
-                       gotp->br_state, *lastxp);
-               *lastxp = NULLEXTNUM;
-               *eofp = 1;
-               return NULL;
-       }
-       return ep;
-}
-
-/*
- * Returns the file-relative block number of the first unused block(s)
- * in the file with at least "len" logically contiguous blocks free.
- * This is the lowest-address hole if the file has holes, else the first block
- * past the end of file.
- * Return 0 if the file is currently local (in-inode).
- */
-int                                            /* error */
-xfs_bmap_first_unused(
-       xfs_trans_t     *tp,                    /* transaction pointer */
-       xfs_inode_t     *ip,                    /* incore inode */
-       xfs_extlen_t    len,                    /* size of hole to find */
-       xfs_fileoff_t   *first_unused,          /* unused block */
-       int             whichfork)              /* data or attr fork */
-{
-       int             error;                  /* error return value */
-       int             idx;                    /* extent record index */
-       xfs_ifork_t     *ifp;                   /* inode fork pointer */
-       xfs_fileoff_t   lastaddr;               /* last block number seen */
-       xfs_fileoff_t   lowest;                 /* lowest useful block */
-       xfs_fileoff_t   max;                    /* starting useful block */
-       xfs_fileoff_t   off;                    /* offset for this block */
-       xfs_extnum_t    nextents;               /* number of extent entries */
-
-       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
-              XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
-              XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
-       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
-               *first_unused = 0;
-               return 0;
-       }
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
-           (error = xfs_iread_extents(tp, ip, whichfork)))
-               return error;
-       lowest = *first_unused;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
-               xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
-               off = xfs_bmbt_get_startoff(ep);
-               /*
-                * See if the hole before this extent will work.
-                */
-               if (off >= lowest + len && off - max >= len) {
-                       *first_unused = max;
-                       return 0;
-               }
-               lastaddr = off + xfs_bmbt_get_blockcount(ep);
-               max = XFS_FILEOFF_MAX(lastaddr, lowest);
-       }
-       *first_unused = max;
-       return 0;
-}
-
-/*
- * Returns the file-relative block number of the last block - 1 before
- * last_block (input value) in the file.
- * This is not based on i_size, it is based on the extent records.
- * Returns 0 for local files, as they do not have extent records.
- */
-int                                            /* error */
-xfs_bmap_last_before(
-       xfs_trans_t     *tp,                    /* transaction pointer */
-       xfs_inode_t     *ip,                    /* incore inode */
-       xfs_fileoff_t   *last_block,            /* last block */
-       int             whichfork)              /* data or attr fork */
-{
-       xfs_fileoff_t   bno;                    /* input file offset */
-       int             eof;                    /* hit end of file */
-       xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
-       int             error;                  /* error return value */
-       xfs_bmbt_irec_t got;                    /* current extent value */
-       xfs_ifork_t     *ifp;                   /* inode fork pointer */
-       xfs_extnum_t    lastx;                  /* last extent used */
-       xfs_bmbt_irec_t prev;                   /* previous extent value */
-
-       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
-              return EIO;
-       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
-               *last_block = 0;
-               return 0;
-       }
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
-           (error = xfs_iread_extents(tp, ip, whichfork)))
-               return error;
-       bno = *last_block - 1;
-       ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
-               &prev);
-       if (eof || xfs_bmbt_get_startoff(ep) > bno) {
-               if (prev.br_startoff == NULLFILEOFF)
-                       *last_block = 0;
-               else
-                       *last_block = prev.br_startoff + prev.br_blockcount;
-       }
-       /*
-        * Otherwise *last_block is already the right answer.
-        */
-       return 0;
-}
-
-int
-xfs_bmap_last_extent(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *ip,
-       int                     whichfork,
-       struct xfs_bmbt_irec    *rec,
-       int                     *is_empty)
-{
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
-       int                     error;
-       int                     nextents;
-
-       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-               error = xfs_iread_extents(tp, ip, whichfork);
-               if (error)
-                       return error;
-       }
-
-       nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-       if (nextents == 0) {
-               *is_empty = 1;
-               return 0;
-       }
-
-       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
-       *is_empty = 0;
-       return 0;
-}
-
-/*
- * Check the last inode extent to determine whether this allocation will result
- * in blocks being allocated at the end of the file. When we allocate new data
- * blocks at the end of the file which do not start at the previous data block,
- * we will try to align the new blocks at stripe unit boundaries.
- *
- * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
- * at, or past the EOF.
- */
-STATIC int
-xfs_bmap_isaeof(
-       struct xfs_bmalloca     *bma,
-       int                     whichfork)
-{
-       struct xfs_bmbt_irec    rec;
-       int                     is_empty;
-       int                     error;
-
-       bma->aeof = 0;
-       error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
-                                    &is_empty);
-       if (error)
-               return error;
-
-       if (is_empty) {
-               bma->aeof = 1;
-               return 0;
-       }
-
-       /*
-        * Check if we are allocation or past the last extent, or at least into
-        * the last delayed allocated extent.
-        */
-       bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
-               (bma->offset >= rec.br_startoff &&
-                isnullstartblock(rec.br_startblock));
-       return 0;
-}
-
-/*
- * Returns the file-relative block number of the first block past eof in
- * the file.  This is not based on i_size, it is based on the extent records.
- * Returns 0 for local files, as they do not have extent records.
- */
-int
-xfs_bmap_last_offset(
-       struct xfs_inode        *ip,
-       xfs_fileoff_t           *last_block,
-       int                     whichfork)
-{
-       struct xfs_bmbt_irec    rec;
-       int                     is_empty;
-       int                     error;
-
-       *last_block = 0;
-
-       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
-               return 0;
-
-       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
-              return EIO;
-
-       error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
-       if (error || is_empty)
-               return error;
-
-       *last_block = rec.br_startoff + rec.br_blockcount;
-       return 0;
-}
-
-/*
- * Returns whether the selected fork of the inode has exactly one
- * block or not.  For the data fork we check this matches di_size,
- * implying the file's range is 0..bsize-1.
- */
-int                                    /* 1=>1 block, 0=>otherwise */
-xfs_bmap_one_block(
-       xfs_inode_t     *ip,            /* incore inode */
-       int             whichfork)      /* data or attr fork */
-{
-       xfs_bmbt_rec_host_t *ep;        /* ptr to fork's extent */
-       xfs_ifork_t     *ifp;           /* inode fork pointer */
-       int             rval;           /* return value */
-       xfs_bmbt_irec_t s;              /* internal version of extent */
-
-#ifndef DEBUG
-       if (whichfork == XFS_DATA_FORK)
-               return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
-#endif /* !DEBUG */
-       if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
-               return 0;
-       if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
-               return 0;
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-       ep = xfs_iext_get_ext(ifp, 0);
-       xfs_bmbt_get_all(ep, &s);
-       rval = s.br_startoff == 0 && s.br_blockcount == 1;
-       if (rval && whichfork == XFS_DATA_FORK)
-               ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
-       return rval;
-}
-
-/*
- * Extent tree manipulation functions used during allocation.
- */
-
-/*
- * Convert a delayed allocation to a real allocation.
- */
-STATIC int                             /* error */
-xfs_bmap_add_extent_delay_real(
-       struct xfs_bmalloca     *bma)
-{
-       struct xfs_bmbt_irec    *new = &bma->got;
-       int                     diff;   /* temp value */
-       xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
-       int                     error;  /* error return value */
-       int                     i;      /* temp state */
-       xfs_ifork_t             *ifp;   /* inode fork pointer */
-       xfs_fileoff_t           new_endoff;     /* end offset of new entry */
-       xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
-                                       /* left is 0, right is 1, prev is 2 */
-       int                     rval=0; /* return value (logging flags) */
-       int                     state = 0;/* state bits, accessed thru macros */
-       xfs_filblks_t           da_new; /* new count del alloc blocks used */
-       xfs_filblks_t           da_old; /* old count del alloc blocks used */
-       xfs_filblks_t           temp=0; /* value for da_new calculations */
-       xfs_filblks_t           temp2=0;/* value for da_new calculations */
-       int                     tmp_rval;       /* partial logging flags */
-
-       ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
-
-       ASSERT(bma->idx >= 0);
-       ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
-       ASSERT(!isnullstartblock(new->br_startblock));
-       ASSERT(!bma->cur ||
-              (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
-
-       XFS_STATS_INC(xs_add_exlist);
-
-#define        LEFT            r[0]
-#define        RIGHT           r[1]
-#define        PREV            r[2]
-
-       /*
-        * Set up a bunch of variables to make the tests simpler.
-        */
-       ep = xfs_iext_get_ext(ifp, bma->idx);
-       xfs_bmbt_get_all(ep, &PREV);
-       new_endoff = new->br_startoff + new->br_blockcount;
-       ASSERT(PREV.br_startoff <= new->br_startoff);
-       ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
-
-       da_old = startblockval(PREV.br_startblock);
-       da_new = 0;
-
-       /*
-        * Set flags determining what part of the previous delayed allocation
-        * extent is being replaced by a real allocation.
-        */
-       if (PREV.br_startoff == new->br_startoff)
-               state |= BMAP_LEFT_FILLING;
-       if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
-               state |= BMAP_RIGHT_FILLING;
-
-       /*
-        * Check and set flags if this segment has a left neighbor.
-        * Don't set contiguous if the combined extent would be too large.
-        */
-       if (bma->idx > 0) {
-               state |= BMAP_LEFT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
-
-               if (isnullstartblock(LEFT.br_startblock))
-                       state |= BMAP_LEFT_DELAY;
-       }
-
-       if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
-           LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
-           LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
-           LEFT.br_state == new->br_state &&
-           LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
-               state |= BMAP_LEFT_CONTIG;
-
-       /*
-        * Check and set flags if this segment has a right neighbor.
-        * Don't set contiguous if the combined extent would be too large.
-        * Also check for all-three-contiguous being too large.
-        */
-       if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
-               state |= BMAP_RIGHT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
-
-               if (isnullstartblock(RIGHT.br_startblock))
-                       state |= BMAP_RIGHT_DELAY;
-       }
-
-       if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
-           new_endoff == RIGHT.br_startoff &&
-           new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
-           new->br_state == RIGHT.br_state &&
-           new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
-           ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
-                      BMAP_RIGHT_FILLING)) !=
-                     (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
-                      BMAP_RIGHT_FILLING) ||
-            LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
-                       <= MAXEXTLEN))
-               state |= BMAP_RIGHT_CONTIG;
-
-       error = 0;
-       /*
-        * Switch out based on the FILLING and CONTIG state bits.
-        */
-       switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
-                        BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
-       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
-            BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
-               /*
-                * Filling in all of a previously delayed allocation extent.
-                * The left and right neighbors are both contiguous with new.
-                */
-               bma->idx--;
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
-                       LEFT.br_blockcount + PREV.br_blockcount +
-                       RIGHT.br_blockcount);
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-
-               xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
-               bma->ip->i_d.di_nextents--;
-               if (bma->cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
-                                       RIGHT.br_startblock,
-                                       RIGHT.br_blockcount, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_btree_delete(bma->cur, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_btree_decrement(bma->cur, 0, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
-                                       LEFT.br_startblock,
-                                       LEFT.br_blockcount +
-                                       PREV.br_blockcount +
-                                       RIGHT.br_blockcount, LEFT.br_state);
-                       if (error)
-                               goto done;
-               }
-               break;
-
-       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
-               /*
-                * Filling in all of a previously delayed allocation extent.
-                * The left neighbor is contiguous, the right is not.
-                */
-               bma->idx--;
-
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
-                       LEFT.br_blockcount + PREV.br_blockcount);
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-
-               xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
-               if (bma->cur == NULL)
-                       rval = XFS_ILOG_DEXT;
-               else {
-                       rval = 0;
-                       error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
-                                       LEFT.br_startblock, LEFT.br_blockcount,
-                                       &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
-                                       LEFT.br_startblock,
-                                       LEFT.br_blockcount +
-                                       PREV.br_blockcount, LEFT.br_state);
-                       if (error)
-                               goto done;
-               }
-               break;
-
-       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
-               /*
-                * Filling in all of a previously delayed allocation extent.
-                * The right neighbor is contiguous, the left is not.
-                */
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_startblock(ep, new->br_startblock);
-               xfs_bmbt_set_blockcount(ep,
-                       PREV.br_blockcount + RIGHT.br_blockcount);
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-
-               xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
-               if (bma->cur == NULL)
-                       rval = XFS_ILOG_DEXT;
-               else {
-                       rval = 0;
-                       error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
-                                       RIGHT.br_startblock,
-                                       RIGHT.br_blockcount, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
-                                       new->br_startblock,
-                                       PREV.br_blockcount +
-                                       RIGHT.br_blockcount, PREV.br_state);
-                       if (error)
-                               goto done;
-               }
-               break;
-
-       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
-               /*
-                * Filling in all of a previously delayed allocation extent.
-                * Neither the left nor right neighbors are contiguous with
-                * the new one.
-                */
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_startblock(ep, new->br_startblock);
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-
-               bma->ip->i_d.di_nextents++;
-               if (bma->cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
-                                       new->br_startblock, new->br_blockcount,
-                                       &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
-                       error = xfs_btree_insert(bma->cur, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-               }
-               break;
-
-       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
-               /*
-                * Filling in the first part of a previous delayed allocation.
-                * The left neighbor is contiguous.
-                */
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
-                       LEFT.br_blockcount + new->br_blockcount);
-               xfs_bmbt_set_startoff(ep,
-                       PREV.br_startoff + new->br_blockcount);
-               trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
-
-               temp = PREV.br_blockcount - new->br_blockcount;
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               if (bma->cur == NULL)
-                       rval = XFS_ILOG_DEXT;
-               else {
-                       rval = 0;
-                       error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
-                                       LEFT.br_startblock, LEFT.br_blockcount,
-                                       &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
-                                       LEFT.br_startblock,
-                                       LEFT.br_blockcount +
-                                       new->br_blockcount,
-                                       LEFT.br_state);
-                       if (error)
-                               goto done;
-               }
-               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
-                       startblockval(PREV.br_startblock));
-               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-
-               bma->idx--;
-               break;
-
-       case BMAP_LEFT_FILLING:
-               /*
-                * Filling in the first part of a previous delayed allocation.
-                * The left neighbor is not contiguous.
-                */
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_startoff(ep, new_endoff);
-               temp = PREV.br_blockcount - new->br_blockcount;
-               xfs_bmbt_set_blockcount(ep, temp);
-               xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
-               bma->ip->i_d.di_nextents++;
-               if (bma->cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
-                                       new->br_startblock, new->br_blockcount,
-                                       &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
-                       error = xfs_btree_insert(bma->cur, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-               }
-
-               if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
-                       error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-                                       bma->firstblock, bma->flist,
-                                       &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
-                       rval |= tmp_rval;
-                       if (error)
-                               goto done;
-               }
-               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
-                       startblockval(PREV.br_startblock) -
-                       (bma->cur ? bma->cur->bc_private.b.allocated : 0));
-               ep = xfs_iext_get_ext(ifp, bma->idx + 1);
-               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
-               trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
-               break;
-
-       case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
-               /*
-                * Filling in the last part of a previous delayed allocation.
-                * The right neighbor is contiguous with the new allocation.
-                */
-               temp = PREV.br_blockcount - new->br_blockcount;
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
-                       new->br_startoff, new->br_startblock,
-                       new->br_blockcount + RIGHT.br_blockcount,
-                       RIGHT.br_state);
-               trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
-               if (bma->cur == NULL)
-                       rval = XFS_ILOG_DEXT;
-               else {
-                       rval = 0;
-                       error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
-                                       RIGHT.br_startblock,
-                                       RIGHT.br_blockcount, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_bmbt_update(bma->cur, new->br_startoff,
-                                       new->br_startblock,
-                                       new->br_blockcount +
-                                       RIGHT.br_blockcount,
-                                       RIGHT.br_state);
-                       if (error)
-                               goto done;
-               }
-
-               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
-                       startblockval(PREV.br_startblock));
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-
-               bma->idx++;
-               break;
-
-       case BMAP_RIGHT_FILLING:
-               /*
-                * Filling in the last part of a previous delayed allocation.
-                * The right neighbor is not contiguous.
-                */
-               temp = PREV.br_blockcount - new->br_blockcount;
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
-               bma->ip->i_d.di_nextents++;
-               if (bma->cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
-                                       new->br_startblock, new->br_blockcount,
-                                       &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
-                       error = xfs_btree_insert(bma->cur, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-               }
-
-               if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
-                       error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-                               bma->firstblock, bma->flist, &bma->cur, 1,
-                               &tmp_rval, XFS_DATA_FORK);
-                       rval |= tmp_rval;
-                       if (error)
-                               goto done;
-               }
-               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
-                       startblockval(PREV.br_startblock) -
-                       (bma->cur ? bma->cur->bc_private.b.allocated : 0));
-               ep = xfs_iext_get_ext(ifp, bma->idx);
-               xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-
-               bma->idx++;
-               break;
-
-       case 0:
-               /*
-                * Filling in the middle part of a previous delayed allocation.
-                * Contiguity is impossible here.
-                * This case is avoided almost all the time.
-                *
-                * We start with a delayed allocation:
-                *
-                * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
-                *  PREV @ idx
-                *
-                * and we are allocating:
-                *                     +rrrrrrrrrrrrrrrrr+
-                *                            new
-                *
-                * and we set it up for insertion as:
-                * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
-                *                            new
-                *  PREV @ idx          LEFT              RIGHT
-                *                      inserted at idx + 1
-                */
-               temp = new->br_startoff - PREV.br_startoff;
-               temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);      /* truncate PREV */
-               LEFT = *new;
-               RIGHT.br_state = PREV.br_state;
-               RIGHT.br_startblock = nullstartblock(
-                               (int)xfs_bmap_worst_indlen(bma->ip, temp2));
-               RIGHT.br_startoff = new_endoff;
-               RIGHT.br_blockcount = temp2;
-               /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
-               xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
-               bma->ip->i_d.di_nextents++;
-               if (bma->cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
-                                       new->br_startblock, new->br_blockcount,
-                                       &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
-                       error = xfs_btree_insert(bma->cur, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-               }
-
-               if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
-                       error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-                                       bma->firstblock, bma->flist, &bma->cur,
-                                       1, &tmp_rval, XFS_DATA_FORK);
-                       rval |= tmp_rval;
-                       if (error)
-                               goto done;
-               }
-               temp = xfs_bmap_worst_indlen(bma->ip, temp);
-               temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
-               diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
-                       (bma->cur ? bma->cur->bc_private.b.allocated : 0));
-               if (diff > 0) {
-                       error = xfs_icsb_modify_counters(bma->ip->i_mount,
-                                       XFS_SBS_FDBLOCKS,
-                                       -((int64_t)diff), 0);
-                       ASSERT(!error);
-                       if (error)
-                               goto done;
-               }
-
-               ep = xfs_iext_get_ext(ifp, bma->idx);
-               xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
-               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
-                       nullstartblock((int)temp2));
-               trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
-
-               bma->idx++;
-               da_new = temp + temp2;
-               break;
-
-       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
-       case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
-       case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
-       case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
-       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
-       case BMAP_LEFT_CONTIG:
-       case BMAP_RIGHT_CONTIG:
-               /*
-                * These cases are all impossible.
-                */
-               ASSERT(0);
-       }
-
-       /* convert to a btree if necessary */
-       if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
-               int     tmp_logflags;   /* partial log flag return val */
-
-               ASSERT(bma->cur == NULL);
-               error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-                               bma->firstblock, bma->flist, &bma->cur,
-                               da_old > 0, &tmp_logflags, XFS_DATA_FORK);
-               bma->logflags |= tmp_logflags;
-               if (error)
-                       goto done;
-       }
-
-       /* adjust for changes in reserved delayed indirect blocks */
-       if (da_old || da_new) {
-               temp = da_new;
-               if (bma->cur)
-                       temp += bma->cur->bc_private.b.allocated;
-               ASSERT(temp <= da_old);
-               if (temp < da_old)
-                       xfs_icsb_modify_counters(bma->ip->i_mount,
-                                       XFS_SBS_FDBLOCKS,
-                                       (int64_t)(da_old - temp), 0);
-       }
-
-       /* clear out the allocated field, done with it now in any case. */
-       if (bma->cur)
-               bma->cur->bc_private.b.allocated = 0;
-
-       xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
-done:
-       bma->logflags |= rval;
-       return error;
-#undef LEFT
-#undef RIGHT
-#undef PREV
-}
-
-/*
- * Convert an unwritten allocation to a real allocation or vice versa.
- */
-STATIC int                             /* error */
-xfs_bmap_add_extent_unwritten_real(
-       struct xfs_trans        *tp,
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            *idx,   /* extent number to update/insert */
-       xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
-       xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
-       xfs_fsblock_t           *first, /* pointer to firstblock variable */
-       xfs_bmap_free_t         *flist, /* list of extents to be freed */
-       int                     *logflagsp) /* inode logging flags */
-{
-       xfs_btree_cur_t         *cur;   /* btree cursor */
-       xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
-       int                     error;  /* error return value */
-       int                     i;      /* temp state */
-       xfs_ifork_t             *ifp;   /* inode fork pointer */
-       xfs_fileoff_t           new_endoff;     /* end offset of new entry */
-       xfs_exntst_t            newext; /* new extent state */
-       xfs_exntst_t            oldext; /* old extent state */
-       xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
-                                       /* left is 0, right is 1, prev is 2 */
-       int                     rval=0; /* return value (logging flags) */
-       int                     state = 0;/* state bits, accessed thru macros */
-
-       *logflagsp = 0;
-
-       cur = *curp;
-       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-
-       ASSERT(*idx >= 0);
-       ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
-       ASSERT(!isnullstartblock(new->br_startblock));
-
-       XFS_STATS_INC(xs_add_exlist);
-
-#define        LEFT            r[0]
-#define        RIGHT           r[1]
-#define        PREV            r[2]
-
-       /*
-        * Set up a bunch of variables to make the tests simpler.
-        */
-       error = 0;
-       ep = xfs_iext_get_ext(ifp, *idx);
-       xfs_bmbt_get_all(ep, &PREV);
-       newext = new->br_state;
-       oldext = (newext == XFS_EXT_UNWRITTEN) ?
-               XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
-       ASSERT(PREV.br_state == oldext);
-       new_endoff = new->br_startoff + new->br_blockcount;
-       ASSERT(PREV.br_startoff <= new->br_startoff);
-       ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
-
-       /*
-        * Set flags determining what part of the previous oldext allocation
-        * extent is being replaced by a newext allocation.
-        */
-       if (PREV.br_startoff == new->br_startoff)
-               state |= BMAP_LEFT_FILLING;
-       if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
-               state |= BMAP_RIGHT_FILLING;
-
-       /*
-        * Check and set flags if this segment has a left neighbor.
-        * Don't set contiguous if the combined extent would be too large.
-        */
-       if (*idx > 0) {
-               state |= BMAP_LEFT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
-
-               if (isnullstartblock(LEFT.br_startblock))
-                       state |= BMAP_LEFT_DELAY;
-       }
-
-       if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
-           LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
-           LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
-           LEFT.br_state == newext &&
-           LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
-               state |= BMAP_LEFT_CONTIG;
-
-       /*
-        * Check and set flags if this segment has a right neighbor.
-        * Don't set contiguous if the combined extent would be too large.
-        * Also check for all-three-contiguous being too large.
-        */
-       if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
-               state |= BMAP_RIGHT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
-               if (isnullstartblock(RIGHT.br_startblock))
-                       state |= BMAP_RIGHT_DELAY;
-       }
-
-       if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
-           new_endoff == RIGHT.br_startoff &&
-           new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
-           newext == RIGHT.br_state &&
-           new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
-           ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
-                      BMAP_RIGHT_FILLING)) !=
-                     (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
-                      BMAP_RIGHT_FILLING) ||
-            LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
-                       <= MAXEXTLEN))
-               state |= BMAP_RIGHT_CONTIG;
-
-       /*
-        * Switch out based on the FILLING and CONTIG state bits.
-        */
-       switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
-                        BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
-       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
-            BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
-               /*
-                * Setting all of a previous oldext extent to newext.
-                * The left and right neighbors are both contiguous with new.
-                */
-               --*idx;
-
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
-                       LEFT.br_blockcount + PREV.br_blockcount +
-                       RIGHT.br_blockcount);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-
-               xfs_iext_remove(ip, *idx + 1, 2, state);
-               ip->i_d.di_nextents -= 2;
-               if (cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
-                                       RIGHT.br_startblock,
-                                       RIGHT.br_blockcount, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_delete(cur, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_decrement(cur, 0, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_delete(cur, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_decrement(cur, 0, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
-                               LEFT.br_startblock,
-                               LEFT.br_blockcount + PREV.br_blockcount +
-                               RIGHT.br_blockcount, LEFT.br_state)))
-                               goto done;
-               }
-               break;
-
-       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
-               /*
-                * Setting all of a previous oldext extent to newext.
-                * The left neighbor is contiguous, the right is not.
-                */
-               --*idx;
-
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
-                       LEFT.br_blockcount + PREV.br_blockcount);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-
-               xfs_iext_remove(ip, *idx + 1, 1, state);
-               ip->i_d.di_nextents--;
-               if (cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
-                                       PREV.br_startblock, PREV.br_blockcount,
-                                       &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_delete(cur, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_decrement(cur, 0, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
-                               LEFT.br_startblock,
-                               LEFT.br_blockcount + PREV.br_blockcount,
-                               LEFT.br_state)))
-                               goto done;
-               }
-               break;
-
-       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
-               /*
-                * Setting all of a previous oldext extent to newext.
-                * The right neighbor is contiguous, the left is not.
-                */
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep,
-                       PREV.br_blockcount + RIGHT.br_blockcount);
-               xfs_bmbt_set_state(ep, newext);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-               xfs_iext_remove(ip, *idx + 1, 1, state);
-               ip->i_d.di_nextents--;
-               if (cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
-                                       RIGHT.br_startblock,
-                                       RIGHT.br_blockcount, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_delete(cur, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_btree_decrement(cur, 0, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, new->br_startoff,
-                               new->br_startblock,
-                               new->br_blockcount + RIGHT.br_blockcount,
-                               newext)))
-                               goto done;
-               }
-               break;
-
-       case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
-               /*
-                * Setting all of a previous oldext extent to newext.
-                * Neither the left nor right neighbors are contiguous with
-                * the new one.
-                */
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_state(ep, newext);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-
-               if (cur == NULL)
-                       rval = XFS_ILOG_DEXT;
-               else {
-                       rval = 0;
-                       if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
-                                       new->br_startblock, new->br_blockcount,
-                                       &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, new->br_startoff,
-                               new->br_startblock, new->br_blockcount,
-                               newext)))
-                               goto done;
-               }
-               break;
-
-       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
-               /*
-                * Setting the first part of a previous oldext extent to newext.
-                * The left neighbor is contiguous.
-                */
-               trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
-                       LEFT.br_blockcount + new->br_blockcount);
-               xfs_bmbt_set_startoff(ep,
-                       PREV.br_startoff + new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
-
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_startblock(ep,
-                       new->br_startblock + new->br_blockcount);
-               xfs_bmbt_set_blockcount(ep,
-                       PREV.br_blockcount - new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-
-               --*idx;
-
-               if (cur == NULL)
-                       rval = XFS_ILOG_DEXT;
-               else {
-                       rval = 0;
-                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
-                                       PREV.br_startblock, PREV.br_blockcount,
-                                       &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur,
-                               PREV.br_startoff + new->br_blockcount,
-                               PREV.br_startblock + new->br_blockcount,
-                               PREV.br_blockcount - new->br_blockcount,
-                               oldext)))
-                               goto done;
-                       if ((error = xfs_btree_decrement(cur, 0, &i)))
-                               goto done;
-                       error = xfs_bmbt_update(cur, LEFT.br_startoff,
-                               LEFT.br_startblock,
-                               LEFT.br_blockcount + new->br_blockcount,
-                               LEFT.br_state);
-                       if (error)
-                               goto done;
-               }
-               break;
-
-       case BMAP_LEFT_FILLING:
-               /*
-                * Setting the first part of a previous oldext extent to newext.
-                * The left neighbor is not contiguous.
-                */
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
-               xfs_bmbt_set_startoff(ep, new_endoff);
-               xfs_bmbt_set_blockcount(ep,
-                       PREV.br_blockcount - new->br_blockcount);
-               xfs_bmbt_set_startblock(ep,
-                       new->br_startblock + new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-
-               xfs_iext_insert(ip, *idx, 1, new, state);
-               ip->i_d.di_nextents++;
-               if (cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
-                                       PREV.br_startblock, PREV.br_blockcount,
-                                       &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur,
-                               PREV.br_startoff + new->br_blockcount,
-                               PREV.br_startblock + new->br_blockcount,
-                               PREV.br_blockcount - new->br_blockcount,
-                               oldext)))
-                               goto done;
-                       cur->bc_rec.b = *new;
-                       if ((error = xfs_btree_insert(cur, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-               }
-               break;
-
-       case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
-               /*
-                * Setting the last part of a previous oldext extent to newext.
-                * The right neighbor is contiguous with the new allocation.
-                */
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep,
-                       PREV.br_blockcount - new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-
-               ++*idx;
-
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
-                       new->br_startoff, new->br_startblock,
-                       new->br_blockcount + RIGHT.br_blockcount, newext);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-
-               if (cur == NULL)
-                       rval = XFS_ILOG_DEXT;
-               else {
-                       rval = 0;
-                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
-                                       PREV.br_startblock,
-                                       PREV.br_blockcount, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
-                               PREV.br_startblock,
-                               PREV.br_blockcount - new->br_blockcount,
-                               oldext)))
-                               goto done;
-                       if ((error = xfs_btree_increment(cur, 0, &i)))
-                               goto done;
-                       if ((error = xfs_bmbt_update(cur, new->br_startoff,
-                               new->br_startblock,
-                               new->br_blockcount + RIGHT.br_blockcount,
-                               newext)))
-                               goto done;
-               }
-               break;
-
-       case BMAP_RIGHT_FILLING:
-               /*
-                * Setting the last part of a previous oldext extent to newext.
-                * The right neighbor is not contiguous.
-                */
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep,
-                       PREV.br_blockcount - new->br_blockcount);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-
-               ++*idx;
-               xfs_iext_insert(ip, *idx, 1, new, state);
-
-               ip->i_d.di_nextents++;
-               if (cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
-                                       PREV.br_startblock, PREV.br_blockcount,
-                                       &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
-                               PREV.br_startblock,
-                               PREV.br_blockcount - new->br_blockcount,
-                               oldext)))
-                               goto done;
-                       if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
-                                       new->br_startblock, new->br_blockcount,
-                                       &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       cur->bc_rec.b.br_state = XFS_EXT_NORM;
-                       if ((error = xfs_btree_insert(cur, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-               }
-               break;
-
-       case 0:
-               /*
-                * Setting the middle part of a previous oldext extent to
-                * newext.  Contiguity is impossible here.
-                * One extent becomes three extents.
-                */
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep,
-                       new->br_startoff - PREV.br_startoff);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-
-               r[0] = *new;
-               r[1].br_startoff = new_endoff;
-               r[1].br_blockcount =
-                       PREV.br_startoff + PREV.br_blockcount - new_endoff;
-               r[1].br_startblock = new->br_startblock + new->br_blockcount;
-               r[1].br_state = oldext;
-
-               ++*idx;
-               xfs_iext_insert(ip, *idx, 2, &r[0], state);
-
-               ip->i_d.di_nextents += 2;
-               if (cur == NULL)
-                       rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
-               else {
-                       rval = XFS_ILOG_CORE;
-                       if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
-                                       PREV.br_startblock, PREV.br_blockcount,
-                                       &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       /* new right extent - oldext */
-                       if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
-                               r[1].br_startblock, r[1].br_blockcount,
-                               r[1].br_state)))
-                               goto done;
-                       /* new left extent - oldext */
-                       cur->bc_rec.b = PREV;
-                       cur->bc_rec.b.br_blockcount =
-                               new->br_startoff - PREV.br_startoff;
-                       if ((error = xfs_btree_insert(cur, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       /*
-                        * Reset the cursor to the position of the new extent
-                        * we are about to insert as we can't trust it after
-                        * the previous insert.
-                        */
-                       if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
-                                       new->br_startblock, new->br_blockcount,
-                                       &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       /* new middle extent - newext */
-                       cur->bc_rec.b.br_state = new->br_state;
-                       if ((error = xfs_btree_insert(cur, &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-               }
-               break;
-
-       case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
-       case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
-       case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
-       case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
-       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
-       case BMAP_LEFT_CONTIG:
-       case BMAP_RIGHT_CONTIG:
-               /*
-                * These cases are all impossible.
-                */
-               ASSERT(0);
-       }
-
-       /* convert to a btree if necessary */
-       if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
-               int     tmp_logflags;   /* partial log flag return val */
-
-               ASSERT(cur == NULL);
-               error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur,
-                               0, &tmp_logflags, XFS_DATA_FORK);
-               *logflagsp |= tmp_logflags;
-               if (error)
-                       goto done;
-       }
-
-       /* clear out the allocated field, done with it now in any case. */
-       if (cur) {
-               cur->bc_private.b.allocated = 0;
-               *curp = cur;
-       }
-
-       xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
-done:
-       *logflagsp |= rval;
-       return error;
-#undef LEFT
-#undef RIGHT
-#undef PREV
-}
-
-/*
- * Convert a hole to a delayed allocation.
- */
-STATIC void
-xfs_bmap_add_extent_hole_delay(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_extnum_t            *idx,   /* extent number to update/insert */
-       xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
-{
-       xfs_ifork_t             *ifp;   /* inode fork pointer */
-       xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
-       xfs_filblks_t           newlen=0;       /* new indirect size */
-       xfs_filblks_t           oldlen=0;       /* old indirect size */
-       xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
-       int                     state;  /* state bits, accessed thru macros */
-       xfs_filblks_t           temp=0; /* temp for indirect calculations */
-
-       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-       state = 0;
-       ASSERT(isnullstartblock(new->br_startblock));
-
-       /*
-        * Check and set flags if this segment has a left neighbor
-        */
-       if (*idx > 0) {
-               state |= BMAP_LEFT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
-
-               if (isnullstartblock(left.br_startblock))
-                       state |= BMAP_LEFT_DELAY;
-       }
-
-       /*
-        * Check and set flags if the current (right) segment exists.
-        * If it doesn't exist, we're converting the hole at end-of-file.
-        */
-       if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
-               state |= BMAP_RIGHT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
-
-               if (isnullstartblock(right.br_startblock))
-                       state |= BMAP_RIGHT_DELAY;
-       }
-
-       /*
-        * Set contiguity flags on the left and right neighbors.
-        * Don't let extents get too large, even if the pieces are contiguous.
-        */
-       if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
-           left.br_startoff + left.br_blockcount == new->br_startoff &&
-           left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
-               state |= BMAP_LEFT_CONTIG;
-
-       if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
-           new->br_startoff + new->br_blockcount == right.br_startoff &&
-           new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
-           (!(state & BMAP_LEFT_CONTIG) ||
-            (left.br_blockcount + new->br_blockcount +
-             right.br_blockcount <= MAXEXTLEN)))
-               state |= BMAP_RIGHT_CONTIG;
-
-       /*
-        * Switch out based on the contiguity flags.
-        */
-       switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
-       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
-               /*
-                * New allocation is contiguous with delayed allocations
-                * on the left and on the right.
-                * Merge all three into a single extent record.
-                */
-               --*idx;
-               temp = left.br_blockcount + new->br_blockcount +
-                       right.br_blockcount;
-
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
-               oldlen = startblockval(left.br_startblock) +
-                       startblockval(new->br_startblock) +
-                       startblockval(right.br_startblock);
-               newlen = xfs_bmap_worst_indlen(ip, temp);
-               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
-                       nullstartblock((int)newlen));
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-
-               xfs_iext_remove(ip, *idx + 1, 1, state);
-               break;
-
-       case BMAP_LEFT_CONTIG:
-               /*
-                * New allocation is contiguous with a delayed allocation
-                * on the left.
-                * Merge the new allocation with the left neighbor.
-                */
-               --*idx;
-               temp = left.br_blockcount + new->br_blockcount;
-
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
-               oldlen = startblockval(left.br_startblock) +
-                       startblockval(new->br_startblock);
-               newlen = xfs_bmap_worst_indlen(ip, temp);
-               xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
-                       nullstartblock((int)newlen));
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-               break;
-
-       case BMAP_RIGHT_CONTIG:
-               /*
-                * New allocation is contiguous with a delayed allocation
-                * on the right.
-                * Merge the new allocation with the right neighbor.
-                */
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               temp = new->br_blockcount + right.br_blockcount;
-               oldlen = startblockval(new->br_startblock) +
-                       startblockval(right.br_startblock);
-               newlen = xfs_bmap_worst_indlen(ip, temp);
-               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
-                       new->br_startoff,
-                       nullstartblock((int)newlen), temp, right.br_state);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-               break;
-
-       case 0:
-               /*
-                * New allocation is not contiguous with another
-                * delayed allocation.
-                * Insert a new entry.
-                */
-               oldlen = newlen = 0;
-               xfs_iext_insert(ip, *idx, 1, new, state);
-               break;
-       }
-       if (oldlen != newlen) {
-               ASSERT(oldlen > newlen);
-               xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
-                       (int64_t)(oldlen - newlen), 0);
-               /*
-                * Nothing to do for disk quota accounting here.
-                */
-       }
-}
-
-/*
- * Convert a hole to a real allocation.
- */
-STATIC int                             /* error */
-xfs_bmap_add_extent_hole_real(
-       struct xfs_bmalloca     *bma,
-       int                     whichfork)
-{
-       struct xfs_bmbt_irec    *new = &bma->got;
-       int                     error;  /* error return value */
-       int                     i;      /* temp state */
-       xfs_ifork_t             *ifp;   /* inode fork pointer */
-       xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
-       xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
-       int                     rval=0; /* return value (logging flags) */
-       int                     state;  /* state bits, accessed thru macros */
-
-       ifp = XFS_IFORK_PTR(bma->ip, whichfork);
-
-       ASSERT(bma->idx >= 0);
-       ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
-       ASSERT(!isnullstartblock(new->br_startblock));
-       ASSERT(!bma->cur ||
-              !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
-
-       XFS_STATS_INC(xs_add_exlist);
-
-       state = 0;
-       if (whichfork == XFS_ATTR_FORK)
-               state |= BMAP_ATTRFORK;
-
-       /*
-        * Check and set flags if this segment has a left neighbor.
-        */
-       if (bma->idx > 0) {
-               state |= BMAP_LEFT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
-               if (isnullstartblock(left.br_startblock))
-                       state |= BMAP_LEFT_DELAY;
-       }
-
-       /*
-        * Check and set flags if this segment has a current value.
-        * Not true if we're inserting into the "hole" at eof.
-        */
-       if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
-               state |= BMAP_RIGHT_VALID;
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
-               if (isnullstartblock(right.br_startblock))
-                       state |= BMAP_RIGHT_DELAY;
-       }
-
-       /*
-        * We're inserting a real allocation between "left" and "right".
-        * Set the contiguity flags.  Don't let extents get too large.
-        */
-       if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
-           left.br_startoff + left.br_blockcount == new->br_startoff &&
-           left.br_startblock + left.br_blockcount == new->br_startblock &&
-           left.br_state == new->br_state &&
-           left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
-               state |= BMAP_LEFT_CONTIG;
-
-       if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
-           new->br_startoff + new->br_blockcount == right.br_startoff &&
-           new->br_startblock + new->br_blockcount == right.br_startblock &&
-           new->br_state == right.br_state &&
-           new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
-           (!(state & BMAP_LEFT_CONTIG) ||
-            left.br_blockcount + new->br_blockcount +
-            right.br_blockcount <= MAXEXTLEN))
-               state |= BMAP_RIGHT_CONTIG;
-
-       error = 0;
-       /*
-        * Select which case we're in here, and implement it.
-        */
-       switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
-       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
-               /*
-                * New allocation is contiguous with real allocations on the
-                * left and on the right.
-                * Merge all three into a single extent record.
-                */
-               --bma->idx;
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
-                       left.br_blockcount + new->br_blockcount +
-                       right.br_blockcount);
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-
-               xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
-
-               XFS_IFORK_NEXT_SET(bma->ip, whichfork,
-                       XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
-               if (bma->cur == NULL) {
-                       rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
-               } else {
-                       rval = XFS_ILOG_CORE;
-                       error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
-                                       right.br_startblock, right.br_blockcount,
-                                       &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_btree_delete(bma->cur, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_btree_decrement(bma->cur, 0, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_bmbt_update(bma->cur, left.br_startoff,
-                                       left.br_startblock,
-                                       left.br_blockcount +
-                                               new->br_blockcount +
-                                               right.br_blockcount,
-                                       left.br_state);
-                       if (error)
-                               goto done;
-               }
-               break;
-
-       case BMAP_LEFT_CONTIG:
-               /*
-                * New allocation is contiguous with a real allocation
-                * on the left.
-                * Merge the new allocation with the left neighbor.
-                */
-               --bma->idx;
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
-                       left.br_blockcount + new->br_blockcount);
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-
-               if (bma->cur == NULL) {
-                       rval = xfs_ilog_fext(whichfork);
-               } else {
-                       rval = 0;
-                       error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
-                                       left.br_startblock, left.br_blockcount,
-                                       &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_bmbt_update(bma->cur, left.br_startoff,
-                                       left.br_startblock,
-                                       left.br_blockcount +
-                                               new->br_blockcount,
-                                       left.br_state);
-                       if (error)
-                               goto done;
-               }
-               break;
-
-       case BMAP_RIGHT_CONTIG:
-               /*
-                * New allocation is contiguous with a real allocation
-                * on the right.
-                * Merge the new allocation with the right neighbor.
-                */
-               trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
-               xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
-                       new->br_startoff, new->br_startblock,
-                       new->br_blockcount + right.br_blockcount,
-                       right.br_state);
-               trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
-
-               if (bma->cur == NULL) {
-                       rval = xfs_ilog_fext(whichfork);
-               } else {
-                       rval = 0;
-                       error = xfs_bmbt_lookup_eq(bma->cur,
-                                       right.br_startoff,
-                                       right.br_startblock,
-                                       right.br_blockcount, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       error = xfs_bmbt_update(bma->cur, new->br_startoff,
-                                       new->br_startblock,
-                                       new->br_blockcount +
-                                               right.br_blockcount,
-                                       right.br_state);
-                       if (error)
-                               goto done;
-               }
-               break;
-
-       case 0:
-               /*
-                * New allocation is not contiguous with another
-                * real allocation.
-                * Insert a new entry.
-                */
-               xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
-               XFS_IFORK_NEXT_SET(bma->ip, whichfork,
-                       XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
-               if (bma->cur == NULL) {
-                       rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
-               } else {
-                       rval = XFS_ILOG_CORE;
-                       error = xfs_bmbt_lookup_eq(bma->cur,
-                                       new->br_startoff,
-                                       new->br_startblock,
-                                       new->br_blockcount, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
-                       bma->cur->bc_rec.b.br_state = new->br_state;
-                       error = xfs_btree_insert(bma->cur, &i);
-                       if (error)
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-               }
-               break;
-       }
-
-       /* convert to a btree if necessary */
-       if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
-               int     tmp_logflags;   /* partial log flag return val */
-
-               ASSERT(bma->cur == NULL);
-               error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
-                               bma->firstblock, bma->flist, &bma->cur,
-                               0, &tmp_logflags, whichfork);
-               bma->logflags |= tmp_logflags;
-               if (error)
-                       goto done;
-       }
-
-       /* clear out the allocated field, done with it now in any case. */
-       if (bma->cur)
-               bma->cur->bc_private.b.allocated = 0;
-
-       xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
-done:
-       bma->logflags |= rval;
-       return error;
-}
-
-/*
- * Functions used in the extent read, allocate and remove paths
- */
-
-/*
- * Adjust the size of the new extent based on di_extsize and rt extsize.
- */
-int
-xfs_bmap_extsize_align(
-       xfs_mount_t     *mp,
-       xfs_bmbt_irec_t *gotp,          /* next extent pointer */
-       xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
-       xfs_extlen_t    extsz,          /* align to this extent size */
-       int             rt,             /* is this a realtime inode? */
-       int             eof,            /* is extent at end-of-file? */
-       int             delay,          /* creating delalloc extent? */
-       int             convert,        /* overwriting unwritten extent? */
-       xfs_fileoff_t   *offp,          /* in/out: aligned offset */
-       xfs_extlen_t    *lenp)          /* in/out: aligned length */
-{
-       xfs_fileoff_t   orig_off;       /* original offset */
-       xfs_extlen_t    orig_alen;      /* original length */
-       xfs_fileoff_t   orig_end;       /* original off+len */
-       xfs_fileoff_t   nexto;          /* next file offset */
-       xfs_fileoff_t   prevo;          /* previous file offset */
-       xfs_fileoff_t   align_off;      /* temp for offset */
-       xfs_extlen_t    align_alen;     /* temp for length */
-       xfs_extlen_t    temp;           /* temp for calculations */
-
-       if (convert)
-               return 0;
-
-       orig_off = align_off = *offp;
-       orig_alen = align_alen = *lenp;
-       orig_end = orig_off + orig_alen;
-
-       /*
-        * If this request overlaps an existing extent, then don't
-        * attempt to perform any additional alignment.
-        */
-       if (!delay && !eof &&
-           (orig_off >= gotp->br_startoff) &&
-           (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
-               return 0;
-       }
-
-       /*
-        * If the file offset is unaligned vs. the extent size
-        * we need to align it.  This will be possible unless
-        * the file was previously written with a kernel that didn't
-        * perform this alignment, or if a truncate shot us in the
-        * foot.
-        */
-       temp = do_mod(orig_off, extsz);
-       if (temp) {
-               align_alen += temp;
-               align_off -= temp;
-       }
-       /*
-        * Same adjustment for the end of the requested area.
-        */
-       if ((temp = (align_alen % extsz))) {
-               align_alen += extsz - temp;
-       }
-       /*
-        * If the previous block overlaps with this proposed allocation
-        * then move the start forward without adjusting the length.
-        */
-       if (prevp->br_startoff != NULLFILEOFF) {
-               if (prevp->br_startblock == HOLESTARTBLOCK)
-                       prevo = prevp->br_startoff;
-               else
-                       prevo = prevp->br_startoff + prevp->br_blockcount;
-       } else
-               prevo = 0;
-       if (align_off != orig_off && align_off < prevo)
-               align_off = prevo;
-       /*
-        * If the next block overlaps with this proposed allocation
-        * then move the start back without adjusting the length,
-        * but not before offset 0.
-        * This may of course make the start overlap previous block,
-        * and if we hit the offset 0 limit then the next block
-        * can still overlap too.
-        */
-       if (!eof && gotp->br_startoff != NULLFILEOFF) {
-               if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
-                   (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
-                       nexto = gotp->br_startoff + gotp->br_blockcount;
-               else
-                       nexto = gotp->br_startoff;
-       } else
-               nexto = NULLFILEOFF;
-       if (!eof &&
-           align_off + align_alen != orig_end &&
-           align_off + align_alen > nexto)
-               align_off = nexto > align_alen ? nexto - align_alen : 0;
-       /*
-        * If we're now overlapping the next or previous extent that
-        * means we can't fit an extsz piece in this hole.  Just move
-        * the start forward to the first valid spot and set
-        * the length so we hit the end.
-        */
-       if (align_off != orig_off && align_off < prevo)
-               align_off = prevo;
-       if (align_off + align_alen != orig_end &&
-           align_off + align_alen > nexto &&
-           nexto != NULLFILEOFF) {
-               ASSERT(nexto > prevo);
-               align_alen = nexto - align_off;
-       }
-
-       /*
-        * If realtime, and the result isn't a multiple of the realtime
-        * extent size we need to remove blocks until it is.
-        */
-       if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
-               /*
-                * We're not covering the original request, or
-                * we won't be able to once we fix the length.
-                */
-               if (orig_off < align_off ||
-                   orig_end > align_off + align_alen ||
-                   align_alen - temp < orig_alen)
-                       return EINVAL;
-               /*
-                * Try to fix it by moving the start up.
-                */
-               if (align_off + temp <= orig_off) {
-                       align_alen -= temp;
-                       align_off += temp;
-               }
-               /*
-                * Try to fix it by moving the end in.
-                */
-               else if (align_off + align_alen - temp >= orig_end)
-                       align_alen -= temp;
-               /*
-                * Set the start to the minimum then trim the length.
-                */
-               else {
-                       align_alen -= orig_off - align_off;
-                       align_off = orig_off;
-                       align_alen -= align_alen % mp->m_sb.sb_rextsize;
-               }
-               /*
-                * Result doesn't cover the request, fail it.
-                */
-               if (orig_off < align_off || orig_end > align_off + align_alen)
-                       return EINVAL;
-       } else {
-               ASSERT(orig_off >= align_off);
-               ASSERT(orig_end <= align_off + align_alen);
-       }
-
-#ifdef DEBUG
-       if (!eof && gotp->br_startoff != NULLFILEOFF)
-               ASSERT(align_off + align_alen <= gotp->br_startoff);
-       if (prevp->br_startoff != NULLFILEOFF)
-               ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
-#endif
-
-       *lenp = align_alen;
-       *offp = align_off;
-       return 0;
-}
-
-#define XFS_ALLOC_GAP_UNITS    4
-
-void
-xfs_bmap_adjacent(
-       struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
-{
-       xfs_fsblock_t   adjust;         /* adjustment to block numbers */
-       xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
-       xfs_mount_t     *mp;            /* mount point structure */
-       int             nullfb;         /* true if ap->firstblock isn't set */
-       int             rt;             /* true if inode is realtime */
-
-#define        ISVALID(x,y)    \
-       (rt ? \
-               (x) < mp->m_sb.sb_rblocks : \
-               XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
-               XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
-               XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
-
-       mp = ap->ip->i_mount;
-       nullfb = *ap->firstblock == NULLFSBLOCK;
-       rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
-       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
-       /*
-        * If allocating at eof, and there's a previous real block,
-        * try to use its last block as our starting point.
-        */
-       if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
-           !isnullstartblock(ap->prev.br_startblock) &&
-           ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
-                   ap->prev.br_startblock)) {
-               ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
-               /*
-                * Adjust for the gap between prevp and us.
-                */
-               adjust = ap->offset -
-                       (ap->prev.br_startoff + ap->prev.br_blockcount);
-               if (adjust &&
-                   ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
-                       ap->blkno += adjust;
-       }
-       /*
-        * If not at eof, then compare the two neighbor blocks.
-        * Figure out whether either one gives us a good starting point,
-        * and pick the better one.
-        */
-       else if (!ap->eof) {
-               xfs_fsblock_t   gotbno;         /* right side block number */
-               xfs_fsblock_t   gotdiff=0;      /* right side difference */
-               xfs_fsblock_t   prevbno;        /* left side block number */
-               xfs_fsblock_t   prevdiff=0;     /* left side difference */
-
-               /*
-                * If there's a previous (left) block, select a requested
-                * start block based on it.
-                */
-               if (ap->prev.br_startoff != NULLFILEOFF &&
-                   !isnullstartblock(ap->prev.br_startblock) &&
-                   (prevbno = ap->prev.br_startblock +
-                              ap->prev.br_blockcount) &&
-                   ISVALID(prevbno, ap->prev.br_startblock)) {
-                       /*
-                        * Calculate gap to end of previous block.
-                        */
-                       adjust = prevdiff = ap->offset -
-                               (ap->prev.br_startoff +
-                                ap->prev.br_blockcount);
-                       /*
-                        * Figure the startblock based on the previous block's
-                        * end and the gap size.
-                        * Heuristic!
-                        * If the gap is large relative to the piece we're
-                        * allocating, or using it gives us an invalid block
-                        * number, then just use the end of the previous block.
-                        */
-                       if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
-                           ISVALID(prevbno + prevdiff,
-                                   ap->prev.br_startblock))
-                               prevbno += adjust;
-                       else
-                               prevdiff += adjust;
-                       /*
-                        * If the firstblock forbids it, can't use it,
-                        * must use default.
-                        */
-                       if (!rt && !nullfb &&
-                           XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
-                               prevbno = NULLFSBLOCK;
-               }
-               /*
-                * No previous block or can't follow it, just default.
-                */
-               else
-                       prevbno = NULLFSBLOCK;
-               /*
-                * If there's a following (right) block, select a requested
-                * start block based on it.
-                */
-               if (!isnullstartblock(ap->got.br_startblock)) {
-                       /*
-                        * Calculate gap to start of next block.
-                        */
-                       adjust = gotdiff = ap->got.br_startoff - ap->offset;
-                       /*
-                        * Figure the startblock based on the next block's
-                        * start and the gap size.
-                        */
-                       gotbno = ap->got.br_startblock;
-                       /*
-                        * Heuristic!
-                        * If the gap is large relative to the piece we're
-                        * allocating, or using it gives us an invalid block
-                        * number, then just use the start of the next block
-                        * offset by our length.
-                        */
-                       if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
-                           ISVALID(gotbno - gotdiff, gotbno))
-                               gotbno -= adjust;
-                       else if (ISVALID(gotbno - ap->length, gotbno)) {
-                               gotbno -= ap->length;
-                               gotdiff += adjust - ap->length;
-                       } else
-                               gotdiff += adjust;
-                       /*
-                        * If the firstblock forbids it, can't use it,
-                        * must use default.
-                        */
-                       if (!rt && !nullfb &&
-                           XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
-                               gotbno = NULLFSBLOCK;
-               }
-               /*
-                * No next block, just default.
-                */
-               else
-                       gotbno = NULLFSBLOCK;
-               /*
-                * If both valid, pick the better one, else the only good
-                * one, else ap->blkno is already set (to 0 or the inode block).
-                */
-               if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
-                       ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
-               else if (prevbno != NULLFSBLOCK)
-                       ap->blkno = prevbno;
-               else if (gotbno != NULLFSBLOCK)
-                       ap->blkno = gotbno;
-       }
-#undef ISVALID
-}
-
-static int
-xfs_bmap_longest_free_extent(
-       struct xfs_trans        *tp,
-       xfs_agnumber_t          ag,
-       xfs_extlen_t            *blen,
-       int                     *notinit)
-{
-       struct xfs_mount        *mp = tp->t_mountp;
-       struct xfs_perag        *pag;
-       xfs_extlen_t            longest;
-       int                     error = 0;
-
-       pag = xfs_perag_get(mp, ag);
-       if (!pag->pagf_init) {
-               error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
-               if (error)
-                       goto out;
-
-               if (!pag->pagf_init) {
-                       *notinit = 1;
-                       goto out;
-               }
-       }
-
-       longest = xfs_alloc_longest_free_extent(mp, pag);
-       if (*blen < longest)
-               *blen = longest;
-
-out:
-       xfs_perag_put(pag);
-       return error;
-}
-
-static void
-xfs_bmap_select_minlen(
-       struct xfs_bmalloca     *ap,
-       struct xfs_alloc_arg    *args,
-       xfs_extlen_t            *blen,
-       int                     notinit)
-{
-       if (notinit || *blen < ap->minlen) {
-               /*
-                * Since we did a BUF_TRYLOCK above, it is possible that
-                * there is space for this request.
-                */
-               args->minlen = ap->minlen;
-       } else if (*blen < args->maxlen) {
-               /*
-                * If the best seen length is less than the request length,
-                * use the best as the minimum.
-                */
-               args->minlen = *blen;
-       } else {
-               /*
-                * Otherwise we've seen an extent as big as maxlen, use that
-                * as the minimum.
-                */
-               args->minlen = args->maxlen;
-       }
-}
-
-STATIC int
-xfs_bmap_btalloc_nullfb(
-       struct xfs_bmalloca     *ap,
-       struct xfs_alloc_arg    *args,
-       xfs_extlen_t            *blen)
-{
-       struct xfs_mount        *mp = ap->ip->i_mount;
-       xfs_agnumber_t          ag, startag;
-       int                     notinit = 0;
-       int                     error;
-
-       args->type = XFS_ALLOCTYPE_START_BNO;
-       args->total = ap->total;
-
-       startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
-       if (startag == NULLAGNUMBER)
-               startag = ag = 0;
-
-       while (*blen < args->maxlen) {
-               error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
-                                                    &notinit);
-               if (error)
-                       return error;
-
-               if (++ag == mp->m_sb.sb_agcount)
-                       ag = 0;
-               if (ag == startag)
-                       break;
-       }
-
-       xfs_bmap_select_minlen(ap, args, blen, notinit);
-       return 0;
-}
-
-STATIC int
-xfs_bmap_btalloc_filestreams(
-       struct xfs_bmalloca     *ap,
-       struct xfs_alloc_arg    *args,
-       xfs_extlen_t            *blen)
-{
-       struct xfs_mount        *mp = ap->ip->i_mount;
-       xfs_agnumber_t          ag;
-       int                     notinit = 0;
-       int                     error;
-
-       args->type = XFS_ALLOCTYPE_NEAR_BNO;
-       args->total = ap->total;
-
-       ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
-       if (ag == NULLAGNUMBER)
-               ag = 0;
-
-       error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
-       if (error)
-               return error;
-
-       if (*blen < args->maxlen) {
-               error = xfs_filestream_new_ag(ap, &ag);
-               if (error)
-                       return error;
-
-               error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
-                                                    &notinit);
-               if (error)
-                       return error;
-
-       }
-
-       xfs_bmap_select_minlen(ap, args, blen, notinit);
-
-       /*
-        * Set the failure fallback case to look in the selected AG as stream
-        * may have moved.
-        */
-       ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
-       return 0;
-}
-
-STATIC int
-xfs_bmap_btalloc(
-       struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
-{
-       xfs_mount_t     *mp;            /* mount point structure */
-       xfs_alloctype_t atype = 0;      /* type for allocation routines */
-       xfs_extlen_t    align;          /* minimum allocation alignment */
-       xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
-       xfs_agnumber_t  ag;
-       xfs_alloc_arg_t args;
-       xfs_extlen_t    blen;
-       xfs_extlen_t    nextminlen = 0;
-       int             nullfb;         /* true if ap->firstblock isn't set */
-       int             isaligned;
-       int             tryagain;
-       int             error;
-       int             stripe_align;
-
-       ASSERT(ap->length);
-
-       mp = ap->ip->i_mount;
-
-       /* stripe alignment for allocation is determined by mount parameters */
-       stripe_align = 0;
-       if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
-               stripe_align = mp->m_swidth;
-       else if (mp->m_dalign)
-               stripe_align = mp->m_dalign;
-
-       align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
-       if (unlikely(align)) {
-               error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
-                                               align, 0, ap->eof, 0, ap->conv,
-                                               &ap->offset, &ap->length);
-               ASSERT(!error);
-               ASSERT(ap->length);
-       }
-
-
-       nullfb = *ap->firstblock == NULLFSBLOCK;
-       fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
-       if (nullfb) {
-               if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
-                       ag = xfs_filestream_lookup_ag(ap->ip);
-                       ag = (ag != NULLAGNUMBER) ? ag : 0;
-                       ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
-               } else {
-                       ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
-               }
-       } else
-               ap->blkno = *ap->firstblock;
-
-       xfs_bmap_adjacent(ap);
-
-       /*
-        * If allowed, use ap->blkno; otherwise must use firstblock since
-        * it's in the right allocation group.
-        */
-       if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
-               ;
-       else
-               ap->blkno = *ap->firstblock;
-       /*
-        * Normal allocation, done through xfs_alloc_vextent.
-        */
-       tryagain = isaligned = 0;
-       memset(&args, 0, sizeof(args));
-       args.tp = ap->tp;
-       args.mp = mp;
-       args.fsbno = ap->blkno;
-
-       /* Trim the allocation back to the maximum an AG can fit. */
-       args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
-       args.firstblock = *ap->firstblock;
-       blen = 0;
-       if (nullfb) {
-               /*
-                * Search for an allocation group with a single extent large
-                * enough for the request.  If one isn't found, then adjust
-                * the minimum allocation size to the largest space found.
-                */
-               if (ap->userdata && xfs_inode_is_filestream(ap->ip))
-                       error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
-               else
-                       error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
-               if (error)
-                       return error;
-       } else if (ap->flist->xbf_low) {
-               if (xfs_inode_is_filestream(ap->ip))
-                       args.type = XFS_ALLOCTYPE_FIRST_AG;
-               else
-                       args.type = XFS_ALLOCTYPE_START_BNO;
-               args.total = args.minlen = ap->minlen;
-       } else {
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-               args.total = ap->total;
-               args.minlen = ap->minlen;
-       }
-       /* apply extent size hints if obtained earlier */
-       if (unlikely(align)) {
-               args.prod = align;
-               if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
-                       args.mod = (xfs_extlen_t)(args.prod - args.mod);
-       } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
-               args.prod = 1;
-               args.mod = 0;
-       } else {
-               args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
-               if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
-                       args.mod = (xfs_extlen_t)(args.prod - args.mod);
-       }
-       /*
-        * If we are not low on available data blocks, and the
-        * underlying logical volume manager is a stripe, and
-        * the file offset is zero then try to allocate data
-        * blocks on stripe unit boundary.
-        * NOTE: ap->aeof is only set if the allocation length
-        * is >= the stripe unit and the allocation offset is
-        * at the end of file.
-        */
-       if (!ap->flist->xbf_low && ap->aeof) {
-               if (!ap->offset) {
-                       args.alignment = stripe_align;
-                       atype = args.type;
-                       isaligned = 1;
-                       /*
-                        * Adjust for alignment
-                        */
-                       if (blen > args.alignment && blen <= args.maxlen)
-                               args.minlen = blen - args.alignment;
-                       args.minalignslop = 0;
-               } else {
-                       /*
-                        * First try an exact bno allocation.
-                        * If it fails then do a near or start bno
-                        * allocation with alignment turned on.
-                        */
-                       atype = args.type;
-                       tryagain = 1;
-                       args.type = XFS_ALLOCTYPE_THIS_BNO;
-                       args.alignment = 1;
-                       /*
-                        * Compute the minlen+alignment for the
-                        * next case.  Set slop so that the value
-                        * of minlen+alignment+slop doesn't go up
-                        * between the calls.
-                        */
-                       if (blen > stripe_align && blen <= args.maxlen)
-                               nextminlen = blen - stripe_align;
-                       else
-                               nextminlen = args.minlen;
-                       if (nextminlen + stripe_align > args.minlen + 1)
-                               args.minalignslop =
-                                       nextminlen + stripe_align -
-                                       args.minlen - 1;
-                       else
-                               args.minalignslop = 0;
-               }
-       } else {
-               args.alignment = 1;
-               args.minalignslop = 0;
-       }
-       args.minleft = ap->minleft;
-       args.wasdel = ap->wasdel;
-       args.isfl = 0;
-       args.userdata = ap->userdata;
-       if ((error = xfs_alloc_vextent(&args)))
-               return error;
-       if (tryagain && args.fsbno == NULLFSBLOCK) {
-               /*
-                * Exact allocation failed. Now try with alignment
-                * turned on.
-                */
-               args.type = atype;
-               args.fsbno = ap->blkno;
-               args.alignment = stripe_align;
-               args.minlen = nextminlen;
-               args.minalignslop = 0;
-               isaligned = 1;
-               if ((error = xfs_alloc_vextent(&args)))
-                       return error;
-       }
-       if (isaligned && args.fsbno == NULLFSBLOCK) {
-               /*
-                * allocation failed, so turn off alignment and
-                * try again.
-                */
-               args.type = atype;
-               args.fsbno = ap->blkno;
-               args.alignment = 0;
-               if ((error = xfs_alloc_vextent(&args)))
-                       return error;
-       }
-       if (args.fsbno == NULLFSBLOCK && nullfb &&
-           args.minlen > ap->minlen) {
-               args.minlen = ap->minlen;
-               args.type = XFS_ALLOCTYPE_START_BNO;
-               args.fsbno = ap->blkno;
-               if ((error = xfs_alloc_vextent(&args)))
-                       return error;
-       }
-       if (args.fsbno == NULLFSBLOCK && nullfb) {
-               args.fsbno = 0;
-               args.type = XFS_ALLOCTYPE_FIRST_AG;
-               args.total = ap->minlen;
-               args.minleft = 0;
-               if ((error = xfs_alloc_vextent(&args)))
-                       return error;
-               ap->flist->xbf_low = 1;
-       }
-       if (args.fsbno != NULLFSBLOCK) {
-               /*
-                * check the allocation happened at the same or higher AG than
-                * the first block that was allocated.
-                */
-               ASSERT(*ap->firstblock == NULLFSBLOCK ||
-                      XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
-                      XFS_FSB_TO_AGNO(mp, args.fsbno) ||
-                      (ap->flist->xbf_low &&
-                       XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
-                       XFS_FSB_TO_AGNO(mp, args.fsbno)));
-
-               ap->blkno = args.fsbno;
-               if (*ap->firstblock == NULLFSBLOCK)
-                       *ap->firstblock = args.fsbno;
-               ASSERT(nullfb || fb_agno == args.agno ||
-                      (ap->flist->xbf_low && fb_agno < args.agno));
-               ap->length = args.len;
-               ap->ip->i_d.di_nblocks += args.len;
-               xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
-               if (ap->wasdel)
-                       ap->ip->i_delayed_blks -= args.len;
-               /*
-                * Adjust the disk quota also. This was reserved
-                * earlier.
-                */
-               xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
-                       ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
-                                       XFS_TRANS_DQ_BCOUNT,
-                       (long) args.len);
-       } else {
-               ap->blkno = NULLFSBLOCK;
-               ap->length = 0;
-       }
-       return 0;
-}
-
-/*
- * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
- * It figures out where to ask the underlying allocator to put the new extent.
- */
-STATIC int
-xfs_bmap_alloc(
-       struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
-{
-       if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
-               return xfs_bmap_rtalloc(ap);
-       return xfs_bmap_btalloc(ap);
-}
-
-/*
- * Trim the returned map to the required bounds
- */
-STATIC void
-xfs_bmapi_trim_map(
-       struct xfs_bmbt_irec    *mval,
-       struct xfs_bmbt_irec    *got,
-       xfs_fileoff_t           *bno,
-       xfs_filblks_t           len,
-       xfs_fileoff_t           obno,
-       xfs_fileoff_t           end,
-       int                     n,
-       int                     flags)
-{
-       if ((flags & XFS_BMAPI_ENTIRE) ||
-           got->br_startoff + got->br_blockcount <= obno) {
-               *mval = *got;
-               if (isnullstartblock(got->br_startblock))
-                       mval->br_startblock = DELAYSTARTBLOCK;
-               return;
-       }
-
-       if (obno > *bno)
-               *bno = obno;
-       ASSERT((*bno >= obno) || (n == 0));
-       ASSERT(*bno < end);
-       mval->br_startoff = *bno;
-       if (isnullstartblock(got->br_startblock))
-               mval->br_startblock = DELAYSTARTBLOCK;
-       else
-               mval->br_startblock = got->br_startblock +
-                                       (*bno - got->br_startoff);
-       /*
-        * Return the minimum of what we got and what we asked for for
-        * the length.  We can use the len variable here because it is
-        * modified below and we could have been there before coming
-        * here if the first part of the allocation didn't overlap what
-        * was asked for.
-        */
-       mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
-                       got->br_blockcount - (*bno - got->br_startoff));
-       mval->br_state = got->br_state;
-       ASSERT(mval->br_blockcount <= len);
-       return;
-}
-
-/*
- * Update and validate the extent map to return
- */
-STATIC void
-xfs_bmapi_update_map(
-       struct xfs_bmbt_irec    **map,
-       xfs_fileoff_t           *bno,
-       xfs_filblks_t           *len,
-       xfs_fileoff_t           obno,
-       xfs_fileoff_t           end,
-       int                     *n,
-       int                     flags)
-{
-       xfs_bmbt_irec_t *mval = *map;
-
-       ASSERT((flags & XFS_BMAPI_ENTIRE) ||
-              ((mval->br_startoff + mval->br_blockcount) <= end));
-       ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
-              (mval->br_startoff < obno));
-
-       *bno = mval->br_startoff + mval->br_blockcount;
-       *len = end - *bno;
-       if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
-               /* update previous map with new information */
-               ASSERT(mval->br_startblock == mval[-1].br_startblock);
-               ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
-               ASSERT(mval->br_state == mval[-1].br_state);
-               mval[-1].br_blockcount = mval->br_blockcount;
-               mval[-1].br_state = mval->br_state;
-       } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
-                  mval[-1].br_startblock != DELAYSTARTBLOCK &&
-                  mval[-1].br_startblock != HOLESTARTBLOCK &&
-                  mval->br_startblock == mval[-1].br_startblock +
-                                         mval[-1].br_blockcount &&
-                  ((flags & XFS_BMAPI_IGSTATE) ||
-                       mval[-1].br_state == mval->br_state)) {
-               ASSERT(mval->br_startoff ==
-                      mval[-1].br_startoff + mval[-1].br_blockcount);
-               mval[-1].br_blockcount += mval->br_blockcount;
-       } else if (*n > 0 &&
-                  mval->br_startblock == DELAYSTARTBLOCK &&
-                  mval[-1].br_startblock == DELAYSTARTBLOCK &&
-                  mval->br_startoff ==
-                  mval[-1].br_startoff + mval[-1].br_blockcount) {
-               mval[-1].br_blockcount += mval->br_blockcount;
-               mval[-1].br_state = mval->br_state;
-       } else if (!((*n == 0) &&
-                    ((mval->br_startoff + mval->br_blockcount) <=
-                     obno))) {
-               mval++;
-               (*n)++;
-       }
-       *map = mval;
-}
-
-/*
- * Map file blocks to filesystem blocks without allocation.
- */
-int
-xfs_bmapi_read(
-       struct xfs_inode        *ip,
-       xfs_fileoff_t           bno,
-       xfs_filblks_t           len,
-       struct xfs_bmbt_irec    *mval,
-       int                     *nmap,
-       int                     flags)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_ifork        *ifp;
-       struct xfs_bmbt_irec    got;
-       struct xfs_bmbt_irec    prev;
-       xfs_fileoff_t           obno;
-       xfs_fileoff_t           end;
-       xfs_extnum_t            lastx;
-       int                     error;
-       int                     eof;
-       int                     n = 0;
-       int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-                                               XFS_ATTR_FORK : XFS_DATA_FORK;
-
-       ASSERT(*nmap >= 1);
-       ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
-                          XFS_BMAPI_IGSTATE)));
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
-
-       if (unlikely(XFS_TEST_ERROR(
-           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
-            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
-               XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
-               return EFSCORRUPTED;
-       }
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return EIO;
-
-       XFS_STATS_INC(xs_blk_mapr);
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-
-       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-               error = xfs_iread_extents(NULL, ip, whichfork);
-               if (error)
-                       return error;
-       }
-
-       xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
-       end = bno + len;
-       obno = bno;
-
-       while (bno < end && n < *nmap) {
-               /* Reading past eof, act as though there's a hole up to end. */
-               if (eof)
-                       got.br_startoff = end;
-               if (got.br_startoff > bno) {
-                       /* Reading in a hole.  */
-                       mval->br_startoff = bno;
-                       mval->br_startblock = HOLESTARTBLOCK;
-                       mval->br_blockcount =
-                               XFS_FILBLKS_MIN(len, got.br_startoff - bno);
-                       mval->br_state = XFS_EXT_NORM;
-                       bno += mval->br_blockcount;
-                       len -= mval->br_blockcount;
-                       mval++;
-                       n++;
-                       continue;
-               }
-
-               /* set up the extent map to return. */
-               xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
-               xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
-
-               /* If we're done, stop now. */
-               if (bno >= end || n >= *nmap)
-                       break;
-
-               /* Else go on to the next record. */
-               if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
-                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
-               else
-                       eof = 1;
-       }
-       *nmap = n;
-       return 0;
-}
-
-STATIC int
-xfs_bmapi_reserve_delalloc(
-       struct xfs_inode        *ip,
-       xfs_fileoff_t           aoff,
-       xfs_filblks_t           len,
-       struct xfs_bmbt_irec    *got,
-       struct xfs_bmbt_irec    *prev,
-       xfs_extnum_t            *lastx,
-       int                     eof)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-       xfs_extlen_t            alen;
-       xfs_extlen_t            indlen;
-       char                    rt = XFS_IS_REALTIME_INODE(ip);
-       xfs_extlen_t            extsz;
-       int                     error;
-
-       alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
-       if (!eof)
-               alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
-
-       /* Figure out the extent size, adjust alen */
-       extsz = xfs_get_extsz_hint(ip);
-       if (extsz) {
-               /*
-                * Make sure we don't exceed a single extent length when we
-                * align the extent by reducing length we are going to
-                * allocate by the maximum amount extent size aligment may
-                * require.
-                */
-               alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
-               error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
-                                              1, 0, &aoff, &alen);
-               ASSERT(!error);
-       }
-
-       if (rt)
-               extsz = alen / mp->m_sb.sb_rextsize;
-
-       /*
-        * Make a transaction-less quota reservation for delayed allocation
-        * blocks.  This number gets adjusted later.  We return if we haven't
-        * allocated blocks already inside this loop.
-        */
-       error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
-                       rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
-       if (error)
-               return error;
-
-       /*
-        * Split changing sb for alen and indlen since they could be coming
-        * from different places.
-        */
-       indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
-       ASSERT(indlen > 0);
-
-       if (rt) {
-               error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
-                                         -((int64_t)extsz), 0);
-       } else {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                                -((int64_t)alen), 0);
-       }
-
-       if (error)
-               goto out_unreserve_quota;
-
-       error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                        -((int64_t)indlen), 0);
-       if (error)
-               goto out_unreserve_blocks;
-
-
-       ip->i_delayed_blks += alen;
-
-       got->br_startoff = aoff;
-       got->br_startblock = nullstartblock(indlen);
-       got->br_blockcount = alen;
-       got->br_state = XFS_EXT_NORM;
-       xfs_bmap_add_extent_hole_delay(ip, lastx, got);
-
-       /*
-        * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
-        * might have merged it into one of the neighbouring ones.
-        */
-       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
-
-       ASSERT(got->br_startoff <= aoff);
-       ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
-       ASSERT(isnullstartblock(got->br_startblock));
-       ASSERT(got->br_state == XFS_EXT_NORM);
-       return 0;
-
-out_unreserve_blocks:
-       if (rt)
-               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
-       else
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
-out_unreserve_quota:
-       if (XFS_IS_QUOTA_ON(mp))
-               xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
-                               XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
-       return error;
-}
-
-/*
- * Map file blocks to filesystem blocks, adding delayed allocations as needed.
- */
-int
-xfs_bmapi_delay(
-       struct xfs_inode        *ip,    /* incore inode */
-       xfs_fileoff_t           bno,    /* starting file offs. mapped */
-       xfs_filblks_t           len,    /* length to map in file */
-       struct xfs_bmbt_irec    *mval,  /* output: map values */
-       int                     *nmap,  /* i/o: mval size/count */
-       int                     flags)  /* XFS_BMAPI_... */
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-       struct xfs_bmbt_irec    got;    /* current file extent record */
-       struct xfs_bmbt_irec    prev;   /* previous file extent record */
-       xfs_fileoff_t           obno;   /* old block number (offset) */
-       xfs_fileoff_t           end;    /* end of mapped file region */
-       xfs_extnum_t            lastx;  /* last useful extent number */
-       int                     eof;    /* we've hit the end of extents */
-       int                     n = 0;  /* current extent index */
-       int                     error = 0;
-
-       ASSERT(*nmap >= 1);
-       ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
-       ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-       if (unlikely(XFS_TEST_ERROR(
-           (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
-            XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
-            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
-               XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
-               return EFSCORRUPTED;
-       }
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return EIO;
-
-       XFS_STATS_INC(xs_blk_mapw);
-
-       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-               error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
-               if (error)
-                       return error;
-       }
-
-       xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
-       end = bno + len;
-       obno = bno;
-
-       while (bno < end && n < *nmap) {
-               if (eof || got.br_startoff > bno) {
-                       error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
-                                                          &prev, &lastx, eof);
-                       if (error) {
-                               if (n == 0) {
-                                       *nmap = 0;
-                                       return error;
-                               }
-                               break;
-                       }
-               }
-
-               /* set up the extent map to return. */
-               xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
-               xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
-
-               /* If we're done, stop now. */
-               if (bno >= end || n >= *nmap)
-                       break;
-
-               /* Else go on to the next record. */
-               prev = got;
-               if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
-                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
-               else
-                       eof = 1;
-       }
-
-       *nmap = n;
-       return 0;
-}
-
-
-int
-__xfs_bmapi_allocate(
-       struct xfs_bmalloca     *bma)
-{
-       struct xfs_mount        *mp = bma->ip->i_mount;
-       int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
-                                               XFS_ATTR_FORK : XFS_DATA_FORK;
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
-       int                     tmp_logflags = 0;
-       int                     error;
-
-       ASSERT(bma->length > 0);
-
-       /*
-        * For the wasdelay case, we could also just allocate the stuff asked
-        * for in this bmap call but that wouldn't be as good.
-        */
-       if (bma->wasdel) {
-               bma->length = (xfs_extlen_t)bma->got.br_blockcount;
-               bma->offset = bma->got.br_startoff;
-               if (bma->idx != NULLEXTNUM && bma->idx) {
-                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
-                                        &bma->prev);
-               }
-       } else {
-               bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
-               if (!bma->eof)
-                       bma->length = XFS_FILBLKS_MIN(bma->length,
-                                       bma->got.br_startoff - bma->offset);
-       }
-
-       /*
-        * Indicate if this is the first user data in the file, or just any
-        * user data.
-        */
-       if (!(bma->flags & XFS_BMAPI_METADATA)) {
-               bma->userdata = (bma->offset == 0) ?
-                       XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
-       }
-
-       bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
-
-       /*
-        * Only want to do the alignment at the eof if it is userdata and
-        * allocation length is larger than a stripe unit.
-        */
-       if (mp->m_dalign && bma->length >= mp->m_dalign &&
-           !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
-               error = xfs_bmap_isaeof(bma, whichfork);
-               if (error)
-                       return error;
-       }
-
-       error = xfs_bmap_alloc(bma);
-       if (error)
-               return error;
-
-       if (bma->flist->xbf_low)
-               bma->minleft = 0;
-       if (bma->cur)
-               bma->cur->bc_private.b.firstblock = *bma->firstblock;
-       if (bma->blkno == NULLFSBLOCK)
-               return 0;
-       if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
-               bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
-               bma->cur->bc_private.b.firstblock = *bma->firstblock;
-               bma->cur->bc_private.b.flist = bma->flist;
-       }
-       /*
-        * Bump the number of extents we've allocated
-        * in this call.
-        */
-       bma->nallocs++;
-
-       if (bma->cur)
-               bma->cur->bc_private.b.flags =
-                       bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
-
-       bma->got.br_startoff = bma->offset;
-       bma->got.br_startblock = bma->blkno;
-       bma->got.br_blockcount = bma->length;
-       bma->got.br_state = XFS_EXT_NORM;
-
-       /*
-        * A wasdelay extent has been initialized, so shouldn't be flagged
-        * as unwritten.
-        */
-       if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
-           xfs_sb_version_hasextflgbit(&mp->m_sb))
-               bma->got.br_state = XFS_EXT_UNWRITTEN;
-
-       if (bma->wasdel)
-               error = xfs_bmap_add_extent_delay_real(bma);
-       else
-               error = xfs_bmap_add_extent_hole_real(bma, whichfork);
-
-       bma->logflags |= tmp_logflags;
-       if (error)
-               return error;
-
-       /*
-        * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
-        * or xfs_bmap_add_extent_hole_real might have merged it into one of
-        * the neighbouring ones.
-        */
-       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
-
-       ASSERT(bma->got.br_startoff <= bma->offset);
-       ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
-              bma->offset + bma->length);
-       ASSERT(bma->got.br_state == XFS_EXT_NORM ||
-              bma->got.br_state == XFS_EXT_UNWRITTEN);
-       return 0;
-}
-
-STATIC int
-xfs_bmapi_convert_unwritten(
-       struct xfs_bmalloca     *bma,
-       struct xfs_bmbt_irec    *mval,
-       xfs_filblks_t           len,
-       int                     flags)
-{
-       int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-                                               XFS_ATTR_FORK : XFS_DATA_FORK;
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
-       int                     tmp_logflags = 0;
-       int                     error;
-
-       /* check if we need to do unwritten->real conversion */
-       if (mval->br_state == XFS_EXT_UNWRITTEN &&
-           (flags & XFS_BMAPI_PREALLOC))
-               return 0;
-
-       /* check if we need to do real->unwritten conversion */
-       if (mval->br_state == XFS_EXT_NORM &&
-           (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
-                       (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
-               return 0;
-
-       /*
-        * Modify (by adding) the state flag, if writing.
-        */
-       ASSERT(mval->br_blockcount <= len);
-       if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
-               bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
-                                       bma->ip, whichfork);
-               bma->cur->bc_private.b.firstblock = *bma->firstblock;
-               bma->cur->bc_private.b.flist = bma->flist;
-       }
-       mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
-                               ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
-
-       error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
-                       &bma->cur, mval, bma->firstblock, bma->flist,
-                       &tmp_logflags);
-       bma->logflags |= tmp_logflags;
-       if (error)
-               return error;
-
-       /*
-        * Update our extent pointer, given that
-        * xfs_bmap_add_extent_unwritten_real might have merged it into one
-        * of the neighbouring ones.
-        */
-       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
-
-       /*
-        * We may have combined previously unwritten space with written space,
-        * so generate another request.
-        */
-       if (mval->br_blockcount < len)
-               return EAGAIN;
-       return 0;
-}
-
-/*
- * Map file blocks to filesystem blocks, and allocate blocks or convert the
- * extent state if necessary.  Details behaviour is controlled by the flags
- * parameter.  Only allocates blocks from a single allocation group, to avoid
- * locking problems.
- *
- * The returned value in "firstblock" from the first call in a transaction
- * must be remembered and presented to subsequent calls in "firstblock".
- * An upper bound for the number of blocks to be allocated is supplied to
- * the first call in "total"; if no allocation group has that many free
- * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
- */
-int
-xfs_bmapi_write(
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_inode        *ip,            /* incore inode */
-       xfs_fileoff_t           bno,            /* starting file offs. mapped */
-       xfs_filblks_t           len,            /* length to map in file */
-       int                     flags,          /* XFS_BMAPI_... */
-       xfs_fsblock_t           *firstblock,    /* first allocated block
-                                                  controls a.g. for allocs */
-       xfs_extlen_t            total,          /* total blocks needed */
-       struct xfs_bmbt_irec    *mval,          /* output: map values */
-       int                     *nmap,          /* i/o: mval size/count */
-       struct xfs_bmap_free    *flist)         /* i/o: list extents to free */
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_ifork        *ifp;
-       struct xfs_bmalloca     bma = { NULL }; /* args for xfs_bmap_alloc */
-       xfs_fileoff_t           end;            /* end of mapped file region */
-       int                     eof;            /* after the end of extents */
-       int                     error;          /* error return */
-       int                     n;              /* current extent index */
-       xfs_fileoff_t           obno;           /* old block number (offset) */
-       int                     whichfork;      /* data or attr fork */
-       char                    inhole;         /* current location is hole in file */
-       char                    wasdelay;       /* old extent was delayed */
-
-#ifdef DEBUG
-       xfs_fileoff_t           orig_bno;       /* original block number value */
-       int                     orig_flags;     /* original flags arg value */
-       xfs_filblks_t           orig_len;       /* original value of len arg */
-       struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
-       int                     orig_nmap;      /* original value of *nmap */
-
-       orig_bno = bno;
-       orig_len = len;
-       orig_flags = flags;
-       orig_mval = mval;
-       orig_nmap = *nmap;
-#endif
-       whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-               XFS_ATTR_FORK : XFS_DATA_FORK;
-
-       ASSERT(*nmap >= 1);
-       ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
-       ASSERT(!(flags & XFS_BMAPI_IGSTATE));
-       ASSERT(tp != NULL);
-       ASSERT(len > 0);
-       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-       if (unlikely(XFS_TEST_ERROR(
-           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
-            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
-               XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
-               return EFSCORRUPTED;
-       }
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return EIO;
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-
-       XFS_STATS_INC(xs_blk_mapw);
-
-       if (*firstblock == NULLFSBLOCK) {
-               if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
-                       bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
-               else
-                       bma.minleft = 1;
-       } else {
-               bma.minleft = 0;
-       }
-
-       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-               error = xfs_iread_extents(tp, ip, whichfork);
-               if (error)
-                       goto error0;
-       }
-
-       xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
-                               &bma.prev);
-       n = 0;
-       end = bno + len;
-       obno = bno;
-
-       bma.tp = tp;
-       bma.ip = ip;
-       bma.total = total;
-       bma.userdata = 0;
-       bma.flist = flist;
-       bma.firstblock = firstblock;
-
-       if (flags & XFS_BMAPI_STACK_SWITCH)
-               bma.stack_switch = 1;
-
-       while (bno < end && n < *nmap) {
-               inhole = eof || bma.got.br_startoff > bno;
-               wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
-
-               /*
-                * First, deal with the hole before the allocated space
-                * that we found, if any.
-                */
-               if (inhole || wasdelay) {
-                       bma.eof = eof;
-                       bma.conv = !!(flags & XFS_BMAPI_CONVERT);
-                       bma.wasdel = wasdelay;
-                       bma.offset = bno;
-                       bma.flags = flags;
-
-                       /*
-                        * There's a 32/64 bit type mismatch between the
-                        * allocation length request (which can be 64 bits in
-                        * length) and the bma length request, which is
-                        * xfs_extlen_t and therefore 32 bits. Hence we have to
-                        * check for 32-bit overflows and handle them here.
-                        */
-                       if (len > (xfs_filblks_t)MAXEXTLEN)
-                               bma.length = MAXEXTLEN;
-                       else
-                               bma.length = len;
-
-                       ASSERT(len > 0);
-                       ASSERT(bma.length > 0);
-                       error = xfs_bmapi_allocate(&bma);
-                       if (error)
-                               goto error0;
-                       if (bma.blkno == NULLFSBLOCK)
-                               break;
-               }
-
-               /* Deal with the allocated space we found.  */
-               xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
-                                                       end, n, flags);
-
-               /* Execute unwritten extent conversion if necessary */
-               error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
-               if (error == EAGAIN)
-                       continue;
-               if (error)
-                       goto error0;
-
-               /* update the extent map to return */
-               xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
-
-               /*
-                * If we're done, stop now.  Stop when we've allocated
-                * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
-                * the transaction may get too big.
-                */
-               if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
-                       break;
-
-               /* Else go on to the next record. */
-               bma.prev = bma.got;
-               if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
-                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
-                                        &bma.got);
-               } else
-                       eof = 1;
-       }
-       *nmap = n;
-
-       /*
-        * Transform from btree to extents, give it cur.
-        */
-       if (xfs_bmap_wants_extents(ip, whichfork)) {
-               int             tmp_logflags = 0;
-
-               ASSERT(bma.cur);
-               error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
-                       &tmp_logflags, whichfork);
-               bma.logflags |= tmp_logflags;
-               if (error)
-                       goto error0;
-       }
-
-       ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
-              XFS_IFORK_NEXTENTS(ip, whichfork) >
-               XFS_IFORK_MAXEXT(ip, whichfork));
-       error = 0;
-error0:
-       /*
-        * Log everything.  Do this after conversion, there's no point in
-        * logging the extent records if we've converted to btree format.
-        */
-       if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
-               bma.logflags &= ~xfs_ilog_fext(whichfork);
-       else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
-                XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
-               bma.logflags &= ~xfs_ilog_fbroot(whichfork);
-       /*
-        * Log whatever the flags say, even if error.  Otherwise we might miss
-        * detecting a case where the data is changed, there's an error,
-        * and it's not logged so we don't shutdown when we should.
-        */
-       if (bma.logflags)
-               xfs_trans_log_inode(tp, ip, bma.logflags);
-
-       if (bma.cur) {
-               if (!error) {
-                       ASSERT(*firstblock == NULLFSBLOCK ||
-                              XFS_FSB_TO_AGNO(mp, *firstblock) ==
-                              XFS_FSB_TO_AGNO(mp,
-                                      bma.cur->bc_private.b.firstblock) ||
-                              (flist->xbf_low &&
-                               XFS_FSB_TO_AGNO(mp, *firstblock) <
-                               XFS_FSB_TO_AGNO(mp,
-                                       bma.cur->bc_private.b.firstblock)));
-                       *firstblock = bma.cur->bc_private.b.firstblock;
-               }
-               xfs_btree_del_cursor(bma.cur,
-                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
-       }
-       if (!error)
-               xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
-                       orig_nmap, *nmap);
-       return error;
-}
-
-/*
- * Called by xfs_bmapi to update file extent records and the btree
- * after removing space (or undoing a delayed allocation).
- */
-STATIC int                             /* error */
-xfs_bmap_del_extent(
-       xfs_inode_t             *ip,    /* incore inode pointer */
-       xfs_trans_t             *tp,    /* current transaction pointer */
-       xfs_extnum_t            *idx,   /* extent number to update/delete */
-       xfs_bmap_free_t         *flist, /* list of extents to be freed */
-       xfs_btree_cur_t         *cur,   /* if null, not a btree */
-       xfs_bmbt_irec_t         *del,   /* data to remove from extents */
-       int                     *logflagsp, /* inode logging flags */
-       int                     whichfork) /* data or attr fork */
-{
-       xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
-       xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
-       xfs_fsblock_t           del_endblock=0; /* first block past del */
-       xfs_fileoff_t           del_endoff;     /* first offset past del */
-       int                     delay;  /* current block is delayed allocated */
-       int                     do_fx;  /* free extent at end of routine */
-       xfs_bmbt_rec_host_t     *ep;    /* current extent entry pointer */
-       int                     error;  /* error return value */
-       int                     flags;  /* inode logging flags */
-       xfs_bmbt_irec_t         got;    /* current extent entry */
-       xfs_fileoff_t           got_endoff;     /* first offset past got */
-       int                     i;      /* temp state */
-       xfs_ifork_t             *ifp;   /* inode fork pointer */
-       xfs_mount_t             *mp;    /* mount structure */
-       xfs_filblks_t           nblks;  /* quota/sb block count */
-       xfs_bmbt_irec_t         new;    /* new record to be inserted */
-       /* REFERENCED */
-       uint                    qfield; /* quota field to update */
-       xfs_filblks_t           temp;   /* for indirect length calculations */
-       xfs_filblks_t           temp2;  /* for indirect length calculations */
-       int                     state = 0;
-
-       XFS_STATS_INC(xs_del_exlist);
-
-       if (whichfork == XFS_ATTR_FORK)
-               state |= BMAP_ATTRFORK;
-
-       mp = ip->i_mount;
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
-               (uint)sizeof(xfs_bmbt_rec_t)));
-       ASSERT(del->br_blockcount > 0);
-       ep = xfs_iext_get_ext(ifp, *idx);
-       xfs_bmbt_get_all(ep, &got);
-       ASSERT(got.br_startoff <= del->br_startoff);
-       del_endoff = del->br_startoff + del->br_blockcount;
-       got_endoff = got.br_startoff + got.br_blockcount;
-       ASSERT(got_endoff >= del_endoff);
-       delay = isnullstartblock(got.br_startblock);
-       ASSERT(isnullstartblock(del->br_startblock) == delay);
-       flags = 0;
-       qfield = 0;
-       error = 0;
-       /*
-        * If deleting a real allocation, must free up the disk space.
-        */
-       if (!delay) {
-               flags = XFS_ILOG_CORE;
-               /*
-                * Realtime allocation.  Free it and record di_nblocks update.
-                */
-               if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
-                       xfs_fsblock_t   bno;
-                       xfs_filblks_t   len;
-
-                       ASSERT(do_mod(del->br_blockcount,
-                                     mp->m_sb.sb_rextsize) == 0);
-                       ASSERT(do_mod(del->br_startblock,
-                                     mp->m_sb.sb_rextsize) == 0);
-                       bno = del->br_startblock;
-                       len = del->br_blockcount;
-                       do_div(bno, mp->m_sb.sb_rextsize);
-                       do_div(len, mp->m_sb.sb_rextsize);
-                       error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
-                       if (error)
-                               goto done;
-                       do_fx = 0;
-                       nblks = len * mp->m_sb.sb_rextsize;
-                       qfield = XFS_TRANS_DQ_RTBCOUNT;
-               }
-               /*
-                * Ordinary allocation.
-                */
-               else {
-                       do_fx = 1;
-                       nblks = del->br_blockcount;
-                       qfield = XFS_TRANS_DQ_BCOUNT;
-               }
-               /*
-                * Set up del_endblock and cur for later.
-                */
-               del_endblock = del->br_startblock + del->br_blockcount;
-               if (cur) {
-                       if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
-                                       got.br_startblock, got.br_blockcount,
-                                       &i)))
-                               goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-               }
-               da_old = da_new = 0;
-       } else {
-               da_old = startblockval(got.br_startblock);
-               da_new = 0;
-               nblks = 0;
-               do_fx = 0;
-       }
-       /*
-        * Set flag value to use in switch statement.
-        * Left-contig is 2, right-contig is 1.
-        */
-       switch (((got.br_startoff == del->br_startoff) << 1) |
-               (got_endoff == del_endoff)) {
-       case 3:
-               /*
-                * Matches the whole extent.  Delete the entry.
-                */
-               xfs_iext_remove(ip, *idx, 1,
-                               whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
-               --*idx;
-               if (delay)
-                       break;
-
-               XFS_IFORK_NEXT_SET(ip, whichfork,
-                       XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
-               flags |= XFS_ILOG_CORE;
-               if (!cur) {
-                       flags |= xfs_ilog_fext(whichfork);
-                       break;
-               }
-               if ((error = xfs_btree_delete(cur, &i)))
-                       goto done;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-               break;
-
-       case 2:
-               /*
-                * Deleting the first part of the extent.
-                */
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_startoff(ep, del_endoff);
-               temp = got.br_blockcount - del->br_blockcount;
-               xfs_bmbt_set_blockcount(ep, temp);
-               if (delay) {
-                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
-                               da_old);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-                       da_new = temp;
-                       break;
-               }
-               xfs_bmbt_set_startblock(ep, del_endblock);
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-               if (!cur) {
-                       flags |= xfs_ilog_fext(whichfork);
-                       break;
-               }
-               if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
-                               got.br_blockcount - del->br_blockcount,
-                               got.br_state)))
-                       goto done;
-               break;
-
-       case 1:
-               /*
-                * Deleting the last part of the extent.
-                */
-               temp = got.br_blockcount - del->br_blockcount;
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               if (delay) {
-                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
-                               da_old);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-                       da_new = temp;
-                       break;
-               }
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-               if (!cur) {
-                       flags |= xfs_ilog_fext(whichfork);
-                       break;
-               }
-               if ((error = xfs_bmbt_update(cur, got.br_startoff,
-                               got.br_startblock,
-                               got.br_blockcount - del->br_blockcount,
-                               got.br_state)))
-                       goto done;
-               break;
-
-       case 0:
-               /*
-                * Deleting the middle of the extent.
-                */
-               temp = del->br_startoff - got.br_startoff;
-               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               new.br_startoff = del_endoff;
-               temp2 = got_endoff - del_endoff;
-               new.br_blockcount = temp2;
-               new.br_state = got.br_state;
-               if (!delay) {
-                       new.br_startblock = del_endblock;
-                       flags |= XFS_ILOG_CORE;
-                       if (cur) {
-                               if ((error = xfs_bmbt_update(cur,
-                                               got.br_startoff,
-                                               got.br_startblock, temp,
-                                               got.br_state)))
-                                       goto done;
-                               if ((error = xfs_btree_increment(cur, 0, &i)))
-                                       goto done;
-                               cur->bc_rec.b = new;
-                               error = xfs_btree_insert(cur, &i);
-                               if (error && error != ENOSPC)
-                                       goto done;
-                               /*
-                                * If get no-space back from btree insert,
-                                * it tried a split, and we have a zero
-                                * block reservation.
-                                * Fix up our state and return the error.
-                                */
-                               if (error == ENOSPC) {
-                                       /*
-                                        * Reset the cursor, don't trust
-                                        * it after any insert operation.
-                                        */
-                                       if ((error = xfs_bmbt_lookup_eq(cur,
-                                                       got.br_startoff,
-                                                       got.br_startblock,
-                                                       temp, &i)))
-                                               goto done;
-                                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                                       /*
-                                        * Update the btree record back
-                                        * to the original value.
-                                        */
-                                       if ((error = xfs_bmbt_update(cur,
-                                                       got.br_startoff,
-                                                       got.br_startblock,
-                                                       got.br_blockcount,
-                                                       got.br_state)))
-                                               goto done;
-                                       /*
-                                        * Reset the extent record back
-                                        * to the original value.
-                                        */
-                                       xfs_bmbt_set_blockcount(ep,
-                                               got.br_blockcount);
-                                       flags = 0;
-                                       error = ENOSPC;
-                                       goto done;
-                               }
-                               XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                       } else
-                               flags |= xfs_ilog_fext(whichfork);
-                       XFS_IFORK_NEXT_SET(ip, whichfork,
-                               XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
-               } else {
-                       ASSERT(whichfork == XFS_DATA_FORK);
-                       temp = xfs_bmap_worst_indlen(ip, temp);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       temp2 = xfs_bmap_worst_indlen(ip, temp2);
-                       new.br_startblock = nullstartblock((int)temp2);
-                       da_new = temp + temp2;
-                       while (da_new > da_old) {
-                               if (temp) {
-                                       temp--;
-                                       da_new--;
-                                       xfs_bmbt_set_startblock(ep,
-                                               nullstartblock((int)temp));
-                               }
-                               if (da_new == da_old)
-                                       break;
-                               if (temp2) {
-                                       temp2--;
-                                       da_new--;
-                                       new.br_startblock =
-                                               nullstartblock((int)temp2);
-                               }
-                       }
-               }
-               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
-               xfs_iext_insert(ip, *idx + 1, 1, &new, state);
-               ++*idx;
-               break;
-       }
-       /*
-        * If we need to, add to list of extents to delete.
-        */
-       if (do_fx)
-               xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
-                       mp);
-       /*
-        * Adjust inode # blocks in the file.
-        */
-       if (nblks)
-               ip->i_d.di_nblocks -= nblks;
-       /*
-        * Adjust quota data.
-        */
-       if (qfield)
-               xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
-
-       /*
-        * Account for change in delayed indirect blocks.
-        * Nothing to do for disk quota accounting here.
-        */
-       ASSERT(da_old >= da_new);
-       if (da_old > da_new) {
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                       (int64_t)(da_old - da_new), 0);
-       }
-done:
-       *logflagsp = flags;
-       return error;
-}
-
-/*
- * Unmap (remove) blocks from a file.
- * If nexts is nonzero then the number of extents to remove is limited to
- * that value.  If not all extents in the block range can be removed then
- * *done is set.
- */
-int                                            /* error */
-xfs_bunmapi(
-       xfs_trans_t             *tp,            /* transaction pointer */
-       struct xfs_inode        *ip,            /* incore inode */
-       xfs_fileoff_t           bno,            /* starting offset to unmap */
-       xfs_filblks_t           len,            /* length to unmap in file */
-       int                     flags,          /* misc flags */
-       xfs_extnum_t            nexts,          /* number of extents max */
-       xfs_fsblock_t           *firstblock,    /* first allocated block
-                                                  controls a.g. for allocs */
-       xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
-       int                     *done)          /* set if not done yet */
-{
-       xfs_btree_cur_t         *cur;           /* bmap btree cursor */
-       xfs_bmbt_irec_t         del;            /* extent being deleted */
-       int                     eof;            /* is deleting at eof */
-       xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
-       int                     error;          /* error return value */
-       xfs_extnum_t            extno;          /* extent number in list */
-       xfs_bmbt_irec_t         got;            /* current extent record */
-       xfs_ifork_t             *ifp;           /* inode fork pointer */
-       int                     isrt;           /* freeing in rt area */
-       xfs_extnum_t            lastx;          /* last extent index used */
-       int                     logflags;       /* transaction logging flags */
-       xfs_extlen_t            mod;            /* rt extent offset */
-       xfs_mount_t             *mp;            /* mount structure */
-       xfs_extnum_t            nextents;       /* number of file extents */
-       xfs_bmbt_irec_t         prev;           /* previous extent record */
-       xfs_fileoff_t           start;          /* first file offset deleted */
-       int                     tmp_logflags;   /* partial logging flags */
-       int                     wasdel;         /* was a delayed alloc extent */
-       int                     whichfork;      /* data or attribute fork */
-       xfs_fsblock_t           sum;
-
-       trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
-
-       whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-               XFS_ATTR_FORK : XFS_DATA_FORK;
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (unlikely(
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
-               XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
-                                ip->i_mount);
-               return EFSCORRUPTED;
-       }
-       mp = ip->i_mount;
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return EIO;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(len > 0);
-       ASSERT(nexts >= 0);
-
-       if (!(ifp->if_flags & XFS_IFEXTENTS) &&
-           (error = xfs_iread_extents(tp, ip, whichfork)))
-               return error;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       if (nextents == 0) {
-               *done = 1;
-               return 0;
-       }
-       XFS_STATS_INC(xs_blk_unmap);
-       isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
-       start = bno;
-       bno = start + len - 1;
-       ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
-               &prev);
-
-       /*
-        * Check to see if the given block number is past the end of the
-        * file, back up to the last block if so...
-        */
-       if (eof) {
-               ep = xfs_iext_get_ext(ifp, --lastx);
-               xfs_bmbt_get_all(ep, &got);
-               bno = got.br_startoff + got.br_blockcount - 1;
-       }
-       logflags = 0;
-       if (ifp->if_flags & XFS_IFBROOT) {
-               ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
-               cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-               cur->bc_private.b.firstblock = *firstblock;
-               cur->bc_private.b.flist = flist;
-               cur->bc_private.b.flags = 0;
-       } else
-               cur = NULL;
-
-       if (isrt) {
-               /*
-                * Synchronize by locking the bitmap inode.
-                */
-               xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
-               xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
-       }
-
-       extno = 0;
-       while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
-              (nexts == 0 || extno < nexts)) {
-               /*
-                * Is the found extent after a hole in which bno lives?
-                * Just back up to the previous extent, if so.
-                */
-               if (got.br_startoff > bno) {
-                       if (--lastx < 0)
-                               break;
-                       ep = xfs_iext_get_ext(ifp, lastx);
-                       xfs_bmbt_get_all(ep, &got);
-               }
-               /*
-                * Is the last block of this extent before the range
-                * we're supposed to delete?  If so, we're done.
-                */
-               bno = XFS_FILEOFF_MIN(bno,
-                       got.br_startoff + got.br_blockcount - 1);
-               if (bno < start)
-                       break;
-               /*
-                * Then deal with the (possibly delayed) allocated space
-                * we found.
-                */
-               ASSERT(ep != NULL);
-               del = got;
-               wasdel = isnullstartblock(del.br_startblock);
-               if (got.br_startoff < start) {
-                       del.br_startoff = start;
-                       del.br_blockcount -= start - got.br_startoff;
-                       if (!wasdel)
-                               del.br_startblock += start - got.br_startoff;
-               }
-               if (del.br_startoff + del.br_blockcount > bno + 1)
-                       del.br_blockcount = bno + 1 - del.br_startoff;
-               sum = del.br_startblock + del.br_blockcount;
-               if (isrt &&
-                   (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
-                       /*
-                        * Realtime extent not lined up at the end.
-                        * The extent could have been split into written
-                        * and unwritten pieces, or we could just be
-                        * unmapping part of it.  But we can't really
-                        * get rid of part of a realtime extent.
-                        */
-                       if (del.br_state == XFS_EXT_UNWRITTEN ||
-                           !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
-                               /*
-                                * This piece is unwritten, or we're not
-                                * using unwritten extents.  Skip over it.
-                                */
-                               ASSERT(bno >= mod);
-                               bno -= mod > del.br_blockcount ?
-                                       del.br_blockcount : mod;
-                               if (bno < got.br_startoff) {
-                                       if (--lastx >= 0)
-                                               xfs_bmbt_get_all(xfs_iext_get_ext(
-                                                       ifp, lastx), &got);
-                               }
-                               continue;
-                       }
-                       /*
-                        * It's written, turn it unwritten.
-                        * This is better than zeroing it.
-                        */
-                       ASSERT(del.br_state == XFS_EXT_NORM);
-                       ASSERT(xfs_trans_get_block_res(tp) > 0);
-                       /*
-                        * If this spans a realtime extent boundary,
-                        * chop it back to the start of the one we end at.
-                        */
-                       if (del.br_blockcount > mod) {
-                               del.br_startoff += del.br_blockcount - mod;
-                               del.br_startblock += del.br_blockcount - mod;
-                               del.br_blockcount = mod;
-                       }
-                       del.br_state = XFS_EXT_UNWRITTEN;
-                       error = xfs_bmap_add_extent_unwritten_real(tp, ip,
-                                       &lastx, &cur, &del, firstblock, flist,
-                                       &logflags);
-                       if (error)
-                               goto error0;
-                       goto nodelete;
-               }
-               if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
-                       /*
-                        * Realtime extent is lined up at the end but not
-                        * at the front.  We'll get rid of full extents if
-                        * we can.
-                        */
-                       mod = mp->m_sb.sb_rextsize - mod;
-                       if (del.br_blockcount > mod) {
-                               del.br_blockcount -= mod;
-                               del.br_startoff += mod;
-                               del.br_startblock += mod;
-                       } else if ((del.br_startoff == start &&
-                                   (del.br_state == XFS_EXT_UNWRITTEN ||
-                                    xfs_trans_get_block_res(tp) == 0)) ||
-                                  !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
-                               /*
-                                * Can't make it unwritten.  There isn't
-                                * a full extent here so just skip it.
-                                */
-                               ASSERT(bno >= del.br_blockcount);
-                               bno -= del.br_blockcount;
-                               if (got.br_startoff > bno) {
-                                       if (--lastx >= 0) {
-                                               ep = xfs_iext_get_ext(ifp,
-                                                                     lastx);
-                                               xfs_bmbt_get_all(ep, &got);
-                                       }
-                               }
-                               continue;
-                       } else if (del.br_state == XFS_EXT_UNWRITTEN) {
-                               /*
-                                * This one is already unwritten.
-                                * It must have a written left neighbor.
-                                * Unwrite the killed part of that one and
-                                * try again.
-                                */
-                               ASSERT(lastx > 0);
-                               xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
-                                               lastx - 1), &prev);
-                               ASSERT(prev.br_state == XFS_EXT_NORM);
-                               ASSERT(!isnullstartblock(prev.br_startblock));
-                               ASSERT(del.br_startblock ==
-                                      prev.br_startblock + prev.br_blockcount);
-                               if (prev.br_startoff < start) {
-                                       mod = start - prev.br_startoff;
-                                       prev.br_blockcount -= mod;
-                                       prev.br_startblock += mod;
-                                       prev.br_startoff = start;
-                               }
-                               prev.br_state = XFS_EXT_UNWRITTEN;
-                               lastx--;
-                               error = xfs_bmap_add_extent_unwritten_real(tp,
-                                               ip, &lastx, &cur, &prev,
-                                               firstblock, flist, &logflags);
-                               if (error)
-                                       goto error0;
-                               goto nodelete;
-                       } else {
-                               ASSERT(del.br_state == XFS_EXT_NORM);
-                               del.br_state = XFS_EXT_UNWRITTEN;
-                               error = xfs_bmap_add_extent_unwritten_real(tp,
-                                               ip, &lastx, &cur, &del,
-                                               firstblock, flist, &logflags);
-                               if (error)
-                                       goto error0;
-                               goto nodelete;
-                       }
-               }
-               if (wasdel) {
-                       ASSERT(startblockval(del.br_startblock) > 0);
-                       /* Update realtime/data freespace, unreserve quota */
-                       if (isrt) {
-                               xfs_filblks_t rtexts;
-
-                               rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
-                               do_div(rtexts, mp->m_sb.sb_rextsize);
-                               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
-                                               (int64_t)rtexts, 0);
-                               (void)xfs_trans_reserve_quota_nblks(NULL,
-                                       ip, -((long)del.br_blockcount), 0,
-                                       XFS_QMOPT_RES_RTBLKS);
-                       } else {
-                               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                               (int64_t)del.br_blockcount, 0);
-                               (void)xfs_trans_reserve_quota_nblks(NULL,
-                                       ip, -((long)del.br_blockcount), 0,
-                                       XFS_QMOPT_RES_REGBLKS);
-                       }
-                       ip->i_delayed_blks -= del.br_blockcount;
-                       if (cur)
-                               cur->bc_private.b.flags |=
-                                       XFS_BTCUR_BPRV_WASDEL;
-               } else if (cur)
-                       cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
-               /*
-                * If it's the case where the directory code is running
-                * with no block reservation, and the deleted block is in
-                * the middle of its extent, and the resulting insert
-                * of an extent would cause transformation to btree format,
-                * then reject it.  The calling code will then swap
-                * blocks around instead.
-                * We have to do this now, rather than waiting for the
-                * conversion to btree format, since the transaction
-                * will be dirty.
-                */
-               if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
-                   XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
-                   XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
-                       XFS_IFORK_MAXEXT(ip, whichfork) &&
-                   del.br_startoff > got.br_startoff &&
-                   del.br_startoff + del.br_blockcount <
-                   got.br_startoff + got.br_blockcount) {
-                       error = ENOSPC;
-                       goto error0;
-               }
-               error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
-                               &tmp_logflags, whichfork);
-               logflags |= tmp_logflags;
-               if (error)
-                       goto error0;
-               bno = del.br_startoff - 1;
-nodelete:
-               /*
-                * If not done go on to the next (previous) record.
-                */
-               if (bno != (xfs_fileoff_t)-1 && bno >= start) {
-                       if (lastx >= 0) {
-                               ep = xfs_iext_get_ext(ifp, lastx);
-                               if (xfs_bmbt_get_startoff(ep) > bno) {
-                                       if (--lastx >= 0)
-                                               ep = xfs_iext_get_ext(ifp,
-                                                                     lastx);
-                               }
-                               xfs_bmbt_get_all(ep, &got);
-                       }
-                       extno++;
-               }
-       }
-       *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
-
-       /*
-        * Convert to a btree if necessary.
-        */
-       if (xfs_bmap_needs_btree(ip, whichfork)) {
-               ASSERT(cur == NULL);
-               error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
-                       &cur, 0, &tmp_logflags, whichfork);
-               logflags |= tmp_logflags;
-               if (error)
-                       goto error0;
-       }
-       /*
-        * transform from btree to extents, give it cur
-        */
-       else if (xfs_bmap_wants_extents(ip, whichfork)) {
-               ASSERT(cur != NULL);
-               error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
-                       whichfork);
-               logflags |= tmp_logflags;
-               if (error)
-                       goto error0;
-       }
-       /*
-        * transform from extents to local?
-        */
-       error = 0;
-error0:
-       /*
-        * Log everything.  Do this after conversion, there's no point in
-        * logging the extent records if we've converted to btree format.
-        */
-       if ((logflags & xfs_ilog_fext(whichfork)) &&
-           XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
-               logflags &= ~xfs_ilog_fext(whichfork);
-       else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
-                XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
-               logflags &= ~xfs_ilog_fbroot(whichfork);
-       /*
-        * Log inode even in the error case, if the transaction
-        * is dirty we'll need to shut down the filesystem.
-        */
-       if (logflags)
-               xfs_trans_log_inode(tp, ip, logflags);
-       if (cur) {
-               if (!error) {
-                       *firstblock = cur->bc_private.b.firstblock;
-                       cur->bc_private.b.allocated = 0;
-               }
-               xfs_btree_del_cursor(cur,
-                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
-       }
-       return error;
-}
-
-/*
- * Shift extent records to the left to cover a hole.
- *
- * The maximum number of extents to be shifted in a single operation
- * is @num_exts, and @current_ext keeps track of the current extent
- * index we have shifted. @offset_shift_fsb is the length by which each
- * extent is shifted. If there is no hole to shift the extents
- * into, this will be considered invalid operation and we abort immediately.
- */
-int
-xfs_bmap_shift_extents(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *ip,
-       int                     *done,
-       xfs_fileoff_t           start_fsb,
-       xfs_fileoff_t           offset_shift_fsb,
-       xfs_extnum_t            *current_ext,
-       xfs_fsblock_t           *firstblock,
-       struct xfs_bmap_free    *flist,
-       int                     num_exts)
-{
-       struct xfs_btree_cur            *cur;
-       struct xfs_bmbt_rec_host        *gotp;
-       struct xfs_bmbt_irec            got;
-       struct xfs_bmbt_irec            left;
-       struct xfs_mount                *mp = ip->i_mount;
-       struct xfs_ifork                *ifp;
-       xfs_extnum_t                    nexts = 0;
-       xfs_fileoff_t                   startoff;
-       int                             error = 0;
-       int                             i;
-       int                             whichfork = XFS_DATA_FORK;
-       int                             logflags;
-       xfs_filblks_t                   blockcount = 0;
-       int                             total_extents;
-
-       if (unlikely(XFS_TEST_ERROR(
-           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
-            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
-               XFS_ERROR_REPORT("xfs_bmap_shift_extents",
-                                XFS_ERRLEVEL_LOW, mp);
-               return EFSCORRUPTED;
-       }
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return EIO;
-
-       ASSERT(current_ext != NULL);
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-               /* Read in all the extents */
-               error = xfs_iread_extents(tp, ip, whichfork);
-               if (error)
-                       return error;
-       }
-
-       /*
-        * If *current_ext is 0, we would need to lookup the extent
-        * from where we would start shifting and store it in gotp.
-        */
-       if (!*current_ext) {
-               gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
-               /*
-                * gotp can be null in 2 cases: 1) if there are no extents
-                * or 2) start_fsb lies in a hole beyond which there are
-                * no extents. Either way, we are done.
-                */
-               if (!gotp) {
-                       *done = 1;
-                       return 0;
-               }
-       }
-
-       /* We are going to change core inode */
-       logflags = XFS_ILOG_CORE;
-       if (ifp->if_flags & XFS_IFBROOT) {
-               cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
-               cur->bc_private.b.firstblock = *firstblock;
-               cur->bc_private.b.flist = flist;
-               cur->bc_private.b.flags = 0;
-       } else {
-               cur = NULL;
-               logflags |= XFS_ILOG_DEXT;
-       }
-
-       /*
-        * There may be delalloc extents in the data fork before the range we
-        * are collapsing out, so we cannot
-        * use the count of real extents here. Instead we have to calculate it
-        * from the incore fork.
-        */
-       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-       while (nexts++ < num_exts && *current_ext < total_extents) {
-
-               gotp = xfs_iext_get_ext(ifp, *current_ext);
-               xfs_bmbt_get_all(gotp, &got);
-               startoff = got.br_startoff - offset_shift_fsb;
-
-               /*
-                * Before shifting extent into hole, make sure that the hole
-                * is large enough to accomodate the shift.
-                */
-               if (*current_ext) {
-                       xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
-                                               *current_ext - 1), &left);
-
-                       if (startoff < left.br_startoff + left.br_blockcount)
-                               error = EINVAL;
-               } else if (offset_shift_fsb > got.br_startoff) {
-                       /*
-                        * When first extent is shifted, offset_shift_fsb
-                        * should be less than the stating offset of
-                        * the first extent.
-                        */
-                       error = EINVAL;
-               }
-
-               if (error)
-                       goto del_cursor;
-
-               if (cur) {
-                       error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
-                                                  got.br_startblock,
-                                                  got.br_blockcount,
-                                                  &i);
-                       if (error)
-                               goto del_cursor;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
-               }
-
-               /* Check if we can merge 2 adjacent extents */
-               if (*current_ext &&
-                   left.br_startoff + left.br_blockcount == startoff &&
-                   left.br_startblock + left.br_blockcount ==
-                               got.br_startblock &&
-                   left.br_state == got.br_state &&
-                   left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
-                       blockcount = left.br_blockcount +
-                               got.br_blockcount;
-                       xfs_iext_remove(ip, *current_ext, 1, 0);
-                       if (cur) {
-                               error = xfs_btree_delete(cur, &i);
-                               if (error)
-                                       goto del_cursor;
-                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
-                       }
-                       XFS_IFORK_NEXT_SET(ip, whichfork,
-                               XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
-                       gotp = xfs_iext_get_ext(ifp, --*current_ext);
-                       xfs_bmbt_get_all(gotp, &got);
-
-                       /* Make cursor point to the extent we will update */
-                       if (cur) {
-                               error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
-                                                          got.br_startblock,
-                                                          got.br_blockcount,
-                                                          &i);
-                               if (error)
-                                       goto del_cursor;
-                               XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
-                       }
-
-                       xfs_bmbt_set_blockcount(gotp, blockcount);
-                       got.br_blockcount = blockcount;
-               } else {
-                       /* We have to update the startoff */
-                       xfs_bmbt_set_startoff(gotp, startoff);
-                       got.br_startoff = startoff;
-               }
-
-               if (cur) {
-                       error = xfs_bmbt_update(cur, got.br_startoff,
-                                               got.br_startblock,
-                                               got.br_blockcount,
-                                               got.br_state);
-                       if (error)
-                               goto del_cursor;
-               }
-
-               (*current_ext)++;
-               total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-       }
-
-       /* Check if we are done */
-       if (*current_ext == total_extents)
-               *done = 1;
-
-del_cursor:
-       if (cur)
-               xfs_btree_del_cursor(cur,
-                       error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
-
-       xfs_trans_log_inode(tp, ip, logflags);
-       return error;
-}
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
deleted file mode 100644 (file)
index de65bb8..0000000
+++ /dev/null
@@ -1,967 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_error.h"
-#include "xfs_quota.h"
-#include "xfs_trace.h"
-#include "xfs_cksum.h"
-#include "xfs_dinode.h"
-
-/*
- * Determine the extent state.
- */
-/* ARGSUSED */
-STATIC xfs_exntst_t
-xfs_extent_state(
-       xfs_filblks_t           blks,
-       int                     extent_flag)
-{
-       if (extent_flag) {
-               ASSERT(blks != 0);      /* saved for DMIG */
-               return XFS_EXT_UNWRITTEN;
-       }
-       return XFS_EXT_NORM;
-}
-
-/*
- * Convert on-disk form of btree root to in-memory form.
- */
-void
-xfs_bmdr_to_bmbt(
-       struct xfs_inode        *ip,
-       xfs_bmdr_block_t        *dblock,
-       int                     dblocklen,
-       struct xfs_btree_block  *rblock,
-       int                     rblocklen)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       int                     dmxr;
-       xfs_bmbt_key_t          *fkp;
-       __be64                  *fpp;
-       xfs_bmbt_key_t          *tkp;
-       __be64                  *tpp;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
-                                XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
-                                XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
-       else
-               xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
-                                XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
-                                XFS_BTREE_LONG_PTRS);
-
-       rblock->bb_level = dblock->bb_level;
-       ASSERT(be16_to_cpu(rblock->bb_level) > 0);
-       rblock->bb_numrecs = dblock->bb_numrecs;
-       dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
-       fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
-       tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
-       fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
-       tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
-       dmxr = be16_to_cpu(dblock->bb_numrecs);
-       memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
-       memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
-}
-
-/*
- * Convert a compressed bmap extent record to an uncompressed form.
- * This code must be in sync with the routines xfs_bmbt_get_startoff,
- * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
- */
-STATIC void
-__xfs_bmbt_get_all(
-               __uint64_t l0,
-               __uint64_t l1,
-               xfs_bmbt_irec_t *s)
-{
-       int     ext_flag;
-       xfs_exntst_t st;
-
-       ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
-       s->br_startoff = ((xfs_fileoff_t)l0 &
-                          xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
-#if XFS_BIG_BLKNOS
-       s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) |
-                          (((xfs_fsblock_t)l1) >> 21);
-#else
-#ifdef DEBUG
-       {
-               xfs_dfsbno_t    b;
-
-               b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) |
-                   (((xfs_dfsbno_t)l1) >> 21);
-               ASSERT((b >> 32) == 0 || isnulldstartblock(b));
-               s->br_startblock = (xfs_fsblock_t)b;
-       }
-#else  /* !DEBUG */
-       s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21);
-#endif /* DEBUG */
-#endif /* XFS_BIG_BLKNOS */
-       s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21));
-       /* This is xfs_extent_state() in-line */
-       if (ext_flag) {
-               ASSERT(s->br_blockcount != 0);  /* saved for DMIG */
-               st = XFS_EXT_UNWRITTEN;
-       } else
-               st = XFS_EXT_NORM;
-       s->br_state = st;
-}
-
-void
-xfs_bmbt_get_all(
-       xfs_bmbt_rec_host_t *r,
-       xfs_bmbt_irec_t *s)
-{
-       __xfs_bmbt_get_all(r->l0, r->l1, s);
-}
-
-/*
- * Extract the blockcount field from an in memory bmap extent record.
- */
-xfs_filblks_t
-xfs_bmbt_get_blockcount(
-       xfs_bmbt_rec_host_t     *r)
-{
-       return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21));
-}
-
-/*
- * Extract the startblock field from an in memory bmap extent record.
- */
-xfs_fsblock_t
-xfs_bmbt_get_startblock(
-       xfs_bmbt_rec_host_t     *r)
-{
-#if XFS_BIG_BLKNOS
-       return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) |
-              (((xfs_fsblock_t)r->l1) >> 21);
-#else
-#ifdef DEBUG
-       xfs_dfsbno_t    b;
-
-       b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) |
-           (((xfs_dfsbno_t)r->l1) >> 21);
-       ASSERT((b >> 32) == 0 || isnulldstartblock(b));
-       return (xfs_fsblock_t)b;
-#else  /* !DEBUG */
-       return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21);
-#endif /* DEBUG */
-#endif /* XFS_BIG_BLKNOS */
-}
-
-/*
- * Extract the startoff field from an in memory bmap extent record.
- */
-xfs_fileoff_t
-xfs_bmbt_get_startoff(
-       xfs_bmbt_rec_host_t     *r)
-{
-       return ((xfs_fileoff_t)r->l0 &
-                xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
-}
-
-xfs_exntst_t
-xfs_bmbt_get_state(
-       xfs_bmbt_rec_host_t     *r)
-{
-       int     ext_flag;
-
-       ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN));
-       return xfs_extent_state(xfs_bmbt_get_blockcount(r),
-                               ext_flag);
-}
-
-/*
- * Extract the blockcount field from an on disk bmap extent record.
- */
-xfs_filblks_t
-xfs_bmbt_disk_get_blockcount(
-       xfs_bmbt_rec_t  *r)
-{
-       return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21));
-}
-
-/*
- * Extract the startoff field from a disk format bmap extent record.
- */
-xfs_fileoff_t
-xfs_bmbt_disk_get_startoff(
-       xfs_bmbt_rec_t  *r)
-{
-       return ((xfs_fileoff_t)be64_to_cpu(r->l0) &
-                xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
-}
-
-
-/*
- * Set all the fields in a bmap extent record from the arguments.
- */
-void
-xfs_bmbt_set_allf(
-       xfs_bmbt_rec_host_t     *r,
-       xfs_fileoff_t           startoff,
-       xfs_fsblock_t           startblock,
-       xfs_filblks_t           blockcount,
-       xfs_exntst_t            state)
-{
-       int             extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
-
-       ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
-       ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
-       ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
-
-#if XFS_BIG_BLKNOS
-       ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
-
-       r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
-               ((xfs_bmbt_rec_base_t)startoff << 9) |
-               ((xfs_bmbt_rec_base_t)startblock >> 43);
-       r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
-               ((xfs_bmbt_rec_base_t)blockcount &
-               (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
-#else  /* !XFS_BIG_BLKNOS */
-       if (isnullstartblock(startblock)) {
-               r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
-                       ((xfs_bmbt_rec_base_t)startoff << 9) |
-                        (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
-               r->l1 = xfs_mask64hi(11) |
-                         ((xfs_bmbt_rec_base_t)startblock << 21) |
-                         ((xfs_bmbt_rec_base_t)blockcount &
-                          (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
-       } else {
-               r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
-                       ((xfs_bmbt_rec_base_t)startoff << 9);
-               r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
-                        ((xfs_bmbt_rec_base_t)blockcount &
-                        (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
-       }
-#endif /* XFS_BIG_BLKNOS */
-}
-
-/*
- * Set all the fields in a bmap extent record from the uncompressed form.
- */
-void
-xfs_bmbt_set_all(
-       xfs_bmbt_rec_host_t *r,
-       xfs_bmbt_irec_t *s)
-{
-       xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock,
-                            s->br_blockcount, s->br_state);
-}
-
-
-/*
- * Set all the fields in a disk format bmap extent record from the arguments.
- */
-void
-xfs_bmbt_disk_set_allf(
-       xfs_bmbt_rec_t          *r,
-       xfs_fileoff_t           startoff,
-       xfs_fsblock_t           startblock,
-       xfs_filblks_t           blockcount,
-       xfs_exntst_t            state)
-{
-       int                     extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
-
-       ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
-       ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
-       ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
-
-#if XFS_BIG_BLKNOS
-       ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
-
-       r->l0 = cpu_to_be64(
-               ((xfs_bmbt_rec_base_t)extent_flag << 63) |
-                ((xfs_bmbt_rec_base_t)startoff << 9) |
-                ((xfs_bmbt_rec_base_t)startblock >> 43));
-       r->l1 = cpu_to_be64(
-               ((xfs_bmbt_rec_base_t)startblock << 21) |
-                ((xfs_bmbt_rec_base_t)blockcount &
-                 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
-#else  /* !XFS_BIG_BLKNOS */
-       if (isnullstartblock(startblock)) {
-               r->l0 = cpu_to_be64(
-                       ((xfs_bmbt_rec_base_t)extent_flag << 63) |
-                        ((xfs_bmbt_rec_base_t)startoff << 9) |
-                         (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
-               r->l1 = cpu_to_be64(xfs_mask64hi(11) |
-                         ((xfs_bmbt_rec_base_t)startblock << 21) |
-                         ((xfs_bmbt_rec_base_t)blockcount &
-                          (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
-       } else {
-               r->l0 = cpu_to_be64(
-                       ((xfs_bmbt_rec_base_t)extent_flag << 63) |
-                        ((xfs_bmbt_rec_base_t)startoff << 9));
-               r->l1 = cpu_to_be64(
-                       ((xfs_bmbt_rec_base_t)startblock << 21) |
-                        ((xfs_bmbt_rec_base_t)blockcount &
-                         (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
-       }
-#endif /* XFS_BIG_BLKNOS */
-}
-
-/*
- * Set all the fields in a bmap extent record from the uncompressed form.
- */
-STATIC void
-xfs_bmbt_disk_set_all(
-       xfs_bmbt_rec_t  *r,
-       xfs_bmbt_irec_t *s)
-{
-       xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock,
-                                 s->br_blockcount, s->br_state);
-}
-
-/*
- * Set the blockcount field in a bmap extent record.
- */
-void
-xfs_bmbt_set_blockcount(
-       xfs_bmbt_rec_host_t *r,
-       xfs_filblks_t   v)
-{
-       ASSERT((v & xfs_mask64hi(43)) == 0);
-       r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) |
-                 (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21));
-}
-
-/*
- * Set the startblock field in a bmap extent record.
- */
-void
-xfs_bmbt_set_startblock(
-       xfs_bmbt_rec_host_t *r,
-       xfs_fsblock_t   v)
-{
-#if XFS_BIG_BLKNOS
-       ASSERT((v & xfs_mask64hi(12)) == 0);
-       r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) |
-                 (xfs_bmbt_rec_base_t)(v >> 43);
-       r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) |
-                 (xfs_bmbt_rec_base_t)(v << 21);
-#else  /* !XFS_BIG_BLKNOS */
-       if (isnullstartblock(v)) {
-               r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
-               r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) |
-                         ((xfs_bmbt_rec_base_t)v << 21) |
-                         (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
-       } else {
-               r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9);
-               r->l1 = ((xfs_bmbt_rec_base_t)v << 21) |
-                         (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
-       }
-#endif /* XFS_BIG_BLKNOS */
-}
-
-/*
- * Set the startoff field in a bmap extent record.
- */
-void
-xfs_bmbt_set_startoff(
-       xfs_bmbt_rec_host_t *r,
-       xfs_fileoff_t   v)
-{
-       ASSERT((v & xfs_mask64hi(9)) == 0);
-       r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) |
-               ((xfs_bmbt_rec_base_t)v << 9) |
-                 (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
-}
-
-/*
- * Set the extent state field in a bmap extent record.
- */
-void
-xfs_bmbt_set_state(
-       xfs_bmbt_rec_host_t *r,
-       xfs_exntst_t    v)
-{
-       ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
-       if (v == XFS_EXT_NORM)
-               r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN);
-       else
-               r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN);
-}
-
-/*
- * Convert in-memory form of btree root to on-disk form.
- */
-void
-xfs_bmbt_to_bmdr(
-       struct xfs_mount        *mp,
-       struct xfs_btree_block  *rblock,
-       int                     rblocklen,
-       xfs_bmdr_block_t        *dblock,
-       int                     dblocklen)
-{
-       int                     dmxr;
-       xfs_bmbt_key_t          *fkp;
-       __be64                  *fpp;
-       xfs_bmbt_key_t          *tkp;
-       __be64                  *tpp;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
-               ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid));
-               ASSERT(rblock->bb_u.l.bb_blkno ==
-                      cpu_to_be64(XFS_BUF_DADDR_NULL));
-       } else
-               ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
-       ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
-       ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
-       ASSERT(rblock->bb_level != 0);
-       dblock->bb_level = rblock->bb_level;
-       dblock->bb_numrecs = rblock->bb_numrecs;
-       dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
-       fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
-       tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
-       fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
-       tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
-       dmxr = be16_to_cpu(dblock->bb_numrecs);
-       memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
-       memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
-}
-
-/*
- * Check extent records, which have just been read, for
- * any bit in the extent flag field. ASSERT on debug
- * kernels, as this condition should not occur.
- * Return an error condition (1) if any flags found,
- * otherwise return 0.
- */
-
-int
-xfs_check_nostate_extents(
-       xfs_ifork_t             *ifp,
-       xfs_extnum_t            idx,
-       xfs_extnum_t            num)
-{
-       for (; num > 0; num--, idx++) {
-               xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
-               if ((ep->l0 >>
-                    (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
-                       ASSERT(0);
-                       return 1;
-               }
-       }
-       return 0;
-}
-
-
-STATIC struct xfs_btree_cur *
-xfs_bmbt_dup_cursor(
-       struct xfs_btree_cur    *cur)
-{
-       struct xfs_btree_cur    *new;
-
-       new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
-                       cur->bc_private.b.ip, cur->bc_private.b.whichfork);
-
-       /*
-        * Copy the firstblock, flist, and flags values,
-        * since init cursor doesn't get them.
-        */
-       new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
-       new->bc_private.b.flist = cur->bc_private.b.flist;
-       new->bc_private.b.flags = cur->bc_private.b.flags;
-
-       return new;
-}
-
-STATIC void
-xfs_bmbt_update_cursor(
-       struct xfs_btree_cur    *src,
-       struct xfs_btree_cur    *dst)
-{
-       ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
-              (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
-       ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist);
-
-       dst->bc_private.b.allocated += src->bc_private.b.allocated;
-       dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
-
-       src->bc_private.b.allocated = 0;
-}
-
-STATIC int
-xfs_bmbt_alloc_block(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *start,
-       union xfs_btree_ptr     *new,
-       int                     *stat)
-{
-       xfs_alloc_arg_t         args;           /* block allocation args */
-       int                     error;          /* error return value */
-
-       memset(&args, 0, sizeof(args));
-       args.tp = cur->bc_tp;
-       args.mp = cur->bc_mp;
-       args.fsbno = cur->bc_private.b.firstblock;
-       args.firstblock = args.fsbno;
-
-       if (args.fsbno == NULLFSBLOCK) {
-               args.fsbno = be64_to_cpu(start->l);
-               args.type = XFS_ALLOCTYPE_START_BNO;
-               /*
-                * Make sure there is sufficient room left in the AG to
-                * complete a full tree split for an extent insert.  If
-                * we are converting the middle part of an extent then
-                * we may need space for two tree splits.
-                *
-                * We are relying on the caller to make the correct block
-                * reservation for this operation to succeed.  If the
-                * reservation amount is insufficient then we may fail a
-                * block allocation here and corrupt the filesystem.
-                */
-               args.minleft = xfs_trans_get_block_res(args.tp);
-       } else if (cur->bc_private.b.flist->xbf_low) {
-               args.type = XFS_ALLOCTYPE_START_BNO;
-       } else {
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-       }
-
-       args.minlen = args.maxlen = args.prod = 1;
-       args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
-       if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
-               error = ENOSPC;
-               goto error0;
-       }
-       error = xfs_alloc_vextent(&args);
-       if (error)
-               goto error0;
-
-       if (args.fsbno == NULLFSBLOCK && args.minleft) {
-               /*
-                * Could not find an AG with enough free space to satisfy
-                * a full btree split.  Try again without minleft and if
-                * successful activate the lowspace algorithm.
-                */
-               args.fsbno = 0;
-               args.type = XFS_ALLOCTYPE_FIRST_AG;
-               args.minleft = 0;
-               error = xfs_alloc_vextent(&args);
-               if (error)
-                       goto error0;
-               cur->bc_private.b.flist->xbf_low = 1;
-       }
-       if (args.fsbno == NULLFSBLOCK) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-               *stat = 0;
-               return 0;
-       }
-       ASSERT(args.len == 1);
-       cur->bc_private.b.firstblock = args.fsbno;
-       cur->bc_private.b.allocated++;
-       cur->bc_private.b.ip->i_d.di_nblocks++;
-       xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
-       xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
-                       XFS_TRANS_DQ_BCOUNT, 1L);
-
-       new->l = cpu_to_be64(args.fsbno);
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 1;
-       return 0;
-
- error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-STATIC int
-xfs_bmbt_free_block(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = cur->bc_mp;
-       struct xfs_inode        *ip = cur->bc_private.b.ip;
-       struct xfs_trans        *tp = cur->bc_tp;
-       xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
-
-       xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
-       ip->i_d.di_nblocks--;
-
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
-       xfs_trans_binval(tp, bp);
-       return 0;
-}
-
-STATIC int
-xfs_bmbt_get_minrecs(
-       struct xfs_btree_cur    *cur,
-       int                     level)
-{
-       if (level == cur->bc_nlevels - 1) {
-               struct xfs_ifork        *ifp;
-
-               ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
-                                   cur->bc_private.b.whichfork);
-
-               return xfs_bmbt_maxrecs(cur->bc_mp,
-                                       ifp->if_broot_bytes, level == 0) / 2;
-       }
-
-       return cur->bc_mp->m_bmap_dmnr[level != 0];
-}
-
-int
-xfs_bmbt_get_maxrecs(
-       struct xfs_btree_cur    *cur,
-       int                     level)
-{
-       if (level == cur->bc_nlevels - 1) {
-               struct xfs_ifork        *ifp;
-
-               ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
-                                   cur->bc_private.b.whichfork);
-
-               return xfs_bmbt_maxrecs(cur->bc_mp,
-                                       ifp->if_broot_bytes, level == 0);
-       }
-
-       return cur->bc_mp->m_bmap_dmxr[level != 0];
-
-}
-
-/*
- * Get the maximum records we could store in the on-disk format.
- *
- * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
- * for the root node this checks the available space in the dinode fork
- * so that we can resize the in-memory buffer to match it.  After a
- * resize to the maximum size this function returns the same value
- * as xfs_bmbt_get_maxrecs for the root node, too.
- */
-STATIC int
-xfs_bmbt_get_dmaxrecs(
-       struct xfs_btree_cur    *cur,
-       int                     level)
-{
-       if (level != cur->bc_nlevels - 1)
-               return cur->bc_mp->m_bmap_dmxr[level != 0];
-       return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0);
-}
-
-STATIC void
-xfs_bmbt_init_key_from_rec(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
-{
-       key->bmbt.br_startoff =
-               cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
-}
-
-STATIC void
-xfs_bmbt_init_rec_from_key(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
-{
-       ASSERT(key->bmbt.br_startoff != 0);
-
-       xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
-                              0, 0, XFS_EXT_NORM);
-}
-
-STATIC void
-xfs_bmbt_init_rec_from_cur(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_rec     *rec)
-{
-       xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
-}
-
-STATIC void
-xfs_bmbt_init_ptr_from_cur(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr)
-{
-       ptr->l = 0;
-}
-
-STATIC __int64_t
-xfs_bmbt_key_diff(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *key)
-{
-       return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
-                                     cur->bc_rec.b.br_startoff;
-}
-
-static bool
-xfs_bmbt_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       unsigned int            level;
-
-       switch (block->bb_magic) {
-       case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
-               if (!xfs_sb_version_hascrc(&mp->m_sb))
-                       return false;
-               if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid))
-                       return false;
-               if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
-                       return false;
-               /*
-                * XXX: need a better way of verifying the owner here. Right now
-                * just make sure there has been one set.
-                */
-               if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
-                       return false;
-               /* fall through */
-       case cpu_to_be32(XFS_BMAP_MAGIC):
-               break;
-       default:
-               return false;
-       }
-
-       /*
-        * numrecs and level verification.
-        *
-        * We don't know what fork we belong to, so just verify that the level
-        * is less than the maximum of the two. Later checks will be more
-        * precise.
-        */
-       level = be16_to_cpu(block->bb_level);
-       if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
-               return false;
-       if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
-               return false;
-
-       /* sibling pointer verification */
-       if (!block->bb_u.l.bb_leftsib ||
-           (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) &&
-            !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
-               return false;
-       if (!block->bb_u.l.bb_rightsib ||
-           (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) &&
-            !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
-               return false;
-
-       return true;
-}
-
-static void
-xfs_bmbt_read_verify(
-       struct xfs_buf  *bp)
-{
-       if (!xfs_btree_lblock_verify_crc(bp))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_bmbt_verify(bp))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_verifier_error(bp);
-       }
-}
-
-static void
-xfs_bmbt_write_verify(
-       struct xfs_buf  *bp)
-{
-       if (!xfs_bmbt_verify(bp)) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-       xfs_btree_lblock_calc_crc(bp);
-}
-
-const struct xfs_buf_ops xfs_bmbt_buf_ops = {
-       .verify_read = xfs_bmbt_read_verify,
-       .verify_write = xfs_bmbt_write_verify,
-};
-
-
-#if defined(DEBUG) || defined(XFS_WARN)
-STATIC int
-xfs_bmbt_keys_inorder(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *k1,
-       union xfs_btree_key     *k2)
-{
-       return be64_to_cpu(k1->bmbt.br_startoff) <
-               be64_to_cpu(k2->bmbt.br_startoff);
-}
-
-STATIC int
-xfs_bmbt_recs_inorder(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_rec     *r1,
-       union xfs_btree_rec     *r2)
-{
-       return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
-               xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
-               xfs_bmbt_disk_get_startoff(&r2->bmbt);
-}
-#endif /* DEBUG */
-
-static const struct xfs_btree_ops xfs_bmbt_ops = {
-       .rec_len                = sizeof(xfs_bmbt_rec_t),
-       .key_len                = sizeof(xfs_bmbt_key_t),
-
-       .dup_cursor             = xfs_bmbt_dup_cursor,
-       .update_cursor          = xfs_bmbt_update_cursor,
-       .alloc_block            = xfs_bmbt_alloc_block,
-       .free_block             = xfs_bmbt_free_block,
-       .get_maxrecs            = xfs_bmbt_get_maxrecs,
-       .get_minrecs            = xfs_bmbt_get_minrecs,
-       .get_dmaxrecs           = xfs_bmbt_get_dmaxrecs,
-       .init_key_from_rec      = xfs_bmbt_init_key_from_rec,
-       .init_rec_from_key      = xfs_bmbt_init_rec_from_key,
-       .init_rec_from_cur      = xfs_bmbt_init_rec_from_cur,
-       .init_ptr_from_cur      = xfs_bmbt_init_ptr_from_cur,
-       .key_diff               = xfs_bmbt_key_diff,
-       .buf_ops                = &xfs_bmbt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
-       .keys_inorder           = xfs_bmbt_keys_inorder,
-       .recs_inorder           = xfs_bmbt_recs_inorder,
-#endif
-};
-
-/*
- * Allocate a new bmap btree cursor.
- */
-struct xfs_btree_cur *                         /* new bmap btree cursor */
-xfs_bmbt_init_cursor(
-       struct xfs_mount        *mp,            /* file system mount point */
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_inode        *ip,            /* inode owning the btree */
-       int                     whichfork)      /* data or attr fork */
-{
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
-       struct xfs_btree_cur    *cur;
-
-       cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
-
-       cur->bc_tp = tp;
-       cur->bc_mp = mp;
-       cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
-       cur->bc_btnum = XFS_BTNUM_BMAP;
-       cur->bc_blocklog = mp->m_sb.sb_blocklog;
-
-       cur->bc_ops = &xfs_bmbt_ops;
-       cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
-
-       cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
-       cur->bc_private.b.ip = ip;
-       cur->bc_private.b.firstblock = NULLFSBLOCK;
-       cur->bc_private.b.flist = NULL;
-       cur->bc_private.b.allocated = 0;
-       cur->bc_private.b.flags = 0;
-       cur->bc_private.b.whichfork = whichfork;
-
-       return cur;
-}
-
-/*
- * Calculate number of records in a bmap btree block.
- */
-int
-xfs_bmbt_maxrecs(
-       struct xfs_mount        *mp,
-       int                     blocklen,
-       int                     leaf)
-{
-       blocklen -= XFS_BMBT_BLOCK_LEN(mp);
-
-       if (leaf)
-               return blocklen / sizeof(xfs_bmbt_rec_t);
-       return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
-}
-
-/*
- * Calculate number of records in a bmap btree inode root.
- */
-int
-xfs_bmdr_maxrecs(
-       int                     blocklen,
-       int                     leaf)
-{
-       blocklen -= sizeof(xfs_bmdr_block_t);
-
-       if (leaf)
-               return blocklen / sizeof(xfs_bmdr_rec_t);
-       return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
-}
-
-/*
- * Change the owner of a btree format fork fo the inode passed in. Change it to
- * the owner of that is passed in so that we can change owners before or after
- * we switch forks between inodes. The operation that the caller is doing will
- * determine whether is needs to change owner before or after the switch.
- *
- * For demand paged transactional modification, the fork switch should be done
- * after reading in all the blocks, modifying them and pinning them in the
- * transaction. For modification when the buffers are already pinned in memory,
- * the fork switch can be done before changing the owner as we won't need to
- * validate the owner until the btree buffers are unpinned and writes can occur
- * again.
- *
- * For recovery based ownership change, there is no transactional context and
- * so a buffer list must be supplied so that we can record the buffers that we
- * modified for the caller to issue IO on.
- */
-int
-xfs_bmbt_change_owner(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *ip,
-       int                     whichfork,
-       xfs_ino_t               new_owner,
-       struct list_head        *buffer_list)
-{
-       struct xfs_btree_cur    *cur;
-       int                     error;
-
-       ASSERT(tp || buffer_list);
-       ASSERT(!(tp && buffer_list));
-       if (whichfork == XFS_DATA_FORK)
-               ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE);
-       else
-               ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE);
-
-       cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
-       if (!cur)
-               return ENOMEM;
-
-       error = xfs_btree_change_owner(cur, new_owner, buffer_list);
-       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
-       return error;
-}
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
deleted file mode 100644 (file)
index 036b4fd..0000000
+++ /dev/null
@@ -1,3989 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_buf_item.h"
-#include "xfs_btree.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_cksum.h"
-
-/*
- * Cursor allocation zone.
- */
-kmem_zone_t    *xfs_btree_cur_zone;
-
-/*
- * Btree magic numbers.
- */
-static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
-       { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
-         XFS_FIBT_MAGIC },
-       { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
-         XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
-};
-#define xfs_btree_magic(cur) \
-       xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
-
-
-STATIC int                             /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_lblock(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       struct xfs_btree_block  *block, /* btree long form block pointer */
-       int                     level,  /* level of the btree block */
-       struct xfs_buf          *bp)    /* buffer for block, if any */
-{
-       int                     lblock_ok = 1; /* block passes checks */
-       struct xfs_mount        *mp;    /* file system mount point */
-
-       mp = cur->bc_mp;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               lblock_ok = lblock_ok &&
-                       uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
-                       block->bb_u.l.bb_blkno == cpu_to_be64(
-                               bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
-       }
-
-       lblock_ok = lblock_ok &&
-               be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
-               be16_to_cpu(block->bb_level) == level &&
-               be16_to_cpu(block->bb_numrecs) <=
-                       cur->bc_ops->get_maxrecs(cur, level) &&
-               block->bb_u.l.bb_leftsib &&
-               (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
-                XFS_FSB_SANITY_CHECK(mp,
-                       be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
-               block->bb_u.l.bb_rightsib &&
-               (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
-                XFS_FSB_SANITY_CHECK(mp,
-                       be64_to_cpu(block->bb_u.l.bb_rightsib)));
-
-       if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
-                       XFS_ERRTAG_BTREE_CHECK_LBLOCK,
-                       XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
-               if (bp)
-                       trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
-               return EFSCORRUPTED;
-       }
-       return 0;
-}
-
-STATIC int                             /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_sblock(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       struct xfs_btree_block  *block, /* btree short form block pointer */
-       int                     level,  /* level of the btree block */
-       struct xfs_buf          *bp)    /* buffer containing block */
-{
-       struct xfs_mount        *mp;    /* file system mount point */
-       struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
-       struct xfs_agf          *agf;   /* ag. freespace structure */
-       xfs_agblock_t           agflen; /* native ag. freespace length */
-       int                     sblock_ok = 1; /* block passes checks */
-
-       mp = cur->bc_mp;
-       agbp = cur->bc_private.a.agbp;
-       agf = XFS_BUF_TO_AGF(agbp);
-       agflen = be32_to_cpu(agf->agf_length);
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               sblock_ok = sblock_ok &&
-                       uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
-                       block->bb_u.s.bb_blkno == cpu_to_be64(
-                               bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
-       }
-
-       sblock_ok = sblock_ok &&
-               be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
-               be16_to_cpu(block->bb_level) == level &&
-               be16_to_cpu(block->bb_numrecs) <=
-                       cur->bc_ops->get_maxrecs(cur, level) &&
-               (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
-                be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
-               block->bb_u.s.bb_leftsib &&
-               (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
-                be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
-               block->bb_u.s.bb_rightsib;
-
-       if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
-                       XFS_ERRTAG_BTREE_CHECK_SBLOCK,
-                       XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
-               if (bp)
-                       trace_xfs_btree_corrupt(bp, _RET_IP_);
-               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
-               return EFSCORRUPTED;
-       }
-       return 0;
-}
-
-/*
- * Debug routine: check that block header is ok.
- */
-int
-xfs_btree_check_block(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       struct xfs_btree_block  *block, /* generic btree block pointer */
-       int                     level,  /* level of the btree block */
-       struct xfs_buf          *bp)    /* buffer containing block, if any */
-{
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               return xfs_btree_check_lblock(cur, block, level, bp);
-       else
-               return xfs_btree_check_sblock(cur, block, level, bp);
-}
-
-/*
- * Check that (long) pointer is ok.
- */
-int                                    /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_lptr(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_dfsbno_t            bno,    /* btree block disk address */
-       int                     level)  /* btree block level */
-{
-       XFS_WANT_CORRUPTED_RETURN(
-               level > 0 &&
-               bno != NULLDFSBNO &&
-               XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
-       return 0;
-}
-
-#ifdef DEBUG
-/*
- * Check that (short) pointer is ok.
- */
-STATIC int                             /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_sptr(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_agblock_t           bno,    /* btree block disk address */
-       int                     level)  /* btree block level */
-{
-       xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
-
-       XFS_WANT_CORRUPTED_RETURN(
-               level > 0 &&
-               bno != NULLAGBLOCK &&
-               bno != 0 &&
-               bno < agblocks);
-       return 0;
-}
-
-/*
- * Check that block ptr is ok.
- */
-STATIC int                             /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_ptr(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       union xfs_btree_ptr     *ptr,   /* btree block disk address */
-       int                     index,  /* offset from ptr to check */
-       int                     level)  /* btree block level */
-{
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               return xfs_btree_check_lptr(cur,
-                               be64_to_cpu((&ptr->l)[index]), level);
-       } else {
-               return xfs_btree_check_sptr(cur,
-                               be32_to_cpu((&ptr->s)[index]), level);
-       }
-}
-#endif
-
-/*
- * Calculate CRC on the whole btree block and stuff it into the
- * long-form btree header.
- *
- * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
- * it into the buffer so recovery knows what the last modifcation was that made
- * it to disk.
- */
-void
-xfs_btree_lblock_calc_crc(
-       struct xfs_buf          *bp)
-{
-       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-
-       if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
-               return;
-       if (bip)
-               block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-       xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
-}
-
-bool
-xfs_btree_lblock_verify_crc(
-       struct xfs_buf          *bp)
-{
-       if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
-               return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
-
-       return true;
-}
-
-/*
- * Calculate CRC on the whole btree block and stuff it into the
- * short-form btree header.
- *
- * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
- * it into the buffer so recovery knows what the last modifcation was that made
- * it to disk.
- */
-void
-xfs_btree_sblock_calc_crc(
-       struct xfs_buf          *bp)
-{
-       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-
-       if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
-               return;
-       if (bip)
-               block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-       xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
-}
-
-bool
-xfs_btree_sblock_verify_crc(
-       struct xfs_buf          *bp)
-{
-       if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
-               return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
-
-       return true;
-}
-
-/*
- * Delete the btree cursor.
- */
-void
-xfs_btree_del_cursor(
-       xfs_btree_cur_t *cur,           /* btree cursor */
-       int             error)          /* del because of error */
-{
-       int             i;              /* btree level */
-
-       /*
-        * Clear the buffer pointers, and release the buffers.
-        * If we're doing this in the face of an error, we
-        * need to make sure to inspect all of the entries
-        * in the bc_bufs array for buffers to be unlocked.
-        * This is because some of the btree code works from
-        * level n down to 0, and if we get an error along
-        * the way we won't have initialized all the entries
-        * down to 0.
-        */
-       for (i = 0; i < cur->bc_nlevels; i++) {
-               if (cur->bc_bufs[i])
-                       xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
-               else if (!error)
-                       break;
-       }
-       /*
-        * Can't free a bmap cursor without having dealt with the
-        * allocated indirect blocks' accounting.
-        */
-       ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
-              cur->bc_private.b.allocated == 0);
-       /*
-        * Free the cursor.
-        */
-       kmem_zone_free(xfs_btree_cur_zone, cur);
-}
-
-/*
- * Duplicate the btree cursor.
- * Allocate a new one, copy the record, re-get the buffers.
- */
-int                                    /* error */
-xfs_btree_dup_cursor(
-       xfs_btree_cur_t *cur,           /* input cursor */
-       xfs_btree_cur_t **ncur)         /* output cursor */
-{
-       xfs_buf_t       *bp;            /* btree block's buffer pointer */
-       int             error;          /* error return value */
-       int             i;              /* level number of btree block */
-       xfs_mount_t     *mp;            /* mount structure for filesystem */
-       xfs_btree_cur_t *new;           /* new cursor value */
-       xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
-
-       tp = cur->bc_tp;
-       mp = cur->bc_mp;
-
-       /*
-        * Allocate a new cursor like the old one.
-        */
-       new = cur->bc_ops->dup_cursor(cur);
-
-       /*
-        * Copy the record currently in the cursor.
-        */
-       new->bc_rec = cur->bc_rec;
-
-       /*
-        * For each level current, re-get the buffer and copy the ptr value.
-        */
-       for (i = 0; i < new->bc_nlevels; i++) {
-               new->bc_ptrs[i] = cur->bc_ptrs[i];
-               new->bc_ra[i] = cur->bc_ra[i];
-               bp = cur->bc_bufs[i];
-               if (bp) {
-                       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-                                                  XFS_BUF_ADDR(bp), mp->m_bsize,
-                                                  0, &bp,
-                                                  cur->bc_ops->buf_ops);
-                       if (error) {
-                               xfs_btree_del_cursor(new, error);
-                               *ncur = NULL;
-                               return error;
-                       }
-               }
-               new->bc_bufs[i] = bp;
-       }
-       *ncur = new;
-       return 0;
-}
-
-/*
- * XFS btree block layout and addressing:
- *
- * There are two types of blocks in the btree: leaf and non-leaf blocks.
- *
- * The leaf record start with a header then followed by records containing
- * the values.  A non-leaf block also starts with the same header, and
- * then first contains lookup keys followed by an equal number of pointers
- * to the btree blocks at the previous level.
- *
- *             +--------+-------+-------+-------+-------+-------+-------+
- * Leaf:       | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
- *             +--------+-------+-------+-------+-------+-------+-------+
- *
- *             +--------+-------+-------+-------+-------+-------+-------+
- * Non-Leaf:   | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
- *             +--------+-------+-------+-------+-------+-------+-------+
- *
- * The header is called struct xfs_btree_block for reasons better left unknown
- * and comes in different versions for short (32bit) and long (64bit) block
- * pointers.  The record and key structures are defined by the btree instances
- * and opaque to the btree core.  The block pointers are simple disk endian
- * integers, available in a short (32bit) and long (64bit) variant.
- *
- * The helpers below calculate the offset of a given record, key or pointer
- * into a btree block (xfs_btree_*_offset) or return a pointer to the given
- * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
- * inside the btree block is done using indices starting at one, not zero!
- */
-
-/*
- * Return size of the btree block header for this btree instance.
- */
-static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
-{
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
-                       return XFS_BTREE_LBLOCK_CRC_LEN;
-               return XFS_BTREE_LBLOCK_LEN;
-       }
-       if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
-               return XFS_BTREE_SBLOCK_CRC_LEN;
-       return XFS_BTREE_SBLOCK_LEN;
-}
-
-/*
- * Return size of btree block pointers for this btree instance.
- */
-static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
-{
-       return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
-               sizeof(__be64) : sizeof(__be32);
-}
-
-/*
- * Calculate offset of the n-th record in a btree block.
- */
-STATIC size_t
-xfs_btree_rec_offset(
-       struct xfs_btree_cur    *cur,
-       int                     n)
-{
-       return xfs_btree_block_len(cur) +
-               (n - 1) * cur->bc_ops->rec_len;
-}
-
-/*
- * Calculate offset of the n-th key in a btree block.
- */
-STATIC size_t
-xfs_btree_key_offset(
-       struct xfs_btree_cur    *cur,
-       int                     n)
-{
-       return xfs_btree_block_len(cur) +
-               (n - 1) * cur->bc_ops->key_len;
-}
-
-/*
- * Calculate offset of the n-th block pointer in a btree block.
- */
-STATIC size_t
-xfs_btree_ptr_offset(
-       struct xfs_btree_cur    *cur,
-       int                     n,
-       int                     level)
-{
-       return xfs_btree_block_len(cur) +
-               cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
-               (n - 1) * xfs_btree_ptr_len(cur);
-}
-
-/*
- * Return a pointer to the n-th record in the btree block.
- */
-STATIC union xfs_btree_rec *
-xfs_btree_rec_addr(
-       struct xfs_btree_cur    *cur,
-       int                     n,
-       struct xfs_btree_block  *block)
-{
-       return (union xfs_btree_rec *)
-               ((char *)block + xfs_btree_rec_offset(cur, n));
-}
-
-/*
- * Return a pointer to the n-th key in the btree block.
- */
-STATIC union xfs_btree_key *
-xfs_btree_key_addr(
-       struct xfs_btree_cur    *cur,
-       int                     n,
-       struct xfs_btree_block  *block)
-{
-       return (union xfs_btree_key *)
-               ((char *)block + xfs_btree_key_offset(cur, n));
-}
-
-/*
- * Return a pointer to the n-th block pointer in the btree block.
- */
-STATIC union xfs_btree_ptr *
-xfs_btree_ptr_addr(
-       struct xfs_btree_cur    *cur,
-       int                     n,
-       struct xfs_btree_block  *block)
-{
-       int                     level = xfs_btree_get_level(block);
-
-       ASSERT(block->bb_level != 0);
-
-       return (union xfs_btree_ptr *)
-               ((char *)block + xfs_btree_ptr_offset(cur, n, level));
-}
-
-/*
- * Get the root block which is stored in the inode.
- *
- * For now this btree implementation assumes the btree root is always
- * stored in the if_broot field of an inode fork.
- */
-STATIC struct xfs_btree_block *
-xfs_btree_get_iroot(
-       struct xfs_btree_cur    *cur)
-{
-       struct xfs_ifork        *ifp;
-
-       ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
-       return (struct xfs_btree_block *)ifp->if_broot;
-}
-
-/*
- * Retrieve the block pointer from the cursor at the given level.
- * This may be an inode btree root or from a buffer.
- */
-STATIC struct xfs_btree_block *                /* generic btree block pointer */
-xfs_btree_get_block(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       int                     level,  /* level in btree */
-       struct xfs_buf          **bpp)  /* buffer containing the block */
-{
-       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
-           (level == cur->bc_nlevels - 1)) {
-               *bpp = NULL;
-               return xfs_btree_get_iroot(cur);
-       }
-
-       *bpp = cur->bc_bufs[level];
-       return XFS_BUF_TO_BLOCK(*bpp);
-}
-
-/*
- * Get a buffer for the block, return it with no data read.
- * Long-form addressing.
- */
-xfs_buf_t *                            /* buffer for fsbno */
-xfs_btree_get_bufl(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_fsblock_t   fsbno,          /* file system block number */
-       uint            lock)           /* lock flags for get_buf */
-{
-       xfs_daddr_t             d;              /* real disk block address */
-
-       ASSERT(fsbno != NULLFSBLOCK);
-       d = XFS_FSB_TO_DADDR(mp, fsbno);
-       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-}
-
-/*
- * Get a buffer for the block, return it with no data read.
- * Short-form addressing.
- */
-xfs_buf_t *                            /* buffer for agno/agbno */
-xfs_btree_get_bufs(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_agnumber_t  agno,           /* allocation group number */
-       xfs_agblock_t   agbno,          /* allocation group block number */
-       uint            lock)           /* lock flags for get_buf */
-{
-       xfs_daddr_t             d;              /* real disk block address */
-
-       ASSERT(agno != NULLAGNUMBER);
-       ASSERT(agbno != NULLAGBLOCK);
-       d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-       return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-}
-
-/*
- * Check for the cursor referring to the last block at the given level.
- */
-int                                    /* 1=is last block, 0=not last block */
-xfs_btree_islastblock(
-       xfs_btree_cur_t         *cur,   /* btree cursor */
-       int                     level)  /* level to check */
-{
-       struct xfs_btree_block  *block; /* generic btree block pointer */
-       xfs_buf_t               *bp;    /* buffer containing block */
-
-       block = xfs_btree_get_block(cur, level, &bp);
-       xfs_btree_check_block(cur, block, level, bp);
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
-       else
-               return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
-}
-
-/*
- * Change the cursor to point to the first record at the given level.
- * Other levels are unaffected.
- */
-STATIC int                             /* success=1, failure=0 */
-xfs_btree_firstrec(
-       xfs_btree_cur_t         *cur,   /* btree cursor */
-       int                     level)  /* level to change */
-{
-       struct xfs_btree_block  *block; /* generic btree block pointer */
-       xfs_buf_t               *bp;    /* buffer containing block */
-
-       /*
-        * Get the block pointer for this level.
-        */
-       block = xfs_btree_get_block(cur, level, &bp);
-       xfs_btree_check_block(cur, block, level, bp);
-       /*
-        * It's empty, there is no such record.
-        */
-       if (!block->bb_numrecs)
-               return 0;
-       /*
-        * Set the ptr value to 1, that's the first record/key.
-        */
-       cur->bc_ptrs[level] = 1;
-       return 1;
-}
-
-/*
- * Change the cursor to point to the last record in the current block
- * at the given level.  Other levels are unaffected.
- */
-STATIC int                             /* success=1, failure=0 */
-xfs_btree_lastrec(
-       xfs_btree_cur_t         *cur,   /* btree cursor */
-       int                     level)  /* level to change */
-{
-       struct xfs_btree_block  *block; /* generic btree block pointer */
-       xfs_buf_t               *bp;    /* buffer containing block */
-
-       /*
-        * Get the block pointer for this level.
-        */
-       block = xfs_btree_get_block(cur, level, &bp);
-       xfs_btree_check_block(cur, block, level, bp);
-       /*
-        * It's empty, there is no such record.
-        */
-       if (!block->bb_numrecs)
-               return 0;
-       /*
-        * Set the ptr value to numrecs, that's the last record/key.
-        */
-       cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
-       return 1;
-}
-
-/*
- * Compute first and last byte offsets for the fields given.
- * Interprets the offsets table, which contains struct field offsets.
- */
-void
-xfs_btree_offsets(
-       __int64_t       fields,         /* bitmask of fields */
-       const short     *offsets,       /* table of field offsets */
-       int             nbits,          /* number of bits to inspect */
-       int             *first,         /* output: first byte offset */
-       int             *last)          /* output: last byte offset */
-{
-       int             i;              /* current bit number */
-       __int64_t       imask;          /* mask for current bit number */
-
-       ASSERT(fields != 0);
-       /*
-        * Find the lowest bit, so the first byte offset.
-        */
-       for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
-               if (imask & fields) {
-                       *first = offsets[i];
-                       break;
-               }
-       }
-       /*
-        * Find the highest bit, so the last byte offset.
-        */
-       for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
-               if (imask & fields) {
-                       *last = offsets[i + 1] - 1;
-                       break;
-               }
-       }
-}
-
-/*
- * Get a buffer for the block, return it read in.
- * Long-form addressing.
- */
-int
-xfs_btree_read_bufl(
-       struct xfs_mount        *mp,            /* file system mount point */
-       struct xfs_trans        *tp,            /* transaction pointer */
-       xfs_fsblock_t           fsbno,          /* file system block number */
-       uint                    lock,           /* lock flags for read_buf */
-       struct xfs_buf          **bpp,          /* buffer for fsbno */
-       int                     refval,         /* ref count value for buffer */
-       const struct xfs_buf_ops *ops)
-{
-       struct xfs_buf          *bp;            /* return value */
-       xfs_daddr_t             d;              /* real disk block address */
-       int                     error;
-
-       ASSERT(fsbno != NULLFSBLOCK);
-       d = XFS_FSB_TO_DADDR(mp, fsbno);
-       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
-                                  mp->m_bsize, lock, &bp, ops);
-       if (error)
-               return error;
-       if (bp)
-               xfs_buf_set_ref(bp, refval);
-       *bpp = bp;
-       return 0;
-}
-
-/*
- * Read-ahead the block, don't wait for it, don't return a buffer.
- * Long-form addressing.
- */
-/* ARGSUSED */
-void
-xfs_btree_reada_bufl(
-       struct xfs_mount        *mp,            /* file system mount point */
-       xfs_fsblock_t           fsbno,          /* file system block number */
-       xfs_extlen_t            count,          /* count of filesystem blocks */
-       const struct xfs_buf_ops *ops)
-{
-       xfs_daddr_t             d;
-
-       ASSERT(fsbno != NULLFSBLOCK);
-       d = XFS_FSB_TO_DADDR(mp, fsbno);
-       xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
-}
-
-/*
- * Read-ahead the block, don't wait for it, don't return a buffer.
- * Short-form addressing.
- */
-/* ARGSUSED */
-void
-xfs_btree_reada_bufs(
-       struct xfs_mount        *mp,            /* file system mount point */
-       xfs_agnumber_t          agno,           /* allocation group number */
-       xfs_agblock_t           agbno,          /* allocation group block number */
-       xfs_extlen_t            count,          /* count of filesystem blocks */
-       const struct xfs_buf_ops *ops)
-{
-       xfs_daddr_t             d;
-
-       ASSERT(agno != NULLAGNUMBER);
-       ASSERT(agbno != NULLAGBLOCK);
-       d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-       xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
-}
-
-STATIC int
-xfs_btree_readahead_lblock(
-       struct xfs_btree_cur    *cur,
-       int                     lr,
-       struct xfs_btree_block  *block)
-{
-       int                     rval = 0;
-       xfs_dfsbno_t            left = be64_to_cpu(block->bb_u.l.bb_leftsib);
-       xfs_dfsbno_t            right = be64_to_cpu(block->bb_u.l.bb_rightsib);
-
-       if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
-               xfs_btree_reada_bufl(cur->bc_mp, left, 1,
-                                    cur->bc_ops->buf_ops);
-               rval++;
-       }
-
-       if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
-               xfs_btree_reada_bufl(cur->bc_mp, right, 1,
-                                    cur->bc_ops->buf_ops);
-               rval++;
-       }
-
-       return rval;
-}
-
-STATIC int
-xfs_btree_readahead_sblock(
-       struct xfs_btree_cur    *cur,
-       int                     lr,
-       struct xfs_btree_block *block)
-{
-       int                     rval = 0;
-       xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
-       xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
-
-
-       if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
-               xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
-                                    left, 1, cur->bc_ops->buf_ops);
-               rval++;
-       }
-
-       if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
-               xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
-                                    right, 1, cur->bc_ops->buf_ops);
-               rval++;
-       }
-
-       return rval;
-}
-
-/*
- * Read-ahead btree blocks, at the given level.
- * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
- */
-STATIC int
-xfs_btree_readahead(
-       struct xfs_btree_cur    *cur,           /* btree cursor */
-       int                     lev,            /* level in btree */
-       int                     lr)             /* left/right bits */
-{
-       struct xfs_btree_block  *block;
-
-       /*
-        * No readahead needed if we are at the root level and the
-        * btree root is stored in the inode.
-        */
-       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
-           (lev == cur->bc_nlevels - 1))
-               return 0;
-
-       if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
-               return 0;
-
-       cur->bc_ra[lev] |= lr;
-       block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
-
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               return xfs_btree_readahead_lblock(cur, lr, block);
-       return xfs_btree_readahead_sblock(cur, lr, block);
-}
-
-STATIC xfs_daddr_t
-xfs_btree_ptr_to_daddr(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr)
-{
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
-
-               return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
-       } else {
-               ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
-               ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
-
-               return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
-                                       be32_to_cpu(ptr->s));
-       }
-}
-
-/*
- * Readahead @count btree blocks at the given @ptr location.
- *
- * We don't need to care about long or short form btrees here as we have a
- * method of converting the ptr directly to a daddr available to us.
- */
-STATIC void
-xfs_btree_readahead_ptr(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr,
-       xfs_extlen_t            count)
-{
-       xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
-                         xfs_btree_ptr_to_daddr(cur, ptr),
-                         cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
-}
-
-/*
- * Set the buffer for level "lev" in the cursor to bp, releasing
- * any previous buffer.
- */
-STATIC void
-xfs_btree_setbuf(
-       xfs_btree_cur_t         *cur,   /* btree cursor */
-       int                     lev,    /* level in btree */
-       xfs_buf_t               *bp)    /* new buffer to set */
-{
-       struct xfs_btree_block  *b;     /* btree block */
-
-       if (cur->bc_bufs[lev])
-               xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
-       cur->bc_bufs[lev] = bp;
-       cur->bc_ra[lev] = 0;
-
-       b = XFS_BUF_TO_BLOCK(bp);
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
-                       cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
-               if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
-                       cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
-       } else {
-               if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
-                       cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
-               if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
-                       cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
-       }
-}
-
-STATIC int
-xfs_btree_ptr_is_null(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr)
-{
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               return ptr->l == cpu_to_be64(NULLDFSBNO);
-       else
-               return ptr->s == cpu_to_be32(NULLAGBLOCK);
-}
-
-STATIC void
-xfs_btree_set_ptr_null(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr)
-{
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               ptr->l = cpu_to_be64(NULLDFSBNO);
-       else
-               ptr->s = cpu_to_be32(NULLAGBLOCK);
-}
-
-/*
- * Get/set/init sibling pointers
- */
-STATIC void
-xfs_btree_get_sibling(
-       struct xfs_btree_cur    *cur,
-       struct xfs_btree_block  *block,
-       union xfs_btree_ptr     *ptr,
-       int                     lr)
-{
-       ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
-
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               if (lr == XFS_BB_RIGHTSIB)
-                       ptr->l = block->bb_u.l.bb_rightsib;
-               else
-                       ptr->l = block->bb_u.l.bb_leftsib;
-       } else {
-               if (lr == XFS_BB_RIGHTSIB)
-                       ptr->s = block->bb_u.s.bb_rightsib;
-               else
-                       ptr->s = block->bb_u.s.bb_leftsib;
-       }
-}
-
-STATIC void
-xfs_btree_set_sibling(
-       struct xfs_btree_cur    *cur,
-       struct xfs_btree_block  *block,
-       union xfs_btree_ptr     *ptr,
-       int                     lr)
-{
-       ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
-
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               if (lr == XFS_BB_RIGHTSIB)
-                       block->bb_u.l.bb_rightsib = ptr->l;
-               else
-                       block->bb_u.l.bb_leftsib = ptr->l;
-       } else {
-               if (lr == XFS_BB_RIGHTSIB)
-                       block->bb_u.s.bb_rightsib = ptr->s;
-               else
-                       block->bb_u.s.bb_leftsib = ptr->s;
-       }
-}
-
-void
-xfs_btree_init_block_int(
-       struct xfs_mount        *mp,
-       struct xfs_btree_block  *buf,
-       xfs_daddr_t             blkno,
-       __u32                   magic,
-       __u16                   level,
-       __u16                   numrecs,
-       __u64                   owner,
-       unsigned int            flags)
-{
-       buf->bb_magic = cpu_to_be32(magic);
-       buf->bb_level = cpu_to_be16(level);
-       buf->bb_numrecs = cpu_to_be16(numrecs);
-
-       if (flags & XFS_BTREE_LONG_PTRS) {
-               buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
-               buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
-               if (flags & XFS_BTREE_CRC_BLOCKS) {
-                       buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
-                       buf->bb_u.l.bb_owner = cpu_to_be64(owner);
-                       uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
-                       buf->bb_u.l.bb_pad = 0;
-                       buf->bb_u.l.bb_lsn = 0;
-               }
-       } else {
-               /* owner is a 32 bit value on short blocks */
-               __u32 __owner = (__u32)owner;
-
-               buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
-               buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
-               if (flags & XFS_BTREE_CRC_BLOCKS) {
-                       buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
-                       buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
-                       uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
-                       buf->bb_u.s.bb_lsn = 0;
-               }
-       }
-}
-
-void
-xfs_btree_init_block(
-       struct xfs_mount *mp,
-       struct xfs_buf  *bp,
-       __u32           magic,
-       __u16           level,
-       __u16           numrecs,
-       __u64           owner,
-       unsigned int    flags)
-{
-       xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
-                                magic, level, numrecs, owner, flags);
-}
-
-STATIC void
-xfs_btree_init_block_cur(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp,
-       int                     level,
-       int                     numrecs)
-{
-       __u64 owner;
-
-       /*
-        * we can pull the owner from the cursor right now as the different
-        * owners align directly with the pointer size of the btree. This may
-        * change in future, but is safe for current users of the generic btree
-        * code.
-        */
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               owner = cur->bc_private.b.ip->i_ino;
-       else
-               owner = cur->bc_private.a.agno;
-
-       xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
-                                xfs_btree_magic(cur), level, numrecs,
-                                owner, cur->bc_flags);
-}
-
-/*
- * Return true if ptr is the last record in the btree and
- * we need to track updates to this record.  The decision
- * will be further refined in the update_lastrec method.
- */
-STATIC int
-xfs_btree_is_lastrec(
-       struct xfs_btree_cur    *cur,
-       struct xfs_btree_block  *block,
-       int                     level)
-{
-       union xfs_btree_ptr     ptr;
-
-       if (level > 0)
-               return 0;
-       if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
-               return 0;
-
-       xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
-       if (!xfs_btree_ptr_is_null(cur, &ptr))
-               return 0;
-       return 1;
-}
-
-STATIC void
-xfs_btree_buf_to_ptr(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp,
-       union xfs_btree_ptr     *ptr)
-{
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
-                                       XFS_BUF_ADDR(bp)));
-       else {
-               ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
-                                       XFS_BUF_ADDR(bp)));
-       }
-}
-
-STATIC void
-xfs_btree_set_refs(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp)
-{
-       switch (cur->bc_btnum) {
-       case XFS_BTNUM_BNO:
-       case XFS_BTNUM_CNT:
-               xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
-               break;
-       case XFS_BTNUM_INO:
-       case XFS_BTNUM_FINO:
-               xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
-               break;
-       case XFS_BTNUM_BMAP:
-               xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
-               break;
-       default:
-               ASSERT(0);
-       }
-}
-
-STATIC int
-xfs_btree_get_buf_block(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr,
-       int                     flags,
-       struct xfs_btree_block  **block,
-       struct xfs_buf          **bpp)
-{
-       struct xfs_mount        *mp = cur->bc_mp;
-       xfs_daddr_t             d;
-
-       /* need to sort out how callers deal with failures first */
-       ASSERT(!(flags & XBF_TRYLOCK));
-
-       d = xfs_btree_ptr_to_daddr(cur, ptr);
-       *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
-                                mp->m_bsize, flags);
-
-       if (!*bpp)
-               return ENOMEM;
-
-       (*bpp)->b_ops = cur->bc_ops->buf_ops;
-       *block = XFS_BUF_TO_BLOCK(*bpp);
-       return 0;
-}
-
-/*
- * Read in the buffer at the given ptr and return the buffer and
- * the block pointer within the buffer.
- */
-STATIC int
-xfs_btree_read_buf_block(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr,
-       int                     flags,
-       struct xfs_btree_block  **block,
-       struct xfs_buf          **bpp)
-{
-       struct xfs_mount        *mp = cur->bc_mp;
-       xfs_daddr_t             d;
-       int                     error;
-
-       /* need to sort out how callers deal with failures first */
-       ASSERT(!(flags & XBF_TRYLOCK));
-
-       d = xfs_btree_ptr_to_daddr(cur, ptr);
-       error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
-                                  mp->m_bsize, flags, bpp,
-                                  cur->bc_ops->buf_ops);
-       if (error)
-               return error;
-
-       xfs_btree_set_refs(cur, *bpp);
-       *block = XFS_BUF_TO_BLOCK(*bpp);
-       return 0;
-}
-
-/*
- * Copy keys from one btree block to another.
- */
-STATIC void
-xfs_btree_copy_keys(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *dst_key,
-       union xfs_btree_key     *src_key,
-       int                     numkeys)
-{
-       ASSERT(numkeys >= 0);
-       memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
-}
-
-/*
- * Copy records from one btree block to another.
- */
-STATIC void
-xfs_btree_copy_recs(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_rec     *dst_rec,
-       union xfs_btree_rec     *src_rec,
-       int                     numrecs)
-{
-       ASSERT(numrecs >= 0);
-       memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
-}
-
-/*
- * Copy block pointers from one btree block to another.
- */
-STATIC void
-xfs_btree_copy_ptrs(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *dst_ptr,
-       union xfs_btree_ptr     *src_ptr,
-       int                     numptrs)
-{
-       ASSERT(numptrs >= 0);
-       memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
-}
-
-/*
- * Shift keys one index left/right inside a single btree block.
- */
-STATIC void
-xfs_btree_shift_keys(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *key,
-       int                     dir,
-       int                     numkeys)
-{
-       char                    *dst_key;
-
-       ASSERT(numkeys >= 0);
-       ASSERT(dir == 1 || dir == -1);
-
-       dst_key = (char *)key + (dir * cur->bc_ops->key_len);
-       memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
-}
-
-/*
- * Shift records one index left/right inside a single btree block.
- */
-STATIC void
-xfs_btree_shift_recs(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_rec     *rec,
-       int                     dir,
-       int                     numrecs)
-{
-       char                    *dst_rec;
-
-       ASSERT(numrecs >= 0);
-       ASSERT(dir == 1 || dir == -1);
-
-       dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
-       memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
-}
-
-/*
- * Shift block pointers one index left/right inside a single btree block.
- */
-STATIC void
-xfs_btree_shift_ptrs(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr,
-       int                     dir,
-       int                     numptrs)
-{
-       char                    *dst_ptr;
-
-       ASSERT(numptrs >= 0);
-       ASSERT(dir == 1 || dir == -1);
-
-       dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
-       memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
-}
-
-/*
- * Log key values from the btree block.
- */
-STATIC void
-xfs_btree_log_keys(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp,
-       int                     first,
-       int                     last)
-{
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
-
-       if (bp) {
-               xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
-               xfs_trans_log_buf(cur->bc_tp, bp,
-                                 xfs_btree_key_offset(cur, first),
-                                 xfs_btree_key_offset(cur, last + 1) - 1);
-       } else {
-               xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
-                               xfs_ilog_fbroot(cur->bc_private.b.whichfork));
-       }
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-}
-
-/*
- * Log record values from the btree block.
- */
-void
-xfs_btree_log_recs(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp,
-       int                     first,
-       int                     last)
-{
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
-
-       xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
-       xfs_trans_log_buf(cur->bc_tp, bp,
-                         xfs_btree_rec_offset(cur, first),
-                         xfs_btree_rec_offset(cur, last + 1) - 1);
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-}
-
-/*
- * Log block pointer fields from a btree block (nonleaf).
- */
-STATIC void
-xfs_btree_log_ptrs(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       struct xfs_buf          *bp,    /* buffer containing btree block */
-       int                     first,  /* index of first pointer to log */
-       int                     last)   /* index of last pointer to log */
-{
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
-
-       if (bp) {
-               struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-               int                     level = xfs_btree_get_level(block);
-
-               xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
-               xfs_trans_log_buf(cur->bc_tp, bp,
-                               xfs_btree_ptr_offset(cur, first, level),
-                               xfs_btree_ptr_offset(cur, last + 1, level) - 1);
-       } else {
-               xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
-                       xfs_ilog_fbroot(cur->bc_private.b.whichfork));
-       }
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-}
-
-/*
- * Log fields from a btree block header.
- */
-void
-xfs_btree_log_block(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       struct xfs_buf          *bp,    /* buffer containing btree block */
-       int                     fields) /* mask of fields: XFS_BB_... */
-{
-       int                     first;  /* first byte offset logged */
-       int                     last;   /* last byte offset logged */
-       static const short      soffsets[] = {  /* table of offsets (short) */
-               offsetof(struct xfs_btree_block, bb_magic),
-               offsetof(struct xfs_btree_block, bb_level),
-               offsetof(struct xfs_btree_block, bb_numrecs),
-               offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
-               offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
-               offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
-               offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
-               offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
-               offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
-               offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
-               XFS_BTREE_SBLOCK_CRC_LEN
-       };
-       static const short      loffsets[] = {  /* table of offsets (long) */
-               offsetof(struct xfs_btree_block, bb_magic),
-               offsetof(struct xfs_btree_block, bb_level),
-               offsetof(struct xfs_btree_block, bb_numrecs),
-               offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
-               offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
-               offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
-               offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
-               offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
-               offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
-               offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
-               offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
-               XFS_BTREE_LBLOCK_CRC_LEN
-       };
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
-
-       if (bp) {
-               int nbits;
-
-               if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
-                       /*
-                        * We don't log the CRC when updating a btree
-                        * block but instead recreate it during log
-                        * recovery.  As the log buffers have checksums
-                        * of their own this is safe and avoids logging a crc
-                        * update in a lot of places.
-                        */
-                       if (fields == XFS_BB_ALL_BITS)
-                               fields = XFS_BB_ALL_BITS_CRC;
-                       nbits = XFS_BB_NUM_BITS_CRC;
-               } else {
-                       nbits = XFS_BB_NUM_BITS;
-               }
-               xfs_btree_offsets(fields,
-                                 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
-                                       loffsets : soffsets,
-                                 nbits, &first, &last);
-               xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
-               xfs_trans_log_buf(cur->bc_tp, bp, first, last);
-       } else {
-               xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
-                       xfs_ilog_fbroot(cur->bc_private.b.whichfork));
-       }
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-}
-
-/*
- * Increment cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int                                            /* error */
-xfs_btree_increment(
-       struct xfs_btree_cur    *cur,
-       int                     level,
-       int                     *stat)          /* success/failure */
-{
-       struct xfs_btree_block  *block;
-       union xfs_btree_ptr     ptr;
-       struct xfs_buf          *bp;
-       int                     error;          /* error return value */
-       int                     lev;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGI(cur, level);
-
-       ASSERT(level < cur->bc_nlevels);
-
-       /* Read-ahead to the right at this level. */
-       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
-
-       /* Get a pointer to the btree block. */
-       block = xfs_btree_get_block(cur, level, &bp);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, block, level, bp);
-       if (error)
-               goto error0;
-#endif
-
-       /* We're done if we remain in the block after the increment. */
-       if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
-               goto out1;
-
-       /* Fail if we just went off the right edge of the tree. */
-       xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
-       if (xfs_btree_ptr_is_null(cur, &ptr))
-               goto out0;
-
-       XFS_BTREE_STATS_INC(cur, increment);
-
-       /*
-        * March up the tree incrementing pointers.
-        * Stop when we don't go off the right edge of a block.
-        */
-       for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
-               block = xfs_btree_get_block(cur, lev, &bp);
-
-#ifdef DEBUG
-               error = xfs_btree_check_block(cur, block, lev, bp);
-               if (error)
-                       goto error0;
-#endif
-
-               if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
-                       break;
-
-               /* Read-ahead the right block for the next loop. */
-               xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
-       }
-
-       /*
-        * If we went off the root then we are either seriously
-        * confused or have the tree root in an inode.
-        */
-       if (lev == cur->bc_nlevels) {
-               if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
-                       goto out0;
-               ASSERT(0);
-               error = EFSCORRUPTED;
-               goto error0;
-       }
-       ASSERT(lev < cur->bc_nlevels);
-
-       /*
-        * Now walk back down the tree, fixing up the cursor's buffer
-        * pointers and key numbers.
-        */
-       for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
-               union xfs_btree_ptr     *ptrp;
-
-               ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
-               --lev;
-               error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
-               if (error)
-                       goto error0;
-
-               xfs_btree_setbuf(cur, lev, bp);
-               cur->bc_ptrs[lev] = 1;
-       }
-out1:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 1;
-       return 0;
-
-out0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 0;
-       return 0;
-
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-/*
- * Decrement cursor by one record at the level.
- * For nonzero levels the leaf-ward information is untouched.
- */
-int                                            /* error */
-xfs_btree_decrement(
-       struct xfs_btree_cur    *cur,
-       int                     level,
-       int                     *stat)          /* success/failure */
-{
-       struct xfs_btree_block  *block;
-       xfs_buf_t               *bp;
-       int                     error;          /* error return value */
-       int                     lev;
-       union xfs_btree_ptr     ptr;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGI(cur, level);
-
-       ASSERT(level < cur->bc_nlevels);
-
-       /* Read-ahead to the left at this level. */
-       xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
-
-       /* We're done if we remain in the block after the decrement. */
-       if (--cur->bc_ptrs[level] > 0)
-               goto out1;
-
-       /* Get a pointer to the btree block. */
-       block = xfs_btree_get_block(cur, level, &bp);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, block, level, bp);
-       if (error)
-               goto error0;
-#endif
-
-       /* Fail if we just went off the left edge of the tree. */
-       xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
-       if (xfs_btree_ptr_is_null(cur, &ptr))
-               goto out0;
-
-       XFS_BTREE_STATS_INC(cur, decrement);
-
-       /*
-        * March up the tree decrementing pointers.
-        * Stop when we don't go off the left edge of a block.
-        */
-       for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
-               if (--cur->bc_ptrs[lev] > 0)
-                       break;
-               /* Read-ahead the left block for the next loop. */
-               xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
-       }
-
-       /*
-        * If we went off the root then we are seriously confused.
-        * or the root of the tree is in an inode.
-        */
-       if (lev == cur->bc_nlevels) {
-               if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
-                       goto out0;
-               ASSERT(0);
-               error = EFSCORRUPTED;
-               goto error0;
-       }
-       ASSERT(lev < cur->bc_nlevels);
-
-       /*
-        * Now walk back down the tree, fixing up the cursor's buffer
-        * pointers and key numbers.
-        */
-       for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
-               union xfs_btree_ptr     *ptrp;
-
-               ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
-               --lev;
-               error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
-               if (error)
-                       goto error0;
-               xfs_btree_setbuf(cur, lev, bp);
-               cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
-       }
-out1:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 1;
-       return 0;
-
-out0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 0;
-       return 0;
-
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-STATIC int
-xfs_btree_lookup_get_block(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       int                     level,  /* level in the btree */
-       union xfs_btree_ptr     *pp,    /* ptr to btree block */
-       struct xfs_btree_block  **blkp) /* return btree block */
-{
-       struct xfs_buf          *bp;    /* buffer pointer for btree block */
-       int                     error = 0;
-
-       /* special case the root block if in an inode */
-       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
-           (level == cur->bc_nlevels - 1)) {
-               *blkp = xfs_btree_get_iroot(cur);
-               return 0;
-       }
-
-       /*
-        * If the old buffer at this level for the disk address we are
-        * looking for re-use it.
-        *
-        * Otherwise throw it away and get a new one.
-        */
-       bp = cur->bc_bufs[level];
-       if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
-               *blkp = XFS_BUF_TO_BLOCK(bp);
-               return 0;
-       }
-
-       error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
-       if (error)
-               return error;
-
-       xfs_btree_setbuf(cur, level, bp);
-       return 0;
-}
-
-/*
- * Get current search key.  For level 0 we don't actually have a key
- * structure so we make one up from the record.  For all other levels
- * we just return the right key.
- */
-STATIC union xfs_btree_key *
-xfs_lookup_get_search_key(
-       struct xfs_btree_cur    *cur,
-       int                     level,
-       int                     keyno,
-       struct xfs_btree_block  *block,
-       union xfs_btree_key     *kp)
-{
-       if (level == 0) {
-               cur->bc_ops->init_key_from_rec(kp,
-                               xfs_btree_rec_addr(cur, keyno, block));
-               return kp;
-       }
-
-       return xfs_btree_key_addr(cur, keyno, block);
-}
-
-/*
- * Lookup the record.  The cursor is made to point to it, based on dir.
- * stat is set to 0 if can't find any such record, 1 for success.
- */
-int                                    /* error */
-xfs_btree_lookup(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_lookup_t            dir,    /* <=, ==, or >= */
-       int                     *stat)  /* success/failure */
-{
-       struct xfs_btree_block  *block; /* current btree block */
-       __int64_t               diff;   /* difference for the current key */
-       int                     error;  /* error return value */
-       int                     keyno;  /* current key number */
-       int                     level;  /* level in the btree */
-       union xfs_btree_ptr     *pp;    /* ptr to btree block */
-       union xfs_btree_ptr     ptr;    /* ptr to btree block */
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGI(cur, dir);
-
-       XFS_BTREE_STATS_INC(cur, lookup);
-
-       block = NULL;
-       keyno = 0;
-
-       /* initialise start pointer from cursor */
-       cur->bc_ops->init_ptr_from_cur(cur, &ptr);
-       pp = &ptr;
-
-       /*
-        * Iterate over each level in the btree, starting at the root.
-        * For each level above the leaves, find the key we need, based
-        * on the lookup record, then follow the corresponding block
-        * pointer down to the next level.
-        */
-       for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
-               /* Get the block we need to do the lookup on. */
-               error = xfs_btree_lookup_get_block(cur, level, pp, &block);
-               if (error)
-                       goto error0;
-
-               if (diff == 0) {
-                       /*
-                        * If we already had a key match at a higher level, we
-                        * know we need to use the first entry in this block.
-                        */
-                       keyno = 1;
-               } else {
-                       /* Otherwise search this block. Do a binary search. */
-
-                       int     high;   /* high entry number */
-                       int     low;    /* low entry number */
-
-                       /* Set low and high entry numbers, 1-based. */
-                       low = 1;
-                       high = xfs_btree_get_numrecs(block);
-                       if (!high) {
-                               /* Block is empty, must be an empty leaf. */
-                               ASSERT(level == 0 && cur->bc_nlevels == 1);
-
-                               cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
-                               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-                               *stat = 0;
-                               return 0;
-                       }
-
-                       /* Binary search the block. */
-                       while (low <= high) {
-                               union xfs_btree_key     key;
-                               union xfs_btree_key     *kp;
-
-                               XFS_BTREE_STATS_INC(cur, compare);
-
-                               /* keyno is average of low and high. */
-                               keyno = (low + high) >> 1;
-
-                               /* Get current search key */
-                               kp = xfs_lookup_get_search_key(cur, level,
-                                               keyno, block, &key);
-
-                               /*
-                                * Compute difference to get next direction:
-                                *  - less than, move right
-                                *  - greater than, move left
-                                *  - equal, we're done
-                                */
-                               diff = cur->bc_ops->key_diff(cur, kp);
-                               if (diff < 0)
-                                       low = keyno + 1;
-                               else if (diff > 0)
-                                       high = keyno - 1;
-                               else
-                                       break;
-                       }
-               }
-
-               /*
-                * If there are more levels, set up for the next level
-                * by getting the block number and filling in the cursor.
-                */
-               if (level > 0) {
-                       /*
-                        * If we moved left, need the previous key number,
-                        * unless there isn't one.
-                        */
-                       if (diff > 0 && --keyno < 1)
-                               keyno = 1;
-                       pp = xfs_btree_ptr_addr(cur, keyno, block);
-
-#ifdef DEBUG
-                       error = xfs_btree_check_ptr(cur, pp, 0, level);
-                       if (error)
-                               goto error0;
-#endif
-                       cur->bc_ptrs[level] = keyno;
-               }
-       }
-
-       /* Done with the search. See if we need to adjust the results. */
-       if (dir != XFS_LOOKUP_LE && diff < 0) {
-               keyno++;
-               /*
-                * If ge search and we went off the end of the block, but it's
-                * not the last block, we're in the wrong block.
-                */
-               xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
-               if (dir == XFS_LOOKUP_GE &&
-                   keyno > xfs_btree_get_numrecs(block) &&
-                   !xfs_btree_ptr_is_null(cur, &ptr)) {
-                       int     i;
-
-                       cur->bc_ptrs[0] = keyno;
-                       error = xfs_btree_increment(cur, 0, &i);
-                       if (error)
-                               goto error0;
-                       XFS_WANT_CORRUPTED_RETURN(i == 1);
-                       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-                       *stat = 1;
-                       return 0;
-               }
-       } else if (dir == XFS_LOOKUP_LE && diff > 0)
-               keyno--;
-       cur->bc_ptrs[0] = keyno;
-
-       /* Return if we succeeded or not. */
-       if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
-               *stat = 0;
-       else if (dir != XFS_LOOKUP_EQ || diff == 0)
-               *stat = 1;
-       else
-               *stat = 0;
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       return 0;
-
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-/*
- * Update keys at all levels from here to the root along the cursor's path.
- */
-STATIC int
-xfs_btree_updkey(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *keyp,
-       int                     level)
-{
-       struct xfs_btree_block  *block;
-       struct xfs_buf          *bp;
-       union xfs_btree_key     *kp;
-       int                     ptr;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
-
-       ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
-
-       /*
-        * Go up the tree from this level toward the root.
-        * At each level, update the key value to the value input.
-        * Stop when we reach a level where the cursor isn't pointing
-        * at the first entry in the block.
-        */
-       for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
-#ifdef DEBUG
-               int             error;
-#endif
-               block = xfs_btree_get_block(cur, level, &bp);
-#ifdef DEBUG
-               error = xfs_btree_check_block(cur, block, level, bp);
-               if (error) {
-                       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-                       return error;
-               }
-#endif
-               ptr = cur->bc_ptrs[level];
-               kp = xfs_btree_key_addr(cur, ptr, block);
-               xfs_btree_copy_keys(cur, kp, keyp, 1);
-               xfs_btree_log_keys(cur, bp, ptr, ptr);
-       }
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       return 0;
-}
-
-/*
- * Update the record referred to by cur to the value in the
- * given record. This either works (return 0) or gets an
- * EFSCORRUPTED error.
- */
-int
-xfs_btree_update(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_rec     *rec)
-{
-       struct xfs_btree_block  *block;
-       struct xfs_buf          *bp;
-       int                     error;
-       int                     ptr;
-       union xfs_btree_rec     *rp;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGR(cur, rec);
-
-       /* Pick up the current block. */
-       block = xfs_btree_get_block(cur, 0, &bp);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, block, 0, bp);
-       if (error)
-               goto error0;
-#endif
-       /* Get the address of the rec to be updated. */
-       ptr = cur->bc_ptrs[0];
-       rp = xfs_btree_rec_addr(cur, ptr, block);
-
-       /* Fill in the new contents and log them. */
-       xfs_btree_copy_recs(cur, rp, rec, 1);
-       xfs_btree_log_recs(cur, bp, ptr, ptr);
-
-       /*
-        * If we are tracking the last record in the tree and
-        * we are at the far right edge of the tree, update it.
-        */
-       if (xfs_btree_is_lastrec(cur, block, 0)) {
-               cur->bc_ops->update_lastrec(cur, block, rec,
-                                           ptr, LASTREC_UPDATE);
-       }
-
-       /* Updating first rec in leaf. Pass new key value up to our parent. */
-       if (ptr == 1) {
-               union xfs_btree_key     key;
-
-               cur->bc_ops->init_key_from_rec(&key, rec);
-               error = xfs_btree_updkey(cur, &key, 1);
-               if (error)
-                       goto error0;
-       }
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       return 0;
-
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-/*
- * Move 1 record left from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int                                     /* error */
-xfs_btree_lshift(
-       struct xfs_btree_cur    *cur,
-       int                     level,
-       int                     *stat)          /* success/failure */
-{
-       union xfs_btree_key     key;            /* btree key */
-       struct xfs_buf          *lbp;           /* left buffer pointer */
-       struct xfs_btree_block  *left;          /* left btree block */
-       int                     lrecs;          /* left record count */
-       struct xfs_buf          *rbp;           /* right buffer pointer */
-       struct xfs_btree_block  *right;         /* right btree block */
-       int                     rrecs;          /* right record count */
-       union xfs_btree_ptr     lptr;           /* left btree pointer */
-       union xfs_btree_key     *rkp = NULL;    /* right btree key */
-       union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
-       union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
-       int                     error;          /* error return value */
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGI(cur, level);
-
-       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
-           level == cur->bc_nlevels - 1)
-               goto out0;
-
-       /* Set up variables for this block as "right". */
-       right = xfs_btree_get_block(cur, level, &rbp);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, right, level, rbp);
-       if (error)
-               goto error0;
-#endif
-
-       /* If we've got no left sibling then we can't shift an entry left. */
-       xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
-       if (xfs_btree_ptr_is_null(cur, &lptr))
-               goto out0;
-
-       /*
-        * If the cursor entry is the one that would be moved, don't
-        * do it... it's too complicated.
-        */
-       if (cur->bc_ptrs[level] <= 1)
-               goto out0;
-
-       /* Set up the left neighbor as "left". */
-       error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
-       if (error)
-               goto error0;
-
-       /* If it's full, it can't take another entry. */
-       lrecs = xfs_btree_get_numrecs(left);
-       if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
-               goto out0;
-
-       rrecs = xfs_btree_get_numrecs(right);
-
-       /*
-        * We add one entry to the left side and remove one for the right side.
-        * Account for it here, the changes will be updated on disk and logged
-        * later.
-        */
-       lrecs++;
-       rrecs--;
-
-       XFS_BTREE_STATS_INC(cur, lshift);
-       XFS_BTREE_STATS_ADD(cur, moves, 1);
-
-       /*
-        * If non-leaf, copy a key and a ptr to the left block.
-        * Log the changes to the left block.
-        */
-       if (level > 0) {
-               /* It's a non-leaf.  Move keys and pointers. */
-               union xfs_btree_key     *lkp;   /* left btree key */
-               union xfs_btree_ptr     *lpp;   /* left address pointer */
-
-               lkp = xfs_btree_key_addr(cur, lrecs, left);
-               rkp = xfs_btree_key_addr(cur, 1, right);
-
-               lpp = xfs_btree_ptr_addr(cur, lrecs, left);
-               rpp = xfs_btree_ptr_addr(cur, 1, right);
-#ifdef DEBUG
-               error = xfs_btree_check_ptr(cur, rpp, 0, level);
-               if (error)
-                       goto error0;
-#endif
-               xfs_btree_copy_keys(cur, lkp, rkp, 1);
-               xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
-
-               xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
-               xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
-
-               ASSERT(cur->bc_ops->keys_inorder(cur,
-                       xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
-       } else {
-               /* It's a leaf.  Move records.  */
-               union xfs_btree_rec     *lrp;   /* left record pointer */
-
-               lrp = xfs_btree_rec_addr(cur, lrecs, left);
-               rrp = xfs_btree_rec_addr(cur, 1, right);
-
-               xfs_btree_copy_recs(cur, lrp, rrp, 1);
-               xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
-
-               ASSERT(cur->bc_ops->recs_inorder(cur,
-                       xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
-       }
-
-       xfs_btree_set_numrecs(left, lrecs);
-       xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
-
-       xfs_btree_set_numrecs(right, rrecs);
-       xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
-
-       /*
-        * Slide the contents of right down one entry.
-        */
-       XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
-       if (level > 0) {
-               /* It's a nonleaf. operate on keys and ptrs */
-#ifdef DEBUG
-               int                     i;              /* loop index */
-
-               for (i = 0; i < rrecs; i++) {
-                       error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
-                       if (error)
-                               goto error0;
-               }
-#endif
-               xfs_btree_shift_keys(cur,
-                               xfs_btree_key_addr(cur, 2, right),
-                               -1, rrecs);
-               xfs_btree_shift_ptrs(cur,
-                               xfs_btree_ptr_addr(cur, 2, right),
-                               -1, rrecs);
-
-               xfs_btree_log_keys(cur, rbp, 1, rrecs);
-               xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
-       } else {
-               /* It's a leaf. operate on records */
-               xfs_btree_shift_recs(cur,
-                       xfs_btree_rec_addr(cur, 2, right),
-                       -1, rrecs);
-               xfs_btree_log_recs(cur, rbp, 1, rrecs);
-
-               /*
-                * If it's the first record in the block, we'll need a key
-                * structure to pass up to the next level (updkey).
-                */
-               cur->bc_ops->init_key_from_rec(&key,
-                       xfs_btree_rec_addr(cur, 1, right));
-               rkp = &key;
-       }
-
-       /* Update the parent key values of right. */
-       error = xfs_btree_updkey(cur, rkp, level + 1);
-       if (error)
-               goto error0;
-
-       /* Slide the cursor value left one. */
-       cur->bc_ptrs[level]--;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 1;
-       return 0;
-
-out0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 0;
-       return 0;
-
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-/*
- * Move 1 record right from cur/level if possible.
- * Update cur to reflect the new path.
- */
-STATIC int                                     /* error */
-xfs_btree_rshift(
-       struct xfs_btree_cur    *cur,
-       int                     level,
-       int                     *stat)          /* success/failure */
-{
-       union xfs_btree_key     key;            /* btree key */
-       struct xfs_buf          *lbp;           /* left buffer pointer */
-       struct xfs_btree_block  *left;          /* left btree block */
-       struct xfs_buf          *rbp;           /* right buffer pointer */
-       struct xfs_btree_block  *right;         /* right btree block */
-       struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
-       union xfs_btree_ptr     rptr;           /* right block pointer */
-       union xfs_btree_key     *rkp;           /* right btree key */
-       int                     rrecs;          /* right record count */
-       int                     lrecs;          /* left record count */
-       int                     error;          /* error return value */
-       int                     i;              /* loop counter */
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGI(cur, level);
-
-       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
-           (level == cur->bc_nlevels - 1))
-               goto out0;
-
-       /* Set up variables for this block as "left". */
-       left = xfs_btree_get_block(cur, level, &lbp);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, left, level, lbp);
-       if (error)
-               goto error0;
-#endif
-
-       /* If we've got no right sibling then we can't shift an entry right. */
-       xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
-       if (xfs_btree_ptr_is_null(cur, &rptr))
-               goto out0;
-
-       /*
-        * If the cursor entry is the one that would be moved, don't
-        * do it... it's too complicated.
-        */
-       lrecs = xfs_btree_get_numrecs(left);
-       if (cur->bc_ptrs[level] >= lrecs)
-               goto out0;
-
-       /* Set up the right neighbor as "right". */
-       error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
-       if (error)
-               goto error0;
-
-       /* If it's full, it can't take another entry. */
-       rrecs = xfs_btree_get_numrecs(right);
-       if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
-               goto out0;
-
-       XFS_BTREE_STATS_INC(cur, rshift);
-       XFS_BTREE_STATS_ADD(cur, moves, rrecs);
-
-       /*
-        * Make a hole at the start of the right neighbor block, then
-        * copy the last left block entry to the hole.
-        */
-       if (level > 0) {
-               /* It's a nonleaf. make a hole in the keys and ptrs */
-               union xfs_btree_key     *lkp;
-               union xfs_btree_ptr     *lpp;
-               union xfs_btree_ptr     *rpp;
-
-               lkp = xfs_btree_key_addr(cur, lrecs, left);
-               lpp = xfs_btree_ptr_addr(cur, lrecs, left);
-               rkp = xfs_btree_key_addr(cur, 1, right);
-               rpp = xfs_btree_ptr_addr(cur, 1, right);
-
-#ifdef DEBUG
-               for (i = rrecs - 1; i >= 0; i--) {
-                       error = xfs_btree_check_ptr(cur, rpp, i, level);
-                       if (error)
-                               goto error0;
-               }
-#endif
-
-               xfs_btree_shift_keys(cur, rkp, 1, rrecs);
-               xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
-
-#ifdef DEBUG
-               error = xfs_btree_check_ptr(cur, lpp, 0, level);
-               if (error)
-                       goto error0;
-#endif
-
-               /* Now put the new data in, and log it. */
-               xfs_btree_copy_keys(cur, rkp, lkp, 1);
-               xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
-
-               xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
-               xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
-
-               ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
-                       xfs_btree_key_addr(cur, 2, right)));
-       } else {
-               /* It's a leaf. make a hole in the records */
-               union xfs_btree_rec     *lrp;
-               union xfs_btree_rec     *rrp;
-
-               lrp = xfs_btree_rec_addr(cur, lrecs, left);
-               rrp = xfs_btree_rec_addr(cur, 1, right);
-
-               xfs_btree_shift_recs(cur, rrp, 1, rrecs);
-
-               /* Now put the new data in, and log it. */
-               xfs_btree_copy_recs(cur, rrp, lrp, 1);
-               xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
-
-               cur->bc_ops->init_key_from_rec(&key, rrp);
-               rkp = &key;
-
-               ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
-                       xfs_btree_rec_addr(cur, 2, right)));
-       }
-
-       /*
-        * Decrement and log left's numrecs, bump and log right's numrecs.
-        */
-       xfs_btree_set_numrecs(left, --lrecs);
-       xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
-
-       xfs_btree_set_numrecs(right, ++rrecs);
-       xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
-
-       /*
-        * Using a temporary cursor, update the parent key values of the
-        * block on the right.
-        */
-       error = xfs_btree_dup_cursor(cur, &tcur);
-       if (error)
-               goto error0;
-       i = xfs_btree_lastrec(tcur, level);
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-
-       error = xfs_btree_increment(tcur, level, &i);
-       if (error)
-               goto error1;
-
-       error = xfs_btree_updkey(tcur, rkp, level + 1);
-       if (error)
-               goto error1;
-
-       xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 1;
-       return 0;
-
-out0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 0;
-       return 0;
-
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-
-error1:
-       XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
-       xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
-       return error;
-}
-
-/*
- * Split cur/level block in half.
- * Return new block number and the key to its first
- * record (to be inserted into parent).
- */
-STATIC int                                     /* error */
-xfs_btree_split(
-       struct xfs_btree_cur    *cur,
-       int                     level,
-       union xfs_btree_ptr     *ptrp,
-       union xfs_btree_key     *key,
-       struct xfs_btree_cur    **curp,
-       int                     *stat)          /* success/failure */
-{
-       union xfs_btree_ptr     lptr;           /* left sibling block ptr */
-       struct xfs_buf          *lbp;           /* left buffer pointer */
-       struct xfs_btree_block  *left;          /* left btree block */
-       union xfs_btree_ptr     rptr;           /* right sibling block ptr */
-       struct xfs_buf          *rbp;           /* right buffer pointer */
-       struct xfs_btree_block  *right;         /* right btree block */
-       union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
-       struct xfs_buf          *rrbp;          /* right-right buffer pointer */
-       struct xfs_btree_block  *rrblock;       /* right-right btree block */
-       int                     lrecs;
-       int                     rrecs;
-       int                     src_index;
-       int                     error;          /* error return value */
-#ifdef DEBUG
-       int                     i;
-#endif
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
-
-       XFS_BTREE_STATS_INC(cur, split);
-
-       /* Set up left block (current one). */
-       left = xfs_btree_get_block(cur, level, &lbp);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, left, level, lbp);
-       if (error)
-               goto error0;
-#endif
-
-       xfs_btree_buf_to_ptr(cur, lbp, &lptr);
-
-       /* Allocate the new block. If we can't do it, we're toast. Give up. */
-       error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
-       if (error)
-               goto error0;
-       if (*stat == 0)
-               goto out0;
-       XFS_BTREE_STATS_INC(cur, alloc);
-
-       /* Set up the new block as "right". */
-       error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
-       if (error)
-               goto error0;
-
-       /* Fill in the btree header for the new right block. */
-       xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
-
-       /*
-        * Split the entries between the old and the new block evenly.
-        * Make sure that if there's an odd number of entries now, that
-        * each new block will have the same number of entries.
-        */
-       lrecs = xfs_btree_get_numrecs(left);
-       rrecs = lrecs / 2;
-       if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
-               rrecs++;
-       src_index = (lrecs - rrecs + 1);
-
-       XFS_BTREE_STATS_ADD(cur, moves, rrecs);
-
-       /*
-        * Copy btree block entries from the left block over to the
-        * new block, the right. Update the right block and log the
-        * changes.
-        */
-       if (level > 0) {
-               /* It's a non-leaf.  Move keys and pointers. */
-               union xfs_btree_key     *lkp;   /* left btree key */
-               union xfs_btree_ptr     *lpp;   /* left address pointer */
-               union xfs_btree_key     *rkp;   /* right btree key */
-               union xfs_btree_ptr     *rpp;   /* right address pointer */
-
-               lkp = xfs_btree_key_addr(cur, src_index, left);
-               lpp = xfs_btree_ptr_addr(cur, src_index, left);
-               rkp = xfs_btree_key_addr(cur, 1, right);
-               rpp = xfs_btree_ptr_addr(cur, 1, right);
-
-#ifdef DEBUG
-               for (i = src_index; i < rrecs; i++) {
-                       error = xfs_btree_check_ptr(cur, lpp, i, level);
-                       if (error)
-                               goto error0;
-               }
-#endif
-
-               xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
-               xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
-
-               xfs_btree_log_keys(cur, rbp, 1, rrecs);
-               xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
-
-               /* Grab the keys to the entries moved to the right block */
-               xfs_btree_copy_keys(cur, key, rkp, 1);
-       } else {
-               /* It's a leaf.  Move records.  */
-               union xfs_btree_rec     *lrp;   /* left record pointer */
-               union xfs_btree_rec     *rrp;   /* right record pointer */
-
-               lrp = xfs_btree_rec_addr(cur, src_index, left);
-               rrp = xfs_btree_rec_addr(cur, 1, right);
-
-               xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
-               xfs_btree_log_recs(cur, rbp, 1, rrecs);
-
-               cur->bc_ops->init_key_from_rec(key,
-                       xfs_btree_rec_addr(cur, 1, right));
-       }
-
-
-       /*
-        * Find the left block number by looking in the buffer.
-        * Adjust numrecs, sibling pointers.
-        */
-       xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
-       xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
-       xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
-       xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
-
-       lrecs -= rrecs;
-       xfs_btree_set_numrecs(left, lrecs);
-       xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
-
-       xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
-       xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
-
-       /*
-        * If there's a block to the new block's right, make that block
-        * point back to right instead of to left.
-        */
-       if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
-               error = xfs_btree_read_buf_block(cur, &rrptr,
-                                                       0, &rrblock, &rrbp);
-               if (error)
-                       goto error0;
-               xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
-               xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
-       }
-       /*
-        * If the cursor is really in the right block, move it there.
-        * If it's just pointing past the last entry in left, then we'll
-        * insert there, so don't change anything in that case.
-        */
-       if (cur->bc_ptrs[level] > lrecs + 1) {
-               xfs_btree_setbuf(cur, level, rbp);
-               cur->bc_ptrs[level] -= lrecs;
-       }
-       /*
-        * If there are more levels, we'll need another cursor which refers
-        * the right block, no matter where this cursor was.
-        */
-       if (level + 1 < cur->bc_nlevels) {
-               error = xfs_btree_dup_cursor(cur, curp);
-               if (error)
-                       goto error0;
-               (*curp)->bc_ptrs[level + 1]++;
-       }
-       *ptrp = rptr;
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 1;
-       return 0;
-out0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 0;
-       return 0;
-
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-/*
- * Copy the old inode root contents into a real block and make the
- * broot point to it.
- */
-int                                            /* error */
-xfs_btree_new_iroot(
-       struct xfs_btree_cur    *cur,           /* btree cursor */
-       int                     *logflags,      /* logging flags for inode */
-       int                     *stat)          /* return status - 0 fail */
-{
-       struct xfs_buf          *cbp;           /* buffer for cblock */
-       struct xfs_btree_block  *block;         /* btree block */
-       struct xfs_btree_block  *cblock;        /* child btree block */
-       union xfs_btree_key     *ckp;           /* child key pointer */
-       union xfs_btree_ptr     *cpp;           /* child ptr pointer */
-       union xfs_btree_key     *kp;            /* pointer to btree key */
-       union xfs_btree_ptr     *pp;            /* pointer to block addr */
-       union xfs_btree_ptr     nptr;           /* new block addr */
-       int                     level;          /* btree level */
-       int                     error;          /* error return code */
-#ifdef DEBUG
-       int                     i;              /* loop counter */
-#endif
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_STATS_INC(cur, newroot);
-
-       ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
-
-       level = cur->bc_nlevels - 1;
-
-       block = xfs_btree_get_iroot(cur);
-       pp = xfs_btree_ptr_addr(cur, 1, block);
-
-       /* Allocate the new block. If we can't do it, we're toast. Give up. */
-       error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
-       if (error)
-               goto error0;
-       if (*stat == 0) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-               return 0;
-       }
-       XFS_BTREE_STATS_INC(cur, alloc);
-
-       /* Copy the root into a real block. */
-       error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
-       if (error)
-               goto error0;
-
-       /*
-        * we can't just memcpy() the root in for CRC enabled btree blocks.
-        * In that case have to also ensure the blkno remains correct
-        */
-       memcpy(cblock, block, xfs_btree_block_len(cur));
-       if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
-               if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-                       cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
-               else
-                       cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
-       }
-
-       be16_add_cpu(&block->bb_level, 1);
-       xfs_btree_set_numrecs(block, 1);
-       cur->bc_nlevels++;
-       cur->bc_ptrs[level + 1] = 1;
-
-       kp = xfs_btree_key_addr(cur, 1, block);
-       ckp = xfs_btree_key_addr(cur, 1, cblock);
-       xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
-
-       cpp = xfs_btree_ptr_addr(cur, 1, cblock);
-#ifdef DEBUG
-       for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
-               error = xfs_btree_check_ptr(cur, pp, i, level);
-               if (error)
-                       goto error0;
-       }
-#endif
-       xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
-
-#ifdef DEBUG
-       error = xfs_btree_check_ptr(cur, &nptr, 0, level);
-       if (error)
-               goto error0;
-#endif
-       xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
-
-       xfs_iroot_realloc(cur->bc_private.b.ip,
-                         1 - xfs_btree_get_numrecs(cblock),
-                         cur->bc_private.b.whichfork);
-
-       xfs_btree_setbuf(cur, level, cbp);
-
-       /*
-        * Do all this logging at the end so that
-        * the root is at the right level.
-        */
-       xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
-       xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
-       xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
-
-       *logflags |=
-               XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
-       *stat = 1;
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       return 0;
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-/*
- * Allocate a new root block, fill it in.
- */
-STATIC int                             /* error */
-xfs_btree_new_root(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       int                     *stat)  /* success/failure */
-{
-       struct xfs_btree_block  *block; /* one half of the old root block */
-       struct xfs_buf          *bp;    /* buffer containing block */
-       int                     error;  /* error return value */
-       struct xfs_buf          *lbp;   /* left buffer pointer */
-       struct xfs_btree_block  *left;  /* left btree block */
-       struct xfs_buf          *nbp;   /* new (root) buffer */
-       struct xfs_btree_block  *new;   /* new (root) btree block */
-       int                     nptr;   /* new value for key index, 1 or 2 */
-       struct xfs_buf          *rbp;   /* right buffer pointer */
-       struct xfs_btree_block  *right; /* right btree block */
-       union xfs_btree_ptr     rptr;
-       union xfs_btree_ptr     lptr;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_STATS_INC(cur, newroot);
-
-       /* initialise our start point from the cursor */
-       cur->bc_ops->init_ptr_from_cur(cur, &rptr);
-
-       /* Allocate the new block. If we can't do it, we're toast. Give up. */
-       error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
-       if (error)
-               goto error0;
-       if (*stat == 0)
-               goto out0;
-       XFS_BTREE_STATS_INC(cur, alloc);
-
-       /* Set up the new block. */
-       error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
-       if (error)
-               goto error0;
-
-       /* Set the root in the holding structure  increasing the level by 1. */
-       cur->bc_ops->set_root(cur, &lptr, 1);
-
-       /*
-        * At the previous root level there are now two blocks: the old root,
-        * and the new block generated when it was split.  We don't know which
-        * one the cursor is pointing at, so we set up variables "left" and
-        * "right" for each case.
-        */
-       block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
-       if (error)
-               goto error0;
-#endif
-
-       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
-       if (!xfs_btree_ptr_is_null(cur, &rptr)) {
-               /* Our block is left, pick up the right block. */
-               lbp = bp;
-               xfs_btree_buf_to_ptr(cur, lbp, &lptr);
-               left = block;
-               error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
-               if (error)
-                       goto error0;
-               bp = rbp;
-               nptr = 1;
-       } else {
-               /* Our block is right, pick up the left block. */
-               rbp = bp;
-               xfs_btree_buf_to_ptr(cur, rbp, &rptr);
-               right = block;
-               xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
-               error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
-               if (error)
-                       goto error0;
-               bp = lbp;
-               nptr = 2;
-       }
-       /* Fill in the new block's btree header and log it. */
-       xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
-       xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
-       ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
-                       !xfs_btree_ptr_is_null(cur, &rptr));
-
-       /* Fill in the key data in the new root. */
-       if (xfs_btree_get_level(left) > 0) {
-               xfs_btree_copy_keys(cur,
-                               xfs_btree_key_addr(cur, 1, new),
-                               xfs_btree_key_addr(cur, 1, left), 1);
-               xfs_btree_copy_keys(cur,
-                               xfs_btree_key_addr(cur, 2, new),
-                               xfs_btree_key_addr(cur, 1, right), 1);
-       } else {
-               cur->bc_ops->init_key_from_rec(
-                               xfs_btree_key_addr(cur, 1, new),
-                               xfs_btree_rec_addr(cur, 1, left));
-               cur->bc_ops->init_key_from_rec(
-                               xfs_btree_key_addr(cur, 2, new),
-                               xfs_btree_rec_addr(cur, 1, right));
-       }
-       xfs_btree_log_keys(cur, nbp, 1, 2);
-
-       /* Fill in the pointer data in the new root. */
-       xfs_btree_copy_ptrs(cur,
-               xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
-       xfs_btree_copy_ptrs(cur,
-               xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
-       xfs_btree_log_ptrs(cur, nbp, 1, 2);
-
-       /* Fix up the cursor. */
-       xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
-       cur->bc_ptrs[cur->bc_nlevels] = nptr;
-       cur->bc_nlevels++;
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 1;
-       return 0;
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-out0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 0;
-       return 0;
-}
-
-STATIC int
-xfs_btree_make_block_unfull(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       int                     level,  /* btree level */
-       int                     numrecs,/* # of recs in block */
-       int                     *oindex,/* old tree index */
-       int                     *index, /* new tree index */
-       union xfs_btree_ptr     *nptr,  /* new btree ptr */
-       struct xfs_btree_cur    **ncur, /* new btree cursor */
-       union xfs_btree_rec     *nrec,  /* new record */
-       int                     *stat)
-{
-       union xfs_btree_key     key;    /* new btree key value */
-       int                     error = 0;
-
-       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
-           level == cur->bc_nlevels - 1) {
-               struct xfs_inode *ip = cur->bc_private.b.ip;
-
-               if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
-                       /* A root block that can be made bigger. */
-                       xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
-               } else {
-                       /* A root block that needs replacing */
-                       int     logflags = 0;
-
-                       error = xfs_btree_new_iroot(cur, &logflags, stat);
-                       if (error || *stat == 0)
-                               return error;
-
-                       xfs_trans_log_inode(cur->bc_tp, ip, logflags);
-               }
-
-               return 0;
-       }
-
-       /* First, try shifting an entry to the right neighbor. */
-       error = xfs_btree_rshift(cur, level, stat);
-       if (error || *stat)
-               return error;
-
-       /* Next, try shifting an entry to the left neighbor. */
-       error = xfs_btree_lshift(cur, level, stat);
-       if (error)
-               return error;
-
-       if (*stat) {
-               *oindex = *index = cur->bc_ptrs[level];
-               return 0;
-       }
-
-       /*
-        * Next, try splitting the current block in half.
-        *
-        * If this works we have to re-set our variables because we
-        * could be in a different block now.
-        */
-       error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
-       if (error || *stat == 0)
-               return error;
-
-
-       *index = cur->bc_ptrs[level];
-       cur->bc_ops->init_rec_from_key(&key, nrec);
-       return 0;
-}
-
-/*
- * Insert one record/level.  Return information to the caller
- * allowing the next level up to proceed if necessary.
- */
-STATIC int
-xfs_btree_insrec(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       int                     level,  /* level to insert record at */
-       union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
-       union xfs_btree_rec     *recp,  /* i/o: record data inserted */
-       struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
-       int                     *stat)  /* success/failure */
-{
-       struct xfs_btree_block  *block; /* btree block */
-       struct xfs_buf          *bp;    /* buffer for block */
-       union xfs_btree_key     key;    /* btree key */
-       union xfs_btree_ptr     nptr;   /* new block ptr */
-       struct xfs_btree_cur    *ncur;  /* new btree cursor */
-       union xfs_btree_rec     nrec;   /* new record count */
-       int                     optr;   /* old key/record index */
-       int                     ptr;    /* key/record index */
-       int                     numrecs;/* number of records */
-       int                     error;  /* error return value */
-#ifdef DEBUG
-       int                     i;
-#endif
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
-
-       ncur = NULL;
-
-       /*
-        * If we have an external root pointer, and we've made it to the
-        * root level, allocate a new root block and we're done.
-        */
-       if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
-           (level >= cur->bc_nlevels)) {
-               error = xfs_btree_new_root(cur, stat);
-               xfs_btree_set_ptr_null(cur, ptrp);
-
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-               return error;
-       }
-
-       /* If we're off the left edge, return failure. */
-       ptr = cur->bc_ptrs[level];
-       if (ptr == 0) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-               *stat = 0;
-               return 0;
-       }
-
-       /* Make a key out of the record data to be inserted, and save it. */
-       cur->bc_ops->init_key_from_rec(&key, recp);
-
-       optr = ptr;
-
-       XFS_BTREE_STATS_INC(cur, insrec);
-
-       /* Get pointers to the btree buffer and block. */
-       block = xfs_btree_get_block(cur, level, &bp);
-       numrecs = xfs_btree_get_numrecs(block);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, block, level, bp);
-       if (error)
-               goto error0;
-
-       /* Check that the new entry is being inserted in the right place. */
-       if (ptr <= numrecs) {
-               if (level == 0) {
-                       ASSERT(cur->bc_ops->recs_inorder(cur, recp,
-                               xfs_btree_rec_addr(cur, ptr, block)));
-               } else {
-                       ASSERT(cur->bc_ops->keys_inorder(cur, &key,
-                               xfs_btree_key_addr(cur, ptr, block)));
-               }
-       }
-#endif
-
-       /*
-        * If the block is full, we can't insert the new entry until we
-        * make the block un-full.
-        */
-       xfs_btree_set_ptr_null(cur, &nptr);
-       if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
-               error = xfs_btree_make_block_unfull(cur, level, numrecs,
-                                       &optr, &ptr, &nptr, &ncur, &nrec, stat);
-               if (error || *stat == 0)
-                       goto error0;
-       }
-
-       /*
-        * The current block may have changed if the block was
-        * previously full and we have just made space in it.
-        */
-       block = xfs_btree_get_block(cur, level, &bp);
-       numrecs = xfs_btree_get_numrecs(block);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, block, level, bp);
-       if (error)
-               return error;
-#endif
-
-       /*
-        * At this point we know there's room for our new entry in the block
-        * we're pointing at.
-        */
-       XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
-
-       if (level > 0) {
-               /* It's a nonleaf. make a hole in the keys and ptrs */
-               union xfs_btree_key     *kp;
-               union xfs_btree_ptr     *pp;
-
-               kp = xfs_btree_key_addr(cur, ptr, block);
-               pp = xfs_btree_ptr_addr(cur, ptr, block);
-
-#ifdef DEBUG
-               for (i = numrecs - ptr; i >= 0; i--) {
-                       error = xfs_btree_check_ptr(cur, pp, i, level);
-                       if (error)
-                               return error;
-               }
-#endif
-
-               xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
-               xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
-
-#ifdef DEBUG
-               error = xfs_btree_check_ptr(cur, ptrp, 0, level);
-               if (error)
-                       goto error0;
-#endif
-
-               /* Now put the new data in, bump numrecs and log it. */
-               xfs_btree_copy_keys(cur, kp, &key, 1);
-               xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
-               numrecs++;
-               xfs_btree_set_numrecs(block, numrecs);
-               xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
-               xfs_btree_log_keys(cur, bp, ptr, numrecs);
-#ifdef DEBUG
-               if (ptr < numrecs) {
-                       ASSERT(cur->bc_ops->keys_inorder(cur, kp,
-                               xfs_btree_key_addr(cur, ptr + 1, block)));
-               }
-#endif
-       } else {
-               /* It's a leaf. make a hole in the records */
-               union xfs_btree_rec             *rp;
-
-               rp = xfs_btree_rec_addr(cur, ptr, block);
-
-               xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
-
-               /* Now put the new data in, bump numrecs and log it. */
-               xfs_btree_copy_recs(cur, rp, recp, 1);
-               xfs_btree_set_numrecs(block, ++numrecs);
-               xfs_btree_log_recs(cur, bp, ptr, numrecs);
-#ifdef DEBUG
-               if (ptr < numrecs) {
-                       ASSERT(cur->bc_ops->recs_inorder(cur, rp,
-                               xfs_btree_rec_addr(cur, ptr + 1, block)));
-               }
-#endif
-       }
-
-       /* Log the new number of records in the btree header. */
-       xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
-
-       /* If we inserted at the start of a block, update the parents' keys. */
-       if (optr == 1) {
-               error = xfs_btree_updkey(cur, &key, level + 1);
-               if (error)
-                       goto error0;
-       }
-
-       /*
-        * If we are tracking the last record in the tree and
-        * we are at the far right edge of the tree, update it.
-        */
-       if (xfs_btree_is_lastrec(cur, block, level)) {
-               cur->bc_ops->update_lastrec(cur, block, recp,
-                                           ptr, LASTREC_INSREC);
-       }
-
-       /*
-        * Return the new block number, if any.
-        * If there is one, give back a record value and a cursor too.
-        */
-       *ptrp = nptr;
-       if (!xfs_btree_ptr_is_null(cur, &nptr)) {
-               *recp = nrec;
-               *curp = ncur;
-       }
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 1;
-       return 0;
-
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-/*
- * Insert the record at the point referenced by cur.
- *
- * A multi-level split of the tree on insert will invalidate the original
- * cursor.  All callers of this function should assume that the cursor is
- * no longer valid and revalidate it.
- */
-int
-xfs_btree_insert(
-       struct xfs_btree_cur    *cur,
-       int                     *stat)
-{
-       int                     error;  /* error return value */
-       int                     i;      /* result value, 0 for failure */
-       int                     level;  /* current level number in btree */
-       union xfs_btree_ptr     nptr;   /* new block number (split result) */
-       struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
-       struct xfs_btree_cur    *pcur;  /* previous level's cursor */
-       union xfs_btree_rec     rec;    /* record to insert */
-
-       level = 0;
-       ncur = NULL;
-       pcur = cur;
-
-       xfs_btree_set_ptr_null(cur, &nptr);
-       cur->bc_ops->init_rec_from_cur(cur, &rec);
-
-       /*
-        * Loop going up the tree, starting at the leaf level.
-        * Stop when we don't get a split block, that must mean that
-        * the insert is finished with this level.
-        */
-       do {
-               /*
-                * Insert nrec/nptr into this level of the tree.
-                * Note if we fail, nptr will be null.
-                */
-               error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
-               if (error) {
-                       if (pcur != cur)
-                               xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
-                       goto error0;
-               }
-
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               level++;
-
-               /*
-                * See if the cursor we just used is trash.
-                * Can't trash the caller's cursor, but otherwise we should
-                * if ncur is a new cursor or we're about to be done.
-                */
-               if (pcur != cur &&
-                   (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
-                       /* Save the state from the cursor before we trash it */
-                       if (cur->bc_ops->update_cursor)
-                               cur->bc_ops->update_cursor(pcur, cur);
-                       cur->bc_nlevels = pcur->bc_nlevels;
-                       xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
-               }
-               /* If we got a new cursor, switch to it. */
-               if (ncur) {
-                       pcur = ncur;
-                       ncur = NULL;
-               }
-       } while (!xfs_btree_ptr_is_null(cur, &nptr));
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = i;
-       return 0;
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-/*
- * Try to merge a non-leaf block back into the inode root.
- *
- * Note: the killroot names comes from the fact that we're effectively
- * killing the old root block.  But because we can't just delete the
- * inode we have to copy the single block it was pointing to into the
- * inode.
- */
-STATIC int
-xfs_btree_kill_iroot(
-       struct xfs_btree_cur    *cur)
-{
-       int                     whichfork = cur->bc_private.b.whichfork;
-       struct xfs_inode        *ip = cur->bc_private.b.ip;
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
-       struct xfs_btree_block  *block;
-       struct xfs_btree_block  *cblock;
-       union xfs_btree_key     *kp;
-       union xfs_btree_key     *ckp;
-       union xfs_btree_ptr     *pp;
-       union xfs_btree_ptr     *cpp;
-       struct xfs_buf          *cbp;
-       int                     level;
-       int                     index;
-       int                     numrecs;
-#ifdef DEBUG
-       union xfs_btree_ptr     ptr;
-       int                     i;
-#endif
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
-       ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
-       ASSERT(cur->bc_nlevels > 1);
-
-       /*
-        * Don't deal with the root block needs to be a leaf case.
-        * We're just going to turn the thing back into extents anyway.
-        */
-       level = cur->bc_nlevels - 1;
-       if (level == 1)
-               goto out0;
-
-       /*
-        * Give up if the root has multiple children.
-        */
-       block = xfs_btree_get_iroot(cur);
-       if (xfs_btree_get_numrecs(block) != 1)
-               goto out0;
-
-       cblock = xfs_btree_get_block(cur, level - 1, &cbp);
-       numrecs = xfs_btree_get_numrecs(cblock);
-
-       /*
-        * Only do this if the next level will fit.
-        * Then the data must be copied up to the inode,
-        * instead of freeing the root you free the next level.
-        */
-       if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
-               goto out0;
-
-       XFS_BTREE_STATS_INC(cur, killroot);
-
-#ifdef DEBUG
-       xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
-       ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
-       xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
-       ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
-#endif
-
-       index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
-       if (index) {
-               xfs_iroot_realloc(cur->bc_private.b.ip, index,
-                                 cur->bc_private.b.whichfork);
-               block = ifp->if_broot;
-       }
-
-       be16_add_cpu(&block->bb_numrecs, index);
-       ASSERT(block->bb_numrecs == cblock->bb_numrecs);
-
-       kp = xfs_btree_key_addr(cur, 1, block);
-       ckp = xfs_btree_key_addr(cur, 1, cblock);
-       xfs_btree_copy_keys(cur, kp, ckp, numrecs);
-
-       pp = xfs_btree_ptr_addr(cur, 1, block);
-       cpp = xfs_btree_ptr_addr(cur, 1, cblock);
-#ifdef DEBUG
-       for (i = 0; i < numrecs; i++) {
-               int             error;
-
-               error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
-               if (error) {
-                       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-                       return error;
-               }
-       }
-#endif
-       xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
-
-       cur->bc_ops->free_block(cur, cbp);
-       XFS_BTREE_STATS_INC(cur, free);
-
-       cur->bc_bufs[level - 1] = NULL;
-       be16_add_cpu(&block->bb_level, -1);
-       xfs_trans_log_inode(cur->bc_tp, ip,
-               XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
-       cur->bc_nlevels--;
-out0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       return 0;
-}
-
-/*
- * Kill the current root node, and replace it with it's only child node.
- */
-STATIC int
-xfs_btree_kill_root(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp,
-       int                     level,
-       union xfs_btree_ptr     *newroot)
-{
-       int                     error;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_STATS_INC(cur, killroot);
-
-       /*
-        * Update the root pointer, decreasing the level by 1 and then
-        * free the old root.
-        */
-       cur->bc_ops->set_root(cur, newroot, -1);
-
-       error = cur->bc_ops->free_block(cur, bp);
-       if (error) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-               return error;
-       }
-
-       XFS_BTREE_STATS_INC(cur, free);
-
-       cur->bc_bufs[level] = NULL;
-       cur->bc_ra[level] = 0;
-       cur->bc_nlevels--;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       return 0;
-}
-
-STATIC int
-xfs_btree_dec_cursor(
-       struct xfs_btree_cur    *cur,
-       int                     level,
-       int                     *stat)
-{
-       int                     error;
-       int                     i;
-
-       if (level > 0) {
-               error = xfs_btree_decrement(cur, level, &i);
-               if (error)
-                       return error;
-       }
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = 1;
-       return 0;
-}
-
-/*
- * Single level of the btree record deletion routine.
- * Delete record pointed to by cur/level.
- * Remove the record from its block then rebalance the tree.
- * Return 0 for error, 1 for done, 2 to go on to the next level.
- */
-STATIC int                                     /* error */
-xfs_btree_delrec(
-       struct xfs_btree_cur    *cur,           /* btree cursor */
-       int                     level,          /* level removing record from */
-       int                     *stat)          /* fail/done/go-on */
-{
-       struct xfs_btree_block  *block;         /* btree block */
-       union xfs_btree_ptr     cptr;           /* current block ptr */
-       struct xfs_buf          *bp;            /* buffer for block */
-       int                     error;          /* error return value */
-       int                     i;              /* loop counter */
-       union xfs_btree_key     key;            /* storage for keyp */
-       union xfs_btree_key     *keyp = &key;   /* passed to the next level */
-       union xfs_btree_ptr     lptr;           /* left sibling block ptr */
-       struct xfs_buf          *lbp;           /* left buffer pointer */
-       struct xfs_btree_block  *left;          /* left btree block */
-       int                     lrecs = 0;      /* left record count */
-       int                     ptr;            /* key/record index */
-       union xfs_btree_ptr     rptr;           /* right sibling block ptr */
-       struct xfs_buf          *rbp;           /* right buffer pointer */
-       struct xfs_btree_block  *right;         /* right btree block */
-       struct xfs_btree_block  *rrblock;       /* right-right btree block */
-       struct xfs_buf          *rrbp;          /* right-right buffer pointer */
-       int                     rrecs = 0;      /* right record count */
-       struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
-       int                     numrecs;        /* temporary numrec count */
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_TRACE_ARGI(cur, level);
-
-       tcur = NULL;
-
-       /* Get the index of the entry being deleted, check for nothing there. */
-       ptr = cur->bc_ptrs[level];
-       if (ptr == 0) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-               *stat = 0;
-               return 0;
-       }
-
-       /* Get the buffer & block containing the record or key/ptr. */
-       block = xfs_btree_get_block(cur, level, &bp);
-       numrecs = xfs_btree_get_numrecs(block);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, block, level, bp);
-       if (error)
-               goto error0;
-#endif
-
-       /* Fail if we're off the end of the block. */
-       if (ptr > numrecs) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-               *stat = 0;
-               return 0;
-       }
-
-       XFS_BTREE_STATS_INC(cur, delrec);
-       XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
-
-       /* Excise the entries being deleted. */
-       if (level > 0) {
-               /* It's a nonleaf. operate on keys and ptrs */
-               union xfs_btree_key     *lkp;
-               union xfs_btree_ptr     *lpp;
-
-               lkp = xfs_btree_key_addr(cur, ptr + 1, block);
-               lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
-
-#ifdef DEBUG
-               for (i = 0; i < numrecs - ptr; i++) {
-                       error = xfs_btree_check_ptr(cur, lpp, i, level);
-                       if (error)
-                               goto error0;
-               }
-#endif
-
-               if (ptr < numrecs) {
-                       xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
-                       xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
-                       xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
-                       xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
-               }
-
-               /*
-                * If it's the first record in the block, we'll need to pass a
-                * key up to the next level (updkey).
-                */
-               if (ptr == 1)
-                       keyp = xfs_btree_key_addr(cur, 1, block);
-       } else {
-               /* It's a leaf. operate on records */
-               if (ptr < numrecs) {
-                       xfs_btree_shift_recs(cur,
-                               xfs_btree_rec_addr(cur, ptr + 1, block),
-                               -1, numrecs - ptr);
-                       xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
-               }
-
-               /*
-                * If it's the first record in the block, we'll need a key
-                * structure to pass up to the next level (updkey).
-                */
-               if (ptr == 1) {
-                       cur->bc_ops->init_key_from_rec(&key,
-                                       xfs_btree_rec_addr(cur, 1, block));
-                       keyp = &key;
-               }
-       }
-
-       /*
-        * Decrement and log the number of entries in the block.
-        */
-       xfs_btree_set_numrecs(block, --numrecs);
-       xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
-
-       /*
-        * If we are tracking the last record in the tree and
-        * we are at the far right edge of the tree, update it.
-        */
-       if (xfs_btree_is_lastrec(cur, block, level)) {
-               cur->bc_ops->update_lastrec(cur, block, NULL,
-                                           ptr, LASTREC_DELREC);
-       }
-
-       /*
-        * We're at the root level.  First, shrink the root block in-memory.
-        * Try to get rid of the next level down.  If we can't then there's
-        * nothing left to do.
-        */
-       if (level == cur->bc_nlevels - 1) {
-               if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
-                       xfs_iroot_realloc(cur->bc_private.b.ip, -1,
-                                         cur->bc_private.b.whichfork);
-
-                       error = xfs_btree_kill_iroot(cur);
-                       if (error)
-                               goto error0;
-
-                       error = xfs_btree_dec_cursor(cur, level, stat);
-                       if (error)
-                               goto error0;
-                       *stat = 1;
-                       return 0;
-               }
-
-               /*
-                * If this is the root level, and there's only one entry left,
-                * and it's NOT the leaf level, then we can get rid of this
-                * level.
-                */
-               if (numrecs == 1 && level > 0) {
-                       union xfs_btree_ptr     *pp;
-                       /*
-                        * pp is still set to the first pointer in the block.
-                        * Make it the new root of the btree.
-                        */
-                       pp = xfs_btree_ptr_addr(cur, 1, block);
-                       error = xfs_btree_kill_root(cur, bp, level, pp);
-                       if (error)
-                               goto error0;
-               } else if (level > 0) {
-                       error = xfs_btree_dec_cursor(cur, level, stat);
-                       if (error)
-                               goto error0;
-               }
-               *stat = 1;
-               return 0;
-       }
-
-       /*
-        * If we deleted the leftmost entry in the block, update the
-        * key values above us in the tree.
-        */
-       if (ptr == 1) {
-               error = xfs_btree_updkey(cur, keyp, level + 1);
-               if (error)
-                       goto error0;
-       }
-
-       /*
-        * If the number of records remaining in the block is at least
-        * the minimum, we're done.
-        */
-       if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
-               error = xfs_btree_dec_cursor(cur, level, stat);
-               if (error)
-                       goto error0;
-               return 0;
-       }
-
-       /*
-        * Otherwise, we have to move some records around to keep the
-        * tree balanced.  Look at the left and right sibling blocks to
-        * see if we can re-balance by moving only one record.
-        */
-       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
-       xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
-
-       if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
-               /*
-                * One child of root, need to get a chance to copy its contents
-                * into the root and delete it. Can't go up to next level,
-                * there's nothing to delete there.
-                */
-               if (xfs_btree_ptr_is_null(cur, &rptr) &&
-                   xfs_btree_ptr_is_null(cur, &lptr) &&
-                   level == cur->bc_nlevels - 2) {
-                       error = xfs_btree_kill_iroot(cur);
-                       if (!error)
-                               error = xfs_btree_dec_cursor(cur, level, stat);
-                       if (error)
-                               goto error0;
-                       return 0;
-               }
-       }
-
-       ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
-              !xfs_btree_ptr_is_null(cur, &lptr));
-
-       /*
-        * Duplicate the cursor so our btree manipulations here won't
-        * disrupt the next level up.
-        */
-       error = xfs_btree_dup_cursor(cur, &tcur);
-       if (error)
-               goto error0;
-
-       /*
-        * If there's a right sibling, see if it's ok to shift an entry
-        * out of it.
-        */
-       if (!xfs_btree_ptr_is_null(cur, &rptr)) {
-               /*
-                * Move the temp cursor to the last entry in the next block.
-                * Actually any entry but the first would suffice.
-                */
-               i = xfs_btree_lastrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-
-               error = xfs_btree_increment(tcur, level, &i);
-               if (error)
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-
-               i = xfs_btree_lastrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-
-               /* Grab a pointer to the block. */
-               right = xfs_btree_get_block(tcur, level, &rbp);
-#ifdef DEBUG
-               error = xfs_btree_check_block(tcur, right, level, rbp);
-               if (error)
-                       goto error0;
-#endif
-               /* Grab the current block number, for future use. */
-               xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
-
-               /*
-                * If right block is full enough so that removing one entry
-                * won't make it too empty, and left-shifting an entry out
-                * of right to us works, we're done.
-                */
-               if (xfs_btree_get_numrecs(right) - 1 >=
-                   cur->bc_ops->get_minrecs(tcur, level)) {
-                       error = xfs_btree_lshift(tcur, level, &i);
-                       if (error)
-                               goto error0;
-                       if (i) {
-                               ASSERT(xfs_btree_get_numrecs(block) >=
-                                      cur->bc_ops->get_minrecs(tcur, level));
-
-                               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
-                               tcur = NULL;
-
-                               error = xfs_btree_dec_cursor(cur, level, stat);
-                               if (error)
-                                       goto error0;
-                               return 0;
-                       }
-               }
-
-               /*
-                * Otherwise, grab the number of records in right for
-                * future reference, and fix up the temp cursor to point
-                * to our block again (last record).
-                */
-               rrecs = xfs_btree_get_numrecs(right);
-               if (!xfs_btree_ptr_is_null(cur, &lptr)) {
-                       i = xfs_btree_firstrec(tcur, level);
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-
-                       error = xfs_btree_decrement(tcur, level, &i);
-                       if (error)
-                               goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               }
-       }
-
-       /*
-        * If there's a left sibling, see if it's ok to shift an entry
-        * out of it.
-        */
-       if (!xfs_btree_ptr_is_null(cur, &lptr)) {
-               /*
-                * Move the temp cursor to the first entry in the
-                * previous block.
-                */
-               i = xfs_btree_firstrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-
-               error = xfs_btree_decrement(tcur, level, &i);
-               if (error)
-                       goto error0;
-               i = xfs_btree_firstrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-
-               /* Grab a pointer to the block. */
-               left = xfs_btree_get_block(tcur, level, &lbp);
-#ifdef DEBUG
-               error = xfs_btree_check_block(cur, left, level, lbp);
-               if (error)
-                       goto error0;
-#endif
-               /* Grab the current block number, for future use. */
-               xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
-
-               /*
-                * If left block is full enough so that removing one entry
-                * won't make it too empty, and right-shifting an entry out
-                * of left to us works, we're done.
-                */
-               if (xfs_btree_get_numrecs(left) - 1 >=
-                   cur->bc_ops->get_minrecs(tcur, level)) {
-                       error = xfs_btree_rshift(tcur, level, &i);
-                       if (error)
-                               goto error0;
-                       if (i) {
-                               ASSERT(xfs_btree_get_numrecs(block) >=
-                                      cur->bc_ops->get_minrecs(tcur, level));
-                               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
-                               tcur = NULL;
-                               if (level == 0)
-                                       cur->bc_ptrs[0]++;
-                               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-                               *stat = 1;
-                               return 0;
-                       }
-               }
-
-               /*
-                * Otherwise, grab the number of records in right for
-                * future reference.
-                */
-               lrecs = xfs_btree_get_numrecs(left);
-       }
-
-       /* Delete the temp cursor, we're done with it. */
-       xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
-       tcur = NULL;
-
-       /* If here, we need to do a join to keep the tree balanced. */
-       ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
-
-       if (!xfs_btree_ptr_is_null(cur, &lptr) &&
-           lrecs + xfs_btree_get_numrecs(block) <=
-                       cur->bc_ops->get_maxrecs(cur, level)) {
-               /*
-                * Set "right" to be the starting block,
-                * "left" to be the left neighbor.
-                */
-               rptr = cptr;
-               right = block;
-               rbp = bp;
-               error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
-               if (error)
-                       goto error0;
-
-       /*
-        * If that won't work, see if we can join with the right neighbor block.
-        */
-       } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
-                  rrecs + xfs_btree_get_numrecs(block) <=
-                       cur->bc_ops->get_maxrecs(cur, level)) {
-               /*
-                * Set "left" to be the starting block,
-                * "right" to be the right neighbor.
-                */
-               lptr = cptr;
-               left = block;
-               lbp = bp;
-               error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
-               if (error)
-                       goto error0;
-
-       /*
-        * Otherwise, we can't fix the imbalance.
-        * Just return.  This is probably a logic error, but it's not fatal.
-        */
-       } else {
-               error = xfs_btree_dec_cursor(cur, level, stat);
-               if (error)
-                       goto error0;
-               return 0;
-       }
-
-       rrecs = xfs_btree_get_numrecs(right);
-       lrecs = xfs_btree_get_numrecs(left);
-
-       /*
-        * We're now going to join "left" and "right" by moving all the stuff
-        * in "right" to "left" and deleting "right".
-        */
-       XFS_BTREE_STATS_ADD(cur, moves, rrecs);
-       if (level > 0) {
-               /* It's a non-leaf.  Move keys and pointers. */
-               union xfs_btree_key     *lkp;   /* left btree key */
-               union xfs_btree_ptr     *lpp;   /* left address pointer */
-               union xfs_btree_key     *rkp;   /* right btree key */
-               union xfs_btree_ptr     *rpp;   /* right address pointer */
-
-               lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
-               lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
-               rkp = xfs_btree_key_addr(cur, 1, right);
-               rpp = xfs_btree_ptr_addr(cur, 1, right);
-#ifdef DEBUG
-               for (i = 1; i < rrecs; i++) {
-                       error = xfs_btree_check_ptr(cur, rpp, i, level);
-                       if (error)
-                               goto error0;
-               }
-#endif
-               xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
-               xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
-
-               xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
-               xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
-       } else {
-               /* It's a leaf.  Move records.  */
-               union xfs_btree_rec     *lrp;   /* left record pointer */
-               union xfs_btree_rec     *rrp;   /* right record pointer */
-
-               lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
-               rrp = xfs_btree_rec_addr(cur, 1, right);
-
-               xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
-               xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
-       }
-
-       XFS_BTREE_STATS_INC(cur, join);
-
-       /*
-        * Fix up the number of records and right block pointer in the
-        * surviving block, and log it.
-        */
-       xfs_btree_set_numrecs(left, lrecs + rrecs);
-       xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
-       xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
-       xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
-
-       /* If there is a right sibling, point it to the remaining block. */
-       xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
-       if (!xfs_btree_ptr_is_null(cur, &cptr)) {
-               error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
-               if (error)
-                       goto error0;
-               xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
-               xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
-       }
-
-       /* Free the deleted block. */
-       error = cur->bc_ops->free_block(cur, rbp);
-       if (error)
-               goto error0;
-       XFS_BTREE_STATS_INC(cur, free);
-
-       /*
-        * If we joined with the left neighbor, set the buffer in the
-        * cursor to the left block, and fix up the index.
-        */
-       if (bp != lbp) {
-               cur->bc_bufs[level] = lbp;
-               cur->bc_ptrs[level] += lrecs;
-               cur->bc_ra[level] = 0;
-       }
-       /*
-        * If we joined with the right neighbor and there's a level above
-        * us, increment the cursor at that level.
-        */
-       else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
-                  (level + 1 < cur->bc_nlevels)) {
-               error = xfs_btree_increment(cur, level + 1, &i);
-               if (error)
-                       goto error0;
-       }
-
-       /*
-        * Readjust the ptr at this level if it's not a leaf, since it's
-        * still pointing at the deletion point, which makes the cursor
-        * inconsistent.  If this makes the ptr 0, the caller fixes it up.
-        * We can't use decrement because it would change the next level up.
-        */
-       if (level > 0)
-               cur->bc_ptrs[level]--;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       /* Return value means the next level up has something to do. */
-       *stat = 2;
-       return 0;
-
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       if (tcur)
-               xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
-       return error;
-}
-
-/*
- * Delete the record pointed to by cur.
- * The cursor refers to the place where the record was (could be inserted)
- * when the operation returns.
- */
-int                                    /* error */
-xfs_btree_delete(
-       struct xfs_btree_cur    *cur,
-       int                     *stat)  /* success/failure */
-{
-       int                     error;  /* error return value */
-       int                     level;
-       int                     i;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
-       /*
-        * Go up the tree, starting at leaf level.
-        *
-        * If 2 is returned then a join was done; go to the next level.
-        * Otherwise we are done.
-        */
-       for (level = 0, i = 2; i == 2; level++) {
-               error = xfs_btree_delrec(cur, level, &i);
-               if (error)
-                       goto error0;
-       }
-
-       if (i == 0) {
-               for (level = 1; level < cur->bc_nlevels; level++) {
-                       if (cur->bc_ptrs[level] == 0) {
-                               error = xfs_btree_decrement(cur, level, &i);
-                               if (error)
-                                       goto error0;
-                               break;
-                       }
-               }
-       }
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       *stat = i;
-       return 0;
-error0:
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-       return error;
-}
-
-/*
- * Get the data from the pointed-to record.
- */
-int                                    /* error */
-xfs_btree_get_rec(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       union xfs_btree_rec     **recp, /* output: btree record */
-       int                     *stat)  /* output: success/failure */
-{
-       struct xfs_btree_block  *block; /* btree block */
-       struct xfs_buf          *bp;    /* buffer pointer */
-       int                     ptr;    /* record number */
-#ifdef DEBUG
-       int                     error;  /* error return value */
-#endif
-
-       ptr = cur->bc_ptrs[0];
-       block = xfs_btree_get_block(cur, 0, &bp);
-
-#ifdef DEBUG
-       error = xfs_btree_check_block(cur, block, 0, bp);
-       if (error)
-               return error;
-#endif
-
-       /*
-        * Off the right end or left end, return failure.
-        */
-       if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
-               *stat = 0;
-               return 0;
-       }
-
-       /*
-        * Point to the record and extract its data.
-        */
-       *recp = xfs_btree_rec_addr(cur, ptr, block);
-       *stat = 1;
-       return 0;
-}
-
-/*
- * Change the owner of a btree.
- *
- * The mechanism we use here is ordered buffer logging. Because we don't know
- * how many buffers were are going to need to modify, we don't really want to
- * have to make transaction reservations for the worst case of every buffer in a
- * full size btree as that may be more space that we can fit in the log....
- *
- * We do the btree walk in the most optimal manner possible - we have sibling
- * pointers so we can just walk all the blocks on each level from left to right
- * in a single pass, and then move to the next level and do the same. We can
- * also do readahead on the sibling pointers to get IO moving more quickly,
- * though for slow disks this is unlikely to make much difference to performance
- * as the amount of CPU work we have to do before moving to the next block is
- * relatively small.
- *
- * For each btree block that we load, modify the owner appropriately, set the
- * buffer as an ordered buffer and log it appropriately. We need to ensure that
- * we mark the region we change dirty so that if the buffer is relogged in
- * a subsequent transaction the changes we make here as an ordered buffer are
- * correctly relogged in that transaction.  If we are in recovery context, then
- * just queue the modified buffer as delayed write buffer so the transaction
- * recovery completion writes the changes to disk.
- */
-static int
-xfs_btree_block_change_owner(
-       struct xfs_btree_cur    *cur,
-       int                     level,
-       __uint64_t              new_owner,
-       struct list_head        *buffer_list)
-{
-       struct xfs_btree_block  *block;
-       struct xfs_buf          *bp;
-       union xfs_btree_ptr     rptr;
-
-       /* do right sibling readahead */
-       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
-
-       /* modify the owner */
-       block = xfs_btree_get_block(cur, level, &bp);
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
-               block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
-       else
-               block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
-
-       /*
-        * If the block is a root block hosted in an inode, we might not have a
-        * buffer pointer here and we shouldn't attempt to log the change as the
-        * information is already held in the inode and discarded when the root
-        * block is formatted into the on-disk inode fork. We still change it,
-        * though, so everything is consistent in memory.
-        */
-       if (bp) {
-               if (cur->bc_tp) {
-                       xfs_trans_ordered_buf(cur->bc_tp, bp);
-                       xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
-               } else {
-                       xfs_buf_delwri_queue(bp, buffer_list);
-               }
-       } else {
-               ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
-               ASSERT(level == cur->bc_nlevels - 1);
-       }
-
-       /* now read rh sibling block for next iteration */
-       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
-       if (xfs_btree_ptr_is_null(cur, &rptr))
-               return ENOENT;
-
-       return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
-}
-
-int
-xfs_btree_change_owner(
-       struct xfs_btree_cur    *cur,
-       __uint64_t              new_owner,
-       struct list_head        *buffer_list)
-{
-       union xfs_btree_ptr     lptr;
-       int                     level;
-       struct xfs_btree_block  *block = NULL;
-       int                     error = 0;
-
-       cur->bc_ops->init_ptr_from_cur(cur, &lptr);
-
-       /* for each level */
-       for (level = cur->bc_nlevels - 1; level >= 0; level--) {
-               /* grab the left hand block */
-               error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
-               if (error)
-                       return error;
-
-               /* readahead the left most block for the next level down */
-               if (level > 0) {
-                       union xfs_btree_ptr     *ptr;
-
-                       ptr = xfs_btree_ptr_addr(cur, 1, block);
-                       xfs_btree_readahead_ptr(cur, ptr, 1);
-
-                       /* save for the next iteration of the loop */
-                       lptr = *ptr;
-               }
-
-               /* for each buffer in the level */
-               do {
-                       error = xfs_btree_block_change_owner(cur, level,
-                                                            new_owner,
-                                                            buffer_list);
-               } while (!error);
-
-               if (error != ENOENT)
-                       return error;
-       }
-
-       return 0;
-}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
deleted file mode 100644 (file)
index a1a4e3e..0000000
+++ /dev/null
@@ -1,2665 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_inode.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_cksum.h"
-#include "xfs_buf_item.h"
-
-/*
- * xfs_da_btree.c
- *
- * Routines to implement directories as Btrees of hashed names.
- */
-
-/*========================================================================
- * Function prototypes for the kernel.
- *========================================================================*/
-
-/*
- * Routines used for growing the Btree.
- */
-STATIC int xfs_da3_root_split(xfs_da_state_t *state,
-                                           xfs_da_state_blk_t *existing_root,
-                                           xfs_da_state_blk_t *new_child);
-STATIC int xfs_da3_node_split(xfs_da_state_t *state,
-                                           xfs_da_state_blk_t *existing_blk,
-                                           xfs_da_state_blk_t *split_blk,
-                                           xfs_da_state_blk_t *blk_to_add,
-                                           int treelevel,
-                                           int *result);
-STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
-                                        xfs_da_state_blk_t *node_blk_1,
-                                        xfs_da_state_blk_t *node_blk_2);
-STATIC void xfs_da3_node_add(xfs_da_state_t *state,
-                                  xfs_da_state_blk_t *old_node_blk,
-                                  xfs_da_state_blk_t *new_node_blk);
-
-/*
- * Routines used for shrinking the Btree.
- */
-STATIC int xfs_da3_root_join(xfs_da_state_t *state,
-                                          xfs_da_state_blk_t *root_blk);
-STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
-STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
-                                             xfs_da_state_blk_t *drop_blk);
-STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
-                                        xfs_da_state_blk_t *src_node_blk,
-                                        xfs_da_state_blk_t *dst_node_blk);
-
-/*
- * Utility routines.
- */
-STATIC int     xfs_da3_blk_unlink(xfs_da_state_t *state,
-                                 xfs_da_state_blk_t *drop_blk,
-                                 xfs_da_state_blk_t *save_blk);
-
-
-kmem_zone_t *xfs_da_state_zone;        /* anchor for state struct zone */
-
-/*
- * Allocate a dir-state structure.
- * We don't put them on the stack since they're large.
- */
-xfs_da_state_t *
-xfs_da_state_alloc(void)
-{
-       return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
-}
-
-/*
- * Kill the altpath contents of a da-state structure.
- */
-STATIC void
-xfs_da_state_kill_altpath(xfs_da_state_t *state)
-{
-       int     i;
-
-       for (i = 0; i < state->altpath.active; i++)
-               state->altpath.blk[i].bp = NULL;
-       state->altpath.active = 0;
-}
-
-/*
- * Free a da-state structure.
- */
-void
-xfs_da_state_free(xfs_da_state_t *state)
-{
-       xfs_da_state_kill_altpath(state);
-#ifdef DEBUG
-       memset((char *)state, 0, sizeof(*state));
-#endif /* DEBUG */
-       kmem_zone_free(xfs_da_state_zone, state);
-}
-
-static bool
-xfs_da3_node_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_da_intnode   *hdr = bp->b_addr;
-       struct xfs_da3_icnode_hdr ichdr;
-       const struct xfs_dir_ops *ops;
-
-       ops = xfs_dir_get_ops(mp, NULL);
-
-       ops->node_hdr_from_disk(&ichdr, hdr);
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
-
-               if (ichdr.magic != XFS_DA3_NODE_MAGIC)
-                       return false;
-
-               if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
-                       return false;
-               if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
-                       return false;
-       } else {
-               if (ichdr.magic != XFS_DA_NODE_MAGIC)
-                       return false;
-       }
-       if (ichdr.level == 0)
-               return false;
-       if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
-               return false;
-       if (ichdr.count == 0)
-               return false;
-
-       /*
-        * we don't know if the node is for and attribute or directory tree,
-        * so only fail if the count is outside both bounds
-        */
-       if (ichdr.count > mp->m_dir_geo->node_ents &&
-           ichdr.count > mp->m_attr_geo->node_ents)
-               return false;
-
-       /* XXX: hash order check? */
-
-       return true;
-}
-
-static void
-xfs_da3_node_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-       struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
-
-       if (!xfs_da3_node_verify(bp)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (bip)
-               hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
-
-       xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
-}
-
-/*
- * leaf/node format detection on trees is sketchy, so a node read can be done on
- * leaf level blocks when detection identifies the tree as a node format tree
- * incorrectly. In this case, we need to swap the verifier to match the correct
- * format of the block being read.
- */
-static void
-xfs_da3_node_read_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_da_blkinfo   *info = bp->b_addr;
-
-       switch (be16_to_cpu(info->magic)) {
-               case XFS_DA3_NODE_MAGIC:
-                       if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
-                               xfs_buf_ioerror(bp, EFSBADCRC);
-                               break;
-                       }
-                       /* fall through */
-               case XFS_DA_NODE_MAGIC:
-                       if (!xfs_da3_node_verify(bp)) {
-                               xfs_buf_ioerror(bp, EFSCORRUPTED);
-                               break;
-                       }
-                       return;
-               case XFS_ATTR_LEAF_MAGIC:
-               case XFS_ATTR3_LEAF_MAGIC:
-                       bp->b_ops = &xfs_attr3_leaf_buf_ops;
-                       bp->b_ops->verify_read(bp);
-                       return;
-               case XFS_DIR2_LEAFN_MAGIC:
-               case XFS_DIR3_LEAFN_MAGIC:
-                       bp->b_ops = &xfs_dir3_leafn_buf_ops;
-                       bp->b_ops->verify_read(bp);
-                       return;
-               default:
-                       break;
-       }
-
-       /* corrupt block */
-       xfs_verifier_error(bp);
-}
-
-const struct xfs_buf_ops xfs_da3_node_buf_ops = {
-       .verify_read = xfs_da3_node_read_verify,
-       .verify_write = xfs_da3_node_write_verify,
-};
-
-int
-xfs_da3_node_read(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *dp,
-       xfs_dablk_t             bno,
-       xfs_daddr_t             mappedbno,
-       struct xfs_buf          **bpp,
-       int                     which_fork)
-{
-       int                     err;
-
-       err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
-                                       which_fork, &xfs_da3_node_buf_ops);
-       if (!err && tp) {
-               struct xfs_da_blkinfo   *info = (*bpp)->b_addr;
-               int                     type;
-
-               switch (be16_to_cpu(info->magic)) {
-               case XFS_DA_NODE_MAGIC:
-               case XFS_DA3_NODE_MAGIC:
-                       type = XFS_BLFT_DA_NODE_BUF;
-                       break;
-               case XFS_ATTR_LEAF_MAGIC:
-               case XFS_ATTR3_LEAF_MAGIC:
-                       type = XFS_BLFT_ATTR_LEAF_BUF;
-                       break;
-               case XFS_DIR2_LEAFN_MAGIC:
-               case XFS_DIR3_LEAFN_MAGIC:
-                       type = XFS_BLFT_DIR_LEAFN_BUF;
-                       break;
-               default:
-                       type = 0;
-                       ASSERT(0);
-                       break;
-               }
-               xfs_trans_buf_set_type(tp, *bpp, type);
-       }
-       return err;
-}
-
-/*========================================================================
- * Routines used for growing the Btree.
- *========================================================================*/
-
-/*
- * Create the initial contents of an intermediate node.
- */
-int
-xfs_da3_node_create(
-       struct xfs_da_args      *args,
-       xfs_dablk_t             blkno,
-       int                     level,
-       struct xfs_buf          **bpp,
-       int                     whichfork)
-{
-       struct xfs_da_intnode   *node;
-       struct xfs_trans        *tp = args->trans;
-       struct xfs_mount        *mp = tp->t_mountp;
-       struct xfs_da3_icnode_hdr ichdr = {0};
-       struct xfs_buf          *bp;
-       int                     error;
-       struct xfs_inode        *dp = args->dp;
-
-       trace_xfs_da_node_create(args);
-       ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
-
-       error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork);
-       if (error)
-               return error;
-       bp->b_ops = &xfs_da3_node_buf_ops;
-       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
-       node = bp->b_addr;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
-
-               ichdr.magic = XFS_DA3_NODE_MAGIC;
-               hdr3->info.blkno = cpu_to_be64(bp->b_bn);
-               hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
-               uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_uuid);
-       } else {
-               ichdr.magic = XFS_DA_NODE_MAGIC;
-       }
-       ichdr.level = level;
-
-       dp->d_ops->node_hdr_to_disk(node, &ichdr);
-       xfs_trans_log_buf(tp, bp,
-               XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
-
-       *bpp = bp;
-       return 0;
-}
-
-/*
- * Split a leaf node, rebalance, then possibly split
- * intermediate nodes, rebalance, etc.
- */
-int                                                    /* error */
-xfs_da3_split(
-       struct xfs_da_state     *state)
-{
-       struct xfs_da_state_blk *oldblk;
-       struct xfs_da_state_blk *newblk;
-       struct xfs_da_state_blk *addblk;
-       struct xfs_da_intnode   *node;
-       struct xfs_buf          *bp;
-       int                     max;
-       int                     action = 0;
-       int                     error;
-       int                     i;
-
-       trace_xfs_da_split(state->args);
-
-       /*
-        * Walk back up the tree splitting/inserting/adjusting as necessary.
-        * If we need to insert and there isn't room, split the node, then
-        * decide which fragment to insert the new block from below into.
-        * Note that we may split the root this way, but we need more fixup.
-        */
-       max = state->path.active - 1;
-       ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
-       ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
-              state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
-
-       addblk = &state->path.blk[max];         /* initial dummy value */
-       for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
-               oldblk = &state->path.blk[i];
-               newblk = &state->altpath.blk[i];
-
-               /*
-                * If a leaf node then
-                *     Allocate a new leaf node, then rebalance across them.
-                * else if an intermediate node then
-                *     We split on the last layer, must we split the node?
-                */
-               switch (oldblk->magic) {
-               case XFS_ATTR_LEAF_MAGIC:
-                       error = xfs_attr3_leaf_split(state, oldblk, newblk);
-                       if ((error != 0) && (error != ENOSPC)) {
-                               return error;   /* GROT: attr is inconsistent */
-                       }
-                       if (!error) {
-                               addblk = newblk;
-                               break;
-                       }
-                       /*
-                        * Entry wouldn't fit, split the leaf again.
-                        */
-                       state->extravalid = 1;
-                       if (state->inleaf) {
-                               state->extraafter = 0;  /* before newblk */
-                               trace_xfs_attr_leaf_split_before(state->args);
-                               error = xfs_attr3_leaf_split(state, oldblk,
-                                                           &state->extrablk);
-                       } else {
-                               state->extraafter = 1;  /* after newblk */
-                               trace_xfs_attr_leaf_split_after(state->args);
-                               error = xfs_attr3_leaf_split(state, newblk,
-                                                           &state->extrablk);
-                       }
-                       if (error)
-                               return error;   /* GROT: attr inconsistent */
-                       addblk = newblk;
-                       break;
-               case XFS_DIR2_LEAFN_MAGIC:
-                       error = xfs_dir2_leafn_split(state, oldblk, newblk);
-                       if (error)
-                               return error;
-                       addblk = newblk;
-                       break;
-               case XFS_DA_NODE_MAGIC:
-                       error = xfs_da3_node_split(state, oldblk, newblk, addblk,
-                                                        max - i, &action);
-                       addblk->bp = NULL;
-                       if (error)
-                               return error;   /* GROT: dir is inconsistent */
-                       /*
-                        * Record the newly split block for the next time thru?
-                        */
-                       if (action)
-                               addblk = newblk;
-                       else
-                               addblk = NULL;
-                       break;
-               }
-
-               /*
-                * Update the btree to show the new hashval for this child.
-                */
-               xfs_da3_fixhashpath(state, &state->path);
-       }
-       if (!addblk)
-               return 0;
-
-       /*
-        * Split the root node.
-        */
-       ASSERT(state->path.active == 0);
-       oldblk = &state->path.blk[0];
-       error = xfs_da3_root_split(state, oldblk, addblk);
-       if (error) {
-               addblk->bp = NULL;
-               return error;   /* GROT: dir is inconsistent */
-       }
-
-       /*
-        * Update pointers to the node which used to be block 0 and
-        * just got bumped because of the addition of a new root node.
-        * There might be three blocks involved if a double split occurred,
-        * and the original block 0 could be at any position in the list.
-        *
-        * Note: the magic numbers and sibling pointers are in the same
-        * physical place for both v2 and v3 headers (by design). Hence it
-        * doesn't matter which version of the xfs_da_intnode structure we use
-        * here as the result will be the same using either structure.
-        */
-       node = oldblk->bp->b_addr;
-       if (node->hdr.info.forw) {
-               if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
-                       bp = addblk->bp;
-               } else {
-                       ASSERT(state->extravalid);
-                       bp = state->extrablk.bp;
-               }
-               node = bp->b_addr;
-               node->hdr.info.back = cpu_to_be32(oldblk->blkno);
-               xfs_trans_log_buf(state->args->trans, bp,
-                   XFS_DA_LOGRANGE(node, &node->hdr.info,
-                   sizeof(node->hdr.info)));
-       }
-       node = oldblk->bp->b_addr;
-       if (node->hdr.info.back) {
-               if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
-                       bp = addblk->bp;
-               } else {
-                       ASSERT(state->extravalid);
-                       bp = state->extrablk.bp;
-               }
-               node = bp->b_addr;
-               node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
-               xfs_trans_log_buf(state->args->trans, bp,
-                   XFS_DA_LOGRANGE(node, &node->hdr.info,
-                   sizeof(node->hdr.info)));
-       }
-       addblk->bp = NULL;
-       return 0;
-}
-
-/*
- * Split the root.  We have to create a new root and point to the two
- * parts (the split old root) that we just created.  Copy block zero to
- * the EOF, extending the inode in process.
- */
-STATIC int                                             /* error */
-xfs_da3_root_split(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *blk1,
-       struct xfs_da_state_blk *blk2)
-{
-       struct xfs_da_intnode   *node;
-       struct xfs_da_intnode   *oldroot;
-       struct xfs_da_node_entry *btree;
-       struct xfs_da3_icnode_hdr nodehdr;
-       struct xfs_da_args      *args;
-       struct xfs_buf          *bp;
-       struct xfs_inode        *dp;
-       struct xfs_trans        *tp;
-       struct xfs_mount        *mp;
-       struct xfs_dir2_leaf    *leaf;
-       xfs_dablk_t             blkno;
-       int                     level;
-       int                     error;
-       int                     size;
-
-       trace_xfs_da_root_split(state->args);
-
-       /*
-        * Copy the existing (incorrect) block from the root node position
-        * to a free space somewhere.
-        */
-       args = state->args;
-       error = xfs_da_grow_inode(args, &blkno);
-       if (error)
-               return error;
-
-       dp = args->dp;
-       tp = args->trans;
-       mp = state->mp;
-       error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
-       if (error)
-               return error;
-       node = bp->b_addr;
-       oldroot = blk1->bp->b_addr;
-       if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
-           oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
-               struct xfs_da3_icnode_hdr nodehdr;
-
-               dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot);
-               btree = dp->d_ops->node_tree_p(oldroot);
-               size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);
-               level = nodehdr.level;
-
-               /*
-                * we are about to copy oldroot to bp, so set up the type
-                * of bp while we know exactly what it will be.
-                */
-               xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
-       } else {
-               struct xfs_dir3_icleaf_hdr leafhdr;
-               struct xfs_dir2_leaf_entry *ents;
-
-               leaf = (xfs_dir2_leaf_t *)oldroot;
-               dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-               ents = dp->d_ops->leaf_ents_p(leaf);
-
-               ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
-                      leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
-               size = (int)((char *)&ents[leafhdr.count] - (char *)leaf);
-               level = 0;
-
-               /*
-                * we are about to copy oldroot to bp, so set up the type
-                * of bp while we know exactly what it will be.
-                */
-               xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
-       }
-
-       /*
-        * we can copy most of the information in the node from one block to
-        * another, but for CRC enabled headers we have to make sure that the
-        * block specific identifiers are kept intact. We update the buffer
-        * directly for this.
-        */
-       memcpy(node, oldroot, size);
-       if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
-           oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
-               struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
-
-               node3->hdr.info.blkno = cpu_to_be64(bp->b_bn);
-       }
-       xfs_trans_log_buf(tp, bp, 0, size - 1);
-
-       bp->b_ops = blk1->bp->b_ops;
-       xfs_trans_buf_copy_type(bp, blk1->bp);
-       blk1->bp = bp;
-       blk1->blkno = blkno;
-
-       /*
-        * Set up the new root node.
-        */
-       error = xfs_da3_node_create(args,
-               (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
-               level + 1, &bp, args->whichfork);
-       if (error)
-               return error;
-
-       node = bp->b_addr;
-       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-       btree = dp->d_ops->node_tree_p(node);
-       btree[0].hashval = cpu_to_be32(blk1->hashval);
-       btree[0].before = cpu_to_be32(blk1->blkno);
-       btree[1].hashval = cpu_to_be32(blk2->hashval);
-       btree[1].before = cpu_to_be32(blk2->blkno);
-       nodehdr.count = 2;
-       dp->d_ops->node_hdr_to_disk(node, &nodehdr);
-
-#ifdef DEBUG
-       if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-           oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
-               ASSERT(blk1->blkno >= args->geo->leafblk &&
-                      blk1->blkno < args->geo->freeblk);
-               ASSERT(blk2->blkno >= args->geo->leafblk &&
-                      blk2->blkno < args->geo->freeblk);
-       }
-#endif
-
-       /* Header is already logged by xfs_da_node_create */
-       xfs_trans_log_buf(tp, bp,
-               XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
-
-       return 0;
-}
-
-/*
- * Split the node, rebalance, then add the new entry.
- */
-STATIC int                                             /* error */
-xfs_da3_node_split(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *oldblk,
-       struct xfs_da_state_blk *newblk,
-       struct xfs_da_state_blk *addblk,
-       int                     treelevel,
-       int                     *result)
-{
-       struct xfs_da_intnode   *node;
-       struct xfs_da3_icnode_hdr nodehdr;
-       xfs_dablk_t             blkno;
-       int                     newcount;
-       int                     error;
-       int                     useextra;
-       struct xfs_inode        *dp = state->args->dp;
-
-       trace_xfs_da_node_split(state->args);
-
-       node = oldblk->bp->b_addr;
-       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-
-       /*
-        * With V2 dirs the extra block is data or freespace.
-        */
-       useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
-       newcount = 1 + useextra;
-       /*
-        * Do we have to split the node?
-        */
-       if (nodehdr.count + newcount > state->args->geo->node_ents) {
-               /*
-                * Allocate a new node, add to the doubly linked chain of
-                * nodes, then move some of our excess entries into it.
-                */
-               error = xfs_da_grow_inode(state->args, &blkno);
-               if (error)
-                       return error;   /* GROT: dir is inconsistent */
-
-               error = xfs_da3_node_create(state->args, blkno, treelevel,
-                                          &newblk->bp, state->args->whichfork);
-               if (error)
-                       return error;   /* GROT: dir is inconsistent */
-               newblk->blkno = blkno;
-               newblk->magic = XFS_DA_NODE_MAGIC;
-               xfs_da3_node_rebalance(state, oldblk, newblk);
-               error = xfs_da3_blk_link(state, oldblk, newblk);
-               if (error)
-                       return error;
-               *result = 1;
-       } else {
-               *result = 0;
-       }
-
-       /*
-        * Insert the new entry(s) into the correct block
-        * (updating last hashval in the process).
-        *
-        * xfs_da3_node_add() inserts BEFORE the given index,
-        * and as a result of using node_lookup_int() we always
-        * point to a valid entry (not after one), but a split
-        * operation always results in a new block whose hashvals
-        * FOLLOW the current block.
-        *
-        * If we had double-split op below us, then add the extra block too.
-        */
-       node = oldblk->bp->b_addr;
-       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-       if (oldblk->index <= nodehdr.count) {
-               oldblk->index++;
-               xfs_da3_node_add(state, oldblk, addblk);
-               if (useextra) {
-                       if (state->extraafter)
-                               oldblk->index++;
-                       xfs_da3_node_add(state, oldblk, &state->extrablk);
-                       state->extravalid = 0;
-               }
-       } else {
-               newblk->index++;
-               xfs_da3_node_add(state, newblk, addblk);
-               if (useextra) {
-                       if (state->extraafter)
-                               newblk->index++;
-                       xfs_da3_node_add(state, newblk, &state->extrablk);
-                       state->extravalid = 0;
-               }
-       }
-
-       return 0;
-}
-
-/*
- * Balance the btree elements between two intermediate nodes,
- * usually one full and one empty.
- *
- * NOTE: if blk2 is empty, then it will get the upper half of blk1.
- */
-STATIC void
-xfs_da3_node_rebalance(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *blk1,
-       struct xfs_da_state_blk *blk2)
-{
-       struct xfs_da_intnode   *node1;
-       struct xfs_da_intnode   *node2;
-       struct xfs_da_intnode   *tmpnode;
-       struct xfs_da_node_entry *btree1;
-       struct xfs_da_node_entry *btree2;
-       struct xfs_da_node_entry *btree_s;
-       struct xfs_da_node_entry *btree_d;
-       struct xfs_da3_icnode_hdr nodehdr1;
-       struct xfs_da3_icnode_hdr nodehdr2;
-       struct xfs_trans        *tp;
-       int                     count;
-       int                     tmp;
-       int                     swap = 0;
-       struct xfs_inode        *dp = state->args->dp;
-
-       trace_xfs_da_node_rebalance(state->args);
-
-       node1 = blk1->bp->b_addr;
-       node2 = blk2->bp->b_addr;
-       dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
-       dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
-       btree1 = dp->d_ops->node_tree_p(node1);
-       btree2 = dp->d_ops->node_tree_p(node2);
-
-       /*
-        * Figure out how many entries need to move, and in which direction.
-        * Swap the nodes around if that makes it simpler.
-        */
-       if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
-           ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
-            (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
-                       be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
-               tmpnode = node1;
-               node1 = node2;
-               node2 = tmpnode;
-               dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
-               dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
-               btree1 = dp->d_ops->node_tree_p(node1);
-               btree2 = dp->d_ops->node_tree_p(node2);
-               swap = 1;
-       }
-
-       count = (nodehdr1.count - nodehdr2.count) / 2;
-       if (count == 0)
-               return;
-       tp = state->args->trans;
-       /*
-        * Two cases: high-to-low and low-to-high.
-        */
-       if (count > 0) {
-               /*
-                * Move elements in node2 up to make a hole.
-                */
-               tmp = nodehdr2.count;
-               if (tmp > 0) {
-                       tmp *= (uint)sizeof(xfs_da_node_entry_t);
-                       btree_s = &btree2[0];
-                       btree_d = &btree2[count];
-                       memmove(btree_d, btree_s, tmp);
-               }
-
-               /*
-                * Move the req'd B-tree elements from high in node1 to
-                * low in node2.
-                */
-               nodehdr2.count += count;
-               tmp = count * (uint)sizeof(xfs_da_node_entry_t);
-               btree_s = &btree1[nodehdr1.count - count];
-               btree_d = &btree2[0];
-               memcpy(btree_d, btree_s, tmp);
-               nodehdr1.count -= count;
-       } else {
-               /*
-                * Move the req'd B-tree elements from low in node2 to
-                * high in node1.
-                */
-               count = -count;
-               tmp = count * (uint)sizeof(xfs_da_node_entry_t);
-               btree_s = &btree2[0];
-               btree_d = &btree1[nodehdr1.count];
-               memcpy(btree_d, btree_s, tmp);
-               nodehdr1.count += count;
-
-               xfs_trans_log_buf(tp, blk1->bp,
-                       XFS_DA_LOGRANGE(node1, btree_d, tmp));
-
-               /*
-                * Move elements in node2 down to fill the hole.
-                */
-               tmp  = nodehdr2.count - count;
-               tmp *= (uint)sizeof(xfs_da_node_entry_t);
-               btree_s = &btree2[count];
-               btree_d = &btree2[0];
-               memmove(btree_d, btree_s, tmp);
-               nodehdr2.count -= count;
-       }
-
-       /*
-        * Log header of node 1 and all current bits of node 2.
-        */
-       dp->d_ops->node_hdr_to_disk(node1, &nodehdr1);
-       xfs_trans_log_buf(tp, blk1->bp,
-               XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size));
-
-       dp->d_ops->node_hdr_to_disk(node2, &nodehdr2);
-       xfs_trans_log_buf(tp, blk2->bp,
-               XFS_DA_LOGRANGE(node2, &node2->hdr,
-                               dp->d_ops->node_hdr_size +
-                               (sizeof(btree2[0]) * nodehdr2.count)));
-
-       /*
-        * Record the last hashval from each block for upward propagation.
-        * (note: don't use the swapped node pointers)
-        */
-       if (swap) {
-               node1 = blk1->bp->b_addr;
-               node2 = blk2->bp->b_addr;
-               dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
-               dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
-               btree1 = dp->d_ops->node_tree_p(node1);
-               btree2 = dp->d_ops->node_tree_p(node2);
-       }
-       blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
-       blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
-
-       /*
-        * Adjust the expected index for insertion.
-        */
-       if (blk1->index >= nodehdr1.count) {
-               blk2->index = blk1->index - nodehdr1.count;
-               blk1->index = nodehdr1.count + 1;       /* make it invalid */
-       }
-}
-
-/*
- * Add a new entry to an intermediate node.
- */
-STATIC void
-xfs_da3_node_add(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *oldblk,
-       struct xfs_da_state_blk *newblk)
-{
-       struct xfs_da_intnode   *node;
-       struct xfs_da3_icnode_hdr nodehdr;
-       struct xfs_da_node_entry *btree;
-       int                     tmp;
-       struct xfs_inode        *dp = state->args->dp;
-
-       trace_xfs_da_node_add(state->args);
-
-       node = oldblk->bp->b_addr;
-       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-       btree = dp->d_ops->node_tree_p(node);
-
-       ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
-       ASSERT(newblk->blkno != 0);
-       if (state->args->whichfork == XFS_DATA_FORK)
-               ASSERT(newblk->blkno >= state->args->geo->leafblk &&
-                      newblk->blkno < state->args->geo->freeblk);
-
-       /*
-        * We may need to make some room before we insert the new node.
-        */
-       tmp = 0;
-       if (oldblk->index < nodehdr.count) {
-               tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
-               memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
-       }
-       btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
-       btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
-       xfs_trans_log_buf(state->args->trans, oldblk->bp,
-               XFS_DA_LOGRANGE(node, &btree[oldblk->index],
-                               tmp + sizeof(*btree)));
-
-       nodehdr.count += 1;
-       dp->d_ops->node_hdr_to_disk(node, &nodehdr);
-       xfs_trans_log_buf(state->args->trans, oldblk->bp,
-               XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
-
-       /*
-        * Copy the last hash value from the oldblk to propagate upwards.
-        */
-       oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
-}
-
-/*========================================================================
- * Routines used for shrinking the Btree.
- *========================================================================*/
-
-/*
- * Deallocate an empty leaf node, remove it from its parent,
- * possibly deallocating that block, etc...
- */
-int
-xfs_da3_join(
-       struct xfs_da_state     *state)
-{
-       struct xfs_da_state_blk *drop_blk;
-       struct xfs_da_state_blk *save_blk;
-       int                     action = 0;
-       int                     error;
-
-       trace_xfs_da_join(state->args);
-
-       drop_blk = &state->path.blk[ state->path.active-1 ];
-       save_blk = &state->altpath.blk[ state->path.active-1 ];
-       ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
-       ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
-              drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
-
-       /*
-        * Walk back up the tree joining/deallocating as necessary.
-        * When we stop dropping blocks, break out.
-        */
-       for (  ; state->path.active >= 2; drop_blk--, save_blk--,
-                state->path.active--) {
-               /*
-                * See if we can combine the block with a neighbor.
-                *   (action == 0) => no options, just leave
-                *   (action == 1) => coalesce, then unlink
-                *   (action == 2) => block empty, unlink it
-                */
-               switch (drop_blk->magic) {
-               case XFS_ATTR_LEAF_MAGIC:
-                       error = xfs_attr3_leaf_toosmall(state, &action);
-                       if (error)
-                               return error;
-                       if (action == 0)
-                               return 0;
-                       xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
-                       break;
-               case XFS_DIR2_LEAFN_MAGIC:
-                       error = xfs_dir2_leafn_toosmall(state, &action);
-                       if (error)
-                               return error;
-                       if (action == 0)
-                               return 0;
-                       xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
-                       break;
-               case XFS_DA_NODE_MAGIC:
-                       /*
-                        * Remove the offending node, fixup hashvals,
-                        * check for a toosmall neighbor.
-                        */
-                       xfs_da3_node_remove(state, drop_blk);
-                       xfs_da3_fixhashpath(state, &state->path);
-                       error = xfs_da3_node_toosmall(state, &action);
-                       if (error)
-                               return error;
-                       if (action == 0)
-                               return 0;
-                       xfs_da3_node_unbalance(state, drop_blk, save_blk);
-                       break;
-               }
-               xfs_da3_fixhashpath(state, &state->altpath);
-               error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
-               xfs_da_state_kill_altpath(state);
-               if (error)
-                       return error;
-               error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
-                                                        drop_blk->bp);
-               drop_blk->bp = NULL;
-               if (error)
-                       return error;
-       }
-       /*
-        * We joined all the way to the top.  If it turns out that
-        * we only have one entry in the root, make the child block
-        * the new root.
-        */
-       xfs_da3_node_remove(state, drop_blk);
-       xfs_da3_fixhashpath(state, &state->path);
-       error = xfs_da3_root_join(state, &state->path.blk[0]);
-       return error;
-}
-
-#ifdef DEBUG
-static void
-xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
-{
-       __be16  magic = blkinfo->magic;
-
-       if (level == 1) {
-               ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-                      magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
-                      magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
-                      magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
-       } else {
-               ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
-                      magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
-       }
-       ASSERT(!blkinfo->forw);
-       ASSERT(!blkinfo->back);
-}
-#else  /* !DEBUG */
-#define        xfs_da_blkinfo_onlychild_validate(blkinfo, level)
-#endif /* !DEBUG */
-
-/*
- * We have only one entry in the root.  Copy the only remaining child of
- * the old root to block 0 as the new root node.
- */
-STATIC int
-xfs_da3_root_join(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *root_blk)
-{
-       struct xfs_da_intnode   *oldroot;
-       struct xfs_da_args      *args;
-       xfs_dablk_t             child;
-       struct xfs_buf          *bp;
-       struct xfs_da3_icnode_hdr oldroothdr;
-       struct xfs_da_node_entry *btree;
-       int                     error;
-       struct xfs_inode        *dp = state->args->dp;
-
-       trace_xfs_da_root_join(state->args);
-
-       ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
-
-       args = state->args;
-       oldroot = root_blk->bp->b_addr;
-       dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot);
-       ASSERT(oldroothdr.forw == 0);
-       ASSERT(oldroothdr.back == 0);
-
-       /*
-        * If the root has more than one child, then don't do anything.
-        */
-       if (oldroothdr.count > 1)
-               return 0;
-
-       /*
-        * Read in the (only) child block, then copy those bytes into
-        * the root block's buffer and free the original child block.
-        */
-       btree = dp->d_ops->node_tree_p(oldroot);
-       child = be32_to_cpu(btree[0].before);
-       ASSERT(child != 0);
-       error = xfs_da3_node_read(args->trans, dp, child, -1, &bp,
-                                            args->whichfork);
-       if (error)
-               return error;
-       xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
-
-       /*
-        * This could be copying a leaf back into the root block in the case of
-        * there only being a single leaf block left in the tree. Hence we have
-        * to update the b_ops pointer as well to match the buffer type change
-        * that could occur. For dir3 blocks we also need to update the block
-        * number in the buffer header.
-        */
-       memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);
-       root_blk->bp->b_ops = bp->b_ops;
-       xfs_trans_buf_copy_type(root_blk->bp, bp);
-       if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
-               struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
-               da3->blkno = cpu_to_be64(root_blk->bp->b_bn);
-       }
-       xfs_trans_log_buf(args->trans, root_blk->bp, 0,
-                         args->geo->blksize - 1);
-       error = xfs_da_shrink_inode(args, child, bp);
-       return error;
-}
-
-/*
- * Check a node block and its neighbors to see if the block should be
- * collapsed into one or the other neighbor.  Always keep the block
- * with the smaller block number.
- * If the current block is over 50% full, don't try to join it, return 0.
- * If the block is empty, fill in the state structure and return 2.
- * If it can be collapsed, fill in the state structure and return 1.
- * If nothing can be done, return 0.
- */
-STATIC int
-xfs_da3_node_toosmall(
-       struct xfs_da_state     *state,
-       int                     *action)
-{
-       struct xfs_da_intnode   *node;
-       struct xfs_da_state_blk *blk;
-       struct xfs_da_blkinfo   *info;
-       xfs_dablk_t             blkno;
-       struct xfs_buf          *bp;
-       struct xfs_da3_icnode_hdr nodehdr;
-       int                     count;
-       int                     forward;
-       int                     error;
-       int                     retval;
-       int                     i;
-       struct xfs_inode        *dp = state->args->dp;
-
-       trace_xfs_da_node_toosmall(state->args);
-
-       /*
-        * Check for the degenerate case of the block being over 50% full.
-        * If so, it's not worth even looking to see if we might be able
-        * to coalesce with a sibling.
-        */
-       blk = &state->path.blk[ state->path.active-1 ];
-       info = blk->bp->b_addr;
-       node = (xfs_da_intnode_t *)info;
-       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-       if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
-               *action = 0;    /* blk over 50%, don't try to join */
-               return 0;       /* blk over 50%, don't try to join */
-       }
-
-       /*
-        * Check for the degenerate case of the block being empty.
-        * If the block is empty, we'll simply delete it, no need to
-        * coalesce it with a sibling block.  We choose (arbitrarily)
-        * to merge with the forward block unless it is NULL.
-        */
-       if (nodehdr.count == 0) {
-               /*
-                * Make altpath point to the block we want to keep and
-                * path point to the block we want to drop (this one).
-                */
-               forward = (info->forw != 0);
-               memcpy(&state->altpath, &state->path, sizeof(state->path));
-               error = xfs_da3_path_shift(state, &state->altpath, forward,
-                                                0, &retval);
-               if (error)
-                       return error;
-               if (retval) {
-                       *action = 0;
-               } else {
-                       *action = 2;
-               }
-               return 0;
-       }
-
-       /*
-        * Examine each sibling block to see if we can coalesce with
-        * at least 25% free space to spare.  We need to figure out
-        * whether to merge with the forward or the backward block.
-        * We prefer coalescing with the lower numbered sibling so as
-        * to shrink a directory over time.
-        */
-       count  = state->args->geo->node_ents;
-       count -= state->args->geo->node_ents >> 2;
-       count -= nodehdr.count;
-
-       /* start with smaller blk num */
-       forward = nodehdr.forw < nodehdr.back;
-       for (i = 0; i < 2; forward = !forward, i++) {
-               struct xfs_da3_icnode_hdr thdr;
-               if (forward)
-                       blkno = nodehdr.forw;
-               else
-                       blkno = nodehdr.back;
-               if (blkno == 0)
-                       continue;
-               error = xfs_da3_node_read(state->args->trans, dp,
-                                       blkno, -1, &bp, state->args->whichfork);
-               if (error)
-                       return error;
-
-               node = bp->b_addr;
-               dp->d_ops->node_hdr_from_disk(&thdr, node);
-               xfs_trans_brelse(state->args->trans, bp);
-
-               if (count - thdr.count >= 0)
-                       break;  /* fits with at least 25% to spare */
-       }
-       if (i >= 2) {
-               *action = 0;
-               return 0;
-       }
-
-       /*
-        * Make altpath point to the block we want to keep (the lower
-        * numbered block) and path point to the block we want to drop.
-        */
-       memcpy(&state->altpath, &state->path, sizeof(state->path));
-       if (blkno < blk->blkno) {
-               error = xfs_da3_path_shift(state, &state->altpath, forward,
-                                                0, &retval);
-       } else {
-               error = xfs_da3_path_shift(state, &state->path, forward,
-                                                0, &retval);
-       }
-       if (error)
-               return error;
-       if (retval) {
-               *action = 0;
-               return 0;
-       }
-       *action = 1;
-       return 0;
-}
-
-/*
- * Pick up the last hashvalue from an intermediate node.
- */
-STATIC uint
-xfs_da3_node_lasthash(
-       struct xfs_inode        *dp,
-       struct xfs_buf          *bp,
-       int                     *count)
-{
-       struct xfs_da_intnode    *node;
-       struct xfs_da_node_entry *btree;
-       struct xfs_da3_icnode_hdr nodehdr;
-
-       node = bp->b_addr;
-       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-       if (count)
-               *count = nodehdr.count;
-       if (!nodehdr.count)
-               return 0;
-       btree = dp->d_ops->node_tree_p(node);
-       return be32_to_cpu(btree[nodehdr.count - 1].hashval);
-}
-
-/*
- * Walk back up the tree adjusting hash values as necessary,
- * when we stop making changes, return.
- */
-void
-xfs_da3_fixhashpath(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_path *path)
-{
-       struct xfs_da_state_blk *blk;
-       struct xfs_da_intnode   *node;
-       struct xfs_da_node_entry *btree;
-       xfs_dahash_t            lasthash=0;
-       int                     level;
-       int                     count;
-       struct xfs_inode        *dp = state->args->dp;
-
-       trace_xfs_da_fixhashpath(state->args);
-
-       level = path->active-1;
-       blk = &path->blk[ level ];
-       switch (blk->magic) {
-       case XFS_ATTR_LEAF_MAGIC:
-               lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
-               if (count == 0)
-                       return;
-               break;
-       case XFS_DIR2_LEAFN_MAGIC:
-               lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count);
-               if (count == 0)
-                       return;
-               break;
-       case XFS_DA_NODE_MAGIC:
-               lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);
-               if (count == 0)
-                       return;
-               break;
-       }
-       for (blk--, level--; level >= 0; blk--, level--) {
-               struct xfs_da3_icnode_hdr nodehdr;
-
-               node = blk->bp->b_addr;
-               dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-               btree = dp->d_ops->node_tree_p(node);
-               if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
-                       break;
-               blk->hashval = lasthash;
-               btree[blk->index].hashval = cpu_to_be32(lasthash);
-               xfs_trans_log_buf(state->args->trans, blk->bp,
-                                 XFS_DA_LOGRANGE(node, &btree[blk->index],
-                                                 sizeof(*btree)));
-
-               lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
-       }
-}
-
-/*
- * Remove an entry from an intermediate node.
- */
-STATIC void
-xfs_da3_node_remove(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *drop_blk)
-{
-       struct xfs_da_intnode   *node;
-       struct xfs_da3_icnode_hdr nodehdr;
-       struct xfs_da_node_entry *btree;
-       int                     index;
-       int                     tmp;
-       struct xfs_inode        *dp = state->args->dp;
-
-       trace_xfs_da_node_remove(state->args);
-
-       node = drop_blk->bp->b_addr;
-       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-       ASSERT(drop_blk->index < nodehdr.count);
-       ASSERT(drop_blk->index >= 0);
-
-       /*
-        * Copy over the offending entry, or just zero it out.
-        */
-       index = drop_blk->index;
-       btree = dp->d_ops->node_tree_p(node);
-       if (index < nodehdr.count - 1) {
-               tmp  = nodehdr.count - index - 1;
-               tmp *= (uint)sizeof(xfs_da_node_entry_t);
-               memmove(&btree[index], &btree[index + 1], tmp);
-               xfs_trans_log_buf(state->args->trans, drop_blk->bp,
-                   XFS_DA_LOGRANGE(node, &btree[index], tmp));
-               index = nodehdr.count - 1;
-       }
-       memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
-       xfs_trans_log_buf(state->args->trans, drop_blk->bp,
-           XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
-       nodehdr.count -= 1;
-       dp->d_ops->node_hdr_to_disk(node, &nodehdr);
-       xfs_trans_log_buf(state->args->trans, drop_blk->bp,
-           XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
-
-       /*
-        * Copy the last hash value from the block to propagate upwards.
-        */
-       drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
-}
-
-/*
- * Unbalance the elements between two intermediate nodes,
- * move all Btree elements from one node into another.
- */
-STATIC void
-xfs_da3_node_unbalance(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *drop_blk,
-       struct xfs_da_state_blk *save_blk)
-{
-       struct xfs_da_intnode   *drop_node;
-       struct xfs_da_intnode   *save_node;
-       struct xfs_da_node_entry *drop_btree;
-       struct xfs_da_node_entry *save_btree;
-       struct xfs_da3_icnode_hdr drop_hdr;
-       struct xfs_da3_icnode_hdr save_hdr;
-       struct xfs_trans        *tp;
-       int                     sindex;
-       int                     tmp;
-       struct xfs_inode        *dp = state->args->dp;
-
-       trace_xfs_da_node_unbalance(state->args);
-
-       drop_node = drop_blk->bp->b_addr;
-       save_node = save_blk->bp->b_addr;
-       dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node);
-       dp->d_ops->node_hdr_from_disk(&save_hdr, save_node);
-       drop_btree = dp->d_ops->node_tree_p(drop_node);
-       save_btree = dp->d_ops->node_tree_p(save_node);
-       tp = state->args->trans;
-
-       /*
-        * If the dying block has lower hashvals, then move all the
-        * elements in the remaining block up to make a hole.
-        */
-       if ((be32_to_cpu(drop_btree[0].hashval) <
-                       be32_to_cpu(save_btree[0].hashval)) ||
-           (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
-                       be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
-               /* XXX: check this - is memmove dst correct? */
-               tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
-               memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
-
-               sindex = 0;
-               xfs_trans_log_buf(tp, save_blk->bp,
-                       XFS_DA_LOGRANGE(save_node, &save_btree[0],
-                               (save_hdr.count + drop_hdr.count) *
-                                               sizeof(xfs_da_node_entry_t)));
-       } else {
-               sindex = save_hdr.count;
-               xfs_trans_log_buf(tp, save_blk->bp,
-                       XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
-                               drop_hdr.count * sizeof(xfs_da_node_entry_t)));
-       }
-
-       /*
-        * Move all the B-tree elements from drop_blk to save_blk.
-        */
-       tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
-       memcpy(&save_btree[sindex], &drop_btree[0], tmp);
-       save_hdr.count += drop_hdr.count;
-
-       dp->d_ops->node_hdr_to_disk(save_node, &save_hdr);
-       xfs_trans_log_buf(tp, save_blk->bp,
-               XFS_DA_LOGRANGE(save_node, &save_node->hdr,
-                               dp->d_ops->node_hdr_size));
-
-       /*
-        * Save the last hashval in the remaining block for upward propagation.
-        */
-       save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
-}
-
-/*========================================================================
- * Routines used for finding things in the Btree.
- *========================================================================*/
-
-/*
- * Walk down the Btree looking for a particular filename, filling
- * in the state structure as we go.
- *
- * We will set the state structure to point to each of the elements
- * in each of the nodes where either the hashval is or should be.
- *
- * We support duplicate hashval's so for each entry in the current
- * node that could contain the desired hashval, descend.  This is a
- * pruned depth-first tree search.
- */
-int                                                    /* error */
-xfs_da3_node_lookup_int(
-       struct xfs_da_state     *state,
-       int                     *result)
-{
-       struct xfs_da_state_blk *blk;
-       struct xfs_da_blkinfo   *curr;
-       struct xfs_da_intnode   *node;
-       struct xfs_da_node_entry *btree;
-       struct xfs_da3_icnode_hdr nodehdr;
-       struct xfs_da_args      *args;
-       xfs_dablk_t             blkno;
-       xfs_dahash_t            hashval;
-       xfs_dahash_t            btreehashval;
-       int                     probe;
-       int                     span;
-       int                     max;
-       int                     error;
-       int                     retval;
-       struct xfs_inode        *dp = state->args->dp;
-
-       args = state->args;
-
-       /*
-        * Descend thru the B-tree searching each level for the right
-        * node to use, until the right hashval is found.
-        */
-       blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0;
-       for (blk = &state->path.blk[0], state->path.active = 1;
-                        state->path.active <= XFS_DA_NODE_MAXDEPTH;
-                        blk++, state->path.active++) {
-               /*
-                * Read the next node down in the tree.
-                */
-               blk->blkno = blkno;
-               error = xfs_da3_node_read(args->trans, args->dp, blkno,
-                                       -1, &blk->bp, args->whichfork);
-               if (error) {
-                       blk->blkno = 0;
-                       state->path.active--;
-                       return error;
-               }
-               curr = blk->bp->b_addr;
-               blk->magic = be16_to_cpu(curr->magic);
-
-               if (blk->magic == XFS_ATTR_LEAF_MAGIC ||
-                   blk->magic == XFS_ATTR3_LEAF_MAGIC) {
-                       blk->magic = XFS_ATTR_LEAF_MAGIC;
-                       blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
-                       break;
-               }
-
-               if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
-                   blk->magic == XFS_DIR3_LEAFN_MAGIC) {
-                       blk->magic = XFS_DIR2_LEAFN_MAGIC;
-                       blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
-                                                              blk->bp, NULL);
-                       break;
-               }
-
-               blk->magic = XFS_DA_NODE_MAGIC;
-
-
-               /*
-                * Search an intermediate node for a match.
-                */
-               node = blk->bp->b_addr;
-               dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-               btree = dp->d_ops->node_tree_p(node);
-
-               max = nodehdr.count;
-               blk->hashval = be32_to_cpu(btree[max - 1].hashval);
-
-               /*
-                * Binary search.  (note: small blocks will skip loop)
-                */
-               probe = span = max / 2;
-               hashval = args->hashval;
-               while (span > 4) {
-                       span /= 2;
-                       btreehashval = be32_to_cpu(btree[probe].hashval);
-                       if (btreehashval < hashval)
-                               probe += span;
-                       else if (btreehashval > hashval)
-                               probe -= span;
-                       else
-                               break;
-               }
-               ASSERT((probe >= 0) && (probe < max));
-               ASSERT((span <= 4) ||
-                       (be32_to_cpu(btree[probe].hashval) == hashval));
-
-               /*
-                * Since we may have duplicate hashval's, find the first
-                * matching hashval in the node.
-                */
-               while (probe > 0 &&
-                      be32_to_cpu(btree[probe].hashval) >= hashval) {
-                       probe--;
-               }
-               while (probe < max &&
-                      be32_to_cpu(btree[probe].hashval) < hashval) {
-                       probe++;
-               }
-
-               /*
-                * Pick the right block to descend on.
-                */
-               if (probe == max) {
-                       blk->index = max - 1;
-                       blkno = be32_to_cpu(btree[max - 1].before);
-               } else {
-                       blk->index = probe;
-                       blkno = be32_to_cpu(btree[probe].before);
-               }
-       }
-
-       /*
-        * A leaf block that ends in the hashval that we are interested in
-        * (final hashval == search hashval) means that the next block may
-        * contain more entries with the same hashval, shift upward to the
-        * next leaf and keep searching.
-        */
-       for (;;) {
-               if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
-                       retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
-                                                       &blk->index, state);
-               } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
-                       retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
-                       blk->index = args->index;
-                       args->blkno = blk->blkno;
-               } else {
-                       ASSERT(0);
-                       return EFSCORRUPTED;
-               }
-               if (((retval == ENOENT) || (retval == ENOATTR)) &&
-                   (blk->hashval == args->hashval)) {
-                       error = xfs_da3_path_shift(state, &state->path, 1, 1,
-                                                        &retval);
-                       if (error)
-                               return error;
-                       if (retval == 0) {
-                               continue;
-                       } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
-                               /* path_shift() gives ENOENT */
-                               retval = ENOATTR;
-                       }
-               }
-               break;
-       }
-       *result = retval;
-       return 0;
-}
-
-/*========================================================================
- * Utility routines.
- *========================================================================*/
-
-/*
- * Compare two intermediate nodes for "order".
- */
-STATIC int
-xfs_da3_node_order(
-       struct xfs_inode *dp,
-       struct xfs_buf  *node1_bp,
-       struct xfs_buf  *node2_bp)
-{
-       struct xfs_da_intnode   *node1;
-       struct xfs_da_intnode   *node2;
-       struct xfs_da_node_entry *btree1;
-       struct xfs_da_node_entry *btree2;
-       struct xfs_da3_icnode_hdr node1hdr;
-       struct xfs_da3_icnode_hdr node2hdr;
-
-       node1 = node1_bp->b_addr;
-       node2 = node2_bp->b_addr;
-       dp->d_ops->node_hdr_from_disk(&node1hdr, node1);
-       dp->d_ops->node_hdr_from_disk(&node2hdr, node2);
-       btree1 = dp->d_ops->node_tree_p(node1);
-       btree2 = dp->d_ops->node_tree_p(node2);
-
-       if (node1hdr.count > 0 && node2hdr.count > 0 &&
-           ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
-            (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
-             be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
-               return 1;
-       }
-       return 0;
-}
-
-/*
- * Link a new block into a doubly linked list of blocks (of whatever type).
- */
-int                                                    /* error */
-xfs_da3_blk_link(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *old_blk,
-       struct xfs_da_state_blk *new_blk)
-{
-       struct xfs_da_blkinfo   *old_info;
-       struct xfs_da_blkinfo   *new_info;
-       struct xfs_da_blkinfo   *tmp_info;
-       struct xfs_da_args      *args;
-       struct xfs_buf          *bp;
-       int                     before = 0;
-       int                     error;
-       struct xfs_inode        *dp = state->args->dp;
-
-       /*
-        * Set up environment.
-        */
-       args = state->args;
-       ASSERT(args != NULL);
-       old_info = old_blk->bp->b_addr;
-       new_info = new_blk->bp->b_addr;
-       ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
-              old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
-              old_blk->magic == XFS_ATTR_LEAF_MAGIC);
-
-       switch (old_blk->magic) {
-       case XFS_ATTR_LEAF_MAGIC:
-               before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
-               break;
-       case XFS_DIR2_LEAFN_MAGIC:
-               before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);
-               break;
-       case XFS_DA_NODE_MAGIC:
-               before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);
-               break;
-       }
-
-       /*
-        * Link blocks in appropriate order.
-        */
-       if (before) {
-               /*
-                * Link new block in before existing block.
-                */
-               trace_xfs_da_link_before(args);
-               new_info->forw = cpu_to_be32(old_blk->blkno);
-               new_info->back = old_info->back;
-               if (old_info->back) {
-                       error = xfs_da3_node_read(args->trans, dp,
-                                               be32_to_cpu(old_info->back),
-                                               -1, &bp, args->whichfork);
-                       if (error)
-                               return error;
-                       ASSERT(bp != NULL);
-                       tmp_info = bp->b_addr;
-                       ASSERT(tmp_info->magic == old_info->magic);
-                       ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
-                       tmp_info->forw = cpu_to_be32(new_blk->blkno);
-                       xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
-               }
-               old_info->back = cpu_to_be32(new_blk->blkno);
-       } else {
-               /*
-                * Link new block in after existing block.
-                */
-               trace_xfs_da_link_after(args);
-               new_info->forw = old_info->forw;
-               new_info->back = cpu_to_be32(old_blk->blkno);
-               if (old_info->forw) {
-                       error = xfs_da3_node_read(args->trans, dp,
-                                               be32_to_cpu(old_info->forw),
-                                               -1, &bp, args->whichfork);
-                       if (error)
-                               return error;
-                       ASSERT(bp != NULL);
-                       tmp_info = bp->b_addr;
-                       ASSERT(tmp_info->magic == old_info->magic);
-                       ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
-                       tmp_info->back = cpu_to_be32(new_blk->blkno);
-                       xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
-               }
-               old_info->forw = cpu_to_be32(new_blk->blkno);
-       }
-
-       xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
-       xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
-       return 0;
-}
-
-/*
- * Unlink a block from a doubly linked list of blocks.
- */
-STATIC int                                             /* error */
-xfs_da3_blk_unlink(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_blk *drop_blk,
-       struct xfs_da_state_blk *save_blk)
-{
-       struct xfs_da_blkinfo   *drop_info;
-       struct xfs_da_blkinfo   *save_info;
-       struct xfs_da_blkinfo   *tmp_info;
-       struct xfs_da_args      *args;
-       struct xfs_buf          *bp;
-       int                     error;
-
-       /*
-        * Set up environment.
-        */
-       args = state->args;
-       ASSERT(args != NULL);
-       save_info = save_blk->bp->b_addr;
-       drop_info = drop_blk->bp->b_addr;
-       ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
-              save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
-              save_blk->magic == XFS_ATTR_LEAF_MAGIC);
-       ASSERT(save_blk->magic == drop_blk->magic);
-       ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
-              (be32_to_cpu(save_info->back) == drop_blk->blkno));
-       ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
-              (be32_to_cpu(drop_info->back) == save_blk->blkno));
-
-       /*
-        * Unlink the leaf block from the doubly linked chain of leaves.
-        */
-       if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
-               trace_xfs_da_unlink_back(args);
-               save_info->back = drop_info->back;
-               if (drop_info->back) {
-                       error = xfs_da3_node_read(args->trans, args->dp,
-                                               be32_to_cpu(drop_info->back),
-                                               -1, &bp, args->whichfork);
-                       if (error)
-                               return error;
-                       ASSERT(bp != NULL);
-                       tmp_info = bp->b_addr;
-                       ASSERT(tmp_info->magic == save_info->magic);
-                       ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
-                       tmp_info->forw = cpu_to_be32(save_blk->blkno);
-                       xfs_trans_log_buf(args->trans, bp, 0,
-                                                   sizeof(*tmp_info) - 1);
-               }
-       } else {
-               trace_xfs_da_unlink_forward(args);
-               save_info->forw = drop_info->forw;
-               if (drop_info->forw) {
-                       error = xfs_da3_node_read(args->trans, args->dp,
-                                               be32_to_cpu(drop_info->forw),
-                                               -1, &bp, args->whichfork);
-                       if (error)
-                               return error;
-                       ASSERT(bp != NULL);
-                       tmp_info = bp->b_addr;
-                       ASSERT(tmp_info->magic == save_info->magic);
-                       ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
-                       tmp_info->back = cpu_to_be32(save_blk->blkno);
-                       xfs_trans_log_buf(args->trans, bp, 0,
-                                                   sizeof(*tmp_info) - 1);
-               }
-       }
-
-       xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
-       return 0;
-}
-
-/*
- * Move a path "forward" or "!forward" one block at the current level.
- *
- * This routine will adjust a "path" to point to the next block
- * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
- * Btree, including updating pointers to the intermediate nodes between
- * the new bottom and the root.
- */
-int                                                    /* error */
-xfs_da3_path_shift(
-       struct xfs_da_state     *state,
-       struct xfs_da_state_path *path,
-       int                     forward,
-       int                     release,
-       int                     *result)
-{
-       struct xfs_da_state_blk *blk;
-       struct xfs_da_blkinfo   *info;
-       struct xfs_da_intnode   *node;
-       struct xfs_da_args      *args;
-       struct xfs_da_node_entry *btree;
-       struct xfs_da3_icnode_hdr nodehdr;
-       xfs_dablk_t             blkno = 0;
-       int                     level;
-       int                     error;
-       struct xfs_inode        *dp = state->args->dp;
-
-       trace_xfs_da_path_shift(state->args);
-
-       /*
-        * Roll up the Btree looking for the first block where our
-        * current index is not at the edge of the block.  Note that
-        * we skip the bottom layer because we want the sibling block.
-        */
-       args = state->args;
-       ASSERT(args != NULL);
-       ASSERT(path != NULL);
-       ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
-       level = (path->active-1) - 1;   /* skip bottom layer in path */
-       for (blk = &path->blk[level]; level >= 0; blk--, level--) {
-               node = blk->bp->b_addr;
-               dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-               btree = dp->d_ops->node_tree_p(node);
-
-               if (forward && (blk->index < nodehdr.count - 1)) {
-                       blk->index++;
-                       blkno = be32_to_cpu(btree[blk->index].before);
-                       break;
-               } else if (!forward && (blk->index > 0)) {
-                       blk->index--;
-                       blkno = be32_to_cpu(btree[blk->index].before);
-                       break;
-               }
-       }
-       if (level < 0) {
-               *result = ENOENT;       /* we're out of our tree */
-               ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-               return 0;
-       }
-
-       /*
-        * Roll down the edge of the subtree until we reach the
-        * same depth we were at originally.
-        */
-       for (blk++, level++; level < path->active; blk++, level++) {
-               /*
-                * Release the old block.
-                * (if it's dirty, trans won't actually let go)
-                */
-               if (release)
-                       xfs_trans_brelse(args->trans, blk->bp);
-
-               /*
-                * Read the next child block.
-                */
-               blk->blkno = blkno;
-               error = xfs_da3_node_read(args->trans, dp, blkno, -1,
-                                       &blk->bp, args->whichfork);
-               if (error)
-                       return error;
-               info = blk->bp->b_addr;
-               ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
-                      info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
-                      info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-                      info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
-                      info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
-                      info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
-
-
-               /*
-                * Note: we flatten the magic number to a single type so we
-                * don't have to compare against crc/non-crc types elsewhere.
-                */
-               switch (be16_to_cpu(info->magic)) {
-               case XFS_DA_NODE_MAGIC:
-               case XFS_DA3_NODE_MAGIC:
-                       blk->magic = XFS_DA_NODE_MAGIC;
-                       node = (xfs_da_intnode_t *)info;
-                       dp->d_ops->node_hdr_from_disk(&nodehdr, node);
-                       btree = dp->d_ops->node_tree_p(node);
-                       blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
-                       if (forward)
-                               blk->index = 0;
-                       else
-                               blk->index = nodehdr.count - 1;
-                       blkno = be32_to_cpu(btree[blk->index].before);
-                       break;
-               case XFS_ATTR_LEAF_MAGIC:
-               case XFS_ATTR3_LEAF_MAGIC:
-                       blk->magic = XFS_ATTR_LEAF_MAGIC;
-                       ASSERT(level == path->active-1);
-                       blk->index = 0;
-                       blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
-                       break;
-               case XFS_DIR2_LEAFN_MAGIC:
-               case XFS_DIR3_LEAFN_MAGIC:
-                       blk->magic = XFS_DIR2_LEAFN_MAGIC;
-                       ASSERT(level == path->active-1);
-                       blk->index = 0;
-                       blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
-                                                              blk->bp, NULL);
-                       break;
-               default:
-                       ASSERT(0);
-                       break;
-               }
-       }
-       *result = 0;
-       return 0;
-}
-
-
-/*========================================================================
- * Utility routines.
- *========================================================================*/
-
-/*
- * Implement a simple hash on a character string.
- * Rotate the hash value by 7 bits, then XOR each character in.
- * This is implemented with some source-level loop unrolling.
- */
-xfs_dahash_t
-xfs_da_hashname(const __uint8_t *name, int namelen)
-{
-       xfs_dahash_t hash;
-
-       /*
-        * Do four characters at a time as long as we can.
-        */
-       for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
-               hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
-                      (name[3] << 0) ^ rol32(hash, 7 * 4);
-
-       /*
-        * Now do the rest of the characters.
-        */
-       switch (namelen) {
-       case 3:
-               return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
-                      rol32(hash, 7 * 3);
-       case 2:
-               return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
-       case 1:
-               return (name[0] << 0) ^ rol32(hash, 7 * 1);
-       default: /* case 0: */
-               return hash;
-       }
-}
-
-enum xfs_dacmp
-xfs_da_compname(
-       struct xfs_da_args *args,
-       const unsigned char *name,
-       int             len)
-{
-       return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
-                                       XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
-}
-
-static xfs_dahash_t
-xfs_default_hashname(
-       struct xfs_name *name)
-{
-       return xfs_da_hashname(name->name, name->len);
-}
-
-const struct xfs_nameops xfs_default_nameops = {
-       .hashname       = xfs_default_hashname,
-       .compname       = xfs_da_compname
-};
-
-int
-xfs_da_grow_inode_int(
-       struct xfs_da_args      *args,
-       xfs_fileoff_t           *bno,
-       int                     count)
-{
-       struct xfs_trans        *tp = args->trans;
-       struct xfs_inode        *dp = args->dp;
-       int                     w = args->whichfork;
-       xfs_drfsbno_t           nblks = dp->i_d.di_nblocks;
-       struct xfs_bmbt_irec    map, *mapp;
-       int                     nmap, error, got, i, mapi;
-
-       /*
-        * Find a spot in the file space to put the new block.
-        */
-       error = xfs_bmap_first_unused(tp, dp, count, bno, w);
-       if (error)
-               return error;
-
-       /*
-        * Try mapping it in one filesystem block.
-        */
-       nmap = 1;
-       ASSERT(args->firstblock != NULL);
-       error = xfs_bmapi_write(tp, dp, *bno, count,
-                       xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
-                       args->firstblock, args->total, &map, &nmap,
-                       args->flist);
-       if (error)
-               return error;
-
-       ASSERT(nmap <= 1);
-       if (nmap == 1) {
-               mapp = &map;
-               mapi = 1;
-       } else if (nmap == 0 && count > 1) {
-               xfs_fileoff_t           b;
-               int                     c;
-
-               /*
-                * If we didn't get it and the block might work if fragmented,
-                * try without the CONTIG flag.  Loop until we get it all.
-                */
-               mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
-               for (b = *bno, mapi = 0; b < *bno + count; ) {
-                       nmap = MIN(XFS_BMAP_MAX_NMAP, count);
-                       c = (int)(*bno + count - b);
-                       error = xfs_bmapi_write(tp, dp, b, c,
-                                       xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
-                                       args->firstblock, args->total,
-                                       &mapp[mapi], &nmap, args->flist);
-                       if (error)
-                               goto out_free_map;
-                       if (nmap < 1)
-                               break;
-                       mapi += nmap;
-                       b = mapp[mapi - 1].br_startoff +
-                           mapp[mapi - 1].br_blockcount;
-               }
-       } else {
-               mapi = 0;
-               mapp = NULL;
-       }
-
-       /*
-        * Count the blocks we got, make sure it matches the total.
-        */
-       for (i = 0, got = 0; i < mapi; i++)
-               got += mapp[i].br_blockcount;
-       if (got != count || mapp[0].br_startoff != *bno ||
-           mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
-           *bno + count) {
-               error = ENOSPC;
-               goto out_free_map;
-       }
-
-       /* account for newly allocated blocks in reserved blocks total */
-       args->total -= dp->i_d.di_nblocks - nblks;
-
-out_free_map:
-       if (mapp != &map)
-               kmem_free(mapp);
-       return error;
-}
-
-/*
- * Add a block to the btree ahead of the file.
- * Return the new block number to the caller.
- */
-int
-xfs_da_grow_inode(
-       struct xfs_da_args      *args,
-       xfs_dablk_t             *new_blkno)
-{
-       xfs_fileoff_t           bno;
-       int                     error;
-
-       trace_xfs_da_grow_inode(args);
-
-       bno = args->geo->leafblk;
-       error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
-       if (!error)
-               *new_blkno = (xfs_dablk_t)bno;
-       return error;
-}
-
-/*
- * Ick.  We need to always be able to remove a btree block, even
- * if there's no space reservation because the filesystem is full.
- * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
- * It swaps the target block with the last block in the file.  The
- * last block in the file can always be removed since it can't cause
- * a bmap btree split to do that.
- */
-STATIC int
-xfs_da3_swap_lastblock(
-       struct xfs_da_args      *args,
-       xfs_dablk_t             *dead_blknop,
-       struct xfs_buf          **dead_bufp)
-{
-       struct xfs_da_blkinfo   *dead_info;
-       struct xfs_da_blkinfo   *sib_info;
-       struct xfs_da_intnode   *par_node;
-       struct xfs_da_intnode   *dead_node;
-       struct xfs_dir2_leaf    *dead_leaf2;
-       struct xfs_da_node_entry *btree;
-       struct xfs_da3_icnode_hdr par_hdr;
-       struct xfs_inode        *dp;
-       struct xfs_trans        *tp;
-       struct xfs_mount        *mp;
-       struct xfs_buf          *dead_buf;
-       struct xfs_buf          *last_buf;
-       struct xfs_buf          *sib_buf;
-       struct xfs_buf          *par_buf;
-       xfs_dahash_t            dead_hash;
-       xfs_fileoff_t           lastoff;
-       xfs_dablk_t             dead_blkno;
-       xfs_dablk_t             last_blkno;
-       xfs_dablk_t             sib_blkno;
-       xfs_dablk_t             par_blkno;
-       int                     error;
-       int                     w;
-       int                     entno;
-       int                     level;
-       int                     dead_level;
-
-       trace_xfs_da_swap_lastblock(args);
-
-       dead_buf = *dead_bufp;
-       dead_blkno = *dead_blknop;
-       tp = args->trans;
-       dp = args->dp;
-       w = args->whichfork;
-       ASSERT(w == XFS_DATA_FORK);
-       mp = dp->i_mount;
-       lastoff = args->geo->freeblk;
-       error = xfs_bmap_last_before(tp, dp, &lastoff, w);
-       if (error)
-               return error;
-       if (unlikely(lastoff == 0)) {
-               XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
-                                mp);
-               return EFSCORRUPTED;
-       }
-       /*
-        * Read the last block in the btree space.
-        */
-       last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
-       error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w);
-       if (error)
-               return error;
-       /*
-        * Copy the last block into the dead buffer and log it.
-        */
-       memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
-       xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
-       dead_info = dead_buf->b_addr;
-       /*
-        * Get values from the moved block.
-        */
-       if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-           dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
-               struct xfs_dir3_icleaf_hdr leafhdr;
-               struct xfs_dir2_leaf_entry *ents;
-
-               dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
-               dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2);
-               ents = dp->d_ops->leaf_ents_p(dead_leaf2);
-               dead_level = 0;
-               dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
-       } else {
-               struct xfs_da3_icnode_hdr deadhdr;
-
-               dead_node = (xfs_da_intnode_t *)dead_info;
-               dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node);
-               btree = dp->d_ops->node_tree_p(dead_node);
-               dead_level = deadhdr.level;
-               dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
-       }
-       sib_buf = par_buf = NULL;
-       /*
-        * If the moved block has a left sibling, fix up the pointers.
-        */
-       if ((sib_blkno = be32_to_cpu(dead_info->back))) {
-               error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
-               if (error)
-                       goto done;
-               sib_info = sib_buf->b_addr;
-               if (unlikely(
-                   be32_to_cpu(sib_info->forw) != last_blkno ||
-                   sib_info->magic != dead_info->magic)) {
-                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
-                                        XFS_ERRLEVEL_LOW, mp);
-                       error = EFSCORRUPTED;
-                       goto done;
-               }
-               sib_info->forw = cpu_to_be32(dead_blkno);
-               xfs_trans_log_buf(tp, sib_buf,
-                       XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
-                                       sizeof(sib_info->forw)));
-               sib_buf = NULL;
-       }
-       /*
-        * If the moved block has a right sibling, fix up the pointers.
-        */
-       if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
-               error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
-               if (error)
-                       goto done;
-               sib_info = sib_buf->b_addr;
-               if (unlikely(
-                      be32_to_cpu(sib_info->back) != last_blkno ||
-                      sib_info->magic != dead_info->magic)) {
-                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
-                                        XFS_ERRLEVEL_LOW, mp);
-                       error = EFSCORRUPTED;
-                       goto done;
-               }
-               sib_info->back = cpu_to_be32(dead_blkno);
-               xfs_trans_log_buf(tp, sib_buf,
-                       XFS_DA_LOGRANGE(sib_info, &sib_info->back,
-                                       sizeof(sib_info->back)));
-               sib_buf = NULL;
-       }
-       par_blkno = args->geo->leafblk;
-       level = -1;
-       /*
-        * Walk down the tree looking for the parent of the moved block.
-        */
-       for (;;) {
-               error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
-               if (error)
-                       goto done;
-               par_node = par_buf->b_addr;
-               dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
-               if (level >= 0 && level != par_hdr.level + 1) {
-                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
-                                        XFS_ERRLEVEL_LOW, mp);
-                       error = EFSCORRUPTED;
-                       goto done;
-               }
-               level = par_hdr.level;
-               btree = dp->d_ops->node_tree_p(par_node);
-               for (entno = 0;
-                    entno < par_hdr.count &&
-                    be32_to_cpu(btree[entno].hashval) < dead_hash;
-                    entno++)
-                       continue;
-               if (entno == par_hdr.count) {
-                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
-                                        XFS_ERRLEVEL_LOW, mp);
-                       error = EFSCORRUPTED;
-                       goto done;
-               }
-               par_blkno = be32_to_cpu(btree[entno].before);
-               if (level == dead_level + 1)
-                       break;
-               xfs_trans_brelse(tp, par_buf);
-               par_buf = NULL;
-       }
-       /*
-        * We're in the right parent block.
-        * Look for the right entry.
-        */
-       for (;;) {
-               for (;
-                    entno < par_hdr.count &&
-                    be32_to_cpu(btree[entno].before) != last_blkno;
-                    entno++)
-                       continue;
-               if (entno < par_hdr.count)
-                       break;
-               par_blkno = par_hdr.forw;
-               xfs_trans_brelse(tp, par_buf);
-               par_buf = NULL;
-               if (unlikely(par_blkno == 0)) {
-                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
-                                        XFS_ERRLEVEL_LOW, mp);
-                       error = EFSCORRUPTED;
-                       goto done;
-               }
-               error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
-               if (error)
-                       goto done;
-               par_node = par_buf->b_addr;
-               dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
-               if (par_hdr.level != level) {
-                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
-                                        XFS_ERRLEVEL_LOW, mp);
-                       error = EFSCORRUPTED;
-                       goto done;
-               }
-               btree = dp->d_ops->node_tree_p(par_node);
-               entno = 0;
-       }
-       /*
-        * Update the parent entry pointing to the moved block.
-        */
-       btree[entno].before = cpu_to_be32(dead_blkno);
-       xfs_trans_log_buf(tp, par_buf,
-               XFS_DA_LOGRANGE(par_node, &btree[entno].before,
-                               sizeof(btree[entno].before)));
-       *dead_blknop = last_blkno;
-       *dead_bufp = last_buf;
-       return 0;
-done:
-       if (par_buf)
-               xfs_trans_brelse(tp, par_buf);
-       if (sib_buf)
-               xfs_trans_brelse(tp, sib_buf);
-       xfs_trans_brelse(tp, last_buf);
-       return error;
-}
-
-/*
- * Remove a btree block from a directory or attribute.
- */
-int
-xfs_da_shrink_inode(
-       xfs_da_args_t   *args,
-       xfs_dablk_t     dead_blkno,
-       struct xfs_buf  *dead_buf)
-{
-       xfs_inode_t *dp;
-       int done, error, w, count;
-       xfs_trans_t *tp;
-       xfs_mount_t *mp;
-
-       trace_xfs_da_shrink_inode(args);
-
-       dp = args->dp;
-       w = args->whichfork;
-       tp = args->trans;
-       mp = dp->i_mount;
-       count = args->geo->fsbcount;
-       for (;;) {
-               /*
-                * Remove extents.  If we get ENOSPC for a dir we have to move
-                * the last block to the place we want to kill.
-                */
-               error = xfs_bunmapi(tp, dp, dead_blkno, count,
-                                   xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
-                                   0, args->firstblock, args->flist, &done);
-               if (error == ENOSPC) {
-                       if (w != XFS_DATA_FORK)
-                               break;
-                       error = xfs_da3_swap_lastblock(args, &dead_blkno,
-                                                     &dead_buf);
-                       if (error)
-                               break;
-               } else {
-                       break;
-               }
-       }
-       xfs_trans_binval(tp, dead_buf);
-       return error;
-}
-
-/*
- * See if the mapping(s) for this btree block are valid, i.e.
- * don't contain holes, are logically contiguous, and cover the whole range.
- */
-STATIC int
-xfs_da_map_covers_blocks(
-       int             nmap,
-       xfs_bmbt_irec_t *mapp,
-       xfs_dablk_t     bno,
-       int             count)
-{
-       int             i;
-       xfs_fileoff_t   off;
-
-       for (i = 0, off = bno; i < nmap; i++) {
-               if (mapp[i].br_startblock == HOLESTARTBLOCK ||
-                   mapp[i].br_startblock == DELAYSTARTBLOCK) {
-                       return 0;
-               }
-               if (off != mapp[i].br_startoff) {
-                       return 0;
-               }
-               off += mapp[i].br_blockcount;
-       }
-       return off == bno + count;
-}
-
-/*
- * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map.
- *
- * For the single map case, it is assumed that the caller has provided a pointer
- * to a valid xfs_buf_map.  For the multiple map case, this function will
- * allocate the xfs_buf_map to hold all the maps and replace the caller's single
- * map pointer with the allocated map.
- */
-static int
-xfs_buf_map_from_irec(
-       struct xfs_mount        *mp,
-       struct xfs_buf_map      **mapp,
-       int                     *nmaps,
-       struct xfs_bmbt_irec    *irecs,
-       int                     nirecs)
-{
-       struct xfs_buf_map      *map;
-       int                     i;
-
-       ASSERT(*nmaps == 1);
-       ASSERT(nirecs >= 1);
-
-       if (nirecs > 1) {
-               map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
-                                 KM_SLEEP | KM_NOFS);
-               if (!map)
-                       return ENOMEM;
-               *mapp = map;
-       }
-
-       *nmaps = nirecs;
-       map = *mapp;
-       for (i = 0; i < *nmaps; i++) {
-               ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK &&
-                      irecs[i].br_startblock != HOLESTARTBLOCK);
-               map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
-               map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
-       }
-       return 0;
-}
-
-/*
- * Map the block we are given ready for reading. There are three possible return
- * values:
- *     -1 - will be returned if we land in a hole and mappedbno == -2 so the
- *          caller knows not to execute a subsequent read.
- *      0 - if we mapped the block successfully
- *     >0 - positive error number if there was an error.
- */
-static int
-xfs_dabuf_map(
-       struct xfs_inode        *dp,
-       xfs_dablk_t             bno,
-       xfs_daddr_t             mappedbno,
-       int                     whichfork,
-       struct xfs_buf_map      **map,
-       int                     *nmaps)
-{
-       struct xfs_mount        *mp = dp->i_mount;
-       int                     nfsb;
-       int                     error = 0;
-       struct xfs_bmbt_irec    irec;
-       struct xfs_bmbt_irec    *irecs = &irec;
-       int                     nirecs;
-
-       ASSERT(map && *map);
-       ASSERT(*nmaps == 1);
-
-       if (whichfork == XFS_DATA_FORK)
-               nfsb = mp->m_dir_geo->fsbcount;
-       else
-               nfsb = mp->m_attr_geo->fsbcount;
-
-       /*
-        * Caller doesn't have a mapping.  -2 means don't complain
-        * if we land in a hole.
-        */
-       if (mappedbno == -1 || mappedbno == -2) {
-               /*
-                * Optimize the one-block case.
-                */
-               if (nfsb != 1)
-                       irecs = kmem_zalloc(sizeof(irec) * nfsb,
-                                           KM_SLEEP | KM_NOFS);
-
-               nirecs = nfsb;
-               error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
-                                      &nirecs, xfs_bmapi_aflag(whichfork));
-               if (error)
-                       goto out;
-       } else {
-               irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
-               irecs->br_startoff = (xfs_fileoff_t)bno;
-               irecs->br_blockcount = nfsb;
-               irecs->br_state = 0;
-               nirecs = 1;
-       }
-
-       if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
-               error = mappedbno == -2 ? -1 : EFSCORRUPTED;
-               if (unlikely(error == EFSCORRUPTED)) {
-                       if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
-                               int i;
-                               xfs_alert(mp, "%s: bno %lld dir: inode %lld",
-                                       __func__, (long long)bno,
-                                       (long long)dp->i_ino);
-                               for (i = 0; i < *nmaps; i++) {
-                                       xfs_alert(mp,
-"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
-                                               i,
-                                               (long long)irecs[i].br_startoff,
-                                               (long long)irecs[i].br_startblock,
-                                               (long long)irecs[i].br_blockcount,
-                                               irecs[i].br_state);
-                               }
-                       }
-                       XFS_ERROR_REPORT("xfs_da_do_buf(1)",
-                                        XFS_ERRLEVEL_LOW, mp);
-               }
-               goto out;
-       }
-       error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs);
-out:
-       if (irecs != &irec)
-               kmem_free(irecs);
-       return error;
-}
-
-/*
- * Get a buffer for the dir/attr block.
- */
-int
-xfs_da_get_buf(
-       struct xfs_trans        *trans,
-       struct xfs_inode        *dp,
-       xfs_dablk_t             bno,
-       xfs_daddr_t             mappedbno,
-       struct xfs_buf          **bpp,
-       int                     whichfork)
-{
-       struct xfs_buf          *bp;
-       struct xfs_buf_map      map;
-       struct xfs_buf_map      *mapp;
-       int                     nmap;
-       int                     error;
-
-       *bpp = NULL;
-       mapp = &map;
-       nmap = 1;
-       error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
-                               &mapp, &nmap);
-       if (error) {
-               /* mapping a hole is not an error, but we don't continue */
-               if (error == -1)
-                       error = 0;
-               goto out_free;
-       }
-
-       bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
-                                   mapp, nmap, 0);
-       error = bp ? bp->b_error : EIO;
-       if (error) {
-               xfs_trans_brelse(trans, bp);
-               goto out_free;
-       }
-
-       *bpp = bp;
-
-out_free:
-       if (mapp != &map)
-               kmem_free(mapp);
-
-       return error;
-}
-
-/*
- * Get a buffer for the dir/attr block, fill in the contents.
- */
-int
-xfs_da_read_buf(
-       struct xfs_trans        *trans,
-       struct xfs_inode        *dp,
-       xfs_dablk_t             bno,
-       xfs_daddr_t             mappedbno,
-       struct xfs_buf          **bpp,
-       int                     whichfork,
-       const struct xfs_buf_ops *ops)
-{
-       struct xfs_buf          *bp;
-       struct xfs_buf_map      map;
-       struct xfs_buf_map      *mapp;
-       int                     nmap;
-       int                     error;
-
-       *bpp = NULL;
-       mapp = &map;
-       nmap = 1;
-       error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
-                               &mapp, &nmap);
-       if (error) {
-               /* mapping a hole is not an error, but we don't continue */
-               if (error == -1)
-                       error = 0;
-               goto out_free;
-       }
-
-       error = xfs_trans_read_buf_map(dp->i_mount, trans,
-                                       dp->i_mount->m_ddev_targp,
-                                       mapp, nmap, 0, &bp, ops);
-       if (error)
-               goto out_free;
-
-       if (whichfork == XFS_ATTR_FORK)
-               xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
-       else
-               xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
-       *bpp = bp;
-out_free:
-       if (mapp != &map)
-               kmem_free(mapp);
-
-       return error;
-}
-
-/*
- * Readahead the dir/attr block.
- */
-xfs_daddr_t
-xfs_da_reada_buf(
-       struct xfs_inode        *dp,
-       xfs_dablk_t             bno,
-       xfs_daddr_t             mappedbno,
-       int                     whichfork,
-       const struct xfs_buf_ops *ops)
-{
-       struct xfs_buf_map      map;
-       struct xfs_buf_map      *mapp;
-       int                     nmap;
-       int                     error;
-
-       mapp = &map;
-       nmap = 1;
-       error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
-                               &mapp, &nmap);
-       if (error) {
-               /* mapping a hole is not an error, but we don't continue */
-               if (error == -1)
-                       error = 0;
-               goto out_free;
-       }
-
-       mappedbno = mapp[0].bm_bn;
-       xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
-
-out_free:
-       if (mapp != &map)
-               kmem_free(mapp);
-
-       if (error)
-               return -1;
-       return mappedbno;
-}
diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/xfs_da_format.c
deleted file mode 100644 (file)
index c9aee52..0000000
+++ /dev/null
@@ -1,911 +0,0 @@
-/*
- * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-
-/*
- * Shortform directory ops
- */
-static int
-xfs_dir2_sf_entsize(
-       struct xfs_dir2_sf_hdr  *hdr,
-       int                     len)
-{
-       int count = sizeof(struct xfs_dir2_sf_entry);   /* namelen + offset */
-
-       count += len;                                   /* name */
-       count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) :
-                               sizeof(xfs_dir2_ino4_t); /* ino # */
-       return count;
-}
-
-static int
-xfs_dir3_sf_entsize(
-       struct xfs_dir2_sf_hdr  *hdr,
-       int                     len)
-{
-       return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t);
-}
-
-static struct xfs_dir2_sf_entry *
-xfs_dir2_sf_nextentry(
-       struct xfs_dir2_sf_hdr  *hdr,
-       struct xfs_dir2_sf_entry *sfep)
-{
-       return (struct xfs_dir2_sf_entry *)
-               ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
-}
-
-static struct xfs_dir2_sf_entry *
-xfs_dir3_sf_nextentry(
-       struct xfs_dir2_sf_hdr  *hdr,
-       struct xfs_dir2_sf_entry *sfep)
-{
-       return (struct xfs_dir2_sf_entry *)
-               ((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen));
-}
-
-
-/*
- * For filetype enabled shortform directories, the file type field is stored at
- * the end of the name.  Because it's only a single byte, endian conversion is
- * not necessary. For non-filetype enable directories, the type is always
- * unknown and we never store the value.
- */
-static __uint8_t
-xfs_dir2_sfe_get_ftype(
-       struct xfs_dir2_sf_entry *sfep)
-{
-       return XFS_DIR3_FT_UNKNOWN;
-}
-
-static void
-xfs_dir2_sfe_put_ftype(
-       struct xfs_dir2_sf_entry *sfep,
-       __uint8_t               ftype)
-{
-       ASSERT(ftype < XFS_DIR3_FT_MAX);
-}
-
-static __uint8_t
-xfs_dir3_sfe_get_ftype(
-       struct xfs_dir2_sf_entry *sfep)
-{
-       __uint8_t       ftype;
-
-       ftype = sfep->name[sfep->namelen];
-       if (ftype >= XFS_DIR3_FT_MAX)
-               return XFS_DIR3_FT_UNKNOWN;
-       return ftype;
-}
-
-static void
-xfs_dir3_sfe_put_ftype(
-       struct xfs_dir2_sf_entry *sfep,
-       __uint8_t               ftype)
-{
-       ASSERT(ftype < XFS_DIR3_FT_MAX);
-
-       sfep->name[sfep->namelen] = ftype;
-}
-
-/*
- * Inode numbers in short-form directories can come in two versions,
- * either 4 bytes or 8 bytes wide.  These helpers deal with the
- * two forms transparently by looking at the headers i8count field.
- *
- * For 64-bit inode number the most significant byte must be zero.
- */
-static xfs_ino_t
-xfs_dir2_sf_get_ino(
-       struct xfs_dir2_sf_hdr  *hdr,
-       xfs_dir2_inou_t         *from)
-{
-       if (hdr->i8count)
-               return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
-       else
-               return get_unaligned_be32(&from->i4.i);
-}
-
-static void
-xfs_dir2_sf_put_ino(
-       struct xfs_dir2_sf_hdr  *hdr,
-       xfs_dir2_inou_t         *to,
-       xfs_ino_t               ino)
-{
-       ASSERT((ino & 0xff00000000000000ULL) == 0);
-
-       if (hdr->i8count)
-               put_unaligned_be64(ino, &to->i8.i);
-       else
-               put_unaligned_be32(ino, &to->i4.i);
-}
-
-static xfs_ino_t
-xfs_dir2_sf_get_parent_ino(
-       struct xfs_dir2_sf_hdr  *hdr)
-{
-       return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
-}
-
-static void
-xfs_dir2_sf_put_parent_ino(
-       struct xfs_dir2_sf_hdr  *hdr,
-       xfs_ino_t               ino)
-{
-       xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
-}
-
-/*
- * In short-form directory entries the inode numbers are stored at variable
- * offset behind the entry name. If the entry stores a filetype value, then it
- * sits between the name and the inode number. Hence the inode numbers may only
- * be accessed through the helpers below.
- */
-static xfs_ino_t
-xfs_dir2_sfe_get_ino(
-       struct xfs_dir2_sf_hdr  *hdr,
-       struct xfs_dir2_sf_entry *sfep)
-{
-       return xfs_dir2_sf_get_ino(hdr,
-                               (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]);
-}
-
-static void
-xfs_dir2_sfe_put_ino(
-       struct xfs_dir2_sf_hdr  *hdr,
-       struct xfs_dir2_sf_entry *sfep,
-       xfs_ino_t               ino)
-{
-       xfs_dir2_sf_put_ino(hdr,
-                           (xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino);
-}
-
-static xfs_ino_t
-xfs_dir3_sfe_get_ino(
-       struct xfs_dir2_sf_hdr  *hdr,
-       struct xfs_dir2_sf_entry *sfep)
-{
-       return xfs_dir2_sf_get_ino(hdr,
-                       (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]);
-}
-
-static void
-xfs_dir3_sfe_put_ino(
-       struct xfs_dir2_sf_hdr  *hdr,
-       struct xfs_dir2_sf_entry *sfep,
-       xfs_ino_t               ino)
-{
-       xfs_dir2_sf_put_ino(hdr,
-                       (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino);
-}
-
-
-/*
- * Directory data block operations
- */
-
-/*
- * For special situations, the dirent size ends up fixed because we always know
- * what the size of the entry is. That's true for the "." and "..", and
- * therefore we know that they are a fixed size and hence their offsets are
- * constant, as is the first entry.
- *
- * Hence, this calculation is written as a macro to be able to be calculated at
- * compile time and so certain offsets can be calculated directly in the
- * structure initaliser via the macro. There are two macros - one for dirents
- * with ftype and without so there are no unresolvable conditionals in the
- * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power
- * of 2 and the compiler doesn't reject it (unlike roundup()).
- */
-#define XFS_DIR2_DATA_ENTSIZE(n)                                       \
-       round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
-                sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN)
-
-#define XFS_DIR3_DATA_ENTSIZE(n)                                       \
-       round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
-                sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)),      \
-               XFS_DIR2_DATA_ALIGN)
-
-static int
-xfs_dir2_data_entsize(
-       int                     n)
-{
-       return XFS_DIR2_DATA_ENTSIZE(n);
-}
-
-static int
-xfs_dir3_data_entsize(
-       int                     n)
-{
-       return XFS_DIR3_DATA_ENTSIZE(n);
-}
-
-static __uint8_t
-xfs_dir2_data_get_ftype(
-       struct xfs_dir2_data_entry *dep)
-{
-       return XFS_DIR3_FT_UNKNOWN;
-}
-
-static void
-xfs_dir2_data_put_ftype(
-       struct xfs_dir2_data_entry *dep,
-       __uint8_t               ftype)
-{
-       ASSERT(ftype < XFS_DIR3_FT_MAX);
-}
-
-static __uint8_t
-xfs_dir3_data_get_ftype(
-       struct xfs_dir2_data_entry *dep)
-{
-       __uint8_t       ftype = dep->name[dep->namelen];
-
-       ASSERT(ftype < XFS_DIR3_FT_MAX);
-       if (ftype >= XFS_DIR3_FT_MAX)
-               return XFS_DIR3_FT_UNKNOWN;
-       return ftype;
-}
-
-static void
-xfs_dir3_data_put_ftype(
-       struct xfs_dir2_data_entry *dep,
-       __uint8_t               type)
-{
-       ASSERT(type < XFS_DIR3_FT_MAX);
-       ASSERT(dep->namelen != 0);
-
-       dep->name[dep->namelen] = type;
-}
-
-/*
- * Pointer to an entry's tag word.
- */
-static __be16 *
-xfs_dir2_data_entry_tag_p(
-       struct xfs_dir2_data_entry *dep)
-{
-       return (__be16 *)((char *)dep +
-               xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
-}
-
-static __be16 *
-xfs_dir3_data_entry_tag_p(
-       struct xfs_dir2_data_entry *dep)
-{
-       return (__be16 *)((char *)dep +
-               xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16));
-}
-
-/*
- * location of . and .. in data space (always block 0)
- */
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_dot_entry_p(
-       struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_dotdot_entry_p(
-       struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
-                               XFS_DIR2_DATA_ENTSIZE(1));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_first_entry_p(
-       struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
-                               XFS_DIR2_DATA_ENTSIZE(1) +
-                               XFS_DIR2_DATA_ENTSIZE(2));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_ftype_data_dotdot_entry_p(
-       struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
-                               XFS_DIR3_DATA_ENTSIZE(1));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_ftype_data_first_entry_p(
-       struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
-                               XFS_DIR3_DATA_ENTSIZE(1) +
-                               XFS_DIR3_DATA_ENTSIZE(2));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_dot_entry_p(
-       struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_dotdot_entry_p(
-       struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
-                               XFS_DIR3_DATA_ENTSIZE(1));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_first_entry_p(
-       struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
-                               XFS_DIR3_DATA_ENTSIZE(1) +
-                               XFS_DIR3_DATA_ENTSIZE(2));
-}
-
-static struct xfs_dir2_data_free *
-xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
-{
-       return hdr->bestfree;
-}
-
-static struct xfs_dir2_data_free *
-xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
-{
-       return ((struct xfs_dir3_data_hdr *)hdr)->best_free;
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
-}
-
-static struct xfs_dir2_data_unused *
-xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_unused *)
-               ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
-}
-
-static struct xfs_dir2_data_unused *
-xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
-{
-       return (struct xfs_dir2_data_unused *)
-               ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
-}
-
-
-/*
- * Directory Leaf block operations
- */
-static int
-xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo)
-{
-       return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) /
-               (uint)sizeof(struct xfs_dir2_leaf_entry);
-}
-
-static struct xfs_dir2_leaf_entry *
-xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp)
-{
-       return lp->__ents;
-}
-
-static int
-xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo)
-{
-       return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) /
-               (uint)sizeof(struct xfs_dir2_leaf_entry);
-}
-
-static struct xfs_dir2_leaf_entry *
-xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp)
-{
-       return ((struct xfs_dir3_leaf *)lp)->__ents;
-}
-
-static void
-xfs_dir2_leaf_hdr_from_disk(
-       struct xfs_dir3_icleaf_hdr      *to,
-       struct xfs_dir2_leaf            *from)
-{
-       to->forw = be32_to_cpu(from->hdr.info.forw);
-       to->back = be32_to_cpu(from->hdr.info.back);
-       to->magic = be16_to_cpu(from->hdr.info.magic);
-       to->count = be16_to_cpu(from->hdr.count);
-       to->stale = be16_to_cpu(from->hdr.stale);
-
-       ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
-              to->magic == XFS_DIR2_LEAFN_MAGIC);
-}
-
-static void
-xfs_dir2_leaf_hdr_to_disk(
-       struct xfs_dir2_leaf            *to,
-       struct xfs_dir3_icleaf_hdr      *from)
-{
-       ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
-              from->magic == XFS_DIR2_LEAFN_MAGIC);
-
-       to->hdr.info.forw = cpu_to_be32(from->forw);
-       to->hdr.info.back = cpu_to_be32(from->back);
-       to->hdr.info.magic = cpu_to_be16(from->magic);
-       to->hdr.count = cpu_to_be16(from->count);
-       to->hdr.stale = cpu_to_be16(from->stale);
-}
-
-static void
-xfs_dir3_leaf_hdr_from_disk(
-       struct xfs_dir3_icleaf_hdr      *to,
-       struct xfs_dir2_leaf            *from)
-{
-       struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from;
-
-       to->forw = be32_to_cpu(hdr3->info.hdr.forw);
-       to->back = be32_to_cpu(hdr3->info.hdr.back);
-       to->magic = be16_to_cpu(hdr3->info.hdr.magic);
-       to->count = be16_to_cpu(hdr3->count);
-       to->stale = be16_to_cpu(hdr3->stale);
-
-       ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC ||
-              to->magic == XFS_DIR3_LEAFN_MAGIC);
-}
-
-static void
-xfs_dir3_leaf_hdr_to_disk(
-       struct xfs_dir2_leaf            *to,
-       struct xfs_dir3_icleaf_hdr      *from)
-{
-       struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to;
-
-       ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC ||
-              from->magic == XFS_DIR3_LEAFN_MAGIC);
-
-       hdr3->info.hdr.forw = cpu_to_be32(from->forw);
-       hdr3->info.hdr.back = cpu_to_be32(from->back);
-       hdr3->info.hdr.magic = cpu_to_be16(from->magic);
-       hdr3->count = cpu_to_be16(from->count);
-       hdr3->stale = cpu_to_be16(from->stale);
-}
-
-
-/*
- * Directory/Attribute Node block operations
- */
-static struct xfs_da_node_entry *
-xfs_da2_node_tree_p(struct xfs_da_intnode *dap)
-{
-       return dap->__btree;
-}
-
-static struct xfs_da_node_entry *
-xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
-{
-       return ((struct xfs_da3_intnode *)dap)->__btree;
-}
-
-static void
-xfs_da2_node_hdr_from_disk(
-       struct xfs_da3_icnode_hdr       *to,
-       struct xfs_da_intnode           *from)
-{
-       ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
-       to->forw = be32_to_cpu(from->hdr.info.forw);
-       to->back = be32_to_cpu(from->hdr.info.back);
-       to->magic = be16_to_cpu(from->hdr.info.magic);
-       to->count = be16_to_cpu(from->hdr.__count);
-       to->level = be16_to_cpu(from->hdr.__level);
-}
-
-static void
-xfs_da2_node_hdr_to_disk(
-       struct xfs_da_intnode           *to,
-       struct xfs_da3_icnode_hdr       *from)
-{
-       ASSERT(from->magic == XFS_DA_NODE_MAGIC);
-       to->hdr.info.forw = cpu_to_be32(from->forw);
-       to->hdr.info.back = cpu_to_be32(from->back);
-       to->hdr.info.magic = cpu_to_be16(from->magic);
-       to->hdr.__count = cpu_to_be16(from->count);
-       to->hdr.__level = cpu_to_be16(from->level);
-}
-
-static void
-xfs_da3_node_hdr_from_disk(
-       struct xfs_da3_icnode_hdr       *to,
-       struct xfs_da_intnode           *from)
-{
-       struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from;
-
-       ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
-       to->forw = be32_to_cpu(hdr3->info.hdr.forw);
-       to->back = be32_to_cpu(hdr3->info.hdr.back);
-       to->magic = be16_to_cpu(hdr3->info.hdr.magic);
-       to->count = be16_to_cpu(hdr3->__count);
-       to->level = be16_to_cpu(hdr3->__level);
-}
-
-static void
-xfs_da3_node_hdr_to_disk(
-       struct xfs_da_intnode           *to,
-       struct xfs_da3_icnode_hdr       *from)
-{
-       struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to;
-
-       ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
-       hdr3->info.hdr.forw = cpu_to_be32(from->forw);
-       hdr3->info.hdr.back = cpu_to_be32(from->back);
-       hdr3->info.hdr.magic = cpu_to_be16(from->magic);
-       hdr3->__count = cpu_to_be16(from->count);
-       hdr3->__level = cpu_to_be16(from->level);
-}
-
-
-/*
- * Directory free space block operations
- */
-static int
-xfs_dir2_free_max_bests(struct xfs_da_geometry *geo)
-{
-       return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) /
-               sizeof(xfs_dir2_data_off_t);
-}
-
-static __be16 *
-xfs_dir2_free_bests_p(struct xfs_dir2_free *free)
-{
-       return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr));
-}
-
-/*
- * Convert data space db to the corresponding free db.
- */
-static xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
-{
-       return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
-                       (db / xfs_dir2_free_max_bests(geo));
-}
-
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static int
-xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
-{
-       return db % xfs_dir2_free_max_bests(geo);
-}
-
-static int
-xfs_dir3_free_max_bests(struct xfs_da_geometry *geo)
-{
-       return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) /
-               sizeof(xfs_dir2_data_off_t);
-}
-
-static __be16 *
-xfs_dir3_free_bests_p(struct xfs_dir2_free *free)
-{
-       return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr));
-}
-
-/*
- * Convert data space db to the corresponding free db.
- */
-static xfs_dir2_db_t
-xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
-{
-       return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
-                       (db / xfs_dir3_free_max_bests(geo));
-}
-
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static int
-xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
-{
-       return db % xfs_dir3_free_max_bests(geo);
-}
-
-static void
-xfs_dir2_free_hdr_from_disk(
-       struct xfs_dir3_icfree_hdr      *to,
-       struct xfs_dir2_free            *from)
-{
-       to->magic = be32_to_cpu(from->hdr.magic);
-       to->firstdb = be32_to_cpu(from->hdr.firstdb);
-       to->nvalid = be32_to_cpu(from->hdr.nvalid);
-       to->nused = be32_to_cpu(from->hdr.nused);
-       ASSERT(to->magic == XFS_DIR2_FREE_MAGIC);
-}
-
-static void
-xfs_dir2_free_hdr_to_disk(
-       struct xfs_dir2_free            *to,
-       struct xfs_dir3_icfree_hdr      *from)
-{
-       ASSERT(from->magic == XFS_DIR2_FREE_MAGIC);
-
-       to->hdr.magic = cpu_to_be32(from->magic);
-       to->hdr.firstdb = cpu_to_be32(from->firstdb);
-       to->hdr.nvalid = cpu_to_be32(from->nvalid);
-       to->hdr.nused = cpu_to_be32(from->nused);
-}
-
-static void
-xfs_dir3_free_hdr_from_disk(
-       struct xfs_dir3_icfree_hdr      *to,
-       struct xfs_dir2_free            *from)
-{
-       struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from;
-
-       to->magic = be32_to_cpu(hdr3->hdr.magic);
-       to->firstdb = be32_to_cpu(hdr3->firstdb);
-       to->nvalid = be32_to_cpu(hdr3->nvalid);
-       to->nused = be32_to_cpu(hdr3->nused);
-
-       ASSERT(to->magic == XFS_DIR3_FREE_MAGIC);
-}
-
-static void
-xfs_dir3_free_hdr_to_disk(
-       struct xfs_dir2_free            *to,
-       struct xfs_dir3_icfree_hdr      *from)
-{
-       struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to;
-
-       ASSERT(from->magic == XFS_DIR3_FREE_MAGIC);
-
-       hdr3->hdr.magic = cpu_to_be32(from->magic);
-       hdr3->firstdb = cpu_to_be32(from->firstdb);
-       hdr3->nvalid = cpu_to_be32(from->nvalid);
-       hdr3->nused = cpu_to_be32(from->nused);
-}
-
-static const struct xfs_dir_ops xfs_dir2_ops = {
-       .sf_entsize = xfs_dir2_sf_entsize,
-       .sf_nextentry = xfs_dir2_sf_nextentry,
-       .sf_get_ftype = xfs_dir2_sfe_get_ftype,
-       .sf_put_ftype = xfs_dir2_sfe_put_ftype,
-       .sf_get_ino = xfs_dir2_sfe_get_ino,
-       .sf_put_ino = xfs_dir2_sfe_put_ino,
-       .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
-       .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
-
-       .data_entsize = xfs_dir2_data_entsize,
-       .data_get_ftype = xfs_dir2_data_get_ftype,
-       .data_put_ftype = xfs_dir2_data_put_ftype,
-       .data_entry_tag_p = xfs_dir2_data_entry_tag_p,
-       .data_bestfree_p = xfs_dir2_data_bestfree_p,
-
-       .data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
-       .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
-                               XFS_DIR2_DATA_ENTSIZE(1),
-       .data_first_offset =  sizeof(struct xfs_dir2_data_hdr) +
-                               XFS_DIR2_DATA_ENTSIZE(1) +
-                               XFS_DIR2_DATA_ENTSIZE(2),
-       .data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
-
-       .data_dot_entry_p = xfs_dir2_data_dot_entry_p,
-       .data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p,
-       .data_first_entry_p = xfs_dir2_data_first_entry_p,
-       .data_entry_p = xfs_dir2_data_entry_p,
-       .data_unused_p = xfs_dir2_data_unused_p,
-
-       .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
-       .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
-       .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
-       .leaf_max_ents = xfs_dir2_max_leaf_ents,
-       .leaf_ents_p = xfs_dir2_leaf_ents_p,
-
-       .node_hdr_size = sizeof(struct xfs_da_node_hdr),
-       .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
-       .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
-       .node_tree_p = xfs_da2_node_tree_p,
-
-       .free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
-       .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
-       .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
-       .free_max_bests = xfs_dir2_free_max_bests,
-       .free_bests_p = xfs_dir2_free_bests_p,
-       .db_to_fdb = xfs_dir2_db_to_fdb,
-       .db_to_fdindex = xfs_dir2_db_to_fdindex,
-};
-
-static const struct xfs_dir_ops xfs_dir2_ftype_ops = {
-       .sf_entsize = xfs_dir3_sf_entsize,
-       .sf_nextentry = xfs_dir3_sf_nextentry,
-       .sf_get_ftype = xfs_dir3_sfe_get_ftype,
-       .sf_put_ftype = xfs_dir3_sfe_put_ftype,
-       .sf_get_ino = xfs_dir3_sfe_get_ino,
-       .sf_put_ino = xfs_dir3_sfe_put_ino,
-       .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
-       .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
-
-       .data_entsize = xfs_dir3_data_entsize,
-       .data_get_ftype = xfs_dir3_data_get_ftype,
-       .data_put_ftype = xfs_dir3_data_put_ftype,
-       .data_entry_tag_p = xfs_dir3_data_entry_tag_p,
-       .data_bestfree_p = xfs_dir2_data_bestfree_p,
-
-       .data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
-       .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
-                               XFS_DIR3_DATA_ENTSIZE(1),
-       .data_first_offset =  sizeof(struct xfs_dir2_data_hdr) +
-                               XFS_DIR3_DATA_ENTSIZE(1) +
-                               XFS_DIR3_DATA_ENTSIZE(2),
-       .data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
-
-       .data_dot_entry_p = xfs_dir2_data_dot_entry_p,
-       .data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p,
-       .data_first_entry_p = xfs_dir2_ftype_data_first_entry_p,
-       .data_entry_p = xfs_dir2_data_entry_p,
-       .data_unused_p = xfs_dir2_data_unused_p,
-
-       .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
-       .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
-       .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
-       .leaf_max_ents = xfs_dir2_max_leaf_ents,
-       .leaf_ents_p = xfs_dir2_leaf_ents_p,
-
-       .node_hdr_size = sizeof(struct xfs_da_node_hdr),
-       .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
-       .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
-       .node_tree_p = xfs_da2_node_tree_p,
-
-       .free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
-       .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
-       .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
-       .free_max_bests = xfs_dir2_free_max_bests,
-       .free_bests_p = xfs_dir2_free_bests_p,
-       .db_to_fdb = xfs_dir2_db_to_fdb,
-       .db_to_fdindex = xfs_dir2_db_to_fdindex,
-};
-
-static const struct xfs_dir_ops xfs_dir3_ops = {
-       .sf_entsize = xfs_dir3_sf_entsize,
-       .sf_nextentry = xfs_dir3_sf_nextentry,
-       .sf_get_ftype = xfs_dir3_sfe_get_ftype,
-       .sf_put_ftype = xfs_dir3_sfe_put_ftype,
-       .sf_get_ino = xfs_dir3_sfe_get_ino,
-       .sf_put_ino = xfs_dir3_sfe_put_ino,
-       .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
-       .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
-
-       .data_entsize = xfs_dir3_data_entsize,
-       .data_get_ftype = xfs_dir3_data_get_ftype,
-       .data_put_ftype = xfs_dir3_data_put_ftype,
-       .data_entry_tag_p = xfs_dir3_data_entry_tag_p,
-       .data_bestfree_p = xfs_dir3_data_bestfree_p,
-
-       .data_dot_offset = sizeof(struct xfs_dir3_data_hdr),
-       .data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) +
-                               XFS_DIR3_DATA_ENTSIZE(1),
-       .data_first_offset =  sizeof(struct xfs_dir3_data_hdr) +
-                               XFS_DIR3_DATA_ENTSIZE(1) +
-                               XFS_DIR3_DATA_ENTSIZE(2),
-       .data_entry_offset = sizeof(struct xfs_dir3_data_hdr),
-
-       .data_dot_entry_p = xfs_dir3_data_dot_entry_p,
-       .data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p,
-       .data_first_entry_p = xfs_dir3_data_first_entry_p,
-       .data_entry_p = xfs_dir3_data_entry_p,
-       .data_unused_p = xfs_dir3_data_unused_p,
-
-       .leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr),
-       .leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk,
-       .leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk,
-       .leaf_max_ents = xfs_dir3_max_leaf_ents,
-       .leaf_ents_p = xfs_dir3_leaf_ents_p,
-
-       .node_hdr_size = sizeof(struct xfs_da3_node_hdr),
-       .node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
-       .node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
-       .node_tree_p = xfs_da3_node_tree_p,
-
-       .free_hdr_size = sizeof(struct xfs_dir3_free_hdr),
-       .free_hdr_to_disk = xfs_dir3_free_hdr_to_disk,
-       .free_hdr_from_disk = xfs_dir3_free_hdr_from_disk,
-       .free_max_bests = xfs_dir3_free_max_bests,
-       .free_bests_p = xfs_dir3_free_bests_p,
-       .db_to_fdb = xfs_dir3_db_to_fdb,
-       .db_to_fdindex = xfs_dir3_db_to_fdindex,
-};
-
-static const struct xfs_dir_ops xfs_dir2_nondir_ops = {
-       .node_hdr_size = sizeof(struct xfs_da_node_hdr),
-       .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
-       .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
-       .node_tree_p = xfs_da2_node_tree_p,
-};
-
-static const struct xfs_dir_ops xfs_dir3_nondir_ops = {
-       .node_hdr_size = sizeof(struct xfs_da3_node_hdr),
-       .node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
-       .node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
-       .node_tree_p = xfs_da3_node_tree_p,
-};
-
-/*
- * Return the ops structure according to the current config.  If we are passed
- * an inode, then that overrides the default config we use which is based on
- * feature bits.
- */
-const struct xfs_dir_ops *
-xfs_dir_get_ops(
-       struct xfs_mount        *mp,
-       struct xfs_inode        *dp)
-{
-       if (dp)
-               return dp->d_ops;
-       if (mp->m_dir_inode_ops)
-               return mp->m_dir_inode_ops;
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               return &xfs_dir3_ops;
-       if (xfs_sb_version_hasftype(&mp->m_sb))
-               return &xfs_dir2_ftype_ops;
-       return &xfs_dir2_ops;
-}
-
-const struct xfs_dir_ops *
-xfs_nondir_get_ops(
-       struct xfs_mount        *mp,
-       struct xfs_inode        *dp)
-{
-       if (dp)
-               return dp->d_ops;
-       if (mp->m_nondir_inode_ops)
-               return mp->m_nondir_inode_ops;
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               return &xfs_dir3_nondir_ops;
-       return &xfs_dir2_nondir_ops;
-}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
deleted file mode 100644 (file)
index a0aca73..0000000
+++ /dev/null
@@ -1,762 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_dinode.h"
-
-struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
-
-
-/*
- * ASCII case-insensitive (ie. A-Z) support for directories that was
- * used in IRIX.
- */
-STATIC xfs_dahash_t
-xfs_ascii_ci_hashname(
-       struct xfs_name *name)
-{
-       xfs_dahash_t    hash;
-       int             i;
-
-       for (i = 0, hash = 0; i < name->len; i++)
-               hash = tolower(name->name[i]) ^ rol32(hash, 7);
-
-       return hash;
-}
-
-STATIC enum xfs_dacmp
-xfs_ascii_ci_compname(
-       struct xfs_da_args *args,
-       const unsigned char *name,
-       int             len)
-{
-       enum xfs_dacmp  result;
-       int             i;
-
-       if (args->namelen != len)
-               return XFS_CMP_DIFFERENT;
-
-       result = XFS_CMP_EXACT;
-       for (i = 0; i < len; i++) {
-               if (args->name[i] == name[i])
-                       continue;
-               if (tolower(args->name[i]) != tolower(name[i]))
-                       return XFS_CMP_DIFFERENT;
-               result = XFS_CMP_CASE;
-       }
-
-       return result;
-}
-
-static struct xfs_nameops xfs_ascii_ci_nameops = {
-       .hashname       = xfs_ascii_ci_hashname,
-       .compname       = xfs_ascii_ci_compname,
-};
-
-int
-xfs_da_mount(
-       struct xfs_mount        *mp)
-{
-       struct xfs_da_geometry  *dageo;
-       int                     nodehdr_size;
-
-
-       ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
-       ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
-              XFS_MAX_BLOCKSIZE);
-
-       mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL);
-       mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL);
-
-       nodehdr_size = mp->m_dir_inode_ops->node_hdr_size;
-       mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
-                                   KM_SLEEP | KM_MAYFAIL);
-       mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
-                                    KM_SLEEP | KM_MAYFAIL);
-       if (!mp->m_dir_geo || !mp->m_attr_geo) {
-               kmem_free(mp->m_dir_geo);
-               kmem_free(mp->m_attr_geo);
-               return ENOMEM;
-       }
-
-       /* set up directory geometry */
-       dageo = mp->m_dir_geo;
-       dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog;
-       dageo->fsblog = mp->m_sb.sb_blocklog;
-       dageo->blksize = 1 << dageo->blklog;
-       dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog;
-
-       /*
-        * Now we've set up the block conversion variables, we can calculate the
-        * segment block constants using the geometry structure.
-        */
-       dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET);
-       dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET);
-       dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET);
-       dageo->node_ents = (dageo->blksize - nodehdr_size) /
-                               (uint)sizeof(xfs_da_node_entry_t);
-       dageo->magicpct = (dageo->blksize * 37) / 100;
-
-       /* set up attribute geometry - single fsb only */
-       dageo = mp->m_attr_geo;
-       dageo->blklog = mp->m_sb.sb_blocklog;
-       dageo->fsblog = mp->m_sb.sb_blocklog;
-       dageo->blksize = 1 << dageo->blklog;
-       dageo->fsbcount = 1;
-       dageo->node_ents = (dageo->blksize - nodehdr_size) /
-                               (uint)sizeof(xfs_da_node_entry_t);
-       dageo->magicpct = (dageo->blksize * 37) / 100;
-
-       if (xfs_sb_version_hasasciici(&mp->m_sb))
-               mp->m_dirnameops = &xfs_ascii_ci_nameops;
-       else
-               mp->m_dirnameops = &xfs_default_nameops;
-
-       return 0;
-}
-
-void
-xfs_da_unmount(
-       struct xfs_mount        *mp)
-{
-       kmem_free(mp->m_dir_geo);
-       kmem_free(mp->m_attr_geo);
-}
-
-/*
- * Return 1 if directory contains only "." and "..".
- */
-int
-xfs_dir_isempty(
-       xfs_inode_t     *dp)
-{
-       xfs_dir2_sf_hdr_t       *sfp;
-
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
-       if (dp->i_d.di_size == 0)       /* might happen during shutdown. */
-               return 1;
-       if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
-               return 0;
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       return !sfp->count;
-}
-
-/*
- * Validate a given inode number.
- */
-int
-xfs_dir_ino_validate(
-       xfs_mount_t     *mp,
-       xfs_ino_t       ino)
-{
-       xfs_agblock_t   agblkno;
-       xfs_agino_t     agino;
-       xfs_agnumber_t  agno;
-       int             ino_ok;
-       int             ioff;
-
-       agno = XFS_INO_TO_AGNO(mp, ino);
-       agblkno = XFS_INO_TO_AGBNO(mp, ino);
-       ioff = XFS_INO_TO_OFFSET(mp, ino);
-       agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
-       ino_ok =
-               agno < mp->m_sb.sb_agcount &&
-               agblkno < mp->m_sb.sb_agblocks &&
-               agblkno != 0 &&
-               ioff < (1 << mp->m_sb.sb_inopblog) &&
-               XFS_AGINO_TO_INO(mp, agno, agino) == ino;
-       if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
-                       XFS_RANDOM_DIR_INO_VALIDATE))) {
-               xfs_warn(mp, "Invalid inode number 0x%Lx",
-                               (unsigned long long) ino);
-               XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
-               return EFSCORRUPTED;
-       }
-       return 0;
-}
-
-/*
- * Initialize a directory with its "." and ".." entries.
- */
-int
-xfs_dir_init(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *dp,
-       xfs_inode_t     *pdp)
-{
-       struct xfs_da_args *args;
-       int             error;
-
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
-       error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
-       if (error)
-               return error;
-
-       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-       if (!args)
-               return ENOMEM;
-
-       args->geo = dp->i_mount->m_dir_geo;
-       args->dp = dp;
-       args->trans = tp;
-       error = xfs_dir2_sf_create(args, pdp->i_ino);
-       kmem_free(args);
-       return error;
-}
-
-/*
-  Enter a name in a directory.
- */
-int
-xfs_dir_createname(
-       xfs_trans_t             *tp,
-       xfs_inode_t             *dp,
-       struct xfs_name         *name,
-       xfs_ino_t               inum,           /* new entry inode number */
-       xfs_fsblock_t           *first,         /* bmap's firstblock */
-       xfs_bmap_free_t         *flist,         /* bmap's freeblock list */
-       xfs_extlen_t            total)          /* bmap's total block count */
-{
-       struct xfs_da_args      *args;
-       int                     rval;
-       int                     v;              /* type-checking value */
-
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
-       rval = xfs_dir_ino_validate(tp->t_mountp, inum);
-       if (rval)
-               return rval;
-       XFS_STATS_INC(xs_dir_create);
-
-       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-       if (!args)
-               return ENOMEM;
-
-       args->geo = dp->i_mount->m_dir_geo;
-       args->name = name->name;
-       args->namelen = name->len;
-       args->filetype = name->type;
-       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args->inumber = inum;
-       args->dp = dp;
-       args->firstblock = first;
-       args->flist = flist;
-       args->total = total;
-       args->whichfork = XFS_DATA_FORK;
-       args->trans = tp;
-       args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-               rval = xfs_dir2_sf_addname(args);
-               goto out_free;
-       }
-
-       rval = xfs_dir2_isblock(args, &v);
-       if (rval)
-               goto out_free;
-       if (v) {
-               rval = xfs_dir2_block_addname(args);
-               goto out_free;
-       }
-
-       rval = xfs_dir2_isleaf(args, &v);
-       if (rval)
-               goto out_free;
-       if (v)
-               rval = xfs_dir2_leaf_addname(args);
-       else
-               rval = xfs_dir2_node_addname(args);
-
-out_free:
-       kmem_free(args);
-       return rval;
-}
-
-/*
- * If doing a CI lookup and case-insensitive match, dup actual name into
- * args.value. Return EEXIST for success (ie. name found) or an error.
- */
-int
-xfs_dir_cilookup_result(
-       struct xfs_da_args *args,
-       const unsigned char *name,
-       int             len)
-{
-       if (args->cmpresult == XFS_CMP_DIFFERENT)
-               return ENOENT;
-       if (args->cmpresult != XFS_CMP_CASE ||
-                                       !(args->op_flags & XFS_DA_OP_CILOOKUP))
-               return EEXIST;
-
-       args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
-       if (!args->value)
-               return ENOMEM;
-
-       memcpy(args->value, name, len);
-       args->valuelen = len;
-       return EEXIST;
-}
-
-/*
- * Lookup a name in a directory, give back the inode number.
- * If ci_name is not NULL, returns the actual name in ci_name if it differs
- * to name, or ci_name->name is set to NULL for an exact match.
- */
-
-int
-xfs_dir_lookup(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *dp,
-       struct xfs_name *name,
-       xfs_ino_t       *inum,          /* out: inode number */
-       struct xfs_name *ci_name)       /* out: actual name if CI match */
-{
-       struct xfs_da_args *args;
-       int             rval;
-       int             v;              /* type-checking value */
-
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
-       XFS_STATS_INC(xs_dir_lookup);
-
-       /*
-        * We need to use KM_NOFS here so that lockdep will not throw false
-        * positive deadlock warnings on a non-transactional lookup path. It is
-        * safe to recurse into inode recalim in that case, but lockdep can't
-        * easily be taught about it. Hence KM_NOFS avoids having to add more
-        * lockdep Doing this avoids having to add a bunch of lockdep class
-        * annotations into the reclaim path for the ilock.
-        */
-       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-       args->geo = dp->i_mount->m_dir_geo;
-       args->name = name->name;
-       args->namelen = name->len;
-       args->filetype = name->type;
-       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args->dp = dp;
-       args->whichfork = XFS_DATA_FORK;
-       args->trans = tp;
-       args->op_flags = XFS_DA_OP_OKNOENT;
-       if (ci_name)
-               args->op_flags |= XFS_DA_OP_CILOOKUP;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-               rval = xfs_dir2_sf_lookup(args);
-               goto out_check_rval;
-       }
-
-       rval = xfs_dir2_isblock(args, &v);
-       if (rval)
-               goto out_free;
-       if (v) {
-               rval = xfs_dir2_block_lookup(args);
-               goto out_check_rval;
-       }
-
-       rval = xfs_dir2_isleaf(args, &v);
-       if (rval)
-               goto out_free;
-       if (v)
-               rval = xfs_dir2_leaf_lookup(args);
-       else
-               rval = xfs_dir2_node_lookup(args);
-
-out_check_rval:
-       if (rval == EEXIST)
-               rval = 0;
-       if (!rval) {
-               *inum = args->inumber;
-               if (ci_name) {
-                       ci_name->name = args->value;
-                       ci_name->len = args->valuelen;
-               }
-       }
-out_free:
-       kmem_free(args);
-       return rval;
-}
-
-/*
- * Remove an entry from a directory.
- */
-int
-xfs_dir_removename(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *dp,
-       struct xfs_name *name,
-       xfs_ino_t       ino,
-       xfs_fsblock_t   *first,         /* bmap's firstblock */
-       xfs_bmap_free_t *flist,         /* bmap's freeblock list */
-       xfs_extlen_t    total)          /* bmap's total block count */
-{
-       struct xfs_da_args *args;
-       int             rval;
-       int             v;              /* type-checking value */
-
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
-       XFS_STATS_INC(xs_dir_remove);
-
-       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-       if (!args)
-               return ENOMEM;
-
-       args->geo = dp->i_mount->m_dir_geo;
-       args->name = name->name;
-       args->namelen = name->len;
-       args->filetype = name->type;
-       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args->inumber = ino;
-       args->dp = dp;
-       args->firstblock = first;
-       args->flist = flist;
-       args->total = total;
-       args->whichfork = XFS_DATA_FORK;
-       args->trans = tp;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-               rval = xfs_dir2_sf_removename(args);
-               goto out_free;
-       }
-
-       rval = xfs_dir2_isblock(args, &v);
-       if (rval)
-               goto out_free;
-       if (v) {
-               rval = xfs_dir2_block_removename(args);
-               goto out_free;
-       }
-
-       rval = xfs_dir2_isleaf(args, &v);
-       if (rval)
-               goto out_free;
-       if (v)
-               rval = xfs_dir2_leaf_removename(args);
-       else
-               rval = xfs_dir2_node_removename(args);
-out_free:
-       kmem_free(args);
-       return rval;
-}
-
-/*
- * Replace the inode number of a directory entry.
- */
-int
-xfs_dir_replace(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *dp,
-       struct xfs_name *name,          /* name of entry to replace */
-       xfs_ino_t       inum,           /* new inode number */
-       xfs_fsblock_t   *first,         /* bmap's firstblock */
-       xfs_bmap_free_t *flist,         /* bmap's freeblock list */
-       xfs_extlen_t    total)          /* bmap's total block count */
-{
-       struct xfs_da_args *args;
-       int             rval;
-       int             v;              /* type-checking value */
-
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
-
-       rval = xfs_dir_ino_validate(tp->t_mountp, inum);
-       if (rval)
-               return rval;
-
-       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-       if (!args)
-               return ENOMEM;
-
-       args->geo = dp->i_mount->m_dir_geo;
-       args->name = name->name;
-       args->namelen = name->len;
-       args->filetype = name->type;
-       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args->inumber = inum;
-       args->dp = dp;
-       args->firstblock = first;
-       args->flist = flist;
-       args->total = total;
-       args->whichfork = XFS_DATA_FORK;
-       args->trans = tp;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-               rval = xfs_dir2_sf_replace(args);
-               goto out_free;
-       }
-
-       rval = xfs_dir2_isblock(args, &v);
-       if (rval)
-               goto out_free;
-       if (v) {
-               rval = xfs_dir2_block_replace(args);
-               goto out_free;
-       }
-
-       rval = xfs_dir2_isleaf(args, &v);
-       if (rval)
-               goto out_free;
-       if (v)
-               rval = xfs_dir2_leaf_replace(args);
-       else
-               rval = xfs_dir2_node_replace(args);
-out_free:
-       kmem_free(args);
-       return rval;
-}
-
-/*
- * See if this entry can be added to the directory without allocating space.
- * First checks that the caller couldn't reserve enough space (resblks = 0).
- */
-int
-xfs_dir_canenter(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *dp,
-       struct xfs_name *name,          /* name of entry to add */
-       uint            resblks)
-{
-       struct xfs_da_args *args;
-       int             rval;
-       int             v;              /* type-checking value */
-
-       if (resblks)
-               return 0;
-
-       ASSERT(S_ISDIR(dp->i_d.di_mode));
-
-       args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-       if (!args)
-               return ENOMEM;
-
-       args->geo = dp->i_mount->m_dir_geo;
-       args->name = name->name;
-       args->namelen = name->len;
-       args->filetype = name->type;
-       args->hashval = dp->i_mount->m_dirnameops->hashname(name);
-       args->dp = dp;
-       args->whichfork = XFS_DATA_FORK;
-       args->trans = tp;
-       args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
-                                                       XFS_DA_OP_OKNOENT;
-
-       if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-               rval = xfs_dir2_sf_addname(args);
-               goto out_free;
-       }
-
-       rval = xfs_dir2_isblock(args, &v);
-       if (rval)
-               goto out_free;
-       if (v) {
-               rval = xfs_dir2_block_addname(args);
-               goto out_free;
-       }
-
-       rval = xfs_dir2_isleaf(args, &v);
-       if (rval)
-               goto out_free;
-       if (v)
-               rval = xfs_dir2_leaf_addname(args);
-       else
-               rval = xfs_dir2_node_addname(args);
-out_free:
-       kmem_free(args);
-       return rval;
-}
-
-/*
- * Utility routines.
- */
-
-/*
- * Add a block to the directory.
- *
- * This routine is for data and free blocks, not leaf/node blocks which are
- * handled by xfs_da_grow_inode.
- */
-int
-xfs_dir2_grow_inode(
-       struct xfs_da_args      *args,
-       int                     space,  /* v2 dir's space XFS_DIR2_xxx_SPACE */
-       xfs_dir2_db_t           *dbp)   /* out: block number added */
-{
-       struct xfs_inode        *dp = args->dp;
-       struct xfs_mount        *mp = dp->i_mount;
-       xfs_fileoff_t           bno;    /* directory offset of new block */
-       int                     count;  /* count of filesystem blocks */
-       int                     error;
-
-       trace_xfs_dir2_grow_inode(args, space);
-
-       /*
-        * Set lowest possible block in the space requested.
-        */
-       bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
-       count = args->geo->fsbcount;
-
-       error = xfs_da_grow_inode_int(args, &bno, count);
-       if (error)
-               return error;
-
-       *dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno);
-
-       /*
-        * Update file's size if this is the data space and it grew.
-        */
-       if (space == XFS_DIR2_DATA_SPACE) {
-               xfs_fsize_t     size;           /* directory file (data) size */
-
-               size = XFS_FSB_TO_B(mp, bno + count);
-               if (size > dp->i_d.di_size) {
-                       dp->i_d.di_size = size;
-                       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
-               }
-       }
-       return 0;
-}
-
-/*
- * See if the directory is a single-block form directory.
- */
-int
-xfs_dir2_isblock(
-       struct xfs_da_args      *args,
-       int                     *vp)    /* out: 1 is block, 0 is not block */
-{
-       xfs_fileoff_t           last;   /* last file offset */
-       int                     rval;
-
-       if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
-               return rval;
-       rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
-       ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize);
-       *vp = rval;
-       return 0;
-}
-
-/*
- * See if the directory is a single-leaf form directory.
- */
-int
-xfs_dir2_isleaf(
-       struct xfs_da_args      *args,
-       int                     *vp)    /* out: 1 is block, 0 is not block */
-{
-       xfs_fileoff_t           last;   /* last file offset */
-       int                     rval;
-
-       if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
-               return rval;
-       *vp = last == args->geo->leafblk + args->geo->fsbcount;
-       return 0;
-}
-
-/*
- * Remove the given block from the directory.
- * This routine is used for data and free blocks, leaf/node are done
- * by xfs_da_shrink_inode.
- */
-int
-xfs_dir2_shrink_inode(
-       xfs_da_args_t   *args,
-       xfs_dir2_db_t   db,
-       struct xfs_buf  *bp)
-{
-       xfs_fileoff_t   bno;            /* directory file offset */
-       xfs_dablk_t     da;             /* directory file offset */
-       int             done;           /* bunmap is finished */
-       xfs_inode_t     *dp;
-       int             error;
-       xfs_mount_t     *mp;
-       xfs_trans_t     *tp;
-
-       trace_xfs_dir2_shrink_inode(args, db);
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       tp = args->trans;
-       da = xfs_dir2_db_to_da(args->geo, db);
-       /*
-        * Unmap the fsblock(s).
-        */
-       if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount,
-                       XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
-                       &done))) {
-               /*
-                * ENOSPC actually can happen if we're in a removename with
-                * no space reservation, and the resulting block removal
-                * would cause a bmap btree split or conversion from extents
-                * to btree.  This can only happen for un-fragmented
-                * directory blocks, since you need to be punching out
-                * the middle of an extent.
-                * In this case we need to leave the block in the file,
-                * and not binval it.
-                * So the block has to be in a consistent empty state
-                * and appropriately logged.
-                * We don't free up the buffer, the caller can tell it
-                * hasn't happened since it got an error back.
-                */
-               return error;
-       }
-       ASSERT(done);
-       /*
-        * Invalidate the buffer from the transaction.
-        */
-       xfs_trans_binval(tp, bp);
-       /*
-        * If it's not a data block, we're done.
-        */
-       if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET))
-               return 0;
-       /*
-        * If the block isn't the last one in the directory, we're done.
-        */
-       if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0))
-               return 0;
-       bno = da;
-       if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) {
-               /*
-                * This can't really happen unless there's kernel corruption.
-                */
-               return error;
-       }
-       if (db == args->geo->datablk)
-               ASSERT(bno == 0);
-       else
-               ASSERT(bno > 0);
-       /*
-        * Set the size to the new last block.
-        */
-       dp->i_d.di_size = XFS_FSB_TO_B(mp, bno);
-       xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-       return 0;
-}
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
deleted file mode 100644 (file)
index ab0bffc..0000000
+++ /dev/null
@@ -1,1265 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_buf_item.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_cksum.h"
-#include "xfs_dinode.h"
-
-/*
- * Local function prototypes.
- */
-static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, struct xfs_buf *bp,
-                                   int first, int last);
-static void xfs_dir2_block_log_tail(xfs_trans_t *tp, struct xfs_buf *bp);
-static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, struct xfs_buf **bpp,
-                                    int *entno);
-static int xfs_dir2_block_sort(const void *a, const void *b);
-
-static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot;
-
-/*
- * One-time startup routine called from xfs_init().
- */
-void
-xfs_dir_startup(void)
-{
-       xfs_dir_hash_dot = xfs_da_hashname((unsigned char *)".", 1);
-       xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
-}
-
-static bool
-xfs_dir3_block_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
-                       return false;
-               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
-                       return false;
-               if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
-                       return false;
-       } else {
-               if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
-                       return false;
-       }
-       if (__xfs_dir3_data_check(NULL, bp))
-               return false;
-       return true;
-}
-
-static void
-xfs_dir3_block_read_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_dir3_block_verify(bp))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-}
-
-static void
-xfs_dir3_block_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
-
-       if (!xfs_dir3_block_verify(bp)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (bip)
-               hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
-
-       xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
-}
-
-const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
-       .verify_read = xfs_dir3_block_read_verify,
-       .verify_write = xfs_dir3_block_write_verify,
-};
-
-int
-xfs_dir3_block_read(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *dp,
-       struct xfs_buf          **bpp)
-{
-       struct xfs_mount        *mp = dp->i_mount;
-       int                     err;
-
-       err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
-                               XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
-       if (!err && tp)
-               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
-       return err;
-}
-
-static void
-xfs_dir3_block_init(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
-       struct xfs_buf          *bp,
-       struct xfs_inode        *dp)
-{
-       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
-
-       bp->b_ops = &xfs_dir3_block_buf_ops;
-       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF);
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               memset(hdr3, 0, sizeof(*hdr3));
-               hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
-               hdr3->blkno = cpu_to_be64(bp->b_bn);
-               hdr3->owner = cpu_to_be64(dp->i_ino);
-               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
-               return;
-
-       }
-       hdr3->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
-}
-
-static void
-xfs_dir2_block_need_space(
-       struct xfs_inode                *dp,
-       struct xfs_dir2_data_hdr        *hdr,
-       struct xfs_dir2_block_tail      *btp,
-       struct xfs_dir2_leaf_entry      *blp,
-       __be16                          **tagpp,
-       struct xfs_dir2_data_unused     **dupp,
-       struct xfs_dir2_data_unused     **enddupp,
-       int                             *compact,
-       int                             len)
-{
-       struct xfs_dir2_data_free       *bf;
-       __be16                          *tagp = NULL;
-       struct xfs_dir2_data_unused     *dup = NULL;
-       struct xfs_dir2_data_unused     *enddup = NULL;
-
-       *compact = 0;
-       bf = dp->d_ops->data_bestfree_p(hdr);
-
-       /*
-        * If there are stale entries we'll use one for the leaf.
-        */
-       if (btp->stale) {
-               if (be16_to_cpu(bf[0].length) >= len) {
-                       /*
-                        * The biggest entry enough to avoid compaction.
-                        */
-                       dup = (xfs_dir2_data_unused_t *)
-                             ((char *)hdr + be16_to_cpu(bf[0].offset));
-                       goto out;
-               }
-
-               /*
-                * Will need to compact to make this work.
-                * Tag just before the first leaf entry.
-                */
-               *compact = 1;
-               tagp = (__be16 *)blp - 1;
-
-               /* Data object just before the first leaf entry.  */
-               dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
-
-               /*
-                * If it's not free then the data will go where the
-                * leaf data starts now, if it works at all.
-                */
-               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                       if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) *
-                           (uint)sizeof(*blp) < len)
-                               dup = NULL;
-               } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len)
-                       dup = NULL;
-               else
-                       dup = (xfs_dir2_data_unused_t *)blp;
-               goto out;
-       }
-
-       /*
-        * no stale entries, so just use free space.
-        * Tag just before the first leaf entry.
-        */
-       tagp = (__be16 *)blp - 1;
-
-       /* Data object just before the first leaf entry.  */
-       enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
-
-       /*
-        * If it's not free then can't do this add without cleaning up:
-        * the space before the first leaf entry needs to be free so it
-        * can be expanded to hold the pointer to the new entry.
-        */
-       if (be16_to_cpu(enddup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-               /*
-                * Check out the biggest freespace and see if it's the same one.
-                */
-               dup = (xfs_dir2_data_unused_t *)
-                     ((char *)hdr + be16_to_cpu(bf[0].offset));
-               if (dup != enddup) {
-                       /*
-                        * Not the same free entry, just check its length.
-                        */
-                       if (be16_to_cpu(dup->length) < len)
-                               dup = NULL;
-                       goto out;
-               }
-
-               /*
-                * It is the biggest freespace, can it hold the leaf too?
-                */
-               if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) {
-                       /*
-                        * Yes, use the second-largest entry instead if it works.
-                        */
-                       if (be16_to_cpu(bf[1].length) >= len)
-                               dup = (xfs_dir2_data_unused_t *)
-                                     ((char *)hdr + be16_to_cpu(bf[1].offset));
-                       else
-                               dup = NULL;
-               }
-       }
-out:
-       *tagpp = tagp;
-       *dupp = dup;
-       *enddupp = enddup;
-}
-
-/*
- * compact the leaf entries.
- * Leave the highest-numbered stale entry stale.
- * XXX should be the one closest to mid but mid is not yet computed.
- */
-static void
-xfs_dir2_block_compact(
-       struct xfs_da_args              *args,
-       struct xfs_buf                  *bp,
-       struct xfs_dir2_data_hdr        *hdr,
-       struct xfs_dir2_block_tail      *btp,
-       struct xfs_dir2_leaf_entry      *blp,
-       int                             *needlog,
-       int                             *lfloghigh,
-       int                             *lfloglow)
-{
-       int                     fromidx;        /* source leaf index */
-       int                     toidx;          /* target leaf index */
-       int                     needscan = 0;
-       int                     highstale;      /* high stale index */
-
-       fromidx = toidx = be32_to_cpu(btp->count) - 1;
-       highstale = *lfloghigh = -1;
-       for (; fromidx >= 0; fromidx--) {
-               if (blp[fromidx].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
-                       if (highstale == -1)
-                               highstale = toidx;
-                       else {
-                               if (*lfloghigh == -1)
-                                       *lfloghigh = toidx;
-                               continue;
-                       }
-               }
-               if (fromidx < toidx)
-                       blp[toidx] = blp[fromidx];
-               toidx--;
-       }
-       *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
-       *lfloghigh -= be32_to_cpu(btp->stale) - 1;
-       be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
-       xfs_dir2_data_make_free(args, bp,
-               (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
-               (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
-               needlog, &needscan);
-       btp->stale = cpu_to_be32(1);
-       /*
-        * If we now need to rebuild the bestfree map, do so.
-        * This needs to happen before the next call to use_free.
-        */
-       if (needscan)
-               xfs_dir2_data_freescan(args->dp, hdr, needlog);
-}
-
-/*
- * Add an entry to a block directory.
- */
-int                                            /* error */
-xfs_dir2_block_addname(
-       xfs_da_args_t           *args)          /* directory op arguments */
-{
-       xfs_dir2_data_hdr_t     *hdr;           /* block header */
-       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       struct xfs_buf          *bp;            /* buffer for block */
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
-       int                     compact;        /* need to compact leaf ents */
-       xfs_dir2_data_entry_t   *dep;           /* block data entry */
-       xfs_inode_t             *dp;            /* directory inode */
-       xfs_dir2_data_unused_t  *dup;           /* block unused entry */
-       int                     error;          /* error return value */
-       xfs_dir2_data_unused_t  *enddup=NULL;   /* unused at end of data */
-       xfs_dahash_t            hash;           /* hash value of found entry */
-       int                     high;           /* high index for binary srch */
-       int                     highstale;      /* high stale index */
-       int                     lfloghigh=0;    /* last final leaf to log */
-       int                     lfloglow=0;     /* first final leaf to log */
-       int                     len;            /* length of the new entry */
-       int                     low;            /* low index for binary srch */
-       int                     lowstale;       /* low stale index */
-       int                     mid=0;          /* midpoint for binary srch */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       int                     needlog;        /* need to log header */
-       int                     needscan;       /* need to rescan freespace */
-       __be16                  *tagp;          /* pointer to tag value */
-       xfs_trans_t             *tp;            /* transaction structure */
-
-       trace_xfs_dir2_block_addname(args);
-
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-
-       /* Read the (one and only) directory block into bp. */
-       error = xfs_dir3_block_read(tp, dp, &bp);
-       if (error)
-               return error;
-
-       len = dp->d_ops->data_entsize(args->namelen);
-
-       /*
-        * Set up pointers to parts of the block.
-        */
-       hdr = bp->b_addr;
-       btp = xfs_dir2_block_tail_p(args->geo, hdr);
-       blp = xfs_dir2_block_leaf_p(btp);
-
-       /*
-        * Find out if we can reuse stale entries or whether we need extra
-        * space for entry and new leaf.
-        */
-       xfs_dir2_block_need_space(dp, hdr, btp, blp, &tagp, &dup,
-                                 &enddup, &compact, len);
-
-       /*
-        * Done everything we need for a space check now.
-        */
-       if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
-               xfs_trans_brelse(tp, bp);
-               if (!dup)
-                       return ENOSPC;
-               return 0;
-       }
-
-       /*
-        * If we don't have space for the new entry & leaf ...
-        */
-       if (!dup) {
-               /* Don't have a space reservation: return no-space.  */
-               if (args->total == 0)
-                       return ENOSPC;
-               /*
-                * Convert to the next larger format.
-                * Then add the new entry in that format.
-                */
-               error = xfs_dir2_block_to_leaf(args, bp);
-               if (error)
-                       return error;
-               return xfs_dir2_leaf_addname(args);
-       }
-
-       needlog = needscan = 0;
-
-       /*
-        * If need to compact the leaf entries, do it now.
-        */
-       if (compact) {
-               xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog,
-                                     &lfloghigh, &lfloglow);
-               /* recalculate blp post-compaction */
-               blp = xfs_dir2_block_leaf_p(btp);
-       } else if (btp->stale) {
-               /*
-                * Set leaf logging boundaries to impossible state.
-                * For the no-stale case they're set explicitly.
-                */
-               lfloglow = be32_to_cpu(btp->count);
-               lfloghigh = -1;
-       }
-
-       /*
-        * Find the slot that's first lower than our hash value, -1 if none.
-        */
-       for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) {
-               mid = (low + high) >> 1;
-               if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval)
-                       break;
-               if (hash < args->hashval)
-                       low = mid + 1;
-               else
-                       high = mid - 1;
-       }
-       while (mid >= 0 && be32_to_cpu(blp[mid].hashval) >= args->hashval) {
-               mid--;
-       }
-       /*
-        * No stale entries, will use enddup space to hold new leaf.
-        */
-       if (!btp->stale) {
-               /*
-                * Mark the space needed for the new leaf entry, now in use.
-                */
-               xfs_dir2_data_use_free(args, bp, enddup,
-                       (xfs_dir2_data_aoff_t)
-                       ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
-                        sizeof(*blp)),
-                       (xfs_dir2_data_aoff_t)sizeof(*blp),
-                       &needlog, &needscan);
-               /*
-                * Update the tail (entry count).
-                */
-               be32_add_cpu(&btp->count, 1);
-               /*
-                * If we now need to rebuild the bestfree map, do so.
-                * This needs to happen before the next call to use_free.
-                */
-               if (needscan) {
-                       xfs_dir2_data_freescan(dp, hdr, &needlog);
-                       needscan = 0;
-               }
-               /*
-                * Adjust pointer to the first leaf entry, we're about to move
-                * the table up one to open up space for the new leaf entry.
-                * Then adjust our index to match.
-                */
-               blp--;
-               mid++;
-               if (mid)
-                       memmove(blp, &blp[1], mid * sizeof(*blp));
-               lfloglow = 0;
-               lfloghigh = mid;
-       }
-       /*
-        * Use a stale leaf for our new entry.
-        */
-       else {
-               for (lowstale = mid;
-                    lowstale >= 0 &&
-                       blp[lowstale].address !=
-                       cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
-                    lowstale--)
-                       continue;
-               for (highstale = mid + 1;
-                    highstale < be32_to_cpu(btp->count) &&
-                       blp[highstale].address !=
-                       cpu_to_be32(XFS_DIR2_NULL_DATAPTR) &&
-                       (lowstale < 0 || mid - lowstale > highstale - mid);
-                    highstale++)
-                       continue;
-               /*
-                * Move entries toward the low-numbered stale entry.
-                */
-               if (lowstale >= 0 &&
-                   (highstale == be32_to_cpu(btp->count) ||
-                    mid - lowstale <= highstale - mid)) {
-                       if (mid - lowstale)
-                               memmove(&blp[lowstale], &blp[lowstale + 1],
-                                       (mid - lowstale) * sizeof(*blp));
-                       lfloglow = MIN(lowstale, lfloglow);
-                       lfloghigh = MAX(mid, lfloghigh);
-               }
-               /*
-                * Move entries toward the high-numbered stale entry.
-                */
-               else {
-                       ASSERT(highstale < be32_to_cpu(btp->count));
-                       mid++;
-                       if (highstale - mid)
-                               memmove(&blp[mid + 1], &blp[mid],
-                                       (highstale - mid) * sizeof(*blp));
-                       lfloglow = MIN(mid, lfloglow);
-                       lfloghigh = MAX(highstale, lfloghigh);
-               }
-               be32_add_cpu(&btp->stale, -1);
-       }
-       /*
-        * Point to the new data entry.
-        */
-       dep = (xfs_dir2_data_entry_t *)dup;
-       /*
-        * Fill in the leaf entry.
-        */
-       blp[mid].hashval = cpu_to_be32(args->hashval);
-       blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
-                               (char *)dep - (char *)hdr));
-       xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
-       /*
-        * Mark space for the data entry used.
-        */
-       xfs_dir2_data_use_free(args, bp, dup,
-               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
-               (xfs_dir2_data_aoff_t)len, &needlog, &needscan);
-       /*
-        * Create the new data entry.
-        */
-       dep->inumber = cpu_to_be64(args->inumber);
-       dep->namelen = args->namelen;
-       memcpy(dep->name, args->name, args->namelen);
-       dp->d_ops->data_put_ftype(dep, args->filetype);
-       tagp = dp->d_ops->data_entry_tag_p(dep);
-       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
-       /*
-        * Clean up the bestfree array and log the header, tail, and entry.
-        */
-       if (needscan)
-               xfs_dir2_data_freescan(dp, hdr, &needlog);
-       if (needlog)
-               xfs_dir2_data_log_header(args, bp);
-       xfs_dir2_block_log_tail(tp, bp);
-       xfs_dir2_data_log_entry(args, bp, dep);
-       xfs_dir3_data_check(dp, bp);
-       return 0;
-}
-
-/*
- * Log leaf entries from the block.
- */
-static void
-xfs_dir2_block_log_leaf(
-       xfs_trans_t             *tp,            /* transaction structure */
-       struct xfs_buf          *bp,            /* block buffer */
-       int                     first,          /* index of first logged leaf */
-       int                     last)           /* index of last logged leaf */
-{
-       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
-       xfs_dir2_leaf_entry_t   *blp;
-       xfs_dir2_block_tail_t   *btp;
-
-       btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr);
-       blp = xfs_dir2_block_leaf_p(btp);
-       xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
-               (uint)((char *)&blp[last + 1] - (char *)hdr - 1));
-}
-
-/*
- * Log the block tail.
- */
-static void
-xfs_dir2_block_log_tail(
-       xfs_trans_t             *tp,            /* transaction structure */
-       struct xfs_buf          *bp)            /* block buffer */
-{
-       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
-       xfs_dir2_block_tail_t   *btp;
-
-       btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr);
-       xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
-               (uint)((char *)(btp + 1) - (char *)hdr - 1));
-}
-
-/*
- * Look up an entry in the block.  This is the external routine,
- * xfs_dir2_block_lookup_int does the real work.
- */
-int                                            /* error */
-xfs_dir2_block_lookup(
-       xfs_da_args_t           *args)          /* dir lookup arguments */
-{
-       xfs_dir2_data_hdr_t     *hdr;           /* block header */
-       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       struct xfs_buf          *bp;            /* block buffer */
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
-       xfs_dir2_data_entry_t   *dep;           /* block data entry */
-       xfs_inode_t             *dp;            /* incore inode */
-       int                     ent;            /* entry index */
-       int                     error;          /* error return value */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-
-       trace_xfs_dir2_block_lookup(args);
-
-       /*
-        * Get the buffer, look up the entry.
-        * If not found (ENOENT) then return, have no buffer.
-        */
-       if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent)))
-               return error;
-       dp = args->dp;
-       mp = dp->i_mount;
-       hdr = bp->b_addr;
-       xfs_dir3_data_check(dp, bp);
-       btp = xfs_dir2_block_tail_p(args->geo, hdr);
-       blp = xfs_dir2_block_leaf_p(btp);
-       /*
-        * Get the offset from the leaf entry, to point to the data.
-        */
-       dep = (xfs_dir2_data_entry_t *)((char *)hdr +
-                       xfs_dir2_dataptr_to_off(args->geo,
-                                               be32_to_cpu(blp[ent].address)));
-       /*
-        * Fill in inode number, CI name if appropriate, release the block.
-        */
-       args->inumber = be64_to_cpu(dep->inumber);
-       args->filetype = dp->d_ops->data_get_ftype(dep);
-       error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
-       xfs_trans_brelse(args->trans, bp);
-       return error;
-}
-
-/*
- * Internal block lookup routine.
- */
-static int                                     /* error */
-xfs_dir2_block_lookup_int(
-       xfs_da_args_t           *args,          /* dir lookup arguments */
-       struct xfs_buf          **bpp,          /* returned block buffer */
-       int                     *entno)         /* returned entry number */
-{
-       xfs_dir2_dataptr_t      addr;           /* data entry address */
-       xfs_dir2_data_hdr_t     *hdr;           /* block header */
-       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       struct xfs_buf          *bp;            /* block buffer */
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
-       xfs_dir2_data_entry_t   *dep;           /* block data entry */
-       xfs_inode_t             *dp;            /* incore inode */
-       int                     error;          /* error return value */
-       xfs_dahash_t            hash;           /* found hash value */
-       int                     high;           /* binary search high index */
-       int                     low;            /* binary search low index */
-       int                     mid;            /* binary search current idx */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       enum xfs_dacmp          cmp;            /* comparison result */
-
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-
-       error = xfs_dir3_block_read(tp, dp, &bp);
-       if (error)
-               return error;
-
-       hdr = bp->b_addr;
-       xfs_dir3_data_check(dp, bp);
-       btp = xfs_dir2_block_tail_p(args->geo, hdr);
-       blp = xfs_dir2_block_leaf_p(btp);
-       /*
-        * Loop doing a binary search for our hash value.
-        * Find our entry, ENOENT if it's not there.
-        */
-       for (low = 0, high = be32_to_cpu(btp->count) - 1; ; ) {
-               ASSERT(low <= high);
-               mid = (low + high) >> 1;
-               if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval)
-                       break;
-               if (hash < args->hashval)
-                       low = mid + 1;
-               else
-                       high = mid - 1;
-               if (low > high) {
-                       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-                       xfs_trans_brelse(tp, bp);
-                       return ENOENT;
-               }
-       }
-       /*
-        * Back up to the first one with the right hash value.
-        */
-       while (mid > 0 && be32_to_cpu(blp[mid - 1].hashval) == args->hashval) {
-               mid--;
-       }
-       /*
-        * Now loop forward through all the entries with the
-        * right hash value looking for our name.
-        */
-       do {
-               if ((addr = be32_to_cpu(blp[mid].address)) == XFS_DIR2_NULL_DATAPTR)
-                       continue;
-               /*
-                * Get pointer to the entry from the leaf.
-                */
-               dep = (xfs_dir2_data_entry_t *)
-                       ((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr));
-               /*
-                * Compare name and if it's an exact match, return the index
-                * and buffer. If it's the first case-insensitive match, store
-                * the index and buffer and continue looking for an exact match.
-                */
-               cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
-               if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
-                       args->cmpresult = cmp;
-                       *bpp = bp;
-                       *entno = mid;
-                       if (cmp == XFS_CMP_EXACT)
-                               return 0;
-               }
-       } while (++mid < be32_to_cpu(btp->count) &&
-                       be32_to_cpu(blp[mid].hashval) == hash);
-
-       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-       /*
-        * Here, we can only be doing a lookup (not a rename or replace).
-        * If a case-insensitive match was found earlier, return success.
-        */
-       if (args->cmpresult == XFS_CMP_CASE)
-               return 0;
-       /*
-        * No match, release the buffer and return ENOENT.
-        */
-       xfs_trans_brelse(tp, bp);
-       return ENOENT;
-}
-
-/*
- * Remove an entry from a block format directory.
- * If that makes the block small enough to fit in shortform, transform it.
- */
-int                                            /* error */
-xfs_dir2_block_removename(
-       xfs_da_args_t           *args)          /* directory operation args */
-{
-       xfs_dir2_data_hdr_t     *hdr;           /* block header */
-       xfs_dir2_leaf_entry_t   *blp;           /* block leaf pointer */
-       struct xfs_buf          *bp;            /* block buffer */
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
-       xfs_dir2_data_entry_t   *dep;           /* block data entry */
-       xfs_inode_t             *dp;            /* incore inode */
-       int                     ent;            /* block leaf entry index */
-       int                     error;          /* error return value */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       int                     needlog;        /* need to log block header */
-       int                     needscan;       /* need to fixup bestfree */
-       xfs_dir2_sf_hdr_t       sfh;            /* shortform header */
-       int                     size;           /* shortform size */
-       xfs_trans_t             *tp;            /* transaction pointer */
-
-       trace_xfs_dir2_block_removename(args);
-
-       /*
-        * Look up the entry in the block.  Gets the buffer and entry index.
-        * It will always be there, the vnodeops level does a lookup first.
-        */
-       if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) {
-               return error;
-       }
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-       hdr = bp->b_addr;
-       btp = xfs_dir2_block_tail_p(args->geo, hdr);
-       blp = xfs_dir2_block_leaf_p(btp);
-       /*
-        * Point to the data entry using the leaf entry.
-        */
-       dep = (xfs_dir2_data_entry_t *)((char *)hdr +
-                       xfs_dir2_dataptr_to_off(args->geo,
-                                               be32_to_cpu(blp[ent].address)));
-       /*
-        * Mark the data entry's space free.
-        */
-       needlog = needscan = 0;
-       xfs_dir2_data_make_free(args, bp,
-               (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
-               dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
-       /*
-        * Fix up the block tail.
-        */
-       be32_add_cpu(&btp->stale, 1);
-       xfs_dir2_block_log_tail(tp, bp);
-       /*
-        * Remove the leaf entry by marking it stale.
-        */
-       blp[ent].address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
-       xfs_dir2_block_log_leaf(tp, bp, ent, ent);
-       /*
-        * Fix up bestfree, log the header if necessary.
-        */
-       if (needscan)
-               xfs_dir2_data_freescan(dp, hdr, &needlog);
-       if (needlog)
-               xfs_dir2_data_log_header(args, bp);
-       xfs_dir3_data_check(dp, bp);
-       /*
-        * See if the size as a shortform is good enough.
-        */
-       size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
-       if (size > XFS_IFORK_DSIZE(dp))
-               return 0;
-
-       /*
-        * If it works, do the conversion.
-        */
-       return xfs_dir2_block_to_sf(args, bp, size, &sfh);
-}
-
-/*
- * Replace an entry in a V2 block directory.
- * Change the inode number to the new value.
- */
-int                                            /* error */
-xfs_dir2_block_replace(
-       xfs_da_args_t           *args)          /* directory operation args */
-{
-       xfs_dir2_data_hdr_t     *hdr;           /* block header */
-       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       struct xfs_buf          *bp;            /* block buffer */
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
-       xfs_dir2_data_entry_t   *dep;           /* block data entry */
-       xfs_inode_t             *dp;            /* incore inode */
-       int                     ent;            /* leaf entry index */
-       int                     error;          /* error return value */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-
-       trace_xfs_dir2_block_replace(args);
-
-       /*
-        * Lookup the entry in the directory.  Get buffer and entry index.
-        * This will always succeed since the caller has already done a lookup.
-        */
-       if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) {
-               return error;
-       }
-       dp = args->dp;
-       mp = dp->i_mount;
-       hdr = bp->b_addr;
-       btp = xfs_dir2_block_tail_p(args->geo, hdr);
-       blp = xfs_dir2_block_leaf_p(btp);
-       /*
-        * Point to the data entry we need to change.
-        */
-       dep = (xfs_dir2_data_entry_t *)((char *)hdr +
-                       xfs_dir2_dataptr_to_off(args->geo,
-                                               be32_to_cpu(blp[ent].address)));
-       ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
-       /*
-        * Change the inode number to the new value.
-        */
-       dep->inumber = cpu_to_be64(args->inumber);
-       dp->d_ops->data_put_ftype(dep, args->filetype);
-       xfs_dir2_data_log_entry(args, bp, dep);
-       xfs_dir3_data_check(dp, bp);
-       return 0;
-}
-
-/*
- * Qsort comparison routine for the block leaf entries.
- */
-static int                                     /* sort order */
-xfs_dir2_block_sort(
-       const void                      *a,     /* first leaf entry */
-       const void                      *b)     /* second leaf entry */
-{
-       const xfs_dir2_leaf_entry_t     *la;    /* first leaf entry */
-       const xfs_dir2_leaf_entry_t     *lb;    /* second leaf entry */
-
-       la = a;
-       lb = b;
-       return be32_to_cpu(la->hashval) < be32_to_cpu(lb->hashval) ? -1 :
-               (be32_to_cpu(la->hashval) > be32_to_cpu(lb->hashval) ? 1 : 0);
-}
-
-/*
- * Convert a V2 leaf directory to a V2 block directory if possible.
- */
-int                                            /* error */
-xfs_dir2_leaf_to_block(
-       xfs_da_args_t           *args,          /* operation arguments */
-       struct xfs_buf          *lbp,           /* leaf buffer */
-       struct xfs_buf          *dbp)           /* data buffer */
-{
-       __be16                  *bestsp;        /* leaf bests table */
-       xfs_dir2_data_hdr_t     *hdr;           /* block header */
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       xfs_dir2_data_unused_t  *dup;           /* unused data entry */
-       int                     error;          /* error return value */
-       int                     from;           /* leaf from index */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
-       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
-       xfs_mount_t             *mp;            /* file system mount point */
-       int                     needlog;        /* need to log data header */
-       int                     needscan;       /* need to scan for bestfree */
-       xfs_dir2_sf_hdr_t       sfh;            /* shortform header */
-       int                     size;           /* bytes used */
-       __be16                  *tagp;          /* end of entry (tag) */
-       int                     to;             /* block/leaf to index */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       trace_xfs_dir2_leaf_to_block(args);
-
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-       leaf = lbp->b_addr;
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-       ents = dp->d_ops->leaf_ents_p(leaf);
-       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
-
-       ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
-              leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
-       /*
-        * If there are data blocks other than the first one, take this
-        * opportunity to remove trailing empty data blocks that may have
-        * been left behind during no-space-reservation operations.
-        * These will show up in the leaf bests table.
-        */
-       while (dp->i_d.di_size > args->geo->blksize) {
-               int hdrsz;
-
-               hdrsz = dp->d_ops->data_entry_offset;
-               bestsp = xfs_dir2_leaf_bests_p(ltp);
-               if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
-                                           args->geo->blksize - hdrsz) {
-                       if ((error =
-                           xfs_dir2_leaf_trim_data(args, lbp,
-                                   (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
-                               return error;
-               } else
-                       return 0;
-       }
-       /*
-        * Read the data block if we don't already have it, give up if it fails.
-        */
-       if (!dbp) {
-               error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp);
-               if (error)
-                       return error;
-       }
-       hdr = dbp->b_addr;
-       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
-
-       /*
-        * Size of the "leaf" area in the block.
-        */
-       size = (uint)sizeof(xfs_dir2_block_tail_t) +
-              (uint)sizeof(*lep) * (leafhdr.count - leafhdr.stale);
-       /*
-        * Look at the last data entry.
-        */
-       tagp = (__be16 *)((char *)hdr + args->geo->blksize) - 1;
-       dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
-       /*
-        * If it's not free or is too short we can't do it.
-        */
-       if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG ||
-           be16_to_cpu(dup->length) < size)
-               return 0;
-
-       /*
-        * Start converting it to block form.
-        */
-       xfs_dir3_block_init(mp, tp, dbp, dp);
-
-       needlog = 1;
-       needscan = 0;
-       /*
-        * Use up the space at the end of the block (blp/btp).
-        */
-       xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size,
-               &needlog, &needscan);
-       /*
-        * Initialize the block tail.
-        */
-       btp = xfs_dir2_block_tail_p(args->geo, hdr);
-       btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale);
-       btp->stale = 0;
-       xfs_dir2_block_log_tail(tp, dbp);
-       /*
-        * Initialize the block leaf area.  We compact out stale entries.
-        */
-       lep = xfs_dir2_block_leaf_p(btp);
-       for (from = to = 0; from < leafhdr.count; from++) {
-               if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
-                       continue;
-               lep[to++] = ents[from];
-       }
-       ASSERT(to == be32_to_cpu(btp->count));
-       xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1);
-       /*
-        * Scan the bestfree if we need it and log the data block header.
-        */
-       if (needscan)
-               xfs_dir2_data_freescan(dp, hdr, &needlog);
-       if (needlog)
-               xfs_dir2_data_log_header(args, dbp);
-       /*
-        * Pitch the old leaf block.
-        */
-       error = xfs_da_shrink_inode(args, args->geo->leafblk, lbp);
-       if (error)
-               return error;
-
-       /*
-        * Now see if the resulting block can be shrunken to shortform.
-        */
-       size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
-       if (size > XFS_IFORK_DSIZE(dp))
-               return 0;
-
-       return xfs_dir2_block_to_sf(args, dbp, size, &sfh);
-}
-
-/*
- * Convert the shortform directory to block form.
- */
-int                                            /* error */
-xfs_dir2_sf_to_block(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       xfs_dir2_db_t           blkno;          /* dir-relative block # (0) */
-       xfs_dir2_data_hdr_t     *hdr;           /* block header */
-       xfs_dir2_leaf_entry_t   *blp;           /* block leaf entries */
-       struct xfs_buf          *bp;            /* block buffer */
-       xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
-       xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     dummy;          /* trash */
-       xfs_dir2_data_unused_t  *dup;           /* unused entry pointer */
-       int                     endoffset;      /* end of data objects */
-       int                     error;          /* error return value */
-       int                     i;              /* index */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       int                     needlog;        /* need to log block header */
-       int                     needscan;       /* need to scan block freespc */
-       int                     newoffset;      /* offset from current entry */
-       int                     offset;         /* target block offset */
-       xfs_dir2_sf_entry_t     *sfep;          /* sf entry pointer */
-       xfs_dir2_sf_hdr_t       *oldsfp;        /* old shortform header  */
-       xfs_dir2_sf_hdr_t       *sfp;           /* shortform header  */
-       __be16                  *tagp;          /* end of data entry */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_name         name;
-       struct xfs_ifork        *ifp;
-
-       trace_xfs_dir2_sf_to_block(args);
-
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-       ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK);
-       ASSERT(ifp->if_flags & XFS_IFINLINE);
-       /*
-        * Bomb out if the shortform directory is way too short.
-        */
-       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-               ASSERT(XFS_FORCED_SHUTDOWN(mp));
-               return EIO;
-       }
-
-       oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data;
-
-       ASSERT(ifp->if_bytes == dp->i_d.di_size);
-       ASSERT(ifp->if_u1.if_data != NULL);
-       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count));
-       ASSERT(dp->i_d.di_nextents == 0);
-
-       /*
-        * Copy the directory into a temporary buffer.
-        * Then pitch the incore inode data so we can make extents.
-        */
-       sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP);
-       memcpy(sfp, oldsfp, ifp->if_bytes);
-
-       xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK);
-       xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK);
-       dp->i_d.di_size = 0;
-
-       /*
-        * Add block 0 to the inode.
-        */
-       error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
-       if (error) {
-               kmem_free(sfp);
-               return error;
-       }
-       /*
-        * Initialize the data block, then convert it to block format.
-        */
-       error = xfs_dir3_data_init(args, blkno, &bp);
-       if (error) {
-               kmem_free(sfp);
-               return error;
-       }
-       xfs_dir3_block_init(mp, tp, bp, dp);
-       hdr = bp->b_addr;
-
-       /*
-        * Compute size of block "tail" area.
-        */
-       i = (uint)sizeof(*btp) +
-           (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
-       /*
-        * The whole thing is initialized to free by the init routine.
-        * Say we're using the leaf and tail area.
-        */
-       dup = dp->d_ops->data_unused_p(hdr);
-       needlog = needscan = 0;
-       xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
-                              i, &needlog, &needscan);
-       ASSERT(needscan == 0);
-       /*
-        * Fill in the tail.
-        */
-       btp = xfs_dir2_block_tail_p(args->geo, hdr);
-       btp->count = cpu_to_be32(sfp->count + 2);       /* ., .. */
-       btp->stale = 0;
-       blp = xfs_dir2_block_leaf_p(btp);
-       endoffset = (uint)((char *)blp - (char *)hdr);
-       /*
-        * Remove the freespace, we'll manage it.
-        */
-       xfs_dir2_data_use_free(args, bp, dup,
-               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
-               be16_to_cpu(dup->length), &needlog, &needscan);
-       /*
-        * Create entry for .
-        */
-       dep = dp->d_ops->data_dot_entry_p(hdr);
-       dep->inumber = cpu_to_be64(dp->i_ino);
-       dep->namelen = 1;
-       dep->name[0] = '.';
-       dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
-       tagp = dp->d_ops->data_entry_tag_p(dep);
-       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
-       xfs_dir2_data_log_entry(args, bp, dep);
-       blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
-       blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
-                               (char *)dep - (char *)hdr));
-       /*
-        * Create entry for ..
-        */
-       dep = dp->d_ops->data_dotdot_entry_p(hdr);
-       dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp));
-       dep->namelen = 2;
-       dep->name[0] = dep->name[1] = '.';
-       dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
-       tagp = dp->d_ops->data_entry_tag_p(dep);
-       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
-       xfs_dir2_data_log_entry(args, bp, dep);
-       blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
-       blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
-                               (char *)dep - (char *)hdr));
-       offset = dp->d_ops->data_first_offset;
-       /*
-        * Loop over existing entries, stuff them in.
-        */
-       i = 0;
-       if (!sfp->count)
-               sfep = NULL;
-       else
-               sfep = xfs_dir2_sf_firstentry(sfp);
-       /*
-        * Need to preserve the existing offset values in the sf directory.
-        * Insert holes (unused entries) where necessary.
-        */
-       while (offset < endoffset) {
-               /*
-                * sfep is null when we reach the end of the list.
-                */
-               if (sfep == NULL)
-                       newoffset = endoffset;
-               else
-                       newoffset = xfs_dir2_sf_get_offset(sfep);
-               /*
-                * There should be a hole here, make one.
-                */
-               if (offset < newoffset) {
-                       dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
-                       dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
-                       dup->length = cpu_to_be16(newoffset - offset);
-                       *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
-                               ((char *)dup - (char *)hdr));
-                       xfs_dir2_data_log_unused(args, bp, dup);
-                       xfs_dir2_data_freeinsert(hdr,
-                                                dp->d_ops->data_bestfree_p(hdr),
-                                                dup, &dummy);
-                       offset += be16_to_cpu(dup->length);
-                       continue;
-               }
-               /*
-                * Copy a real entry.
-                */
-               dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
-               dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep));
-               dep->namelen = sfep->namelen;
-               dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep));
-               memcpy(dep->name, sfep->name, dep->namelen);
-               tagp = dp->d_ops->data_entry_tag_p(dep);
-               *tagp = cpu_to_be16((char *)dep - (char *)hdr);
-               xfs_dir2_data_log_entry(args, bp, dep);
-               name.name = sfep->name;
-               name.len = sfep->namelen;
-               blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
-                                                       hashname(&name));
-               blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
-                                                (char *)dep - (char *)hdr));
-               offset = (int)((char *)(tagp + 1) - (char *)hdr);
-               if (++i == sfp->count)
-                       sfep = NULL;
-               else
-                       sfep = dp->d_ops->sf_nextentry(sfp, sfep);
-       }
-       /* Done with the temporary buffer */
-       kmem_free(sfp);
-       /*
-        * Sort the leaf entries by hash value.
-        */
-       xfs_sort(blp, be32_to_cpu(btp->count), sizeof(*blp), xfs_dir2_block_sort);
-       /*
-        * Log the leaf entry area and tail.
-        * Already logged the header in data_init, ignore needlog.
-        */
-       ASSERT(needscan == 0);
-       xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
-       xfs_dir2_block_log_tail(tp, bp);
-       xfs_dir3_data_check(dp, bp);
-       return 0;
-}
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
deleted file mode 100644 (file)
index 8c2f642..0000000
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_error.h"
-#include "xfs_trans.h"
-#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
-
-/*
- * Check the consistency of the data block.
- * The input can also be a block-format directory.
- * Return 0 is the buffer is good, otherwise an error.
- */
-int
-__xfs_dir3_data_check(
-       struct xfs_inode        *dp,            /* incore inode pointer */
-       struct xfs_buf          *bp)            /* data block's buffer */
-{
-       xfs_dir2_dataptr_t      addr;           /* addr for leaf lookup */
-       xfs_dir2_data_free_t    *bf;            /* bestfree table */
-       xfs_dir2_block_tail_t   *btp=NULL;      /* block tail */
-       int                     count;          /* count of entries found */
-       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
-       xfs_dir2_data_entry_t   *dep;           /* data entry */
-       xfs_dir2_data_free_t    *dfp;           /* bestfree entry */
-       xfs_dir2_data_unused_t  *dup;           /* unused entry */
-       char                    *endp;          /* end of useful data */
-       int                     freeseen;       /* mask of bestfrees seen */
-       xfs_dahash_t            hash;           /* hash of current name */
-       int                     i;              /* leaf index */
-       int                     lastfree;       /* last entry was unused */
-       xfs_dir2_leaf_entry_t   *lep=NULL;      /* block leaf entries */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       char                    *p;             /* current data position */
-       int                     stale;          /* count of stale leaves */
-       struct xfs_name         name;
-       const struct xfs_dir_ops *ops;
-       struct xfs_da_geometry  *geo;
-
-       mp = bp->b_target->bt_mount;
-       geo = mp->m_dir_geo;
-
-       /*
-        * We can be passed a null dp here from a verifier, so we need to go the
-        * hard way to get them.
-        */
-       ops = xfs_dir_get_ops(mp, dp);
-
-       hdr = bp->b_addr;
-       p = (char *)ops->data_entry_p(hdr);
-
-       switch (hdr->magic) {
-       case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
-       case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
-               btp = xfs_dir2_block_tail_p(geo, hdr);
-               lep = xfs_dir2_block_leaf_p(btp);
-               endp = (char *)lep;
-
-               /*
-                * The number of leaf entries is limited by the size of the
-                * block and the amount of space used by the data entries.
-                * We don't know how much space is used by the data entries yet,
-                * so just ensure that the count falls somewhere inside the
-                * block right now.
-                */
-               XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) <
-                       ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
-               break;
-       case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
-       case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
-               endp = (char *)hdr + geo->blksize;
-               break;
-       default:
-               XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
-               return EFSCORRUPTED;
-       }
-
-       /*
-        * Account for zero bestfree entries.
-        */
-       bf = ops->data_bestfree_p(hdr);
-       count = lastfree = freeseen = 0;
-       if (!bf[0].length) {
-               XFS_WANT_CORRUPTED_RETURN(!bf[0].offset);
-               freeseen |= 1 << 0;
-       }
-       if (!bf[1].length) {
-               XFS_WANT_CORRUPTED_RETURN(!bf[1].offset);
-               freeseen |= 1 << 1;
-       }
-       if (!bf[2].length) {
-               XFS_WANT_CORRUPTED_RETURN(!bf[2].offset);
-               freeseen |= 1 << 2;
-       }
-
-       XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >=
-                                               be16_to_cpu(bf[1].length));
-       XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >=
-                                               be16_to_cpu(bf[2].length));
-       /*
-        * Loop over the data/unused entries.
-        */
-       while (p < endp) {
-               dup = (xfs_dir2_data_unused_t *)p;
-               /*
-                * If it's unused, look for the space in the bestfree table.
-                * If we find it, account for that, else make sure it
-                * doesn't need to be there.
-                */
-               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                       XFS_WANT_CORRUPTED_RETURN(lastfree == 0);
-                       XFS_WANT_CORRUPTED_RETURN(
-                               be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
-                                              (char *)dup - (char *)hdr);
-                       dfp = xfs_dir2_data_freefind(hdr, bf, dup);
-                       if (dfp) {
-                               i = (int)(dfp - bf);
-                               XFS_WANT_CORRUPTED_RETURN(
-                                       (freeseen & (1 << i)) == 0);
-                               freeseen |= 1 << i;
-                       } else {
-                               XFS_WANT_CORRUPTED_RETURN(
-                                       be16_to_cpu(dup->length) <=
-                                               be16_to_cpu(bf[2].length));
-                       }
-                       p += be16_to_cpu(dup->length);
-                       lastfree = 1;
-                       continue;
-               }
-               /*
-                * It's a real entry.  Validate the fields.
-                * If this is a block directory then make sure it's
-                * in the leaf section of the block.
-                * The linear search is crude but this is DEBUG code.
-                */
-               dep = (xfs_dir2_data_entry_t *)p;
-               XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0);
-               XFS_WANT_CORRUPTED_RETURN(
-                       !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
-               XFS_WANT_CORRUPTED_RETURN(
-                       be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
-                                              (char *)dep - (char *)hdr);
-               XFS_WANT_CORRUPTED_RETURN(
-                               ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
-               count++;
-               lastfree = 0;
-               if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-                   hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
-                       addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
-                                               (xfs_dir2_data_aoff_t)
-                                               ((char *)dep - (char *)hdr));
-                       name.name = dep->name;
-                       name.len = dep->namelen;
-                       hash = mp->m_dirnameops->hashname(&name);
-                       for (i = 0; i < be32_to_cpu(btp->count); i++) {
-                               if (be32_to_cpu(lep[i].address) == addr &&
-                                   be32_to_cpu(lep[i].hashval) == hash)
-                                       break;
-                       }
-                       XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
-               }
-               p += ops->data_entsize(dep->namelen);
-       }
-       /*
-        * Need to have seen all the entries and all the bestfree slots.
-        */
-       XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
-       if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-           hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
-               for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
-                       if (lep[i].address ==
-                           cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
-                               stale++;
-                       if (i > 0)
-                               XFS_WANT_CORRUPTED_RETURN(
-                                       be32_to_cpu(lep[i].hashval) >=
-                                               be32_to_cpu(lep[i - 1].hashval));
-               }
-               XFS_WANT_CORRUPTED_RETURN(count ==
-                       be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
-               XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale));
-       }
-       return 0;
-}
-
-static bool
-xfs_dir3_data_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
-                       return false;
-               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
-                       return false;
-               if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
-                       return false;
-       } else {
-               if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
-                       return false;
-       }
-       if (__xfs_dir3_data_check(NULL, bp))
-               return false;
-       return true;
-}
-
-/*
- * Readahead of the first block of the directory when it is opened is completely
- * oblivious to the format of the directory. Hence we can either get a block
- * format buffer or a data format buffer on readahead.
- */
-static void
-xfs_dir3_data_reada_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_dir2_data_hdr *hdr = bp->b_addr;
-
-       switch (hdr->magic) {
-       case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
-       case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
-               bp->b_ops = &xfs_dir3_block_buf_ops;
-               bp->b_ops->verify_read(bp);
-               return;
-       case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
-       case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
-               xfs_dir3_data_verify(bp);
-               return;
-       default:
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               break;
-       }
-}
-
-static void
-xfs_dir3_data_read_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
-                xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_dir3_data_verify(bp))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-}
-
-static void
-xfs_dir3_data_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
-
-       if (!xfs_dir3_data_verify(bp)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (bip)
-               hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
-
-       xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
-}
-
-const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
-       .verify_read = xfs_dir3_data_read_verify,
-       .verify_write = xfs_dir3_data_write_verify,
-};
-
-static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
-       .verify_read = xfs_dir3_data_reada_verify,
-       .verify_write = xfs_dir3_data_write_verify,
-};
-
-
-int
-xfs_dir3_data_read(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *dp,
-       xfs_dablk_t             bno,
-       xfs_daddr_t             mapped_bno,
-       struct xfs_buf          **bpp)
-{
-       int                     err;
-
-       err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
-                               XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
-       if (!err && tp)
-               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
-       return err;
-}
-
-int
-xfs_dir3_data_readahead(
-       struct xfs_inode        *dp,
-       xfs_dablk_t             bno,
-       xfs_daddr_t             mapped_bno)
-{
-       return xfs_da_reada_buf(dp, bno, mapped_bno,
-                               XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
-}
-
-/*
- * Given a data block and an unused entry from that block,
- * return the bestfree entry if any that corresponds to it.
- */
-xfs_dir2_data_free_t *
-xfs_dir2_data_freefind(
-       struct xfs_dir2_data_hdr *hdr,          /* data block header */
-       struct xfs_dir2_data_free *bf,          /* bestfree table pointer */
-       struct xfs_dir2_data_unused *dup)       /* unused space */
-{
-       xfs_dir2_data_free_t    *dfp;           /* bestfree entry */
-       xfs_dir2_data_aoff_t    off;            /* offset value needed */
-#ifdef DEBUG
-       int                     matched;        /* matched the value */
-       int                     seenzero;       /* saw a 0 bestfree entry */
-#endif
-
-       off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
-
-#ifdef DEBUG
-       /*
-        * Validate some consistency in the bestfree table.
-        * Check order, non-overlapping entries, and if we find the
-        * one we're looking for it has to be exact.
-        */
-       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-       for (dfp = &bf[0], seenzero = matched = 0;
-            dfp < &bf[XFS_DIR2_DATA_FD_COUNT];
-            dfp++) {
-               if (!dfp->offset) {
-                       ASSERT(!dfp->length);
-                       seenzero = 1;
-                       continue;
-               }
-               ASSERT(seenzero == 0);
-               if (be16_to_cpu(dfp->offset) == off) {
-                       matched = 1;
-                       ASSERT(dfp->length == dup->length);
-               } else if (off < be16_to_cpu(dfp->offset))
-                       ASSERT(off + be16_to_cpu(dup->length) <= be16_to_cpu(dfp->offset));
-               else
-                       ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
-               ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
-               if (dfp > &bf[0])
-                       ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
-       }
-#endif
-       /*
-        * If this is smaller than the smallest bestfree entry,
-        * it can't be there since they're sorted.
-        */
-       if (be16_to_cpu(dup->length) <
-           be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
-               return NULL;
-       /*
-        * Look at the three bestfree entries for our guy.
-        */
-       for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
-               if (!dfp->offset)
-                       return NULL;
-               if (be16_to_cpu(dfp->offset) == off)
-                       return dfp;
-       }
-       /*
-        * Didn't find it.  This only happens if there are duplicate lengths.
-        */
-       return NULL;
-}
-
-/*
- * Insert an unused-space entry into the bestfree table.
- */
-xfs_dir2_data_free_t *                         /* entry inserted */
-xfs_dir2_data_freeinsert(
-       struct xfs_dir2_data_hdr *hdr,          /* data block pointer */
-       struct xfs_dir2_data_free *dfp,         /* bestfree table pointer */
-       struct xfs_dir2_data_unused *dup,       /* unused space */
-       int                     *loghead)       /* log the data header (out) */
-{
-       xfs_dir2_data_free_t    new;            /* new bestfree entry */
-
-       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-
-       new.length = dup->length;
-       new.offset = cpu_to_be16((char *)dup - (char *)hdr);
-
-       /*
-        * Insert at position 0, 1, or 2; or not at all.
-        */
-       if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) {
-               dfp[2] = dfp[1];
-               dfp[1] = dfp[0];
-               dfp[0] = new;
-               *loghead = 1;
-               return &dfp[0];
-       }
-       if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) {
-               dfp[2] = dfp[1];
-               dfp[1] = new;
-               *loghead = 1;
-               return &dfp[1];
-       }
-       if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) {
-               dfp[2] = new;
-               *loghead = 1;
-               return &dfp[2];
-       }
-       return NULL;
-}
-
-/*
- * Remove a bestfree entry from the table.
- */
-STATIC void
-xfs_dir2_data_freeremove(
-       struct xfs_dir2_data_hdr *hdr,          /* data block header */
-       struct xfs_dir2_data_free *bf,          /* bestfree table pointer */
-       struct xfs_dir2_data_free *dfp,         /* bestfree entry pointer */
-       int                     *loghead)       /* out: log data header */
-{
-
-       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-
-       /*
-        * It's the first entry, slide the next 2 up.
-        */
-       if (dfp == &bf[0]) {
-               bf[0] = bf[1];
-               bf[1] = bf[2];
-       }
-       /*
-        * It's the second entry, slide the 3rd entry up.
-        */
-       else if (dfp == &bf[1])
-               bf[1] = bf[2];
-       /*
-        * Must be the last entry.
-        */
-       else
-               ASSERT(dfp == &bf[2]);
-       /*
-        * Clear the 3rd entry, must be zero now.
-        */
-       bf[2].length = 0;
-       bf[2].offset = 0;
-       *loghead = 1;
-}
-
-/*
- * Given a data block, reconstruct its bestfree map.
- */
-void
-xfs_dir2_data_freescan(
-       struct xfs_inode        *dp,
-       struct xfs_dir2_data_hdr *hdr,
-       int                     *loghead)
-{
-       xfs_dir2_block_tail_t   *btp;           /* block tail */
-       xfs_dir2_data_entry_t   *dep;           /* active data entry */
-       xfs_dir2_data_unused_t  *dup;           /* unused data entry */
-       struct xfs_dir2_data_free *bf;
-       char                    *endp;          /* end of block's data */
-       char                    *p;             /* current entry pointer */
-       struct xfs_da_geometry  *geo = dp->i_mount->m_dir_geo;
-
-       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-
-       /*
-        * Start by clearing the table.
-        */
-       bf = dp->d_ops->data_bestfree_p(hdr);
-       memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
-       *loghead = 1;
-       /*
-        * Set up pointers.
-        */
-       p = (char *)dp->d_ops->data_entry_p(hdr);
-       if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-           hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
-               btp = xfs_dir2_block_tail_p(geo, hdr);
-               endp = (char *)xfs_dir2_block_leaf_p(btp);
-       } else
-               endp = (char *)hdr + geo->blksize;
-       /*
-        * Loop over the block's entries.
-        */
-       while (p < endp) {
-               dup = (xfs_dir2_data_unused_t *)p;
-               /*
-                * If it's a free entry, insert it.
-                */
-               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                       ASSERT((char *)dup - (char *)hdr ==
-                              be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
-                       xfs_dir2_data_freeinsert(hdr, bf, dup, loghead);
-                       p += be16_to_cpu(dup->length);
-               }
-               /*
-                * For active entries, check their tags and skip them.
-                */
-               else {
-                       dep = (xfs_dir2_data_entry_t *)p;
-                       ASSERT((char *)dep - (char *)hdr ==
-                              be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep)));
-                       p += dp->d_ops->data_entsize(dep->namelen);
-               }
-       }
-}
-
-/*
- * Initialize a data block at the given block number in the directory.
- * Give back the buffer for the created block.
- */
-int                                            /* error */
-xfs_dir3_data_init(
-       xfs_da_args_t           *args,          /* directory operation args */
-       xfs_dir2_db_t           blkno,          /* logical dir block number */
-       struct xfs_buf          **bpp)          /* output block buffer */
-{
-       struct xfs_buf          *bp;            /* block buffer */
-       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       xfs_dir2_data_unused_t  *dup;           /* unused entry pointer */
-       struct xfs_dir2_data_free *bf;
-       int                     error;          /* error return value */
-       int                     i;              /* bestfree index */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       int                     t;              /* temp */
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       tp = args->trans;
-       /*
-        * Get the buffer set up for the block.
-        */
-       error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno),
-                              -1, &bp, XFS_DATA_FORK);
-       if (error)
-               return error;
-       bp->b_ops = &xfs_dir3_data_buf_ops;
-       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF);
-
-       /*
-        * Initialize the header.
-        */
-       hdr = bp->b_addr;
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
-
-               memset(hdr3, 0, sizeof(*hdr3));
-               hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
-               hdr3->blkno = cpu_to_be64(bp->b_bn);
-               hdr3->owner = cpu_to_be64(dp->i_ino);
-               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
-
-       } else
-               hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
-
-       bf = dp->d_ops->data_bestfree_p(hdr);
-       bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset);
-       for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
-               bf[i].length = 0;
-               bf[i].offset = 0;
-       }
-
-       /*
-        * Set up an unused entry for the block's body.
-        */
-       dup = dp->d_ops->data_unused_p(hdr);
-       dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
-
-       t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset;
-       bf[0].length = cpu_to_be16(t);
-       dup->length = cpu_to_be16(t);
-       *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
-       /*
-        * Log it and return it.
-        */
-       xfs_dir2_data_log_header(args, bp);
-       xfs_dir2_data_log_unused(args, bp, dup);
-       *bpp = bp;
-       return 0;
-}
-
-/*
- * Log an active data entry from the block.
- */
-void
-xfs_dir2_data_log_entry(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp,
-       xfs_dir2_data_entry_t   *dep)           /* data entry pointer */
-{
-       struct xfs_dir2_data_hdr *hdr = bp->b_addr;
-
-       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-
-       xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr),
-               (uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) -
-                      (char *)hdr - 1));
-}
-
-/*
- * Log a data block header.
- */
-void
-xfs_dir2_data_log_header(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp)
-{
-#ifdef DEBUG
-       struct xfs_dir2_data_hdr *hdr = bp->b_addr;
-
-       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-#endif
-
-       xfs_trans_log_buf(args->trans, bp, 0,
-                         args->dp->d_ops->data_entry_offset - 1);
-}
-
-/*
- * Log a data unused entry.
- */
-void
-xfs_dir2_data_log_unused(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp,
-       xfs_dir2_data_unused_t  *dup)           /* data unused pointer */
-{
-       xfs_dir2_data_hdr_t     *hdr = bp->b_addr;
-
-       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-
-       /*
-        * Log the first part of the unused entry.
-        */
-       xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr),
-               (uint)((char *)&dup->length + sizeof(dup->length) -
-                      1 - (char *)hdr));
-       /*
-        * Log the end (tag) of the unused entry.
-        */
-       xfs_trans_log_buf(args->trans, bp,
-               (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
-               (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
-                      sizeof(xfs_dir2_data_off_t) - 1));
-}
-
-/*
- * Make a byte range in the data block unused.
- * Its current contents are unimportant.
- */
-void
-xfs_dir2_data_make_free(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp,
-       xfs_dir2_data_aoff_t    offset,         /* starting byte offset */
-       xfs_dir2_data_aoff_t    len,            /* length in bytes */
-       int                     *needlogp,      /* out: log header */
-       int                     *needscanp)     /* out: regen bestfree */
-{
-       xfs_dir2_data_hdr_t     *hdr;           /* data block pointer */
-       xfs_dir2_data_free_t    *dfp;           /* bestfree pointer */
-       char                    *endptr;        /* end of data area */
-       int                     needscan;       /* need to regen bestfree */
-       xfs_dir2_data_unused_t  *newdup;        /* new unused entry */
-       xfs_dir2_data_unused_t  *postdup;       /* unused entry after us */
-       xfs_dir2_data_unused_t  *prevdup;       /* unused entry before us */
-       struct xfs_dir2_data_free *bf;
-
-       hdr = bp->b_addr;
-
-       /*
-        * Figure out where the end of the data area is.
-        */
-       if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-           hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
-               endptr = (char *)hdr + args->geo->blksize;
-       else {
-               xfs_dir2_block_tail_t   *btp;   /* block tail */
-
-               ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-                       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-               btp = xfs_dir2_block_tail_p(args->geo, hdr);
-               endptr = (char *)xfs_dir2_block_leaf_p(btp);
-       }
-       /*
-        * If this isn't the start of the block, then back up to
-        * the previous entry and see if it's free.
-        */
-       if (offset > args->dp->d_ops->data_entry_offset) {
-               __be16                  *tagp;  /* tag just before us */
-
-               tagp = (__be16 *)((char *)hdr + offset) - 1;
-               prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
-               if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
-                       prevdup = NULL;
-       } else
-               prevdup = NULL;
-       /*
-        * If this isn't the end of the block, see if the entry after
-        * us is free.
-        */
-       if ((char *)hdr + offset + len < endptr) {
-               postdup =
-                       (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
-               if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
-                       postdup = NULL;
-       } else
-               postdup = NULL;
-       ASSERT(*needscanp == 0);
-       needscan = 0;
-       /*
-        * Previous and following entries are both free,
-        * merge everything into a single free entry.
-        */
-       bf = args->dp->d_ops->data_bestfree_p(hdr);
-       if (prevdup && postdup) {
-               xfs_dir2_data_free_t    *dfp2;  /* another bestfree pointer */
-
-               /*
-                * See if prevdup and/or postdup are in bestfree table.
-                */
-               dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
-               dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup);
-               /*
-                * We need a rescan unless there are exactly 2 free entries
-                * namely our two.  Then we know what's happening, otherwise
-                * since the third bestfree is there, there might be more
-                * entries.
-                */
-               needscan = (bf[2].length != 0);
-               /*
-                * Fix up the new big freespace.
-                */
-               be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
-               *xfs_dir2_data_unused_tag_p(prevdup) =
-                       cpu_to_be16((char *)prevdup - (char *)hdr);
-               xfs_dir2_data_log_unused(args, bp, prevdup);
-               if (!needscan) {
-                       /*
-                        * Has to be the case that entries 0 and 1 are
-                        * dfp and dfp2 (don't know which is which), and
-                        * entry 2 is empty.
-                        * Remove entry 1 first then entry 0.
-                        */
-                       ASSERT(dfp && dfp2);
-                       if (dfp == &bf[1]) {
-                               dfp = &bf[0];
-                               ASSERT(dfp2 == dfp);
-                               dfp2 = &bf[1];
-                       }
-                       xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp);
-                       xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
-                       /*
-                        * Now insert the new entry.
-                        */
-                       dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup,
-                                                      needlogp);
-                       ASSERT(dfp == &bf[0]);
-                       ASSERT(dfp->length == prevdup->length);
-                       ASSERT(!dfp[1].length);
-                       ASSERT(!dfp[2].length);
-               }
-       }
-       /*
-        * The entry before us is free, merge with it.
-        */
-       else if (prevdup) {
-               dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
-               be16_add_cpu(&prevdup->length, len);
-               *xfs_dir2_data_unused_tag_p(prevdup) =
-                       cpu_to_be16((char *)prevdup - (char *)hdr);
-               xfs_dir2_data_log_unused(args, bp, prevdup);
-               /*
-                * If the previous entry was in the table, the new entry
-                * is longer, so it will be in the table too.  Remove
-                * the old one and add the new one.
-                */
-               if (dfp) {
-                       xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
-                       xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp);
-               }
-               /*
-                * Otherwise we need a scan if the new entry is big enough.
-                */
-               else {
-                       needscan = be16_to_cpu(prevdup->length) >
-                                  be16_to_cpu(bf[2].length);
-               }
-       }
-       /*
-        * The following entry is free, merge with it.
-        */
-       else if (postdup) {
-               dfp = xfs_dir2_data_freefind(hdr, bf, postdup);
-               newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
-               newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
-               newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
-               *xfs_dir2_data_unused_tag_p(newdup) =
-                       cpu_to_be16((char *)newdup - (char *)hdr);
-               xfs_dir2_data_log_unused(args, bp, newdup);
-               /*
-                * If the following entry was in the table, the new entry
-                * is longer, so it will be in the table too.  Remove
-                * the old one and add the new one.
-                */
-               if (dfp) {
-                       xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
-                       xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
-               }
-               /*
-                * Otherwise we need a scan if the new entry is big enough.
-                */
-               else {
-                       needscan = be16_to_cpu(newdup->length) >
-                                  be16_to_cpu(bf[2].length);
-               }
-       }
-       /*
-        * Neither neighbor is free.  Make a new entry.
-        */
-       else {
-               newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
-               newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
-               newdup->length = cpu_to_be16(len);
-               *xfs_dir2_data_unused_tag_p(newdup) =
-                       cpu_to_be16((char *)newdup - (char *)hdr);
-               xfs_dir2_data_log_unused(args, bp, newdup);
-               xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
-       }
-       *needscanp = needscan;
-}
-
-/*
- * Take a byte range out of an existing unused space and make it un-free.
- */
-void
-xfs_dir2_data_use_free(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp,
-       xfs_dir2_data_unused_t  *dup,           /* unused entry */
-       xfs_dir2_data_aoff_t    offset,         /* starting offset to use */
-       xfs_dir2_data_aoff_t    len,            /* length to use */
-       int                     *needlogp,      /* out: need to log header */
-       int                     *needscanp)     /* out: need regen bestfree */
-{
-       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
-       xfs_dir2_data_free_t    *dfp;           /* bestfree pointer */
-       int                     matchback;      /* matches end of freespace */
-       int                     matchfront;     /* matches start of freespace */
-       int                     needscan;       /* need to regen bestfree */
-       xfs_dir2_data_unused_t  *newdup;        /* new unused entry */
-       xfs_dir2_data_unused_t  *newdup2;       /* another new unused entry */
-       int                     oldlen;         /* old unused entry's length */
-       struct xfs_dir2_data_free *bf;
-
-       hdr = bp->b_addr;
-       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-       ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
-       ASSERT(offset >= (char *)dup - (char *)hdr);
-       ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
-       ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
-       /*
-        * Look up the entry in the bestfree table.
-        */
-       oldlen = be16_to_cpu(dup->length);
-       bf = args->dp->d_ops->data_bestfree_p(hdr);
-       dfp = xfs_dir2_data_freefind(hdr, bf, dup);
-       ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
-       /*
-        * Check for alignment with front and back of the entry.
-        */
-       matchfront = (char *)dup - (char *)hdr == offset;
-       matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
-       ASSERT(*needscanp == 0);
-       needscan = 0;
-       /*
-        * If we matched it exactly we just need to get rid of it from
-        * the bestfree table.
-        */
-       if (matchfront && matchback) {
-               if (dfp) {
-                       needscan = (bf[2].offset != 0);
-                       if (!needscan)
-                               xfs_dir2_data_freeremove(hdr, bf, dfp,
-                                                        needlogp);
-               }
-       }
-       /*
-        * We match the first part of the entry.
-        * Make a new entry with the remaining freespace.
-        */
-       else if (matchfront) {
-               newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
-               newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
-               newdup->length = cpu_to_be16(oldlen - len);
-               *xfs_dir2_data_unused_tag_p(newdup) =
-                       cpu_to_be16((char *)newdup - (char *)hdr);
-               xfs_dir2_data_log_unused(args, bp, newdup);
-               /*
-                * If it was in the table, remove it and add the new one.
-                */
-               if (dfp) {
-                       xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
-                       dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
-                                                      needlogp);
-                       ASSERT(dfp != NULL);
-                       ASSERT(dfp->length == newdup->length);
-                       ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
-                       /*
-                        * If we got inserted at the last slot,
-                        * that means we don't know if there was a better
-                        * choice for the last slot, or not.  Rescan.
-                        */
-                       needscan = dfp == &bf[2];
-               }
-       }
-       /*
-        * We match the last part of the entry.
-        * Trim the allocated space off the tail of the entry.
-        */
-       else if (matchback) {
-               newdup = dup;
-               newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
-               *xfs_dir2_data_unused_tag_p(newdup) =
-                       cpu_to_be16((char *)newdup - (char *)hdr);
-               xfs_dir2_data_log_unused(args, bp, newdup);
-               /*
-                * If it was in the table, remove it and add the new one.
-                */
-               if (dfp) {
-                       xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
-                       dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
-                                                      needlogp);
-                       ASSERT(dfp != NULL);
-                       ASSERT(dfp->length == newdup->length);
-                       ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
-                       /*
-                        * If we got inserted at the last slot,
-                        * that means we don't know if there was a better
-                        * choice for the last slot, or not.  Rescan.
-                        */
-                       needscan = dfp == &bf[2];
-               }
-       }
-       /*
-        * Poking out the middle of an entry.
-        * Make two new entries.
-        */
-       else {
-               newdup = dup;
-               newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
-               *xfs_dir2_data_unused_tag_p(newdup) =
-                       cpu_to_be16((char *)newdup - (char *)hdr);
-               xfs_dir2_data_log_unused(args, bp, newdup);
-               newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
-               newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
-               newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
-               *xfs_dir2_data_unused_tag_p(newdup2) =
-                       cpu_to_be16((char *)newdup2 - (char *)hdr);
-               xfs_dir2_data_log_unused(args, bp, newdup2);
-               /*
-                * If the old entry was in the table, we need to scan
-                * if the 3rd entry was valid, since these entries
-                * are smaller than the old one.
-                * If we don't need to scan that means there were 1 or 2
-                * entries in the table, and removing the old and adding
-                * the 2 new will work.
-                */
-               if (dfp) {
-                       needscan = (bf[2].length != 0);
-                       if (!needscan) {
-                               xfs_dir2_data_freeremove(hdr, bf, dfp,
-                                                        needlogp);
-                               xfs_dir2_data_freeinsert(hdr, bf, newdup,
-                                                        needlogp);
-                               xfs_dir2_data_freeinsert(hdr, bf, newdup2,
-                                                        needlogp);
-                       }
-               }
-       }
-       *needscanp = needscan;
-}
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
deleted file mode 100644 (file)
index 78b411b..0000000
+++ /dev/null
@@ -1,1831 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_trans.h"
-#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
-
-/*
- * Local function declarations.
- */
-static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
-                                   int *indexp, struct xfs_buf **dbpp);
-static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args,
-                                   struct xfs_buf *bp, int first, int last);
-static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args,
-                                  struct xfs_buf *bp);
-
-/*
- * Check the internal consistency of a leaf1 block.
- * Pop an assert if something is wrong.
- */
-#ifdef DEBUG
-#define        xfs_dir3_leaf_check(dp, bp) \
-do { \
-       if (!xfs_dir3_leaf1_check((dp), (bp))) \
-               ASSERT(0); \
-} while (0);
-
-STATIC bool
-xfs_dir3_leaf1_check(
-       struct xfs_inode        *dp,
-       struct xfs_buf          *bp)
-{
-       struct xfs_dir2_leaf    *leaf = bp->b_addr;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-
-       if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
-               struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
-               if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
-                       return false;
-       } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
-               return false;
-
-       return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
-}
-#else
-#define        xfs_dir3_leaf_check(dp, bp)
-#endif
-
-bool
-xfs_dir3_leaf_check_int(
-       struct xfs_mount        *mp,
-       struct xfs_inode        *dp,
-       struct xfs_dir3_icleaf_hdr *hdr,
-       struct xfs_dir2_leaf    *leaf)
-{
-       struct xfs_dir2_leaf_entry *ents;
-       xfs_dir2_leaf_tail_t    *ltp;
-       int                     stale;
-       int                     i;
-       const struct xfs_dir_ops *ops;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-       struct xfs_da_geometry  *geo = mp->m_dir_geo;
-
-       /*
-        * we can be passed a null dp here from a verifier, so we need to go the
-        * hard way to get them.
-        */
-       ops = xfs_dir_get_ops(mp, dp);
-
-       if (!hdr) {
-               ops->leaf_hdr_from_disk(&leafhdr, leaf);
-               hdr = &leafhdr;
-       }
-
-       ents = ops->leaf_ents_p(leaf);
-       ltp = xfs_dir2_leaf_tail_p(geo, leaf);
-
-       /*
-        * XXX (dgc): This value is not restrictive enough.
-        * Should factor in the size of the bests table as well.
-        * We can deduce a value for that from di_size.
-        */
-       if (hdr->count > ops->leaf_max_ents(geo))
-               return false;
-
-       /* Leaves and bests don't overlap in leaf format. */
-       if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
-            hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
-           (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
-               return false;
-
-       /* Check hash value order, count stale entries.  */
-       for (i = stale = 0; i < hdr->count; i++) {
-               if (i + 1 < hdr->count) {
-                       if (be32_to_cpu(ents[i].hashval) >
-                                       be32_to_cpu(ents[i + 1].hashval))
-                               return false;
-               }
-               if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
-                       stale++;
-       }
-       if (hdr->stale != stale)
-               return false;
-       return true;
-}
-
-/*
- * We verify the magic numbers before decoding the leaf header so that on debug
- * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due
- * to incorrect magic numbers.
- */
-static bool
-xfs_dir3_leaf_verify(
-       struct xfs_buf          *bp,
-       __uint16_t              magic)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_dir2_leaf    *leaf = bp->b_addr;
-
-       ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
-               __uint16_t              magic3;
-
-               magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
-                                                        : XFS_DIR3_LEAFN_MAGIC;
-
-               if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
-                       return false;
-               if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid))
-                       return false;
-               if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
-                       return false;
-       } else {
-               if (leaf->hdr.info.magic != cpu_to_be16(magic))
-                       return false;
-       }
-
-       return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
-}
-
-static void
-__read_verify(
-       struct xfs_buf  *bp,
-       __uint16_t      magic)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-            !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_dir3_leaf_verify(bp, magic))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-}
-
-static void
-__write_verify(
-       struct xfs_buf  *bp,
-       __uint16_t      magic)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-       struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
-
-       if (!xfs_dir3_leaf_verify(bp, magic)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (bip)
-               hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
-
-       xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
-}
-
-static void
-xfs_dir3_leaf1_read_verify(
-       struct xfs_buf  *bp)
-{
-       __read_verify(bp, XFS_DIR2_LEAF1_MAGIC);
-}
-
-static void
-xfs_dir3_leaf1_write_verify(
-       struct xfs_buf  *bp)
-{
-       __write_verify(bp, XFS_DIR2_LEAF1_MAGIC);
-}
-
-static void
-xfs_dir3_leafn_read_verify(
-       struct xfs_buf  *bp)
-{
-       __read_verify(bp, XFS_DIR2_LEAFN_MAGIC);
-}
-
-static void
-xfs_dir3_leafn_write_verify(
-       struct xfs_buf  *bp)
-{
-       __write_verify(bp, XFS_DIR2_LEAFN_MAGIC);
-}
-
-const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
-       .verify_read = xfs_dir3_leaf1_read_verify,
-       .verify_write = xfs_dir3_leaf1_write_verify,
-};
-
-const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
-       .verify_read = xfs_dir3_leafn_read_verify,
-       .verify_write = xfs_dir3_leafn_write_verify,
-};
-
-static int
-xfs_dir3_leaf_read(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *dp,
-       xfs_dablk_t             fbno,
-       xfs_daddr_t             mappedbno,
-       struct xfs_buf          **bpp)
-{
-       int                     err;
-
-       err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
-                               XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops);
-       if (!err && tp)
-               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
-       return err;
-}
-
-int
-xfs_dir3_leafn_read(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *dp,
-       xfs_dablk_t             fbno,
-       xfs_daddr_t             mappedbno,
-       struct xfs_buf          **bpp)
-{
-       int                     err;
-
-       err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
-                               XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops);
-       if (!err && tp)
-               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
-       return err;
-}
-
-/*
- * Initialize a new leaf block, leaf1 or leafn magic accepted.
- */
-static void
-xfs_dir3_leaf_init(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
-       struct xfs_buf          *bp,
-       xfs_ino_t               owner,
-       __uint16_t              type)
-{
-       struct xfs_dir2_leaf    *leaf = bp->b_addr;
-
-       ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC);
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
-
-               memset(leaf3, 0, sizeof(*leaf3));
-
-               leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC)
-                                        ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)
-                                        : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
-               leaf3->info.blkno = cpu_to_be64(bp->b_bn);
-               leaf3->info.owner = cpu_to_be64(owner);
-               uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_uuid);
-       } else {
-               memset(leaf, 0, sizeof(*leaf));
-               leaf->hdr.info.magic = cpu_to_be16(type);
-       }
-
-       /*
-        * If it's a leaf-format directory initialize the tail.
-        * Caller is responsible for initialising the bests table.
-        */
-       if (type == XFS_DIR2_LEAF1_MAGIC) {
-               struct xfs_dir2_leaf_tail *ltp;
-
-               ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
-               ltp->bestcount = 0;
-               bp->b_ops = &xfs_dir3_leaf1_buf_ops;
-               xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF);
-       } else {
-               bp->b_ops = &xfs_dir3_leafn_buf_ops;
-               xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
-       }
-}
-
-int
-xfs_dir3_leaf_get_buf(
-       xfs_da_args_t           *args,
-       xfs_dir2_db_t           bno,
-       struct xfs_buf          **bpp,
-       __uint16_t              magic)
-{
-       struct xfs_inode        *dp = args->dp;
-       struct xfs_trans        *tp = args->trans;
-       struct xfs_mount        *mp = dp->i_mount;
-       struct xfs_buf          *bp;
-       int                     error;
-
-       ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
-       ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) &&
-              bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
-
-       error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno),
-                              -1, &bp, XFS_DATA_FORK);
-       if (error)
-               return error;
-
-       xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
-       xfs_dir3_leaf_log_header(args, bp);
-       if (magic == XFS_DIR2_LEAF1_MAGIC)
-               xfs_dir3_leaf_log_tail(args, bp);
-       *bpp = bp;
-       return 0;
-}
-
-/*
- * Convert a block form directory to a leaf form directory.
- */
-int                                            /* error */
-xfs_dir2_block_to_leaf(
-       xfs_da_args_t           *args,          /* operation arguments */
-       struct xfs_buf          *dbp)           /* input block's buffer */
-{
-       __be16                  *bestsp;        /* leaf's bestsp entries */
-       xfs_dablk_t             blkno;          /* leaf block's bno */
-       xfs_dir2_data_hdr_t     *hdr;           /* block header */
-       xfs_dir2_leaf_entry_t   *blp;           /* block's leaf entries */
-       xfs_dir2_block_tail_t   *btp;           /* block's tail */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return code */
-       struct xfs_buf          *lbp;           /* leaf block's buffer */
-       xfs_dir2_db_t           ldb;            /* leaf block's bno */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_tail_t    *ltp;           /* leaf's tail */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       int                     needlog;        /* need to log block header */
-       int                     needscan;       /* need to rescan bestfree */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir2_data_free *bf;
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       trace_xfs_dir2_block_to_leaf(args);
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       tp = args->trans;
-       /*
-        * Add the leaf block to the inode.
-        * This interface will only put blocks in the leaf/node range.
-        * Since that's empty now, we'll get the root (block 0 in range).
-        */
-       if ((error = xfs_da_grow_inode(args, &blkno))) {
-               return error;
-       }
-       ldb = xfs_dir2_da_to_db(args->geo, blkno);
-       ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET));
-       /*
-        * Initialize the leaf block, get a buffer for it.
-        */
-       error = xfs_dir3_leaf_get_buf(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC);
-       if (error)
-               return error;
-
-       leaf = lbp->b_addr;
-       hdr = dbp->b_addr;
-       xfs_dir3_data_check(dp, dbp);
-       btp = xfs_dir2_block_tail_p(args->geo, hdr);
-       blp = xfs_dir2_block_leaf_p(btp);
-       bf = dp->d_ops->data_bestfree_p(hdr);
-       ents = dp->d_ops->leaf_ents_p(leaf);
-
-       /*
-        * Set the counts in the leaf header.
-        */
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-       leafhdr.count = be32_to_cpu(btp->count);
-       leafhdr.stale = be32_to_cpu(btp->stale);
-       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-       xfs_dir3_leaf_log_header(args, lbp);
-
-       /*
-        * Could compact these but I think we always do the conversion
-        * after squeezing out stale entries.
-        */
-       memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
-       xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1);
-       needscan = 0;
-       needlog = 1;
-       /*
-        * Make the space formerly occupied by the leaf entries and block
-        * tail be free.
-        */
-       xfs_dir2_data_make_free(args, dbp,
-               (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
-               (xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize -
-                                      (char *)blp),
-               &needlog, &needscan);
-       /*
-        * Fix up the block header, make it a data block.
-        */
-       dbp->b_ops = &xfs_dir3_data_buf_ops;
-       xfs_trans_buf_set_type(tp, dbp, XFS_BLFT_DIR_DATA_BUF);
-       if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
-               hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
-       else
-               hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
-
-       if (needscan)
-               xfs_dir2_data_freescan(dp, hdr, &needlog);
-       /*
-        * Set up leaf tail and bests table.
-        */
-       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
-       ltp->bestcount = cpu_to_be32(1);
-       bestsp = xfs_dir2_leaf_bests_p(ltp);
-       bestsp[0] =  bf[0].length;
-       /*
-        * Log the data header and leaf bests table.
-        */
-       if (needlog)
-               xfs_dir2_data_log_header(args, dbp);
-       xfs_dir3_leaf_check(dp, lbp);
-       xfs_dir3_data_check(dp, dbp);
-       xfs_dir3_leaf_log_bests(args, lbp, 0, 0);
-       return 0;
-}
-
-STATIC void
-xfs_dir3_leaf_find_stale(
-       struct xfs_dir3_icleaf_hdr *leafhdr,
-       struct xfs_dir2_leaf_entry *ents,
-       int                     index,
-       int                     *lowstale,
-       int                     *highstale)
-{
-       /*
-        * Find the first stale entry before our index, if any.
-        */
-       for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
-               if (ents[*lowstale].address ==
-                   cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
-                       break;
-       }
-
-       /*
-        * Find the first stale entry at or after our index, if any.
-        * Stop if the result would require moving more entries than using
-        * lowstale.
-        */
-       for (*highstale = index; *highstale < leafhdr->count; ++*highstale) {
-               if (ents[*highstale].address ==
-                   cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
-                       break;
-               if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
-                       break;
-       }
-}
-
-struct xfs_dir2_leaf_entry *
-xfs_dir3_leaf_find_entry(
-       struct xfs_dir3_icleaf_hdr *leafhdr,
-       struct xfs_dir2_leaf_entry *ents,
-       int                     index,          /* leaf table position */
-       int                     compact,        /* need to compact leaves */
-       int                     lowstale,       /* index of prev stale leaf */
-       int                     highstale,      /* index of next stale leaf */
-       int                     *lfloglow,      /* low leaf logging index */
-       int                     *lfloghigh)     /* high leaf logging index */
-{
-       if (!leafhdr->stale) {
-               xfs_dir2_leaf_entry_t   *lep;   /* leaf entry table pointer */
-
-               /*
-                * Now we need to make room to insert the leaf entry.
-                *
-                * If there are no stale entries, just insert a hole at index.
-                */
-               lep = &ents[index];
-               if (index < leafhdr->count)
-                       memmove(lep + 1, lep,
-                               (leafhdr->count - index) * sizeof(*lep));
-
-               /*
-                * Record low and high logging indices for the leaf.
-                */
-               *lfloglow = index;
-               *lfloghigh = leafhdr->count++;
-               return lep;
-       }
-
-       /*
-        * There are stale entries.
-        *
-        * We will use one of them for the new entry.  It's probably not at
-        * the right location, so we'll have to shift some up or down first.
-        *
-        * If we didn't compact before, we need to find the nearest stale
-        * entries before and after our insertion point.
-        */
-       if (compact == 0)
-               xfs_dir3_leaf_find_stale(leafhdr, ents, index,
-                                        &lowstale, &highstale);
-
-       /*
-        * If the low one is better, use it.
-        */
-       if (lowstale >= 0 &&
-           (highstale == leafhdr->count ||
-            index - lowstale - 1 < highstale - index)) {
-               ASSERT(index - lowstale - 1 >= 0);
-               ASSERT(ents[lowstale].address ==
-                      cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
-
-               /*
-                * Copy entries up to cover the stale entry and make room
-                * for the new entry.
-                */
-               if (index - lowstale - 1 > 0) {
-                       memmove(&ents[lowstale], &ents[lowstale + 1],
-                               (index - lowstale - 1) *
-                                       sizeof(xfs_dir2_leaf_entry_t));
-               }
-               *lfloglow = MIN(lowstale, *lfloglow);
-               *lfloghigh = MAX(index - 1, *lfloghigh);
-               leafhdr->stale--;
-               return &ents[index - 1];
-       }
-
-       /*
-        * The high one is better, so use that one.
-        */
-       ASSERT(highstale - index >= 0);
-       ASSERT(ents[highstale].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
-
-       /*
-        * Copy entries down to cover the stale entry and make room for the
-        * new entry.
-        */
-       if (highstale - index > 0) {
-               memmove(&ents[index + 1], &ents[index],
-                       (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
-       }
-       *lfloglow = MIN(index, *lfloglow);
-       *lfloghigh = MAX(highstale, *lfloghigh);
-       leafhdr->stale--;
-       return &ents[index];
-}
-
-/*
- * Add an entry to a leaf form directory.
- */
-int                                            /* error */
-xfs_dir2_leaf_addname(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       __be16                  *bestsp;        /* freespace table in leaf */
-       int                     compact;        /* need to compact leaves */
-       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
-       struct xfs_buf          *dbp;           /* data block buffer */
-       xfs_dir2_data_entry_t   *dep;           /* data block entry */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       xfs_dir2_data_unused_t  *dup;           /* data unused entry */
-       int                     error;          /* error return value */
-       int                     grown;          /* allocated new data block */
-       int                     highstale;      /* index of next stale leaf */
-       int                     i;              /* temporary, index */
-       int                     index;          /* leaf table position */
-       struct xfs_buf          *lbp;           /* leaf's buffer */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       int                     length;         /* length of new entry */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry table pointer */
-       int                     lfloglow;       /* low leaf logging index */
-       int                     lfloghigh;      /* high leaf logging index */
-       int                     lowstale;       /* index of prev stale leaf */
-       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail pointer */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       int                     needbytes;      /* leaf block bytes needed */
-       int                     needlog;        /* need to log data header */
-       int                     needscan;       /* need to rescan data free */
-       __be16                  *tagp;          /* end of data entry */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       xfs_dir2_db_t           use_block;      /* data block number */
-       struct xfs_dir2_data_free *bf;          /* bestfree table */
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       trace_xfs_dir2_leaf_addname(args);
-
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-
-       error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
-       if (error)
-               return error;
-
-       /*
-        * Look up the entry by hash value and name.
-        * We know it's not there, our caller has already done a lookup.
-        * So the index is of the entry to insert in front of.
-        * But if there are dup hash values the index is of the first of those.
-        */
-       index = xfs_dir2_leaf_search_hash(args, lbp);
-       leaf = lbp->b_addr;
-       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
-       ents = dp->d_ops->leaf_ents_p(leaf);
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-       bestsp = xfs_dir2_leaf_bests_p(ltp);
-       length = dp->d_ops->data_entsize(args->namelen);
-
-       /*
-        * See if there are any entries with the same hash value
-        * and space in their block for the new entry.
-        * This is good because it puts multiple same-hash value entries
-        * in a data block, improving the lookup of those entries.
-        */
-       for (use_block = -1, lep = &ents[index];
-            index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
-            index++, lep++) {
-               if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
-                       continue;
-               i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
-               ASSERT(i < be32_to_cpu(ltp->bestcount));
-               ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
-               if (be16_to_cpu(bestsp[i]) >= length) {
-                       use_block = i;
-                       break;
-               }
-       }
-       /*
-        * Didn't find a block yet, linear search all the data blocks.
-        */
-       if (use_block == -1) {
-               for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
-                       /*
-                        * Remember a block we see that's missing.
-                        */
-                       if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
-                           use_block == -1)
-                               use_block = i;
-                       else if (be16_to_cpu(bestsp[i]) >= length) {
-                               use_block = i;
-                               break;
-                       }
-               }
-       }
-       /*
-        * How many bytes do we need in the leaf block?
-        */
-       needbytes = 0;
-       if (!leafhdr.stale)
-               needbytes += sizeof(xfs_dir2_leaf_entry_t);
-       if (use_block == -1)
-               needbytes += sizeof(xfs_dir2_data_off_t);
-
-       /*
-        * Now kill use_block if it refers to a missing block, so we
-        * can use it as an indication of allocation needed.
-        */
-       if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
-               use_block = -1;
-       /*
-        * If we don't have enough free bytes but we can make enough
-        * by compacting out stale entries, we'll do that.
-        */
-       if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes &&
-           leafhdr.stale > 1)
-               compact = 1;
-
-       /*
-        * Otherwise if we don't have enough free bytes we need to
-        * convert to node form.
-        */
-       else if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes) {
-               /*
-                * Just checking or no space reservation, give up.
-                */
-               if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
-                                                       args->total == 0) {
-                       xfs_trans_brelse(tp, lbp);
-                       return ENOSPC;
-               }
-               /*
-                * Convert to node form.
-                */
-               error = xfs_dir2_leaf_to_node(args, lbp);
-               if (error)
-                       return error;
-               /*
-                * Then add the new entry.
-                */
-               return xfs_dir2_node_addname(args);
-       }
-       /*
-        * Otherwise it will fit without compaction.
-        */
-       else
-               compact = 0;
-       /*
-        * If just checking, then it will fit unless we needed to allocate
-        * a new data block.
-        */
-       if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
-               xfs_trans_brelse(tp, lbp);
-               return use_block == -1 ? ENOSPC : 0;
-       }
-       /*
-        * If no allocations are allowed, return now before we've
-        * changed anything.
-        */
-       if (args->total == 0 && use_block == -1) {
-               xfs_trans_brelse(tp, lbp);
-               return ENOSPC;
-       }
-       /*
-        * Need to compact the leaf entries, removing stale ones.
-        * Leave one stale entry behind - the one closest to our
-        * insertion index - and we'll shift that one to our insertion
-        * point later.
-        */
-       if (compact) {
-               xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale,
-                       &highstale, &lfloglow, &lfloghigh);
-       }
-       /*
-        * There are stale entries, so we'll need log-low and log-high
-        * impossibly bad values later.
-        */
-       else if (leafhdr.stale) {
-               lfloglow = leafhdr.count;
-               lfloghigh = -1;
-       }
-       /*
-        * If there was no data block space found, we need to allocate
-        * a new one.
-        */
-       if (use_block == -1) {
-               /*
-                * Add the new data block.
-                */
-               if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
-                               &use_block))) {
-                       xfs_trans_brelse(tp, lbp);
-                       return error;
-               }
-               /*
-                * Initialize the block.
-                */
-               if ((error = xfs_dir3_data_init(args, use_block, &dbp))) {
-                       xfs_trans_brelse(tp, lbp);
-                       return error;
-               }
-               /*
-                * If we're adding a new data block on the end we need to
-                * extend the bests table.  Copy it up one entry.
-                */
-               if (use_block >= be32_to_cpu(ltp->bestcount)) {
-                       bestsp--;
-                       memmove(&bestsp[0], &bestsp[1],
-                               be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
-                       be32_add_cpu(&ltp->bestcount, 1);
-                       xfs_dir3_leaf_log_tail(args, lbp);
-                       xfs_dir3_leaf_log_bests(args, lbp, 0,
-                                               be32_to_cpu(ltp->bestcount) - 1);
-               }
-               /*
-                * If we're filling in a previously empty block just log it.
-                */
-               else
-                       xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
-               hdr = dbp->b_addr;
-               bf = dp->d_ops->data_bestfree_p(hdr);
-               bestsp[use_block] = bf[0].length;
-               grown = 1;
-       } else {
-               /*
-                * Already had space in some data block.
-                * Just read that one in.
-                */
-               error = xfs_dir3_data_read(tp, dp,
-                                  xfs_dir2_db_to_da(args->geo, use_block),
-                                  -1, &dbp);
-               if (error) {
-                       xfs_trans_brelse(tp, lbp);
-                       return error;
-               }
-               hdr = dbp->b_addr;
-               bf = dp->d_ops->data_bestfree_p(hdr);
-               grown = 0;
-       }
-       /*
-        * Point to the biggest freespace in our data block.
-        */
-       dup = (xfs_dir2_data_unused_t *)
-             ((char *)hdr + be16_to_cpu(bf[0].offset));
-       ASSERT(be16_to_cpu(dup->length) >= length);
-       needscan = needlog = 0;
-       /*
-        * Mark the initial part of our freespace in use for the new entry.
-        */
-       xfs_dir2_data_use_free(args, dbp, dup,
-               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
-               &needlog, &needscan);
-       /*
-        * Initialize our new entry (at last).
-        */
-       dep = (xfs_dir2_data_entry_t *)dup;
-       dep->inumber = cpu_to_be64(args->inumber);
-       dep->namelen = args->namelen;
-       memcpy(dep->name, args->name, dep->namelen);
-       dp->d_ops->data_put_ftype(dep, args->filetype);
-       tagp = dp->d_ops->data_entry_tag_p(dep);
-       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
-       /*
-        * Need to scan fix up the bestfree table.
-        */
-       if (needscan)
-               xfs_dir2_data_freescan(dp, hdr, &needlog);
-       /*
-        * Need to log the data block's header.
-        */
-       if (needlog)
-               xfs_dir2_data_log_header(args, dbp);
-       xfs_dir2_data_log_entry(args, dbp, dep);
-       /*
-        * If the bests table needs to be changed, do it.
-        * Log the change unless we've already done that.
-        */
-       if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {
-               bestsp[use_block] = bf[0].length;
-               if (!grown)
-                       xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
-       }
-
-       lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
-                                      highstale, &lfloglow, &lfloghigh);
-
-       /*
-        * Fill in the new leaf entry.
-        */
-       lep->hashval = cpu_to_be32(args->hashval);
-       lep->address = cpu_to_be32(
-                               xfs_dir2_db_off_to_dataptr(args->geo, use_block,
-                               be16_to_cpu(*tagp)));
-       /*
-        * Log the leaf fields and give up the buffers.
-        */
-       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-       xfs_dir3_leaf_log_header(args, lbp);
-       xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh);
-       xfs_dir3_leaf_check(dp, lbp);
-       xfs_dir3_data_check(dp, dbp);
-       return 0;
-}
-
-/*
- * Compact out any stale entries in the leaf.
- * Log the header and changed leaf entries, if any.
- */
-void
-xfs_dir3_leaf_compact(
-       xfs_da_args_t   *args,          /* operation arguments */
-       struct xfs_dir3_icleaf_hdr *leafhdr,
-       struct xfs_buf  *bp)            /* leaf buffer */
-{
-       int             from;           /* source leaf index */
-       xfs_dir2_leaf_t *leaf;          /* leaf structure */
-       int             loglow;         /* first leaf entry to log */
-       int             to;             /* target leaf index */
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_inode *dp = args->dp;
-
-       leaf = bp->b_addr;
-       if (!leafhdr->stale)
-               return;
-
-       /*
-        * Compress out the stale entries in place.
-        */
-       ents = dp->d_ops->leaf_ents_p(leaf);
-       for (from = to = 0, loglow = -1; from < leafhdr->count; from++) {
-               if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
-                       continue;
-               /*
-                * Only actually copy the entries that are different.
-                */
-               if (from > to) {
-                       if (loglow == -1)
-                               loglow = to;
-                       ents[to] = ents[from];
-               }
-               to++;
-       }
-       /*
-        * Update and log the header, log the leaf entries.
-        */
-       ASSERT(leafhdr->stale == from - to);
-       leafhdr->count -= leafhdr->stale;
-       leafhdr->stale = 0;
-
-       dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr);
-       xfs_dir3_leaf_log_header(args, bp);
-       if (loglow != -1)
-               xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1);
-}
-
-/*
- * Compact the leaf entries, removing stale ones.
- * Leave one stale entry behind - the one closest to our
- * insertion index - and the caller will shift that one to our insertion
- * point later.
- * Return new insertion index, where the remaining stale entry is,
- * and leaf logging indices.
- */
-void
-xfs_dir3_leaf_compact_x1(
-       struct xfs_dir3_icleaf_hdr *leafhdr,
-       struct xfs_dir2_leaf_entry *ents,
-       int             *indexp,        /* insertion index */
-       int             *lowstalep,     /* out: stale entry before us */
-       int             *highstalep,    /* out: stale entry after us */
-       int             *lowlogp,       /* out: low log index */
-       int             *highlogp)      /* out: high log index */
-{
-       int             from;           /* source copy index */
-       int             highstale;      /* stale entry at/after index */
-       int             index;          /* insertion index */
-       int             keepstale;      /* source index of kept stale */
-       int             lowstale;       /* stale entry before index */
-       int             newindex=0;     /* new insertion index */
-       int             to;             /* destination copy index */
-
-       ASSERT(leafhdr->stale > 1);
-       index = *indexp;
-
-       xfs_dir3_leaf_find_stale(leafhdr, ents, index, &lowstale, &highstale);
-
-       /*
-        * Pick the better of lowstale and highstale.
-        */
-       if (lowstale >= 0 &&
-           (highstale == leafhdr->count ||
-            index - lowstale <= highstale - index))
-               keepstale = lowstale;
-       else
-               keepstale = highstale;
-       /*
-        * Copy the entries in place, removing all the stale entries
-        * except keepstale.
-        */
-       for (from = to = 0; from < leafhdr->count; from++) {
-               /*
-                * Notice the new value of index.
-                */
-               if (index == from)
-                       newindex = to;
-               if (from != keepstale &&
-                   ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
-                       if (from == to)
-                               *lowlogp = to;
-                       continue;
-               }
-               /*
-                * Record the new keepstale value for the insertion.
-                */
-               if (from == keepstale)
-                       lowstale = highstale = to;
-               /*
-                * Copy only the entries that have moved.
-                */
-               if (from > to)
-                       ents[to] = ents[from];
-               to++;
-       }
-       ASSERT(from > to);
-       /*
-        * If the insertion point was past the last entry,
-        * set the new insertion point accordingly.
-        */
-       if (index == from)
-               newindex = to;
-       *indexp = newindex;
-       /*
-        * Adjust the leaf header values.
-        */
-       leafhdr->count -= from - to;
-       leafhdr->stale = 1;
-       /*
-        * Remember the low/high stale value only in the "right"
-        * direction.
-        */
-       if (lowstale >= newindex)
-               lowstale = -1;
-       else
-               highstale = leafhdr->count;
-       *highlogp = leafhdr->count - 1;
-       *lowstalep = lowstale;
-       *highstalep = highstale;
-}
-
-/*
- * Log the bests entries indicated from a leaf1 block.
- */
-static void
-xfs_dir3_leaf_log_bests(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp,            /* leaf buffer */
-       int                     first,          /* first entry to log */
-       int                     last)           /* last entry to log */
-{
-       __be16                  *firstb;        /* pointer to first entry */
-       __be16                  *lastb;         /* pointer to last entry */
-       struct xfs_dir2_leaf    *leaf = bp->b_addr;
-       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
-
-       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
-              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC));
-
-       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
-       firstb = xfs_dir2_leaf_bests_p(ltp) + first;
-       lastb = xfs_dir2_leaf_bests_p(ltp) + last;
-       xfs_trans_log_buf(args->trans, bp,
-               (uint)((char *)firstb - (char *)leaf),
-               (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
-}
-
-/*
- * Log the leaf entries indicated from a leaf1 or leafn block.
- */
-void
-xfs_dir3_leaf_log_ents(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp,
-       int                     first,
-       int                     last)
-{
-       xfs_dir2_leaf_entry_t   *firstlep;      /* pointer to first entry */
-       xfs_dir2_leaf_entry_t   *lastlep;       /* pointer to last entry */
-       struct xfs_dir2_leaf    *leaf = bp->b_addr;
-       struct xfs_dir2_leaf_entry *ents;
-
-       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
-              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
-              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
-
-       ents = args->dp->d_ops->leaf_ents_p(leaf);
-       firstlep = &ents[first];
-       lastlep = &ents[last];
-       xfs_trans_log_buf(args->trans, bp,
-               (uint)((char *)firstlep - (char *)leaf),
-               (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
-}
-
-/*
- * Log the header of the leaf1 or leafn block.
- */
-void
-xfs_dir3_leaf_log_header(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp)
-{
-       struct xfs_dir2_leaf    *leaf = bp->b_addr;
-
-       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
-              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
-              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
-
-       xfs_trans_log_buf(args->trans, bp,
-                         (uint)((char *)&leaf->hdr - (char *)leaf),
-                         args->dp->d_ops->leaf_hdr_size - 1);
-}
-
-/*
- * Log the tail of the leaf1 block.
- */
-STATIC void
-xfs_dir3_leaf_log_tail(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp)
-{
-       struct xfs_dir2_leaf    *leaf = bp->b_addr;
-       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
-
-       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
-              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
-              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
-              leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
-
-       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
-       xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf),
-               (uint)(args->geo->blksize - 1));
-}
-
-/*
- * Look up the entry referred to by args in the leaf format directory.
- * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
- * is also used by the node-format code.
- */
-int
-xfs_dir2_leaf_lookup(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       struct xfs_buf          *dbp;           /* data block buffer */
-       xfs_dir2_data_entry_t   *dep;           /* data block entry */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return code */
-       int                     index;          /* found entry index */
-       struct xfs_buf          *lbp;           /* leaf buffer */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir2_leaf_entry *ents;
-
-       trace_xfs_dir2_leaf_lookup(args);
-
-       /*
-        * Look up name in the leaf block, returning both buffers and index.
-        */
-       if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
-               return error;
-       }
-       tp = args->trans;
-       dp = args->dp;
-       xfs_dir3_leaf_check(dp, lbp);
-       leaf = lbp->b_addr;
-       ents = dp->d_ops->leaf_ents_p(leaf);
-       /*
-        * Get to the leaf entry and contained data entry address.
-        */
-       lep = &ents[index];
-
-       /*
-        * Point to the data entry.
-        */
-       dep = (xfs_dir2_data_entry_t *)
-             ((char *)dbp->b_addr +
-              xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
-       /*
-        * Return the found inode number & CI name if appropriate
-        */
-       args->inumber = be64_to_cpu(dep->inumber);
-       args->filetype = dp->d_ops->data_get_ftype(dep);
-       error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
-       xfs_trans_brelse(tp, dbp);
-       xfs_trans_brelse(tp, lbp);
-       return error;
-}
-
-/*
- * Look up name/hash in the leaf block.
- * Fill in indexp with the found index, and dbpp with the data buffer.
- * If not found dbpp will be NULL, and ENOENT comes back.
- * lbpp will always be filled in with the leaf buffer unless there's an error.
- */
-static int                                     /* error */
-xfs_dir2_leaf_lookup_int(
-       xfs_da_args_t           *args,          /* operation arguments */
-       struct xfs_buf          **lbpp,         /* out: leaf buffer */
-       int                     *indexp,        /* out: index in leaf block */
-       struct xfs_buf          **dbpp)         /* out: data buffer */
-{
-       xfs_dir2_db_t           curdb = -1;     /* current data block number */
-       struct xfs_buf          *dbp = NULL;    /* data buffer */
-       xfs_dir2_data_entry_t   *dep;           /* data entry */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return code */
-       int                     index;          /* index in leaf block */
-       struct xfs_buf          *lbp;           /* leaf buffer */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       xfs_dir2_db_t           newdb;          /* new data block number */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       xfs_dir2_db_t           cidb = -1;      /* case match data block no. */
-       enum xfs_dacmp          cmp;            /* name compare result */
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-
-       error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
-       if (error)
-               return error;
-
-       *lbpp = lbp;
-       leaf = lbp->b_addr;
-       xfs_dir3_leaf_check(dp, lbp);
-       ents = dp->d_ops->leaf_ents_p(leaf);
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-
-       /*
-        * Look for the first leaf entry with our hash value.
-        */
-       index = xfs_dir2_leaf_search_hash(args, lbp);
-       /*
-        * Loop over all the entries with the right hash value
-        * looking to match the name.
-        */
-       for (lep = &ents[index];
-            index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
-            lep++, index++) {
-               /*
-                * Skip over stale leaf entries.
-                */
-               if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
-                       continue;
-               /*
-                * Get the new data block number.
-                */
-               newdb = xfs_dir2_dataptr_to_db(args->geo,
-                                              be32_to_cpu(lep->address));
-               /*
-                * If it's not the same as the old data block number,
-                * need to pitch the old one and read the new one.
-                */
-               if (newdb != curdb) {
-                       if (dbp)
-                               xfs_trans_brelse(tp, dbp);
-                       error = xfs_dir3_data_read(tp, dp,
-                                          xfs_dir2_db_to_da(args->geo, newdb),
-                                          -1, &dbp);
-                       if (error) {
-                               xfs_trans_brelse(tp, lbp);
-                               return error;
-                       }
-                       curdb = newdb;
-               }
-               /*
-                * Point to the data entry.
-                */
-               dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
-                       xfs_dir2_dataptr_to_off(args->geo,
-                                               be32_to_cpu(lep->address)));
-               /*
-                * Compare name and if it's an exact match, return the index
-                * and buffer. If it's the first case-insensitive match, store
-                * the index and buffer and continue looking for an exact match.
-                */
-               cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
-               if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
-                       args->cmpresult = cmp;
-                       *indexp = index;
-                       /* case exact match: return the current buffer. */
-                       if (cmp == XFS_CMP_EXACT) {
-                               *dbpp = dbp;
-                               return 0;
-                       }
-                       cidb = curdb;
-               }
-       }
-       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-       /*
-        * Here, we can only be doing a lookup (not a rename or remove).
-        * If a case-insensitive match was found earlier, re-read the
-        * appropriate data block if required and return it.
-        */
-       if (args->cmpresult == XFS_CMP_CASE) {
-               ASSERT(cidb != -1);
-               if (cidb != curdb) {
-                       xfs_trans_brelse(tp, dbp);
-                       error = xfs_dir3_data_read(tp, dp,
-                                          xfs_dir2_db_to_da(args->geo, cidb),
-                                          -1, &dbp);
-                       if (error) {
-                               xfs_trans_brelse(tp, lbp);
-                               return error;
-                       }
-               }
-               *dbpp = dbp;
-               return 0;
-       }
-       /*
-        * No match found, return ENOENT.
-        */
-       ASSERT(cidb == -1);
-       if (dbp)
-               xfs_trans_brelse(tp, dbp);
-       xfs_trans_brelse(tp, lbp);
-       return ENOENT;
-}
-
-/*
- * Remove an entry from a leaf format directory.
- */
-int                                            /* error */
-xfs_dir2_leaf_removename(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       __be16                  *bestsp;        /* leaf block best freespace */
-       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
-       xfs_dir2_db_t           db;             /* data block number */
-       struct xfs_buf          *dbp;           /* data block buffer */
-       xfs_dir2_data_entry_t   *dep;           /* data entry structure */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return code */
-       xfs_dir2_db_t           i;              /* temporary data block # */
-       int                     index;          /* index into leaf entries */
-       struct xfs_buf          *lbp;           /* leaf buffer */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
-       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       int                     needlog;        /* need to log data header */
-       int                     needscan;       /* need to rescan data frees */
-       xfs_dir2_data_off_t     oldbest;        /* old value of best free */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir2_data_free *bf;          /* bestfree table */
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       trace_xfs_dir2_leaf_removename(args);
-
-       /*
-        * Lookup the leaf entry, get the leaf and data blocks read in.
-        */
-       if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
-               return error;
-       }
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-       leaf = lbp->b_addr;
-       hdr = dbp->b_addr;
-       xfs_dir3_data_check(dp, dbp);
-       bf = dp->d_ops->data_bestfree_p(hdr);
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-       ents = dp->d_ops->leaf_ents_p(leaf);
-       /*
-        * Point to the leaf entry, use that to point to the data entry.
-        */
-       lep = &ents[index];
-       db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
-       dep = (xfs_dir2_data_entry_t *)((char *)hdr +
-               xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
-       needscan = needlog = 0;
-       oldbest = be16_to_cpu(bf[0].length);
-       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
-       bestsp = xfs_dir2_leaf_bests_p(ltp);
-       ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
-       /*
-        * Mark the former data entry unused.
-        */
-       xfs_dir2_data_make_free(args, dbp,
-               (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
-               dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
-       /*
-        * We just mark the leaf entry stale by putting a null in it.
-        */
-       leafhdr.stale++;
-       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-       xfs_dir3_leaf_log_header(args, lbp);
-
-       lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
-       xfs_dir3_leaf_log_ents(args, lbp, index, index);
-
-       /*
-        * Scan the freespace in the data block again if necessary,
-        * log the data block header if necessary.
-        */
-       if (needscan)
-               xfs_dir2_data_freescan(dp, hdr, &needlog);
-       if (needlog)
-               xfs_dir2_data_log_header(args, dbp);
-       /*
-        * If the longest freespace in the data block has changed,
-        * put the new value in the bests table and log that.
-        */
-       if (be16_to_cpu(bf[0].length) != oldbest) {
-               bestsp[db] = bf[0].length;
-               xfs_dir3_leaf_log_bests(args, lbp, db, db);
-       }
-       xfs_dir3_data_check(dp, dbp);
-       /*
-        * If the data block is now empty then get rid of the data block.
-        */
-       if (be16_to_cpu(bf[0].length) ==
-                       args->geo->blksize - dp->d_ops->data_entry_offset) {
-               ASSERT(db != args->geo->datablk);
-               if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
-                       /*
-                        * Nope, can't get rid of it because it caused
-                        * allocation of a bmap btree block to do so.
-                        * Just go on, returning success, leaving the
-                        * empty block in place.
-                        */
-                       if (error == ENOSPC && args->total == 0)
-                               error = 0;
-                       xfs_dir3_leaf_check(dp, lbp);
-                       return error;
-               }
-               dbp = NULL;
-               /*
-                * If this is the last data block then compact the
-                * bests table by getting rid of entries.
-                */
-               if (db == be32_to_cpu(ltp->bestcount) - 1) {
-                       /*
-                        * Look for the last active entry (i).
-                        */
-                       for (i = db - 1; i > 0; i--) {
-                               if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
-                                       break;
-                       }
-                       /*
-                        * Copy the table down so inactive entries at the
-                        * end are removed.
-                        */
-                       memmove(&bestsp[db - i], bestsp,
-                               (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
-                       be32_add_cpu(&ltp->bestcount, -(db - i));
-                       xfs_dir3_leaf_log_tail(args, lbp);
-                       xfs_dir3_leaf_log_bests(args, lbp, 0,
-                                               be32_to_cpu(ltp->bestcount) - 1);
-               } else
-                       bestsp[db] = cpu_to_be16(NULLDATAOFF);
-       }
-       /*
-        * If the data block was not the first one, drop it.
-        */
-       else if (db != args->geo->datablk)
-               dbp = NULL;
-
-       xfs_dir3_leaf_check(dp, lbp);
-       /*
-        * See if we can convert to block form.
-        */
-       return xfs_dir2_leaf_to_block(args, lbp, dbp);
-}
-
-/*
- * Replace the inode number in a leaf format directory entry.
- */
-int                                            /* error */
-xfs_dir2_leaf_replace(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       struct xfs_buf          *dbp;           /* data block buffer */
-       xfs_dir2_data_entry_t   *dep;           /* data block entry */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return code */
-       int                     index;          /* index of leaf entry */
-       struct xfs_buf          *lbp;           /* leaf buffer */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir2_leaf_entry *ents;
-
-       trace_xfs_dir2_leaf_replace(args);
-
-       /*
-        * Look up the entry.
-        */
-       if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
-               return error;
-       }
-       dp = args->dp;
-       leaf = lbp->b_addr;
-       ents = dp->d_ops->leaf_ents_p(leaf);
-       /*
-        * Point to the leaf entry, get data address from it.
-        */
-       lep = &ents[index];
-       /*
-        * Point to the data entry.
-        */
-       dep = (xfs_dir2_data_entry_t *)
-             ((char *)dbp->b_addr +
-              xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
-       ASSERT(args->inumber != be64_to_cpu(dep->inumber));
-       /*
-        * Put the new inode number in, log it.
-        */
-       dep->inumber = cpu_to_be64(args->inumber);
-       dp->d_ops->data_put_ftype(dep, args->filetype);
-       tp = args->trans;
-       xfs_dir2_data_log_entry(args, dbp, dep);
-       xfs_dir3_leaf_check(dp, lbp);
-       xfs_trans_brelse(tp, lbp);
-       return 0;
-}
-
-/*
- * Return index in the leaf block (lbp) which is either the first
- * one with this hash value, or if there are none, the insert point
- * for that hash value.
- */
-int                                            /* index value */
-xfs_dir2_leaf_search_hash(
-       xfs_da_args_t           *args,          /* operation arguments */
-       struct xfs_buf          *lbp)           /* leaf buffer */
-{
-       xfs_dahash_t            hash=0;         /* hash from this entry */
-       xfs_dahash_t            hashwant;       /* hash value looking for */
-       int                     high;           /* high leaf index */
-       int                     low;            /* low leaf index */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
-       int                     mid=0;          /* current leaf index */
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       leaf = lbp->b_addr;
-       ents = args->dp->d_ops->leaf_ents_p(leaf);
-       args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-
-       /*
-        * Note, the table cannot be empty, so we have to go through the loop.
-        * Binary search the leaf entries looking for our hash value.
-        */
-       for (lep = ents, low = 0, high = leafhdr.count - 1,
-               hashwant = args->hashval;
-            low <= high; ) {
-               mid = (low + high) >> 1;
-               if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
-                       break;
-               if (hash < hashwant)
-                       low = mid + 1;
-               else
-                       high = mid - 1;
-       }
-       /*
-        * Found one, back up through all the equal hash values.
-        */
-       if (hash == hashwant) {
-               while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
-                       mid--;
-               }
-       }
-       /*
-        * Need to point to an entry higher than ours.
-        */
-       else if (hash < hashwant)
-               mid++;
-       return mid;
-}
-
-/*
- * Trim off a trailing data block.  We know it's empty since the leaf
- * freespace table says so.
- */
-int                                            /* error */
-xfs_dir2_leaf_trim_data(
-       xfs_da_args_t           *args,          /* operation arguments */
-       struct xfs_buf          *lbp,           /* leaf buffer */
-       xfs_dir2_db_t           db)             /* data block number */
-{
-       __be16                  *bestsp;        /* leaf bests table */
-       struct xfs_buf          *dbp;           /* data block buffer */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return value */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       xfs_trans_t             *tp;            /* transaction pointer */
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       tp = args->trans;
-       /*
-        * Read the offending data block.  We need its buffer.
-        */
-       error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db),
-                                  -1, &dbp);
-       if (error)
-               return error;
-
-       leaf = lbp->b_addr;
-       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
-
-#ifdef DEBUG
-{
-       struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
-       struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr);
-
-       ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-              hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
-       ASSERT(be16_to_cpu(bf[0].length) ==
-              args->geo->blksize - dp->d_ops->data_entry_offset);
-       ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
-}
-#endif
-
-       /*
-        * Get rid of the data block.
-        */
-       if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
-               ASSERT(error != ENOSPC);
-               xfs_trans_brelse(tp, dbp);
-               return error;
-       }
-       /*
-        * Eliminate the last bests entry from the table.
-        */
-       bestsp = xfs_dir2_leaf_bests_p(ltp);
-       be32_add_cpu(&ltp->bestcount, -1);
-       memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
-       xfs_dir3_leaf_log_tail(args, lbp);
-       xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
-       return 0;
-}
-
-static inline size_t
-xfs_dir3_leaf_size(
-       struct xfs_dir3_icleaf_hdr      *hdr,
-       int                             counts)
-{
-       int     entries;
-       int     hdrsize;
-
-       entries = hdr->count - hdr->stale;
-       if (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
-           hdr->magic == XFS_DIR2_LEAFN_MAGIC)
-               hdrsize = sizeof(struct xfs_dir2_leaf_hdr);
-       else
-               hdrsize = sizeof(struct xfs_dir3_leaf_hdr);
-
-       return hdrsize + entries * sizeof(xfs_dir2_leaf_entry_t)
-                      + counts * sizeof(xfs_dir2_data_off_t)
-                      + sizeof(xfs_dir2_leaf_tail_t);
-}
-
-/*
- * Convert node form directory to leaf form directory.
- * The root of the node form dir needs to already be a LEAFN block.
- * Just return if we can't do anything.
- */
-int                                            /* error */
-xfs_dir2_node_to_leaf(
-       xfs_da_state_t          *state)         /* directory operation state */
-{
-       xfs_da_args_t           *args;          /* operation arguments */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return code */
-       struct xfs_buf          *fbp;           /* buffer for freespace block */
-       xfs_fileoff_t           fo;             /* freespace file offset */
-       xfs_dir2_free_t         *free;          /* freespace structure */
-       struct xfs_buf          *lbp;           /* buffer for leaf block */
-       xfs_dir2_leaf_tail_t    *ltp;           /* tail of leaf structure */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       int                     rval;           /* successful free trim? */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir3_icleaf_hdr leafhdr;
-       struct xfs_dir3_icfree_hdr freehdr;
-
-       /*
-        * There's more than a leaf level in the btree, so there must
-        * be multiple leafn blocks.  Give up.
-        */
-       if (state->path.active > 1)
-               return 0;
-       args = state->args;
-
-       trace_xfs_dir2_node_to_leaf(args);
-
-       mp = state->mp;
-       dp = args->dp;
-       tp = args->trans;
-       /*
-        * Get the last offset in the file.
-        */
-       if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) {
-               return error;
-       }
-       fo -= args->geo->fsbcount;
-       /*
-        * If there are freespace blocks other than the first one,
-        * take this opportunity to remove trailing empty freespace blocks
-        * that may have been left behind during no-space-reservation
-        * operations.
-        */
-       while (fo > args->geo->freeblk) {
-               if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
-                       return error;
-               }
-               if (rval)
-                       fo -= args->geo->fsbcount;
-               else
-                       return 0;
-       }
-       /*
-        * Now find the block just before the freespace block.
-        */
-       if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
-               return error;
-       }
-       /*
-        * If it's not the single leaf block, give up.
-        */
-       if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize)
-               return 0;
-       lbp = state->path.blk[0].bp;
-       leaf = lbp->b_addr;
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-
-       ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
-              leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
-
-       /*
-        * Read the freespace block.
-        */
-       error = xfs_dir2_free_read(tp, dp,  args->geo->freeblk, &fbp);
-       if (error)
-               return error;
-       free = fbp->b_addr;
-       dp->d_ops->free_hdr_from_disk(&freehdr, free);
-
-       ASSERT(!freehdr.firstdb);
-
-       /*
-        * Now see if the leafn and free data will fit in a leaf1.
-        * If not, release the buffer and give up.
-        */
-       if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) {
-               xfs_trans_brelse(tp, fbp);
-               return 0;
-       }
-
-       /*
-        * If the leaf has any stale entries in it, compress them out.
-        */
-       if (leafhdr.stale)
-               xfs_dir3_leaf_compact(args, &leafhdr, lbp);
-
-       lbp->b_ops = &xfs_dir3_leaf1_buf_ops;
-       xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAF1_BUF);
-       leafhdr.magic = (leafhdr.magic == XFS_DIR2_LEAFN_MAGIC)
-                                       ? XFS_DIR2_LEAF1_MAGIC
-                                       : XFS_DIR3_LEAF1_MAGIC;
-
-       /*
-        * Set up the leaf tail from the freespace block.
-        */
-       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
-       ltp->bestcount = cpu_to_be32(freehdr.nvalid);
-
-       /*
-        * Set up the leaf bests table.
-        */
-       memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free),
-               freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
-
-       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-       xfs_dir3_leaf_log_header(args, lbp);
-       xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
-       xfs_dir3_leaf_log_tail(args, lbp);
-       xfs_dir3_leaf_check(dp, lbp);
-
-       /*
-        * Get rid of the freespace block.
-        */
-       error = xfs_dir2_shrink_inode(args,
-                       xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET),
-                       fbp);
-       if (error) {
-               /*
-                * This can't fail here because it can only happen when
-                * punching out the middle of an extent, and this is an
-                * isolated block.
-                */
-               ASSERT(error != ENOSPC);
-               return error;
-       }
-       fbp = NULL;
-       /*
-        * Now see if we can convert the single-leaf directory
-        * down to a block form directory.
-        * This routine always kills the dabuf for the leaf, so
-        * eliminate it from the path.
-        */
-       error = xfs_dir2_leaf_to_block(args, lbp, NULL);
-       state->path.blk[0].bp = NULL;
-       return error;
-}
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
deleted file mode 100644 (file)
index 4cf8b99..0000000
+++ /dev/null
@@ -1,2284 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_trans.h"
-#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
-
-/*
- * Function declarations.
- */
-static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args,
-                             int index);
-static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state,
-                                    xfs_da_state_blk_t *blk1,
-                                    xfs_da_state_blk_t *blk2);
-static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp,
-                                int index, xfs_da_state_blk_t *dblk,
-                                int *rval);
-static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
-                                    xfs_da_state_blk_t *fblk);
-
-/*
- * Check internal consistency of a leafn block.
- */
-#ifdef DEBUG
-#define        xfs_dir3_leaf_check(dp, bp) \
-do { \
-       if (!xfs_dir3_leafn_check((dp), (bp))) \
-               ASSERT(0); \
-} while (0);
-
-static bool
-xfs_dir3_leafn_check(
-       struct xfs_inode        *dp,
-       struct xfs_buf          *bp)
-{
-       struct xfs_dir2_leaf    *leaf = bp->b_addr;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-
-       if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {
-               struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
-               if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
-                       return false;
-       } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)
-               return false;
-
-       return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
-}
-#else
-#define        xfs_dir3_leaf_check(dp, bp)
-#endif
-
-static bool
-xfs_dir3_free_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_dir2_free_hdr *hdr = bp->b_addr;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
-
-               if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC))
-                       return false;
-               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
-                       return false;
-               if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
-                       return false;
-       } else {
-               if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
-                       return false;
-       }
-
-       /* XXX: should bounds check the xfs_dir3_icfree_hdr here */
-
-       return true;
-}
-
-static void
-xfs_dir3_free_read_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-           !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_dir3_free_verify(bp))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-}
-
-static void
-xfs_dir3_free_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-       struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
-
-       if (!xfs_dir3_free_verify(bp)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (bip)
-               hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
-
-       xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF);
-}
-
-const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
-       .verify_read = xfs_dir3_free_read_verify,
-       .verify_write = xfs_dir3_free_write_verify,
-};
-
-
-static int
-__xfs_dir3_free_read(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *dp,
-       xfs_dablk_t             fbno,
-       xfs_daddr_t             mappedbno,
-       struct xfs_buf          **bpp)
-{
-       int                     err;
-
-       err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
-                               XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
-
-       /* try read returns without an error or *bpp if it lands in a hole */
-       if (!err && tp && *bpp)
-               xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF);
-       return err;
-}
-
-int
-xfs_dir2_free_read(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *dp,
-       xfs_dablk_t             fbno,
-       struct xfs_buf          **bpp)
-{
-       return __xfs_dir3_free_read(tp, dp, fbno, -1, bpp);
-}
-
-static int
-xfs_dir2_free_try_read(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *dp,
-       xfs_dablk_t             fbno,
-       struct xfs_buf          **bpp)
-{
-       return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp);
-}
-
-static int
-xfs_dir3_free_get_buf(
-       xfs_da_args_t           *args,
-       xfs_dir2_db_t           fbno,
-       struct xfs_buf          **bpp)
-{
-       struct xfs_trans        *tp = args->trans;
-       struct xfs_inode        *dp = args->dp;
-       struct xfs_mount        *mp = dp->i_mount;
-       struct xfs_buf          *bp;
-       int                     error;
-       struct xfs_dir3_icfree_hdr hdr;
-
-       error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno),
-                                  -1, &bp, XFS_DATA_FORK);
-       if (error)
-               return error;
-
-       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_FREE_BUF);
-       bp->b_ops = &xfs_dir3_free_buf_ops;
-
-       /*
-        * Initialize the new block to be empty, and remember
-        * its first slot as our empty slot.
-        */
-       memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr));
-       memset(&hdr, 0, sizeof(hdr));
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
-
-               hdr.magic = XFS_DIR3_FREE_MAGIC;
-
-               hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
-               hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
-               uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
-       } else
-               hdr.magic = XFS_DIR2_FREE_MAGIC;
-       dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr);
-       *bpp = bp;
-       return 0;
-}
-
-/*
- * Log entries from a freespace block.
- */
-STATIC void
-xfs_dir2_free_log_bests(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp,
-       int                     first,          /* first entry to log */
-       int                     last)           /* last entry to log */
-{
-       xfs_dir2_free_t         *free;          /* freespace structure */
-       __be16                  *bests;
-
-       free = bp->b_addr;
-       bests = args->dp->d_ops->free_bests_p(free);
-       ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
-              free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
-       xfs_trans_log_buf(args->trans, bp,
-               (uint)((char *)&bests[first] - (char *)free),
-               (uint)((char *)&bests[last] - (char *)free +
-                      sizeof(bests[0]) - 1));
-}
-
-/*
- * Log header from a freespace block.
- */
-static void
-xfs_dir2_free_log_header(
-       struct xfs_da_args      *args,
-       struct xfs_buf          *bp)
-{
-#ifdef DEBUG
-       xfs_dir2_free_t         *free;          /* freespace structure */
-
-       free = bp->b_addr;
-       ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
-              free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
-#endif
-       xfs_trans_log_buf(args->trans, bp, 0,
-                         args->dp->d_ops->free_hdr_size - 1);
-}
-
-/*
- * Convert a leaf-format directory to a node-format directory.
- * We need to change the magic number of the leaf block, and copy
- * the freespace table out of the leaf block into its own block.
- */
-int                                            /* error */
-xfs_dir2_leaf_to_node(
-       xfs_da_args_t           *args,          /* operation arguments */
-       struct xfs_buf          *lbp)           /* leaf buffer */
-{
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return value */
-       struct xfs_buf          *fbp;           /* freespace buffer */
-       xfs_dir2_db_t           fdb;            /* freespace block number */
-       xfs_dir2_free_t         *free;          /* freespace structure */
-       __be16                  *from;          /* pointer to freespace entry */
-       int                     i;              /* leaf freespace index */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_tail_t    *ltp;           /* leaf tail structure */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       int                     n;              /* count of live freespc ents */
-       xfs_dir2_data_off_t     off;            /* freespace entry value */
-       __be16                  *to;            /* pointer to freespace entry */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir3_icfree_hdr freehdr;
-
-       trace_xfs_dir2_leaf_to_node(args);
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       tp = args->trans;
-       /*
-        * Add a freespace block to the directory.
-        */
-       if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) {
-               return error;
-       }
-       ASSERT(fdb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
-       /*
-        * Get the buffer for the new freespace block.
-        */
-       error = xfs_dir3_free_get_buf(args, fdb, &fbp);
-       if (error)
-               return error;
-
-       free = fbp->b_addr;
-       dp->d_ops->free_hdr_from_disk(&freehdr, free);
-       leaf = lbp->b_addr;
-       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
-       ASSERT(be32_to_cpu(ltp->bestcount) <=
-                               (uint)dp->i_d.di_size / args->geo->blksize);
-
-       /*
-        * Copy freespace entries from the leaf block to the new block.
-        * Count active entries.
-        */
-       from = xfs_dir2_leaf_bests_p(ltp);
-       to = dp->d_ops->free_bests_p(free);
-       for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
-               if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
-                       n++;
-               *to = cpu_to_be16(off);
-       }
-
-       /*
-        * Now initialize the freespace block header.
-        */
-       freehdr.nused = n;
-       freehdr.nvalid = be32_to_cpu(ltp->bestcount);
-
-       dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
-       xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1);
-       xfs_dir2_free_log_header(args, fbp);
-
-       /*
-        * Converting the leaf to a leafnode is just a matter of changing the
-        * magic number and the ops. Do the change directly to the buffer as
-        * it's less work (and less code) than decoding the header to host
-        * format and back again.
-        */
-       if (leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC))
-               leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
-       else
-               leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
-       lbp->b_ops = &xfs_dir3_leafn_buf_ops;
-       xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF);
-       xfs_dir3_leaf_log_header(args, lbp);
-       xfs_dir3_leaf_check(dp, lbp);
-       return 0;
-}
-
-/*
- * Add a leaf entry to a leaf block in a node-form directory.
- * The other work necessary is done from the caller.
- */
-static int                                     /* error */
-xfs_dir2_leafn_add(
-       struct xfs_buf          *bp,            /* leaf buffer */
-       xfs_da_args_t           *args,          /* operation arguments */
-       int                     index)          /* insertion pt for new entry */
-{
-       int                     compact;        /* compacting stale leaves */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     highstale;      /* next stale entry */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
-       int                     lfloghigh;      /* high leaf entry logging */
-       int                     lfloglow;       /* low leaf entry logging */
-       int                     lowstale;       /* previous stale entry */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir3_icleaf_hdr leafhdr;
-       struct xfs_dir2_leaf_entry *ents;
-
-       trace_xfs_dir2_leafn_add(args, index);
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       tp = args->trans;
-       leaf = bp->b_addr;
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-       ents = dp->d_ops->leaf_ents_p(leaf);
-
-       /*
-        * Quick check just to make sure we are not going to index
-        * into other peoples memory
-        */
-       if (index < 0)
-               return EFSCORRUPTED;
-
-       /*
-        * If there are already the maximum number of leaf entries in
-        * the block, if there are no stale entries it won't fit.
-        * Caller will do a split.  If there are stale entries we'll do
-        * a compact.
-        */
-
-       if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) {
-               if (!leafhdr.stale)
-                       return ENOSPC;
-               compact = leafhdr.stale > 1;
-       } else
-               compact = 0;
-       ASSERT(index == 0 || be32_to_cpu(ents[index - 1].hashval) <= args->hashval);
-       ASSERT(index == leafhdr.count ||
-              be32_to_cpu(ents[index].hashval) >= args->hashval);
-
-       if (args->op_flags & XFS_DA_OP_JUSTCHECK)
-               return 0;
-
-       /*
-        * Compact out all but one stale leaf entry.  Leaves behind
-        * the entry closest to index.
-        */
-       if (compact)
-               xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale,
-                                        &highstale, &lfloglow, &lfloghigh);
-       else if (leafhdr.stale) {
-               /*
-                * Set impossible logging indices for this case.
-                */
-               lfloglow = leafhdr.count;
-               lfloghigh = -1;
-       }
-
-       /*
-        * Insert the new entry, log everything.
-        */
-       lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
-                                      highstale, &lfloglow, &lfloghigh);
-
-       lep->hashval = cpu_to_be32(args->hashval);
-       lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo,
-                               args->blkno, args->index));
-
-       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-       xfs_dir3_leaf_log_header(args, bp);
-       xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh);
-       xfs_dir3_leaf_check(dp, bp);
-       return 0;
-}
-
-#ifdef DEBUG
-static void
-xfs_dir2_free_hdr_check(
-       struct xfs_inode *dp,
-       struct xfs_buf  *bp,
-       xfs_dir2_db_t   db)
-{
-       struct xfs_dir3_icfree_hdr hdr;
-
-       dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr);
-
-       ASSERT((hdr.firstdb %
-               dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0);
-       ASSERT(hdr.firstdb <= db);
-       ASSERT(db < hdr.firstdb + hdr.nvalid);
-}
-#else
-#define xfs_dir2_free_hdr_check(dp, bp, db)
-#endif /* DEBUG */
-
-/*
- * Return the last hash value in the leaf.
- * Stale entries are ok.
- */
-xfs_dahash_t                                   /* hash value */
-xfs_dir2_leafn_lasthash(
-       struct xfs_inode *dp,
-       struct xfs_buf  *bp,                    /* leaf buffer */
-       int             *count)                 /* count of entries in leaf */
-{
-       struct xfs_dir2_leaf    *leaf = bp->b_addr;
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-
-       ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
-              leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
-
-       if (count)
-               *count = leafhdr.count;
-       if (!leafhdr.count)
-               return 0;
-
-       ents = dp->d_ops->leaf_ents_p(leaf);
-       return be32_to_cpu(ents[leafhdr.count - 1].hashval);
-}
-
-/*
- * Look up a leaf entry for space to add a name in a node-format leaf block.
- * The extrablk in state is a freespace block.
- */
-STATIC int
-xfs_dir2_leafn_lookup_for_addname(
-       struct xfs_buf          *bp,            /* leaf buffer */
-       xfs_da_args_t           *args,          /* operation arguments */
-       int                     *indexp,        /* out: leaf entry index */
-       xfs_da_state_t          *state)         /* state to fill in */
-{
-       struct xfs_buf          *curbp = NULL;  /* current data/free buffer */
-       xfs_dir2_db_t           curdb = -1;     /* current data block number */
-       xfs_dir2_db_t           curfdb = -1;    /* current free block number */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return value */
-       int                     fi;             /* free entry index */
-       xfs_dir2_free_t         *free = NULL;   /* free block structure */
-       int                     index;          /* leaf entry index */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       int                     length;         /* length of new data entry */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       xfs_dir2_db_t           newdb;          /* new data block number */
-       xfs_dir2_db_t           newfdb;         /* new free block number */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-       leaf = bp->b_addr;
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-       ents = dp->d_ops->leaf_ents_p(leaf);
-
-       xfs_dir3_leaf_check(dp, bp);
-       ASSERT(leafhdr.count > 0);
-
-       /*
-        * Look up the hash value in the leaf entries.
-        */
-       index = xfs_dir2_leaf_search_hash(args, bp);
-       /*
-        * Do we have a buffer coming in?
-        */
-       if (state->extravalid) {
-               /* If so, it's a free block buffer, get the block number. */
-               curbp = state->extrablk.bp;
-               curfdb = state->extrablk.blkno;
-               free = curbp->b_addr;
-               ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
-                      free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
-       }
-       length = dp->d_ops->data_entsize(args->namelen);
-       /*
-        * Loop over leaf entries with the right hash value.
-        */
-       for (lep = &ents[index];
-            index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
-            lep++, index++) {
-               /*
-                * Skip stale leaf entries.
-                */
-               if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
-                       continue;
-               /*
-                * Pull the data block number from the entry.
-                */
-               newdb = xfs_dir2_dataptr_to_db(args->geo,
-                                              be32_to_cpu(lep->address));
-               /*
-                * For addname, we're looking for a place to put the new entry.
-                * We want to use a data block with an entry of equal
-                * hash value to ours if there is one with room.
-                *
-                * If this block isn't the data block we already have
-                * in hand, take a look at it.
-                */
-               if (newdb != curdb) {
-                       __be16 *bests;
-
-                       curdb = newdb;
-                       /*
-                        * Convert the data block to the free block
-                        * holding its freespace information.
-                        */
-                       newfdb = dp->d_ops->db_to_fdb(args->geo, newdb);
-                       /*
-                        * If it's not the one we have in hand, read it in.
-                        */
-                       if (newfdb != curfdb) {
-                               /*
-                                * If we had one before, drop it.
-                                */
-                               if (curbp)
-                                       xfs_trans_brelse(tp, curbp);
-
-                               error = xfs_dir2_free_read(tp, dp,
-                                               xfs_dir2_db_to_da(args->geo,
-                                                                 newfdb),
-                                               &curbp);
-                               if (error)
-                                       return error;
-                               free = curbp->b_addr;
-
-                               xfs_dir2_free_hdr_check(dp, curbp, curdb);
-                       }
-                       /*
-                        * Get the index for our entry.
-                        */
-                       fi = dp->d_ops->db_to_fdindex(args->geo, curdb);
-                       /*
-                        * If it has room, return it.
-                        */
-                       bests = dp->d_ops->free_bests_p(free);
-                       if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) {
-                               XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
-                                                       XFS_ERRLEVEL_LOW, mp);
-                               if (curfdb != newfdb)
-                                       xfs_trans_brelse(tp, curbp);
-                               return EFSCORRUPTED;
-                       }
-                       curfdb = newfdb;
-                       if (be16_to_cpu(bests[fi]) >= length)
-                               goto out;
-               }
-       }
-       /* Didn't find any space */
-       fi = -1;
-out:
-       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-       if (curbp) {
-               /* Giving back a free block. */
-               state->extravalid = 1;
-               state->extrablk.bp = curbp;
-               state->extrablk.index = fi;
-               state->extrablk.blkno = curfdb;
-
-               /*
-                * Important: this magic number is not in the buffer - it's for
-                * buffer type information and therefore only the free/data type
-                * matters here, not whether CRCs are enabled or not.
-                */
-               state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
-       } else {
-               state->extravalid = 0;
-       }
-       /*
-        * Return the index, that will be the insertion point.
-        */
-       *indexp = index;
-       return ENOENT;
-}
-
-/*
- * Look up a leaf entry in a node-format leaf block.
- * The extrablk in state a data block.
- */
-STATIC int
-xfs_dir2_leafn_lookup_for_entry(
-       struct xfs_buf          *bp,            /* leaf buffer */
-       xfs_da_args_t           *args,          /* operation arguments */
-       int                     *indexp,        /* out: leaf entry index */
-       xfs_da_state_t          *state)         /* state to fill in */
-{
-       struct xfs_buf          *curbp = NULL;  /* current data/free buffer */
-       xfs_dir2_db_t           curdb = -1;     /* current data block number */
-       xfs_dir2_data_entry_t   *dep;           /* data block entry */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return value */
-       int                     index;          /* leaf entry index */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       xfs_dir2_db_t           newdb;          /* new data block number */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       enum xfs_dacmp          cmp;            /* comparison result */
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_dir3_icleaf_hdr leafhdr;
-
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-       leaf = bp->b_addr;
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-       ents = dp->d_ops->leaf_ents_p(leaf);
-
-       xfs_dir3_leaf_check(dp, bp);
-       ASSERT(leafhdr.count > 0);
-
-       /*
-        * Look up the hash value in the leaf entries.
-        */
-       index = xfs_dir2_leaf_search_hash(args, bp);
-       /*
-        * Do we have a buffer coming in?
-        */
-       if (state->extravalid) {
-               curbp = state->extrablk.bp;
-               curdb = state->extrablk.blkno;
-       }
-       /*
-        * Loop over leaf entries with the right hash value.
-        */
-       for (lep = &ents[index];
-            index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
-            lep++, index++) {
-               /*
-                * Skip stale leaf entries.
-                */
-               if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
-                       continue;
-               /*
-                * Pull the data block number from the entry.
-                */
-               newdb = xfs_dir2_dataptr_to_db(args->geo,
-                                              be32_to_cpu(lep->address));
-               /*
-                * Not adding a new entry, so we really want to find
-                * the name given to us.
-                *
-                * If it's a different data block, go get it.
-                */
-               if (newdb != curdb) {
-                       /*
-                        * If we had a block before that we aren't saving
-                        * for a CI name, drop it
-                        */
-                       if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
-                                               curdb != state->extrablk.blkno))
-                               xfs_trans_brelse(tp, curbp);
-                       /*
-                        * If needing the block that is saved with a CI match,
-                        * use it otherwise read in the new data block.
-                        */
-                       if (args->cmpresult != XFS_CMP_DIFFERENT &&
-                                       newdb == state->extrablk.blkno) {
-                               ASSERT(state->extravalid);
-                               curbp = state->extrablk.bp;
-                       } else {
-                               error = xfs_dir3_data_read(tp, dp,
-                                               xfs_dir2_db_to_da(args->geo,
-                                                                 newdb),
-                                               -1, &curbp);
-                               if (error)
-                                       return error;
-                       }
-                       xfs_dir3_data_check(dp, curbp);
-                       curdb = newdb;
-               }
-               /*
-                * Point to the data entry.
-                */
-               dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr +
-                       xfs_dir2_dataptr_to_off(args->geo,
-                                               be32_to_cpu(lep->address)));
-               /*
-                * Compare the entry and if it's an exact match, return
-                * EEXIST immediately. If it's the first case-insensitive
-                * match, store the block & inode number and continue looking.
-                */
-               cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
-               if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
-                       /* If there is a CI match block, drop it */
-                       if (args->cmpresult != XFS_CMP_DIFFERENT &&
-                                               curdb != state->extrablk.blkno)
-                               xfs_trans_brelse(tp, state->extrablk.bp);
-                       args->cmpresult = cmp;
-                       args->inumber = be64_to_cpu(dep->inumber);
-                       args->filetype = dp->d_ops->data_get_ftype(dep);
-                       *indexp = index;
-                       state->extravalid = 1;
-                       state->extrablk.bp = curbp;
-                       state->extrablk.blkno = curdb;
-                       state->extrablk.index = (int)((char *)dep -
-                                                       (char *)curbp->b_addr);
-                       state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
-                       curbp->b_ops = &xfs_dir3_data_buf_ops;
-                       xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
-                       if (cmp == XFS_CMP_EXACT)
-                               return EEXIST;
-               }
-       }
-       ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT));
-       if (curbp) {
-               if (args->cmpresult == XFS_CMP_DIFFERENT) {
-                       /* Giving back last used data block. */
-                       state->extravalid = 1;
-                       state->extrablk.bp = curbp;
-                       state->extrablk.index = -1;
-                       state->extrablk.blkno = curdb;
-                       state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
-                       curbp->b_ops = &xfs_dir3_data_buf_ops;
-                       xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
-               } else {
-                       /* If the curbp is not the CI match block, drop it */
-                       if (state->extrablk.bp != curbp)
-                               xfs_trans_brelse(tp, curbp);
-               }
-       } else {
-               state->extravalid = 0;
-       }
-       *indexp = index;
-       return ENOENT;
-}
-
-/*
- * Look up a leaf entry in a node-format leaf block.
- * If this is an addname then the extrablk in state is a freespace block,
- * otherwise it's a data block.
- */
-int
-xfs_dir2_leafn_lookup_int(
-       struct xfs_buf          *bp,            /* leaf buffer */
-       xfs_da_args_t           *args,          /* operation arguments */
-       int                     *indexp,        /* out: leaf entry index */
-       xfs_da_state_t          *state)         /* state to fill in */
-{
-       if (args->op_flags & XFS_DA_OP_ADDNAME)
-               return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp,
-                                                       state);
-       return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state);
-}
-
-/*
- * Move count leaf entries from source to destination leaf.
- * Log entries and headers.  Stale entries are preserved.
- */
-static void
-xfs_dir3_leafn_moveents(
-       xfs_da_args_t                   *args,  /* operation arguments */
-       struct xfs_buf                  *bp_s,  /* source */
-       struct xfs_dir3_icleaf_hdr      *shdr,
-       struct xfs_dir2_leaf_entry      *sents,
-       int                             start_s,/* source leaf index */
-       struct xfs_buf                  *bp_d,  /* destination */
-       struct xfs_dir3_icleaf_hdr      *dhdr,
-       struct xfs_dir2_leaf_entry      *dents,
-       int                             start_d,/* destination leaf index */
-       int                             count)  /* count of leaves to copy */
-{
-       int                             stale;  /* count stale leaves copied */
-
-       trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count);
-
-       /*
-        * Silently return if nothing to do.
-        */
-       if (count == 0)
-               return;
-
-       /*
-        * If the destination index is not the end of the current
-        * destination leaf entries, open up a hole in the destination
-        * to hold the new entries.
-        */
-       if (start_d < dhdr->count) {
-               memmove(&dents[start_d + count], &dents[start_d],
-                       (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t));
-               xfs_dir3_leaf_log_ents(args, bp_d, start_d + count,
-                                      count + dhdr->count - 1);
-       }
-       /*
-        * If the source has stale leaves, count the ones in the copy range
-        * so we can update the header correctly.
-        */
-       if (shdr->stale) {
-               int     i;                      /* temp leaf index */
-
-               for (i = start_s, stale = 0; i < start_s + count; i++) {
-                       if (sents[i].address ==
-                                       cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
-                               stale++;
-               }
-       } else
-               stale = 0;
-       /*
-        * Copy the leaf entries from source to destination.
-        */
-       memcpy(&dents[start_d], &sents[start_s],
-               count * sizeof(xfs_dir2_leaf_entry_t));
-       xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1);
-
-       /*
-        * If there are source entries after the ones we copied,
-        * delete the ones we copied by sliding the next ones down.
-        */
-       if (start_s + count < shdr->count) {
-               memmove(&sents[start_s], &sents[start_s + count],
-                       count * sizeof(xfs_dir2_leaf_entry_t));
-               xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1);
-       }
-
-       /*
-        * Update the headers and log them.
-        */
-       shdr->count -= count;
-       shdr->stale -= stale;
-       dhdr->count += count;
-       dhdr->stale += stale;
-}
-
-/*
- * Determine the sort order of two leaf blocks.
- * Returns 1 if both are valid and leaf2 should be before leaf1, else 0.
- */
-int                                            /* sort order */
-xfs_dir2_leafn_order(
-       struct xfs_inode        *dp,
-       struct xfs_buf          *leaf1_bp,              /* leaf1 buffer */
-       struct xfs_buf          *leaf2_bp)              /* leaf2 buffer */
-{
-       struct xfs_dir2_leaf    *leaf1 = leaf1_bp->b_addr;
-       struct xfs_dir2_leaf    *leaf2 = leaf2_bp->b_addr;
-       struct xfs_dir2_leaf_entry *ents1;
-       struct xfs_dir2_leaf_entry *ents2;
-       struct xfs_dir3_icleaf_hdr hdr1;
-       struct xfs_dir3_icleaf_hdr hdr2;
-
-       dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1);
-       dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2);
-       ents1 = dp->d_ops->leaf_ents_p(leaf1);
-       ents2 = dp->d_ops->leaf_ents_p(leaf2);
-
-       if (hdr1.count > 0 && hdr2.count > 0 &&
-           (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) ||
-            be32_to_cpu(ents2[hdr2.count - 1].hashval) <
-                               be32_to_cpu(ents1[hdr1.count - 1].hashval)))
-               return 1;
-       return 0;
-}
-
-/*
- * Rebalance leaf entries between two leaf blocks.
- * This is actually only called when the second block is new,
- * though the code deals with the general case.
- * A new entry will be inserted in one of the blocks, and that
- * entry is taken into account when balancing.
- */
-static void
-xfs_dir2_leafn_rebalance(
-       xfs_da_state_t          *state,         /* btree cursor */
-       xfs_da_state_blk_t      *blk1,          /* first btree block */
-       xfs_da_state_blk_t      *blk2)          /* second btree block */
-{
-       xfs_da_args_t           *args;          /* operation arguments */
-       int                     count;          /* count (& direction) leaves */
-       int                     isleft;         /* new goes in left leaf */
-       xfs_dir2_leaf_t         *leaf1;         /* first leaf structure */
-       xfs_dir2_leaf_t         *leaf2;         /* second leaf structure */
-       int                     mid;            /* midpoint leaf index */
-#if defined(DEBUG) || defined(XFS_WARN)
-       int                     oldstale;       /* old count of stale leaves */
-#endif
-       int                     oldsum;         /* old total leaf count */
-       int                     swap;           /* swapped leaf blocks */
-       struct xfs_dir2_leaf_entry *ents1;
-       struct xfs_dir2_leaf_entry *ents2;
-       struct xfs_dir3_icleaf_hdr hdr1;
-       struct xfs_dir3_icleaf_hdr hdr2;
-       struct xfs_inode        *dp = state->args->dp;
-
-       args = state->args;
-       /*
-        * If the block order is wrong, swap the arguments.
-        */
-       if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) {
-               xfs_da_state_blk_t      *tmp;   /* temp for block swap */
-
-               tmp = blk1;
-               blk1 = blk2;
-               blk2 = tmp;
-       }
-       leaf1 = blk1->bp->b_addr;
-       leaf2 = blk2->bp->b_addr;
-       dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1);
-       dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2);
-       ents1 = dp->d_ops->leaf_ents_p(leaf1);
-       ents2 = dp->d_ops->leaf_ents_p(leaf2);
-
-       oldsum = hdr1.count + hdr2.count;
-#if defined(DEBUG) || defined(XFS_WARN)
-       oldstale = hdr1.stale + hdr2.stale;
-#endif
-       mid = oldsum >> 1;
-
-       /*
-        * If the old leaf count was odd then the new one will be even,
-        * so we need to divide the new count evenly.
-        */
-       if (oldsum & 1) {
-               xfs_dahash_t    midhash;        /* middle entry hash value */
-
-               if (mid >= hdr1.count)
-                       midhash = be32_to_cpu(ents2[mid - hdr1.count].hashval);
-               else
-                       midhash = be32_to_cpu(ents1[mid].hashval);
-               isleft = args->hashval <= midhash;
-       }
-       /*
-        * If the old count is even then the new count is odd, so there's
-        * no preferred side for the new entry.
-        * Pick the left one.
-        */
-       else
-               isleft = 1;
-       /*
-        * Calculate moved entry count.  Positive means left-to-right,
-        * negative means right-to-left.  Then move the entries.
-        */
-       count = hdr1.count - mid + (isleft == 0);
-       if (count > 0)
-               xfs_dir3_leafn_moveents(args, blk1->bp, &hdr1, ents1,
-                                       hdr1.count - count, blk2->bp,
-                                       &hdr2, ents2, 0, count);
-       else if (count < 0)
-               xfs_dir3_leafn_moveents(args, blk2->bp, &hdr2, ents2, 0,
-                                       blk1->bp, &hdr1, ents1,
-                                       hdr1.count, count);
-
-       ASSERT(hdr1.count + hdr2.count == oldsum);
-       ASSERT(hdr1.stale + hdr2.stale == oldstale);
-
-       /* log the changes made when moving the entries */
-       dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1);
-       dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2);
-       xfs_dir3_leaf_log_header(args, blk1->bp);
-       xfs_dir3_leaf_log_header(args, blk2->bp);
-
-       xfs_dir3_leaf_check(dp, blk1->bp);
-       xfs_dir3_leaf_check(dp, blk2->bp);
-
-       /*
-        * Mark whether we're inserting into the old or new leaf.
-        */
-       if (hdr1.count < hdr2.count)
-               state->inleaf = swap;
-       else if (hdr1.count > hdr2.count)
-               state->inleaf = !swap;
-       else
-               state->inleaf = swap ^ (blk1->index <= hdr1.count);
-       /*
-        * Adjust the expected index for insertion.
-        */
-       if (!state->inleaf)
-               blk2->index = blk1->index - hdr1.count;
-
-       /*
-        * Finally sanity check just to make sure we are not returning a
-        * negative index
-        */
-       if (blk2->index < 0) {
-               state->inleaf = 1;
-               blk2->index = 0;
-               xfs_alert(dp->i_mount,
-       "%s: picked the wrong leaf? reverting original leaf: blk1->index %d",
-                       __func__, blk1->index);
-       }
-}
-
-static int
-xfs_dir3_data_block_free(
-       xfs_da_args_t           *args,
-       struct xfs_dir2_data_hdr *hdr,
-       struct xfs_dir2_free    *free,
-       xfs_dir2_db_t           fdb,
-       int                     findex,
-       struct xfs_buf          *fbp,
-       int                     longest)
-{
-       int                     logfree = 0;
-       __be16                  *bests;
-       struct xfs_dir3_icfree_hdr freehdr;
-       struct xfs_inode        *dp = args->dp;
-
-       dp->d_ops->free_hdr_from_disk(&freehdr, free);
-       bests = dp->d_ops->free_bests_p(free);
-       if (hdr) {
-               /*
-                * Data block is not empty, just set the free entry to the new
-                * value.
-                */
-               bests[findex] = cpu_to_be16(longest);
-               xfs_dir2_free_log_bests(args, fbp, findex, findex);
-               return 0;
-       }
-
-       /* One less used entry in the free table. */
-       freehdr.nused--;
-
-       /*
-        * If this was the last entry in the table, we can trim the table size
-        * back.  There might be other entries at the end referring to
-        * non-existent data blocks, get those too.
-        */
-       if (findex == freehdr.nvalid - 1) {
-               int     i;              /* free entry index */
-
-               for (i = findex - 1; i >= 0; i--) {
-                       if (bests[i] != cpu_to_be16(NULLDATAOFF))
-                               break;
-               }
-               freehdr.nvalid = i + 1;
-               logfree = 0;
-       } else {
-               /* Not the last entry, just punch it out.  */
-               bests[findex] = cpu_to_be16(NULLDATAOFF);
-               logfree = 1;
-       }
-
-       dp->d_ops->free_hdr_to_disk(free, &freehdr);
-       xfs_dir2_free_log_header(args, fbp);
-
-       /*
-        * If there are no useful entries left in the block, get rid of the
-        * block if we can.
-        */
-       if (!freehdr.nused) {
-               int error;
-
-               error = xfs_dir2_shrink_inode(args, fdb, fbp);
-               if (error == 0) {
-                       fbp = NULL;
-                       logfree = 0;
-               } else if (error != ENOSPC || args->total != 0)
-                       return error;
-               /*
-                * It's possible to get ENOSPC if there is no
-                * space reservation.  In this case some one
-                * else will eventually get rid of this block.
-                */
-       }
-
-       /* Log the free entry that changed, unless we got rid of it.  */
-       if (logfree)
-               xfs_dir2_free_log_bests(args, fbp, findex, findex);
-       return 0;
-}
-
-/*
- * Remove an entry from a node directory.
- * This removes the leaf entry and the data entry,
- * and updates the free block if necessary.
- */
-static int                                     /* error */
-xfs_dir2_leafn_remove(
-       xfs_da_args_t           *args,          /* operation arguments */
-       struct xfs_buf          *bp,            /* leaf buffer */
-       int                     index,          /* leaf entry index */
-       xfs_da_state_blk_t      *dblk,          /* data block */
-       int                     *rval)          /* resulting block needs join */
-{
-       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
-       xfs_dir2_db_t           db;             /* data block number */
-       struct xfs_buf          *dbp;           /* data block buffer */
-       xfs_dir2_data_entry_t   *dep;           /* data block entry */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
-       int                     longest;        /* longest data free entry */
-       int                     off;            /* data block entry offset */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       int                     needlog;        /* need to log data header */
-       int                     needscan;       /* need to rescan data frees */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir2_data_free *bf;          /* bestfree table */
-       struct xfs_dir3_icleaf_hdr leafhdr;
-       struct xfs_dir2_leaf_entry *ents;
-
-       trace_xfs_dir2_leafn_remove(args, index);
-
-       dp = args->dp;
-       tp = args->trans;
-       mp = dp->i_mount;
-       leaf = bp->b_addr;
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-       ents = dp->d_ops->leaf_ents_p(leaf);
-
-       /*
-        * Point to the entry we're removing.
-        */
-       lep = &ents[index];
-
-       /*
-        * Extract the data block and offset from the entry.
-        */
-       db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
-       ASSERT(dblk->blkno == db);
-       off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address));
-       ASSERT(dblk->index == off);
-
-       /*
-        * Kill the leaf entry by marking it stale.
-        * Log the leaf block changes.
-        */
-       leafhdr.stale++;
-       dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
-       xfs_dir3_leaf_log_header(args, bp);
-
-       lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
-       xfs_dir3_leaf_log_ents(args, bp, index, index);
-
-       /*
-        * Make the data entry free.  Keep track of the longest freespace
-        * in the data block in case it changes.
-        */
-       dbp = dblk->bp;
-       hdr = dbp->b_addr;
-       dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
-       bf = dp->d_ops->data_bestfree_p(hdr);
-       longest = be16_to_cpu(bf[0].length);
-       needlog = needscan = 0;
-       xfs_dir2_data_make_free(args, dbp, off,
-               dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
-       /*
-        * Rescan the data block freespaces for bestfree.
-        * Log the data block header if needed.
-        */
-       if (needscan)
-               xfs_dir2_data_freescan(dp, hdr, &needlog);
-       if (needlog)
-               xfs_dir2_data_log_header(args, dbp);
-       xfs_dir3_data_check(dp, dbp);
-       /*
-        * If the longest data block freespace changes, need to update
-        * the corresponding freeblock entry.
-        */
-       if (longest < be16_to_cpu(bf[0].length)) {
-               int             error;          /* error return value */
-               struct xfs_buf  *fbp;           /* freeblock buffer */
-               xfs_dir2_db_t   fdb;            /* freeblock block number */
-               int             findex;         /* index in freeblock entries */
-               xfs_dir2_free_t *free;          /* freeblock structure */
-
-               /*
-                * Convert the data block number to a free block,
-                * read in the free block.
-                */
-               fdb = dp->d_ops->db_to_fdb(args->geo, db);
-               error = xfs_dir2_free_read(tp, dp,
-                                          xfs_dir2_db_to_da(args->geo, fdb),
-                                          &fbp);
-               if (error)
-                       return error;
-               free = fbp->b_addr;
-#ifdef DEBUG
-       {
-               struct xfs_dir3_icfree_hdr freehdr;
-               dp->d_ops->free_hdr_from_disk(&freehdr, free);
-               ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) *
-                       (fdb - xfs_dir2_byte_to_db(args->geo,
-                                                  XFS_DIR2_FREE_OFFSET)));
-       }
-#endif
-               /*
-                * Calculate which entry we need to fix.
-                */
-               findex = dp->d_ops->db_to_fdindex(args->geo, db);
-               longest = be16_to_cpu(bf[0].length);
-               /*
-                * If the data block is now empty we can get rid of it
-                * (usually).
-                */
-               if (longest == args->geo->blksize -
-                              dp->d_ops->data_entry_offset) {
-                       /*
-                        * Try to punch out the data block.
-                        */
-                       error = xfs_dir2_shrink_inode(args, db, dbp);
-                       if (error == 0) {
-                               dblk->bp = NULL;
-                               hdr = NULL;
-                       }
-                       /*
-                        * We can get ENOSPC if there's no space reservation.
-                        * In this case just drop the buffer and some one else
-                        * will eventually get rid of the empty block.
-                        */
-                       else if (!(error == ENOSPC && args->total == 0))
-                               return error;
-               }
-               /*
-                * If we got rid of the data block, we can eliminate that entry
-                * in the free block.
-                */
-               error = xfs_dir3_data_block_free(args, hdr, free,
-                                                fdb, findex, fbp, longest);
-               if (error)
-                       return error;
-       }
-
-       xfs_dir3_leaf_check(dp, bp);
-       /*
-        * Return indication of whether this leaf block is empty enough
-        * to justify trying to join it with a neighbor.
-        */
-       *rval = (dp->d_ops->leaf_hdr_size +
-                (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) <
-               args->geo->magicpct;
-       return 0;
-}
-
-/*
- * Split the leaf entries in the old block into old and new blocks.
- */
-int                                            /* error */
-xfs_dir2_leafn_split(
-       xfs_da_state_t          *state,         /* btree cursor */
-       xfs_da_state_blk_t      *oldblk,        /* original block */
-       xfs_da_state_blk_t      *newblk)        /* newly created block */
-{
-       xfs_da_args_t           *args;          /* operation arguments */
-       xfs_dablk_t             blkno;          /* new leaf block number */
-       int                     error;          /* error return value */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       struct xfs_inode        *dp;
-
-       /*
-        * Allocate space for a new leaf node.
-        */
-       args = state->args;
-       dp = args->dp;
-       mp = dp->i_mount;
-       ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC);
-       error = xfs_da_grow_inode(args, &blkno);
-       if (error) {
-               return error;
-       }
-       /*
-        * Initialize the new leaf block.
-        */
-       error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(args->geo, blkno),
-                                     &newblk->bp, XFS_DIR2_LEAFN_MAGIC);
-       if (error)
-               return error;
-
-       newblk->blkno = blkno;
-       newblk->magic = XFS_DIR2_LEAFN_MAGIC;
-       /*
-        * Rebalance the entries across the two leaves, link the new
-        * block into the leaves.
-        */
-       xfs_dir2_leafn_rebalance(state, oldblk, newblk);
-       error = xfs_da3_blk_link(state, oldblk, newblk);
-       if (error) {
-               return error;
-       }
-       /*
-        * Insert the new entry in the correct block.
-        */
-       if (state->inleaf)
-               error = xfs_dir2_leafn_add(oldblk->bp, args, oldblk->index);
-       else
-               error = xfs_dir2_leafn_add(newblk->bp, args, newblk->index);
-       /*
-        * Update last hashval in each block since we added the name.
-        */
-       oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL);
-       newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL);
-       xfs_dir3_leaf_check(dp, oldblk->bp);
-       xfs_dir3_leaf_check(dp, newblk->bp);
-       return error;
-}
-
-/*
- * Check a leaf block and its neighbors to see if the block should be
- * collapsed into one or the other neighbor.  Always keep the block
- * with the smaller block number.
- * If the current block is over 50% full, don't try to join it, return 0.
- * If the block is empty, fill in the state structure and return 2.
- * If it can be collapsed, fill in the state structure and return 1.
- * If nothing can be done, return 0.
- */
-int                                            /* error */
-xfs_dir2_leafn_toosmall(
-       xfs_da_state_t          *state,         /* btree cursor */
-       int                     *action)        /* resulting action to take */
-{
-       xfs_da_state_blk_t      *blk;           /* leaf block */
-       xfs_dablk_t             blkno;          /* leaf block number */
-       struct xfs_buf          *bp;            /* leaf buffer */
-       int                     bytes;          /* bytes in use */
-       int                     count;          /* leaf live entry count */
-       int                     error;          /* error return value */
-       int                     forward;        /* sibling block direction */
-       int                     i;              /* sibling counter */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       int                     rval;           /* result from path_shift */
-       struct xfs_dir3_icleaf_hdr leafhdr;
-       struct xfs_dir2_leaf_entry *ents;
-       struct xfs_inode        *dp = state->args->dp;
-
-       /*
-        * Check for the degenerate case of the block being over 50% full.
-        * If so, it's not worth even looking to see if we might be able
-        * to coalesce with a sibling.
-        */
-       blk = &state->path.blk[state->path.active - 1];
-       leaf = blk->bp->b_addr;
-       dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
-       ents = dp->d_ops->leaf_ents_p(leaf);
-       xfs_dir3_leaf_check(dp, blk->bp);
-
-       count = leafhdr.count - leafhdr.stale;
-       bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]);
-       if (bytes > (state->args->geo->blksize >> 1)) {
-               /*
-                * Blk over 50%, don't try to join.
-                */
-               *action = 0;
-               return 0;
-       }
-       /*
-        * Check for the degenerate case of the block being empty.
-        * If the block is empty, we'll simply delete it, no need to
-        * coalesce it with a sibling block.  We choose (arbitrarily)
-        * to merge with the forward block unless it is NULL.
-        */
-       if (count == 0) {
-               /*
-                * Make altpath point to the block we want to keep and
-                * path point to the block we want to drop (this one).
-                */
-               forward = (leafhdr.forw != 0);
-               memcpy(&state->altpath, &state->path, sizeof(state->path));
-               error = xfs_da3_path_shift(state, &state->altpath, forward, 0,
-                       &rval);
-               if (error)
-                       return error;
-               *action = rval ? 2 : 0;
-               return 0;
-       }
-       /*
-        * Examine each sibling block to see if we can coalesce with
-        * at least 25% free space to spare.  We need to figure out
-        * whether to merge with the forward or the backward block.
-        * We prefer coalescing with the lower numbered sibling so as
-        * to shrink a directory over time.
-        */
-       forward = leafhdr.forw < leafhdr.back;
-       for (i = 0, bp = NULL; i < 2; forward = !forward, i++) {
-               struct xfs_dir3_icleaf_hdr hdr2;
-
-               blkno = forward ? leafhdr.forw : leafhdr.back;
-               if (blkno == 0)
-                       continue;
-               /*
-                * Read the sibling leaf block.
-                */
-               error = xfs_dir3_leafn_read(state->args->trans, dp,
-                                           blkno, -1, &bp);
-               if (error)
-                       return error;
-
-               /*
-                * Count bytes in the two blocks combined.
-                */
-               count = leafhdr.count - leafhdr.stale;
-               bytes = state->args->geo->blksize -
-                       (state->args->geo->blksize >> 2);
-
-               leaf = bp->b_addr;
-               dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf);
-               ents = dp->d_ops->leaf_ents_p(leaf);
-               count += hdr2.count - hdr2.stale;
-               bytes -= count * sizeof(ents[0]);
-
-               /*
-                * Fits with at least 25% to spare.
-                */
-               if (bytes >= 0)
-                       break;
-               xfs_trans_brelse(state->args->trans, bp);
-       }
-       /*
-        * Didn't like either block, give up.
-        */
-       if (i >= 2) {
-               *action = 0;
-               return 0;
-       }
-
-       /*
-        * Make altpath point to the block we want to keep (the lower
-        * numbered block) and path point to the block we want to drop.
-        */
-       memcpy(&state->altpath, &state->path, sizeof(state->path));
-       if (blkno < blk->blkno)
-               error = xfs_da3_path_shift(state, &state->altpath, forward, 0,
-                       &rval);
-       else
-               error = xfs_da3_path_shift(state, &state->path, forward, 0,
-                       &rval);
-       if (error) {
-               return error;
-       }
-       *action = rval ? 0 : 1;
-       return 0;
-}
-
-/*
- * Move all the leaf entries from drop_blk to save_blk.
- * This is done as part of a join operation.
- */
-void
-xfs_dir2_leafn_unbalance(
-       xfs_da_state_t          *state,         /* cursor */
-       xfs_da_state_blk_t      *drop_blk,      /* dead block */
-       xfs_da_state_blk_t      *save_blk)      /* surviving block */
-{
-       xfs_da_args_t           *args;          /* operation arguments */
-       xfs_dir2_leaf_t         *drop_leaf;     /* dead leaf structure */
-       xfs_dir2_leaf_t         *save_leaf;     /* surviving leaf structure */
-       struct xfs_dir3_icleaf_hdr savehdr;
-       struct xfs_dir3_icleaf_hdr drophdr;
-       struct xfs_dir2_leaf_entry *sents;
-       struct xfs_dir2_leaf_entry *dents;
-       struct xfs_inode        *dp = state->args->dp;
-
-       args = state->args;
-       ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
-       ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
-       drop_leaf = drop_blk->bp->b_addr;
-       save_leaf = save_blk->bp->b_addr;
-
-       dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf);
-       dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf);
-       sents = dp->d_ops->leaf_ents_p(save_leaf);
-       dents = dp->d_ops->leaf_ents_p(drop_leaf);
-
-       /*
-        * If there are any stale leaf entries, take this opportunity
-        * to purge them.
-        */
-       if (drophdr.stale)
-               xfs_dir3_leaf_compact(args, &drophdr, drop_blk->bp);
-       if (savehdr.stale)
-               xfs_dir3_leaf_compact(args, &savehdr, save_blk->bp);
-
-       /*
-        * Move the entries from drop to the appropriate end of save.
-        */
-       drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval);
-       if (xfs_dir2_leafn_order(dp, save_blk->bp, drop_blk->bp))
-               xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0,
-                                       save_blk->bp, &savehdr, sents, 0,
-                                       drophdr.count);
-       else
-               xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0,
-                                       save_blk->bp, &savehdr, sents,
-                                       savehdr.count, drophdr.count);
-       save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval);
-
-       /* log the changes made when moving the entries */
-       dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr);
-       dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr);
-       xfs_dir3_leaf_log_header(args, save_blk->bp);
-       xfs_dir3_leaf_log_header(args, drop_blk->bp);
-
-       xfs_dir3_leaf_check(dp, save_blk->bp);
-       xfs_dir3_leaf_check(dp, drop_blk->bp);
-}
-
-/*
- * Top-level node form directory addname routine.
- */
-int                                            /* error */
-xfs_dir2_node_addname(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       xfs_da_state_blk_t      *blk;           /* leaf block for insert */
-       int                     error;          /* error return value */
-       int                     rval;           /* sub-return value */
-       xfs_da_state_t          *state;         /* btree cursor */
-
-       trace_xfs_dir2_node_addname(args);
-
-       /*
-        * Allocate and initialize the state (btree cursor).
-        */
-       state = xfs_da_state_alloc();
-       state->args = args;
-       state->mp = args->dp->i_mount;
-       /*
-        * Look up the name.  We're not supposed to find it, but
-        * this gives us the insertion point.
-        */
-       error = xfs_da3_node_lookup_int(state, &rval);
-       if (error)
-               rval = error;
-       if (rval != ENOENT) {
-               goto done;
-       }
-       /*
-        * Add the data entry to a data block.
-        * Extravalid is set to a freeblock found by lookup.
-        */
-       rval = xfs_dir2_node_addname_int(args,
-               state->extravalid ? &state->extrablk : NULL);
-       if (rval) {
-               goto done;
-       }
-       blk = &state->path.blk[state->path.active - 1];
-       ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
-       /*
-        * Add the new leaf entry.
-        */
-       rval = xfs_dir2_leafn_add(blk->bp, args, blk->index);
-       if (rval == 0) {
-               /*
-                * It worked, fix the hash values up the btree.
-                */
-               if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
-                       xfs_da3_fixhashpath(state, &state->path);
-       } else {
-               /*
-                * It didn't work, we need to split the leaf block.
-                */
-               if (args->total == 0) {
-                       ASSERT(rval == ENOSPC);
-                       goto done;
-               }
-               /*
-                * Split the leaf block and insert the new entry.
-                */
-               rval = xfs_da3_split(state);
-       }
-done:
-       xfs_da_state_free(state);
-       return rval;
-}
-
-/*
- * Add the data entry for a node-format directory name addition.
- * The leaf entry is added in xfs_dir2_leafn_add.
- * We may enter with a freespace block that the lookup found.
- */
-static int                                     /* error */
-xfs_dir2_node_addname_int(
-       xfs_da_args_t           *args,          /* operation arguments */
-       xfs_da_state_blk_t      *fblk)          /* optional freespace block */
-{
-       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
-       xfs_dir2_db_t           dbno;           /* data block number */
-       struct xfs_buf          *dbp;           /* data block buffer */
-       xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       xfs_dir2_data_unused_t  *dup;           /* data unused entry pointer */
-       int                     error;          /* error return value */
-       xfs_dir2_db_t           fbno;           /* freespace block number */
-       struct xfs_buf          *fbp;           /* freespace buffer */
-       int                     findex;         /* freespace entry index */
-       xfs_dir2_free_t         *free=NULL;     /* freespace block structure */
-       xfs_dir2_db_t           ifbno;          /* initial freespace block no */
-       xfs_dir2_db_t           lastfbno=0;     /* highest freespace block no */
-       int                     length;         /* length of the new entry */
-       int                     logfree;        /* need to log free entry */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       int                     needlog;        /* need to log data header */
-       int                     needscan;       /* need to rescan data frees */
-       __be16                  *tagp;          /* data entry tag pointer */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       __be16                  *bests;
-       struct xfs_dir3_icfree_hdr freehdr;
-       struct xfs_dir2_data_free *bf;
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       tp = args->trans;
-       length = dp->d_ops->data_entsize(args->namelen);
-       /*
-        * If we came in with a freespace block that means that lookup
-        * found an entry with our hash value.  This is the freespace
-        * block for that data entry.
-        */
-       if (fblk) {
-               fbp = fblk->bp;
-               /*
-                * Remember initial freespace block number.
-                */
-               ifbno = fblk->blkno;
-               free = fbp->b_addr;
-               findex = fblk->index;
-               bests = dp->d_ops->free_bests_p(free);
-               dp->d_ops->free_hdr_from_disk(&freehdr, free);
-
-               /*
-                * This means the free entry showed that the data block had
-                * space for our entry, so we remembered it.
-                * Use that data block.
-                */
-               if (findex >= 0) {
-                       ASSERT(findex < freehdr.nvalid);
-                       ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
-                       ASSERT(be16_to_cpu(bests[findex]) >= length);
-                       dbno = freehdr.firstdb + findex;
-               } else {
-                       /*
-                        * The data block looked at didn't have enough room.
-                        * We'll start at the beginning of the freespace entries.
-                        */
-                       dbno = -1;
-                       findex = 0;
-               }
-       } else {
-               /*
-                * Didn't come in with a freespace block, so no data block.
-                */
-               ifbno = dbno = -1;
-               fbp = NULL;
-               findex = 0;
-       }
-
-       /*
-        * If we don't have a data block yet, we're going to scan the
-        * freespace blocks looking for one.  Figure out what the
-        * highest freespace block number is.
-        */
-       if (dbno == -1) {
-               xfs_fileoff_t   fo;             /* freespace block number */
-
-               if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK)))
-                       return error;
-               lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
-               fbno = ifbno;
-       }
-       /*
-        * While we haven't identified a data block, search the freeblock
-        * data for a good data block.  If we find a null freeblock entry,
-        * indicating a hole in the data blocks, remember that.
-        */
-       while (dbno == -1) {
-               /*
-                * If we don't have a freeblock in hand, get the next one.
-                */
-               if (fbp == NULL) {
-                       /*
-                        * Happens the first time through unless lookup gave
-                        * us a freespace block to start with.
-                        */
-                       if (++fbno == 0)
-                               fbno = xfs_dir2_byte_to_db(args->geo,
-                                                       XFS_DIR2_FREE_OFFSET);
-                       /*
-                        * If it's ifbno we already looked at it.
-                        */
-                       if (fbno == ifbno)
-                               fbno++;
-                       /*
-                        * If it's off the end we're done.
-                        */
-                       if (fbno >= lastfbno)
-                               break;
-                       /*
-                        * Read the block.  There can be holes in the
-                        * freespace blocks, so this might not succeed.
-                        * This should be really rare, so there's no reason
-                        * to avoid it.
-                        */
-                       error = xfs_dir2_free_try_read(tp, dp,
-                                       xfs_dir2_db_to_da(args->geo, fbno),
-                                       &fbp);
-                       if (error)
-                               return error;
-                       if (!fbp)
-                               continue;
-                       free = fbp->b_addr;
-                       findex = 0;
-               }
-               /*
-                * Look at the current free entry.  Is it good enough?
-                *
-                * The bests initialisation should be where the bufer is read in
-                * the above branch. But gcc is too stupid to realise that bests
-                * and the freehdr are actually initialised if they are placed
-                * there, so we have to do it here to avoid warnings. Blech.
-                */
-               bests = dp->d_ops->free_bests_p(free);
-               dp->d_ops->free_hdr_from_disk(&freehdr, free);
-               if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
-                   be16_to_cpu(bests[findex]) >= length)
-                       dbno = freehdr.firstdb + findex;
-               else {
-                       /*
-                        * Are we done with the freeblock?
-                        */
-                       if (++findex == freehdr.nvalid) {
-                               /*
-                                * Drop the block.
-                                */
-                               xfs_trans_brelse(tp, fbp);
-                               fbp = NULL;
-                               if (fblk && fblk->bp)
-                                       fblk->bp = NULL;
-                       }
-               }
-       }
-       /*
-        * If we don't have a data block, we need to allocate one and make
-        * the freespace entries refer to it.
-        */
-       if (unlikely(dbno == -1)) {
-               /*
-                * Not allowed to allocate, return failure.
-                */
-               if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
-                       return ENOSPC;
-
-               /*
-                * Allocate and initialize the new data block.
-                */
-               if (unlikely((error = xfs_dir2_grow_inode(args,
-                                                        XFS_DIR2_DATA_SPACE,
-                                                        &dbno)) ||
-                   (error = xfs_dir3_data_init(args, dbno, &dbp))))
-                       return error;
-
-               /*
-                * If (somehow) we have a freespace block, get rid of it.
-                */
-               if (fbp)
-                       xfs_trans_brelse(tp, fbp);
-               if (fblk && fblk->bp)
-                       fblk->bp = NULL;
-
-               /*
-                * Get the freespace block corresponding to the data block
-                * that was just allocated.
-                */
-               fbno = dp->d_ops->db_to_fdb(args->geo, dbno);
-               error = xfs_dir2_free_try_read(tp, dp,
-                                      xfs_dir2_db_to_da(args->geo, fbno),
-                                      &fbp);
-               if (error)
-                       return error;
-
-               /*
-                * If there wasn't a freespace block, the read will
-                * return a NULL fbp.  Allocate and initialize a new one.
-                */
-               if (!fbp) {
-                       error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE,
-                                                   &fbno);
-                       if (error)
-                               return error;
-
-                       if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) {
-                               xfs_alert(mp,
-                       "%s: dir ino %llu needed freesp block %lld for\n"
-                       "  data block %lld, got %lld ifbno %llu lastfbno %d",
-                                       __func__, (unsigned long long)dp->i_ino,
-                                       (long long)dp->d_ops->db_to_fdb(
-                                                               args->geo, dbno),
-                                       (long long)dbno, (long long)fbno,
-                                       (unsigned long long)ifbno, lastfbno);
-                               if (fblk) {
-                                       xfs_alert(mp,
-                               " fblk 0x%p blkno %llu index %d magic 0x%x",
-                                               fblk,
-                                               (unsigned long long)fblk->blkno,
-                                               fblk->index,
-                                               fblk->magic);
-                               } else {
-                                       xfs_alert(mp, " ... fblk is NULL");
-                               }
-                               XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
-                                                XFS_ERRLEVEL_LOW, mp);
-                               return EFSCORRUPTED;
-                       }
-
-                       /*
-                        * Get a buffer for the new block.
-                        */
-                       error = xfs_dir3_free_get_buf(args, fbno, &fbp);
-                       if (error)
-                               return error;
-                       free = fbp->b_addr;
-                       bests = dp->d_ops->free_bests_p(free);
-                       dp->d_ops->free_hdr_from_disk(&freehdr, free);
-
-                       /*
-                        * Remember the first slot as our empty slot.
-                        */
-                       freehdr.firstdb =
-                               (fbno - xfs_dir2_byte_to_db(args->geo,
-                                                       XFS_DIR2_FREE_OFFSET)) *
-                                       dp->d_ops->free_max_bests(args->geo);
-               } else {
-                       free = fbp->b_addr;
-                       bests = dp->d_ops->free_bests_p(free);
-                       dp->d_ops->free_hdr_from_disk(&freehdr, free);
-               }
-
-               /*
-                * Set the freespace block index from the data block number.
-                */
-               findex = dp->d_ops->db_to_fdindex(args->geo, dbno);
-               /*
-                * If it's after the end of the current entries in the
-                * freespace block, extend that table.
-                */
-               if (findex >= freehdr.nvalid) {
-                       ASSERT(findex < dp->d_ops->free_max_bests(args->geo));
-                       freehdr.nvalid = findex + 1;
-                       /*
-                        * Tag new entry so nused will go up.
-                        */
-                       bests[findex] = cpu_to_be16(NULLDATAOFF);
-               }
-               /*
-                * If this entry was for an empty data block
-                * (this should always be true) then update the header.
-                */
-               if (bests[findex] == cpu_to_be16(NULLDATAOFF)) {
-                       freehdr.nused++;
-                       dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
-                       xfs_dir2_free_log_header(args, fbp);
-               }
-               /*
-                * Update the real value in the table.
-                * We haven't allocated the data entry yet so this will
-                * change again.
-                */
-               hdr = dbp->b_addr;
-               bf = dp->d_ops->data_bestfree_p(hdr);
-               bests[findex] = bf[0].length;
-               logfree = 1;
-       }
-       /*
-        * We had a data block so we don't have to make a new one.
-        */
-       else {
-               /*
-                * If just checking, we succeeded.
-                */
-               if (args->op_flags & XFS_DA_OP_JUSTCHECK)
-                       return 0;
-
-               /*
-                * Read the data block in.
-                */
-               error = xfs_dir3_data_read(tp, dp,
-                                          xfs_dir2_db_to_da(args->geo, dbno),
-                                          -1, &dbp);
-               if (error)
-                       return error;
-               hdr = dbp->b_addr;
-               bf = dp->d_ops->data_bestfree_p(hdr);
-               logfree = 0;
-       }
-       ASSERT(be16_to_cpu(bf[0].length) >= length);
-       /*
-        * Point to the existing unused space.
-        */
-       dup = (xfs_dir2_data_unused_t *)
-             ((char *)hdr + be16_to_cpu(bf[0].offset));
-       needscan = needlog = 0;
-       /*
-        * Mark the first part of the unused space, inuse for us.
-        */
-       xfs_dir2_data_use_free(args, dbp, dup,
-               (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
-               &needlog, &needscan);
-       /*
-        * Fill in the new entry and log it.
-        */
-       dep = (xfs_dir2_data_entry_t *)dup;
-       dep->inumber = cpu_to_be64(args->inumber);
-       dep->namelen = args->namelen;
-       memcpy(dep->name, args->name, dep->namelen);
-       dp->d_ops->data_put_ftype(dep, args->filetype);
-       tagp = dp->d_ops->data_entry_tag_p(dep);
-       *tagp = cpu_to_be16((char *)dep - (char *)hdr);
-       xfs_dir2_data_log_entry(args, dbp, dep);
-       /*
-        * Rescan the block for bestfree if needed.
-        */
-       if (needscan)
-               xfs_dir2_data_freescan(dp, hdr, &needlog);
-       /*
-        * Log the data block header if needed.
-        */
-       if (needlog)
-               xfs_dir2_data_log_header(args, dbp);
-       /*
-        * If the freespace entry is now wrong, update it.
-        */
-       bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */
-       if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) {
-               bests[findex] = bf[0].length;
-               logfree = 1;
-       }
-       /*
-        * Log the freespace entry if needed.
-        */
-       if (logfree)
-               xfs_dir2_free_log_bests(args, fbp, findex, findex);
-       /*
-        * Return the data block and offset in args, then drop the data block.
-        */
-       args->blkno = (xfs_dablk_t)dbno;
-       args->index = be16_to_cpu(*tagp);
-       return 0;
-}
-
-/*
- * Lookup an entry in a node-format directory.
- * All the real work happens in xfs_da3_node_lookup_int.
- * The only real output is the inode number of the entry.
- */
-int                                            /* error */
-xfs_dir2_node_lookup(
-       xfs_da_args_t   *args)                  /* operation arguments */
-{
-       int             error;                  /* error return value */
-       int             i;                      /* btree level */
-       int             rval;                   /* operation return value */
-       xfs_da_state_t  *state;                 /* btree cursor */
-
-       trace_xfs_dir2_node_lookup(args);
-
-       /*
-        * Allocate and initialize the btree cursor.
-        */
-       state = xfs_da_state_alloc();
-       state->args = args;
-       state->mp = args->dp->i_mount;
-       /*
-        * Fill in the path to the entry in the cursor.
-        */
-       error = xfs_da3_node_lookup_int(state, &rval);
-       if (error)
-               rval = error;
-       else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
-               /* If a CI match, dup the actual name and return EEXIST */
-               xfs_dir2_data_entry_t   *dep;
-
-               dep = (xfs_dir2_data_entry_t *)
-                       ((char *)state->extrablk.bp->b_addr +
-                                                state->extrablk.index);
-               rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
-       }
-       /*
-        * Release the btree blocks and leaf block.
-        */
-       for (i = 0; i < state->path.active; i++) {
-               xfs_trans_brelse(args->trans, state->path.blk[i].bp);
-               state->path.blk[i].bp = NULL;
-       }
-       /*
-        * Release the data block if we have it.
-        */
-       if (state->extravalid && state->extrablk.bp) {
-               xfs_trans_brelse(args->trans, state->extrablk.bp);
-               state->extrablk.bp = NULL;
-       }
-       xfs_da_state_free(state);
-       return rval;
-}
-
-/*
- * Remove an entry from a node-format directory.
- */
-int                                            /* error */
-xfs_dir2_node_removename(
-       struct xfs_da_args      *args)          /* operation arguments */
-{
-       struct xfs_da_state_blk *blk;           /* leaf block */
-       int                     error;          /* error return value */
-       int                     rval;           /* operation return value */
-       struct xfs_da_state     *state;         /* btree cursor */
-
-       trace_xfs_dir2_node_removename(args);
-
-       /*
-        * Allocate and initialize the btree cursor.
-        */
-       state = xfs_da_state_alloc();
-       state->args = args;
-       state->mp = args->dp->i_mount;
-
-       /* Look up the entry we're deleting, set up the cursor. */
-       error = xfs_da3_node_lookup_int(state, &rval);
-       if (error)
-               goto out_free;
-
-       /* Didn't find it, upper layer screwed up. */
-       if (rval != EEXIST) {
-               error = rval;
-               goto out_free;
-       }
-
-       blk = &state->path.blk[state->path.active - 1];
-       ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
-       ASSERT(state->extravalid);
-       /*
-        * Remove the leaf and data entries.
-        * Extrablk refers to the data block.
-        */
-       error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
-               &state->extrablk, &rval);
-       if (error)
-               goto out_free;
-       /*
-        * Fix the hash values up the btree.
-        */
-       xfs_da3_fixhashpath(state, &state->path);
-       /*
-        * If we need to join leaf blocks, do it.
-        */
-       if (rval && state->path.active > 1)
-               error = xfs_da3_join(state);
-       /*
-        * If no errors so far, try conversion to leaf format.
-        */
-       if (!error)
-               error = xfs_dir2_node_to_leaf(state);
-out_free:
-       xfs_da_state_free(state);
-       return error;
-}
-
-/*
- * Replace an entry's inode number in a node-format directory.
- */
-int                                            /* error */
-xfs_dir2_node_replace(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       xfs_da_state_blk_t      *blk;           /* leaf block */
-       xfs_dir2_data_hdr_t     *hdr;           /* data block header */
-       xfs_dir2_data_entry_t   *dep;           /* data entry changed */
-       int                     error;          /* error return value */
-       int                     i;              /* btree level */
-       xfs_ino_t               inum;           /* new inode number */
-       xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-       xfs_dir2_leaf_entry_t   *lep;           /* leaf entry being changed */
-       int                     rval;           /* internal return value */
-       xfs_da_state_t          *state;         /* btree cursor */
-
-       trace_xfs_dir2_node_replace(args);
-
-       /*
-        * Allocate and initialize the btree cursor.
-        */
-       state = xfs_da_state_alloc();
-       state->args = args;
-       state->mp = args->dp->i_mount;
-       inum = args->inumber;
-       /*
-        * Lookup the entry to change in the btree.
-        */
-       error = xfs_da3_node_lookup_int(state, &rval);
-       if (error) {
-               rval = error;
-       }
-       /*
-        * It should be found, since the vnodeops layer has looked it up
-        * and locked it.  But paranoia is good.
-        */
-       if (rval == EEXIST) {
-               struct xfs_dir2_leaf_entry *ents;
-               /*
-                * Find the leaf entry.
-                */
-               blk = &state->path.blk[state->path.active - 1];
-               ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
-               leaf = blk->bp->b_addr;
-               ents = args->dp->d_ops->leaf_ents_p(leaf);
-               lep = &ents[blk->index];
-               ASSERT(state->extravalid);
-               /*
-                * Point to the data entry.
-                */
-               hdr = state->extrablk.bp->b_addr;
-               ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-                      hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
-               dep = (xfs_dir2_data_entry_t *)
-                     ((char *)hdr +
-                      xfs_dir2_dataptr_to_off(args->geo,
-                                              be32_to_cpu(lep->address)));
-               ASSERT(inum != be64_to_cpu(dep->inumber));
-               /*
-                * Fill in the new inode number and log the entry.
-                */
-               dep->inumber = cpu_to_be64(inum);
-               args->dp->d_ops->data_put_ftype(dep, args->filetype);
-               xfs_dir2_data_log_entry(args, state->extrablk.bp, dep);
-               rval = 0;
-       }
-       /*
-        * Didn't find it, and we're holding a data block.  Drop it.
-        */
-       else if (state->extravalid) {
-               xfs_trans_brelse(args->trans, state->extrablk.bp);
-               state->extrablk.bp = NULL;
-       }
-       /*
-        * Release all the buffers in the cursor.
-        */
-       for (i = 0; i < state->path.active; i++) {
-               xfs_trans_brelse(args->trans, state->path.blk[i].bp);
-               state->path.blk[i].bp = NULL;
-       }
-       xfs_da_state_free(state);
-       return rval;
-}
-
-/*
- * Trim off a trailing empty freespace block.
- * Return (in rvalp) 1 if we did it, 0 if not.
- */
-int                                            /* error */
-xfs_dir2_node_trim_free(
-       xfs_da_args_t           *args,          /* operation arguments */
-       xfs_fileoff_t           fo,             /* free block number */
-       int                     *rvalp)         /* out: did something */
-{
-       struct xfs_buf          *bp;            /* freespace buffer */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return code */
-       xfs_dir2_free_t         *free;          /* freespace structure */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       xfs_trans_t             *tp;            /* transaction pointer */
-       struct xfs_dir3_icfree_hdr freehdr;
-
-       dp = args->dp;
-       mp = dp->i_mount;
-       tp = args->trans;
-       /*
-        * Read the freespace block.
-        */
-       error = xfs_dir2_free_try_read(tp, dp, fo, &bp);
-       if (error)
-               return error;
-       /*
-        * There can be holes in freespace.  If fo is a hole, there's
-        * nothing to do.
-        */
-       if (!bp)
-               return 0;
-       free = bp->b_addr;
-       dp->d_ops->free_hdr_from_disk(&freehdr, free);
-
-       /*
-        * If there are used entries, there's nothing to do.
-        */
-       if (freehdr.nused > 0) {
-               xfs_trans_brelse(tp, bp);
-               *rvalp = 0;
-               return 0;
-       }
-       /*
-        * Blow the block away.
-        */
-       error = xfs_dir2_shrink_inode(args,
-                       xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo), bp);
-       if (error) {
-               /*
-                * Can't fail with ENOSPC since that only happens with no
-                * space reservation, when breaking up an extent into two
-                * pieces.  This is the last block of an extent.
-                */
-               ASSERT(error != ENOSPC);
-               xfs_trans_brelse(tp, bp);
-               return error;
-       }
-       /*
-        * Return that we succeeded.
-        */
-       *rvalp = 1;
-       return 0;
-}
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
deleted file mode 100644 (file)
index 27ce079..0000000
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DIR2_PRIV_H__
-#define __XFS_DIR2_PRIV_H__
-
-struct dir_context;
-
-/*
- * Directory offset/block conversion functions.
- *
- * DB blocks here are logical directory block numbers, not filesystem blocks.
- */
-
-/*
- * Convert dataptr to byte in file space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp)
-{
-       return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
-}
-
-/*
- * Convert byte in file space to dataptr.  It had better be aligned.
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by)
-{
-       return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
-}
-
-/*
- * Convert byte in space to (DB) block
- */
-static inline xfs_dir2_db_t
-xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
-{
-       return (xfs_dir2_db_t)(by >> geo->blklog);
-}
-
-/*
- * Convert dataptr to a block number
- */
-static inline xfs_dir2_db_t
-xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp)
-{
-       return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp));
-}
-
-/*
- * Convert byte in space to offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
-{
-       return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1));
-}
-
-/*
- * Convert dataptr to a byte offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp)
-{
-       return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp));
-}
-
-/*
- * Convert block and offset to byte in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db,
-                       xfs_dir2_data_aoff_t o)
-{
-       return ((xfs_dir2_off_t)db << geo->blklog) + o;
-}
-
-/*
- * Convert block (DB) to block (dablk)
- */
-static inline xfs_dablk_t
-xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
-{
-       return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog));
-}
-
-/*
- * Convert byte in space to (DA) block
- */
-static inline xfs_dablk_t
-xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by)
-{
-       return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by));
-}
-
-/*
- * Convert block and offset to dataptr
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db,
-                          xfs_dir2_data_aoff_t o)
-{
-       return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o));
-}
-
-/*
- * Convert block (dablk) to block (DB)
- */
-static inline xfs_dir2_db_t
-xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da)
-{
-       return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog));
-}
-
-/*
- * Convert block (dablk) to byte offset in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da)
-{
-       return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0);
-}
-
-/*
- * Directory tail pointer accessor functions. Based on block geometry.
- */
-static inline struct xfs_dir2_block_tail *
-xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr)
-{
-       return ((struct xfs_dir2_block_tail *)
-               ((char *)hdr + geo->blksize)) - 1;
-}
-
-static inline struct xfs_dir2_leaf_tail *
-xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
-{
-       return (struct xfs_dir2_leaf_tail *)
-               ((char *)lp + geo->blksize -
-                 sizeof(struct xfs_dir2_leaf_tail));
-}
-
-/* xfs_dir2.c */
-extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
-extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
-                               xfs_dir2_db_t *dbp);
-extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
-                               const unsigned char *name, int len);
-
-#define S_SHIFT 12
-extern const unsigned char xfs_mode_to_ftype[];
-
-extern unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp,
-                                       __uint8_t filetype);
-
-
-/* xfs_dir2_block.c */
-extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp,
-                              struct xfs_buf **bpp);
-extern int xfs_dir2_block_addname(struct xfs_da_args *args);
-extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_block_removename(struct xfs_da_args *args);
-extern int xfs_dir2_block_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
-               struct xfs_buf *lbp, struct xfs_buf *dbp);
-
-/* xfs_dir2_data.c */
-#ifdef DEBUG
-#define        xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp);
-#else
-#define        xfs_dir3_data_check(dp,bp)
-#endif
-
-extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
-extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
-               xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
-extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
-               xfs_daddr_t mapped_bno);
-
-extern struct xfs_dir2_data_free *
-xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
-               struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup,
-               int *loghead);
-extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
-               struct xfs_buf **bpp);
-
-/* xfs_dir2_leaf.c */
-extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
-               xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
-extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
-               struct xfs_buf *dbp);
-extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
-extern void xfs_dir3_leaf_compact(struct xfs_da_args *args,
-               struct xfs_dir3_icleaf_hdr *leafhdr, struct xfs_buf *bp);
-extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
-               struct xfs_dir2_leaf_entry *ents, int *indexp,
-               int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
-extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
-               struct xfs_buf **bpp, __uint16_t magic);
-extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args,
-               struct xfs_buf *bp, int first, int last);
-extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,
-               struct xfs_buf *bp);
-extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
-               struct xfs_buf *lbp);
-extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
-               struct xfs_buf *lbp, xfs_dir2_db_t db);
-extern struct xfs_dir2_leaf_entry *
-xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
-               struct xfs_dir2_leaf_entry *ents, int index, int compact,
-               int lowstale, int highstale, int *lfloglow, int *lfloghigh);
-extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
-
-extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
-               struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);
-
-/* xfs_dir2_node.c */
-extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
-               struct xfs_buf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp,
-               struct xfs_buf *bp, int *count);
-extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
-               struct xfs_da_args *args, int *indexp,
-               struct xfs_da_state *state);
-extern int xfs_dir2_leafn_order(struct xfs_inode *dp, struct xfs_buf *leaf1_bp,
-               struct xfs_buf *leaf2_bp);
-extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
-       struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
-extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
-extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
-               struct xfs_da_state_blk *drop_blk,
-               struct xfs_da_state_blk *save_blk);
-extern int xfs_dir2_node_addname(struct xfs_da_args *args);
-extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_node_removename(struct xfs_da_args *args);
-extern int xfs_dir2_node_replace(struct xfs_da_args *args);
-extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
-               int *rvalp);
-extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
-               xfs_dablk_t fbno, struct xfs_buf **bpp);
-
-/* xfs_dir2_sf.c */
-extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
-               struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
-extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp,
-               int size, xfs_dir2_sf_hdr_t *sfhp);
-extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
-extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
-extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-
-/* xfs_dir2_readdir.c */
-extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
-                      size_t bufsize);
-
-#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
deleted file mode 100644 (file)
index ab3563b..0000000
+++ /dev/null
@@ -1,1184 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_error.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_trace.h"
-#include "xfs_dinode.h"
-
-/*
- * Prototypes for internal functions.
- */
-static void xfs_dir2_sf_addname_easy(xfs_da_args_t *args,
-                                    xfs_dir2_sf_entry_t *sfep,
-                                    xfs_dir2_data_aoff_t offset,
-                                    int new_isize);
-static void xfs_dir2_sf_addname_hard(xfs_da_args_t *args, int objchange,
-                                    int new_isize);
-static int xfs_dir2_sf_addname_pick(xfs_da_args_t *args, int objchange,
-                                   xfs_dir2_sf_entry_t **sfepp,
-                                   xfs_dir2_data_aoff_t *offsetp);
-#ifdef DEBUG
-static void xfs_dir2_sf_check(xfs_da_args_t *args);
-#else
-#define        xfs_dir2_sf_check(args)
-#endif /* DEBUG */
-#if XFS_BIG_INUMS
-static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
-static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
-#endif /* XFS_BIG_INUMS */
-
-/*
- * Given a block directory (dp/block), calculate its size as a shortform (sf)
- * directory and a header for the sf directory, if it will fit it the
- * space currently present in the inode.  If it won't fit, the output
- * size is too big (but not accurate).
- */
-int                                            /* size for sf form */
-xfs_dir2_block_sfsize(
-       xfs_inode_t             *dp,            /* incore inode pointer */
-       xfs_dir2_data_hdr_t     *hdr,           /* block directory data */
-       xfs_dir2_sf_hdr_t       *sfhp)          /* output: header for sf form */
-{
-       xfs_dir2_dataptr_t      addr;           /* data entry address */
-       xfs_dir2_leaf_entry_t   *blp;           /* leaf area of the block */
-       xfs_dir2_block_tail_t   *btp;           /* tail area of the block */
-       int                     count;          /* shortform entry count */
-       xfs_dir2_data_entry_t   *dep;           /* data entry in the block */
-       int                     i;              /* block entry index */
-       int                     i8count;        /* count of big-inode entries */
-       int                     isdot;          /* entry is "." */
-       int                     isdotdot;       /* entry is ".." */
-       xfs_mount_t             *mp;            /* mount structure pointer */
-       int                     namelen;        /* total name bytes */
-       xfs_ino_t               parent = 0;     /* parent inode number */
-       int                     size=0;         /* total computed size */
-       int                     has_ftype;
-       struct xfs_da_geometry  *geo;
-
-       mp = dp->i_mount;
-       geo = mp->m_dir_geo;
-
-       /*
-        * if there is a filetype field, add the extra byte to the namelen
-        * for each entry that we see.
-        */
-       has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0;
-
-       count = i8count = namelen = 0;
-       btp = xfs_dir2_block_tail_p(geo, hdr);
-       blp = xfs_dir2_block_leaf_p(btp);
-
-       /*
-        * Iterate over the block's data entries by using the leaf pointers.
-        */
-       for (i = 0; i < be32_to_cpu(btp->count); i++) {
-               if ((addr = be32_to_cpu(blp[i].address)) == XFS_DIR2_NULL_DATAPTR)
-                       continue;
-               /*
-                * Calculate the pointer to the entry at hand.
-                */
-               dep = (xfs_dir2_data_entry_t *)((char *)hdr +
-                               xfs_dir2_dataptr_to_off(geo, addr));
-               /*
-                * Detect . and .., so we can special-case them.
-                * . is not included in sf directories.
-                * .. is included by just the parent inode number.
-                */
-               isdot = dep->namelen == 1 && dep->name[0] == '.';
-               isdotdot =
-                       dep->namelen == 2 &&
-                       dep->name[0] == '.' && dep->name[1] == '.';
-#if XFS_BIG_INUMS
-               if (!isdot)
-                       i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM;
-#endif
-               /* take into account the file type field */
-               if (!isdot && !isdotdot) {
-                       count++;
-                       namelen += dep->namelen + has_ftype;
-               } else if (isdotdot)
-                       parent = be64_to_cpu(dep->inumber);
-               /*
-                * Calculate the new size, see if we should give up yet.
-                */
-               size = xfs_dir2_sf_hdr_size(i8count) +          /* header */
-                      count +                                  /* namelen */
-                      count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */
-                      namelen +                                /* name */
-                      (i8count ?                               /* inumber */
-                               (uint)sizeof(xfs_dir2_ino8_t) * count :
-                               (uint)sizeof(xfs_dir2_ino4_t) * count);
-               if (size > XFS_IFORK_DSIZE(dp))
-                       return size;            /* size value is a failure */
-       }
-       /*
-        * Create the output header, if it worked.
-        */
-       sfhp->count = count;
-       sfhp->i8count = i8count;
-       dp->d_ops->sf_put_parent_ino(sfhp, parent);
-       return size;
-}
-
-/*
- * Convert a block format directory to shortform.
- * Caller has already checked that it will fit, and built us a header.
- */
-int                                            /* error */
-xfs_dir2_block_to_sf(
-       xfs_da_args_t           *args,          /* operation arguments */
-       struct xfs_buf          *bp,
-       int                     size,           /* shortform directory size */
-       xfs_dir2_sf_hdr_t       *sfhp)          /* shortform directory hdr */
-{
-       xfs_dir2_data_hdr_t     *hdr;           /* block header */
-       xfs_dir2_block_tail_t   *btp;           /* block tail pointer */
-       xfs_dir2_data_entry_t   *dep;           /* data entry pointer */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       xfs_dir2_data_unused_t  *dup;           /* unused data pointer */
-       char                    *endptr;        /* end of data entries */
-       int                     error;          /* error return value */
-       int                     logflags;       /* inode logging flags */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       char                    *ptr;           /* current data pointer */
-       xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
-       xfs_dir2_sf_hdr_t       *sfp;           /* shortform directory header */
-       xfs_dir2_sf_hdr_t       *dst;           /* temporary data buffer */
-
-       trace_xfs_dir2_block_to_sf(args);
-
-       dp = args->dp;
-       mp = dp->i_mount;
-
-       /*
-        * allocate a temporary destination buffer the size of the inode
-        * to format the data into. Once we have formatted the data, we
-        * can free the block and copy the formatted data into the inode literal
-        * area.
-        */
-       dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP);
-       hdr = bp->b_addr;
-
-       /*
-        * Copy the header into the newly allocate local space.
-        */
-       sfp = (xfs_dir2_sf_hdr_t *)dst;
-       memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
-
-       /*
-        * Set up to loop over the block's entries.
-        */
-       btp = xfs_dir2_block_tail_p(args->geo, hdr);
-       ptr = (char *)dp->d_ops->data_entry_p(hdr);
-       endptr = (char *)xfs_dir2_block_leaf_p(btp);
-       sfep = xfs_dir2_sf_firstentry(sfp);
-       /*
-        * Loop over the active and unused entries.
-        * Stop when we reach the leaf/tail portion of the block.
-        */
-       while (ptr < endptr) {
-               /*
-                * If it's unused, just skip over it.
-                */
-               dup = (xfs_dir2_data_unused_t *)ptr;
-               if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                       ptr += be16_to_cpu(dup->length);
-                       continue;
-               }
-               dep = (xfs_dir2_data_entry_t *)ptr;
-               /*
-                * Skip .
-                */
-               if (dep->namelen == 1 && dep->name[0] == '.')
-                       ASSERT(be64_to_cpu(dep->inumber) == dp->i_ino);
-               /*
-                * Skip .., but make sure the inode number is right.
-                */
-               else if (dep->namelen == 2 &&
-                        dep->name[0] == '.' && dep->name[1] == '.')
-                       ASSERT(be64_to_cpu(dep->inumber) ==
-                              dp->d_ops->sf_get_parent_ino(sfp));
-               /*
-                * Normal entry, copy it into shortform.
-                */
-               else {
-                       sfep->namelen = dep->namelen;
-                       xfs_dir2_sf_put_offset(sfep,
-                               (xfs_dir2_data_aoff_t)
-                               ((char *)dep - (char *)hdr));
-                       memcpy(sfep->name, dep->name, dep->namelen);
-                       dp->d_ops->sf_put_ino(sfp, sfep,
-                                             be64_to_cpu(dep->inumber));
-                       dp->d_ops->sf_put_ftype(sfep,
-                                       dp->d_ops->data_get_ftype(dep));
-
-                       sfep = dp->d_ops->sf_nextentry(sfp, sfep);
-               }
-               ptr += dp->d_ops->data_entsize(dep->namelen);
-       }
-       ASSERT((char *)sfep - (char *)sfp == size);
-
-       /* now we are done with the block, we can shrink the inode */
-       logflags = XFS_ILOG_CORE;
-       error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp);
-       if (error) {
-               ASSERT(error != ENOSPC);
-               goto out;
-       }
-
-       /*
-        * The buffer is now unconditionally gone, whether
-        * xfs_dir2_shrink_inode worked or not.
-        *
-        * Convert the inode to local format and copy the data in.
-        */
-       dp->i_df.if_flags &= ~XFS_IFEXTENTS;
-       dp->i_df.if_flags |= XFS_IFINLINE;
-       dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
-       ASSERT(dp->i_df.if_bytes == 0);
-       xfs_idata_realloc(dp, size, XFS_DATA_FORK);
-
-       logflags |= XFS_ILOG_DDATA;
-       memcpy(dp->i_df.if_u1.if_data, dst, size);
-       dp->i_d.di_size = size;
-       xfs_dir2_sf_check(args);
-out:
-       xfs_trans_log_inode(args->trans, dp, logflags);
-       kmem_free(dst);
-       return error;
-}
-
-/*
- * Add a name to a shortform directory.
- * There are two algorithms, "easy" and "hard" which we decide on
- * before changing anything.
- * Convert to block form if necessary, if the new entry won't fit.
- */
-int                                            /* error */
-xfs_dir2_sf_addname(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     error;          /* error return value */
-       int                     incr_isize;     /* total change in size */
-       int                     new_isize;      /* di_size after adding name */
-       int                     objchange;      /* changing to 8-byte inodes */
-       xfs_dir2_data_aoff_t    offset = 0;     /* offset for new entry */
-       int                     pick;           /* which algorithm to use */
-       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
-       xfs_dir2_sf_entry_t     *sfep = NULL;   /* shortform entry */
-
-       trace_xfs_dir2_sf_addname(args);
-
-       ASSERT(xfs_dir2_sf_lookup(args) == ENOENT);
-       dp = args->dp;
-       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-       /*
-        * Make sure the shortform value has some of its header.
-        */
-       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-               return EIO;
-       }
-       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
-       ASSERT(dp->i_df.if_u1.if_data != NULL);
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
-       /*
-        * Compute entry (and change in) size.
-        */
-       incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen);
-       objchange = 0;
-#if XFS_BIG_INUMS
-       /*
-        * Do we have to change to 8 byte inodes?
-        */
-       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
-               /*
-                * Yes, adjust the inode size.  old count + (parent + new)
-                */
-               incr_isize +=
-                       (sfp->count + 2) *
-                       ((uint)sizeof(xfs_dir2_ino8_t) -
-                        (uint)sizeof(xfs_dir2_ino4_t));
-               objchange = 1;
-       }
-#endif
-       new_isize = (int)dp->i_d.di_size + incr_isize;
-       /*
-        * Won't fit as shortform any more (due to size),
-        * or the pick routine says it won't (due to offset values).
-        */
-       if (new_isize > XFS_IFORK_DSIZE(dp) ||
-           (pick =
-            xfs_dir2_sf_addname_pick(args, objchange, &sfep, &offset)) == 0) {
-               /*
-                * Just checking or no space reservation, it doesn't fit.
-                */
-               if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
-                       return ENOSPC;
-               /*
-                * Convert to block form then add the name.
-                */
-               error = xfs_dir2_sf_to_block(args);
-               if (error)
-                       return error;
-               return xfs_dir2_block_addname(args);
-       }
-       /*
-        * Just checking, it fits.
-        */
-       if (args->op_flags & XFS_DA_OP_JUSTCHECK)
-               return 0;
-       /*
-        * Do it the easy way - just add it at the end.
-        */
-       if (pick == 1)
-               xfs_dir2_sf_addname_easy(args, sfep, offset, new_isize);
-       /*
-        * Do it the hard way - look for a place to insert the new entry.
-        * Convert to 8 byte inode numbers first if necessary.
-        */
-       else {
-               ASSERT(pick == 2);
-#if XFS_BIG_INUMS
-               if (objchange)
-                       xfs_dir2_sf_toino8(args);
-#endif
-               xfs_dir2_sf_addname_hard(args, objchange, new_isize);
-       }
-       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-       return 0;
-}
-
-/*
- * Add the new entry the "easy" way.
- * This is copying the old directory and adding the new entry at the end.
- * Since it's sorted by "offset" we need room after the last offset
- * that's already there, and then room to convert to a block directory.
- * This is already checked by the pick routine.
- */
-static void
-xfs_dir2_sf_addname_easy(
-       xfs_da_args_t           *args,          /* operation arguments */
-       xfs_dir2_sf_entry_t     *sfep,          /* pointer to new entry */
-       xfs_dir2_data_aoff_t    offset,         /* offset to use for new ent */
-       int                     new_isize)      /* new directory size */
-{
-       int                     byteoff;        /* byte offset in sf dir */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
-
-       dp = args->dp;
-
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       byteoff = (int)((char *)sfep - (char *)sfp);
-       /*
-        * Grow the in-inode space.
-        */
-       xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen),
-                         XFS_DATA_FORK);
-       /*
-        * Need to set up again due to realloc of the inode data.
-        */
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
-       /*
-        * Fill in the new entry.
-        */
-       sfep->namelen = args->namelen;
-       xfs_dir2_sf_put_offset(sfep, offset);
-       memcpy(sfep->name, args->name, sfep->namelen);
-       dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
-       dp->d_ops->sf_put_ftype(sfep, args->filetype);
-
-       /*
-        * Update the header and inode.
-        */
-       sfp->count++;
-#if XFS_BIG_INUMS
-       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
-               sfp->i8count++;
-#endif
-       dp->i_d.di_size = new_isize;
-       xfs_dir2_sf_check(args);
-}
-
-/*
- * Add the new entry the "hard" way.
- * The caller has already converted to 8 byte inode numbers if necessary,
- * in which case we need to leave the i8count at 1.
- * Find a hole that the new entry will fit into, and copy
- * the first part of the entries, the new entry, and the last part of
- * the entries.
- */
-/* ARGSUSED */
-static void
-xfs_dir2_sf_addname_hard(
-       xfs_da_args_t           *args,          /* operation arguments */
-       int                     objchange,      /* changing inode number size */
-       int                     new_isize)      /* new directory size */
-{
-       int                     add_datasize;   /* data size need for new ent */
-       char                    *buf;           /* buffer for old */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     eof;            /* reached end of old dir */
-       int                     nbytes;         /* temp for byte copies */
-       xfs_dir2_data_aoff_t    new_offset;     /* next offset value */
-       xfs_dir2_data_aoff_t    offset;         /* current offset value */
-       int                     old_isize;      /* previous di_size */
-       xfs_dir2_sf_entry_t     *oldsfep;       /* entry in original dir */
-       xfs_dir2_sf_hdr_t       *oldsfp;        /* original shortform dir */
-       xfs_dir2_sf_entry_t     *sfep;          /* entry in new dir */
-       xfs_dir2_sf_hdr_t       *sfp;           /* new shortform dir */
-       struct xfs_mount        *mp;
-
-       /*
-        * Copy the old directory to the stack buffer.
-        */
-       dp = args->dp;
-       mp = dp->i_mount;
-
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       old_isize = (int)dp->i_d.di_size;
-       buf = kmem_alloc(old_isize, KM_SLEEP);
-       oldsfp = (xfs_dir2_sf_hdr_t *)buf;
-       memcpy(oldsfp, sfp, old_isize);
-       /*
-        * Loop over the old directory finding the place we're going
-        * to insert the new entry.
-        * If it's going to end up at the end then oldsfep will point there.
-        */
-       for (offset = dp->d_ops->data_first_offset,
-             oldsfep = xfs_dir2_sf_firstentry(oldsfp),
-             add_datasize = dp->d_ops->data_entsize(args->namelen),
-             eof = (char *)oldsfep == &buf[old_isize];
-            !eof;
-            offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen),
-             oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep),
-             eof = (char *)oldsfep == &buf[old_isize]) {
-               new_offset = xfs_dir2_sf_get_offset(oldsfep);
-               if (offset + add_datasize <= new_offset)
-                       break;
-       }
-       /*
-        * Get rid of the old directory, then allocate space for
-        * the new one.  We do this so xfs_idata_realloc won't copy
-        * the data.
-        */
-       xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK);
-       xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK);
-       /*
-        * Reset the pointer since the buffer was reallocated.
-        */
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       /*
-        * Copy the first part of the directory, including the header.
-        */
-       nbytes = (int)((char *)oldsfep - (char *)oldsfp);
-       memcpy(sfp, oldsfp, nbytes);
-       sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes);
-       /*
-        * Fill in the new entry, and update the header counts.
-        */
-       sfep->namelen = args->namelen;
-       xfs_dir2_sf_put_offset(sfep, offset);
-       memcpy(sfep->name, args->name, sfep->namelen);
-       dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
-       dp->d_ops->sf_put_ftype(sfep, args->filetype);
-       sfp->count++;
-#if XFS_BIG_INUMS
-       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
-               sfp->i8count++;
-#endif
-       /*
-        * If there's more left to copy, do that.
-        */
-       if (!eof) {
-               sfep = dp->d_ops->sf_nextentry(sfp, sfep);
-               memcpy(sfep, oldsfep, old_isize - nbytes);
-       }
-       kmem_free(buf);
-       dp->i_d.di_size = new_isize;
-       xfs_dir2_sf_check(args);
-}
-
-/*
- * Decide if the new entry will fit at all.
- * If it will fit, pick between adding the new entry to the end (easy)
- * or somewhere else (hard).
- * Return 0 (won't fit), 1 (easy), 2 (hard).
- */
-/*ARGSUSED*/
-static int                                     /* pick result */
-xfs_dir2_sf_addname_pick(
-       xfs_da_args_t           *args,          /* operation arguments */
-       int                     objchange,      /* inode # size changes */
-       xfs_dir2_sf_entry_t     **sfepp,        /* out(1): new entry ptr */
-       xfs_dir2_data_aoff_t    *offsetp)       /* out(1): new offset */
-{
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     holefit;        /* found hole it will fit in */
-       int                     i;              /* entry number */
-       xfs_mount_t             *mp;            /* filesystem mount point */
-       xfs_dir2_data_aoff_t    offset;         /* data block offset */
-       xfs_dir2_sf_entry_t     *sfep;          /* shortform entry */
-       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
-       int                     size;           /* entry's data size */
-       int                     used;           /* data bytes used */
-
-       dp = args->dp;
-       mp = dp->i_mount;
-
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       size = dp->d_ops->data_entsize(args->namelen);
-       offset = dp->d_ops->data_first_offset;
-       sfep = xfs_dir2_sf_firstentry(sfp);
-       holefit = 0;
-       /*
-        * Loop over sf entries.
-        * Keep track of data offset and whether we've seen a place
-        * to insert the new entry.
-        */
-       for (i = 0; i < sfp->count; i++) {
-               if (!holefit)
-                       holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
-               offset = xfs_dir2_sf_get_offset(sfep) +
-                        dp->d_ops->data_entsize(sfep->namelen);
-               sfep = dp->d_ops->sf_nextentry(sfp, sfep);
-       }
-       /*
-        * Calculate data bytes used excluding the new entry, if this
-        * was a data block (block form directory).
-        */
-       used = offset +
-              (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
-              (uint)sizeof(xfs_dir2_block_tail_t);
-       /*
-        * If it won't fit in a block form then we can't insert it,
-        * we'll go back, convert to block, then try the insert and convert
-        * to leaf.
-        */
-       if (used + (holefit ? 0 : size) > args->geo->blksize)
-               return 0;
-       /*
-        * If changing the inode number size, do it the hard way.
-        */
-#if XFS_BIG_INUMS
-       if (objchange) {
-               return 2;
-       }
-#else
-       ASSERT(objchange == 0);
-#endif
-       /*
-        * If it won't fit at the end then do it the hard way (use the hole).
-        */
-       if (used + size > args->geo->blksize)
-               return 2;
-       /*
-        * Do it the easy way.
-        */
-       *sfepp = sfep;
-       *offsetp = offset;
-       return 1;
-}
-
-#ifdef DEBUG
-/*
- * Check consistency of shortform directory, assert if bad.
- */
-static void
-xfs_dir2_sf_check(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     i;              /* entry number */
-       int                     i8count;        /* number of big inode#s */
-       xfs_ino_t               ino;            /* entry inode number */
-       int                     offset;         /* data offset */
-       xfs_dir2_sf_entry_t     *sfep;          /* shortform dir entry */
-       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
-       struct xfs_mount        *mp;
-
-       dp = args->dp;
-       mp = dp->i_mount;
-
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       offset = dp->d_ops->data_first_offset;
-       ino = dp->d_ops->sf_get_parent_ino(sfp);
-       i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
-
-       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-            i < sfp->count;
-            i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
-               ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
-               ino = dp->d_ops->sf_get_ino(sfp, sfep);
-               i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
-               offset =
-                       xfs_dir2_sf_get_offset(sfep) +
-                       dp->d_ops->data_entsize(sfep->namelen);
-               ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX);
-       }
-       ASSERT(i8count == sfp->i8count);
-       ASSERT(XFS_BIG_INUMS || i8count == 0);
-       ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
-       ASSERT(offset +
-              (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
-              (uint)sizeof(xfs_dir2_block_tail_t) <= args->geo->blksize);
-}
-#endif /* DEBUG */
-
-/*
- * Create a new (shortform) directory.
- */
-int                                    /* error, always 0 */
-xfs_dir2_sf_create(
-       xfs_da_args_t   *args,          /* operation arguments */
-       xfs_ino_t       pino)           /* parent inode number */
-{
-       xfs_inode_t     *dp;            /* incore directory inode */
-       int             i8count;        /* parent inode is an 8-byte number */
-       xfs_dir2_sf_hdr_t *sfp;         /* shortform structure */
-       int             size;           /* directory size */
-
-       trace_xfs_dir2_sf_create(args);
-
-       dp = args->dp;
-
-       ASSERT(dp != NULL);
-       ASSERT(dp->i_d.di_size == 0);
-       /*
-        * If it's currently a zero-length extent file,
-        * convert it to local format.
-        */
-       if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
-               dp->i_df.if_flags &= ~XFS_IFEXTENTS;    /* just in case */
-               dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
-               xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
-               dp->i_df.if_flags |= XFS_IFINLINE;
-       }
-       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-       ASSERT(dp->i_df.if_bytes == 0);
-       i8count = pino > XFS_DIR2_MAX_SHORT_INUM;
-       size = xfs_dir2_sf_hdr_size(i8count);
-       /*
-        * Make a buffer for the data.
-        */
-       xfs_idata_realloc(dp, size, XFS_DATA_FORK);
-       /*
-        * Fill in the header,
-        */
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       sfp->i8count = i8count;
-       /*
-        * Now can put in the inode number, since i8count is set.
-        */
-       dp->d_ops->sf_put_parent_ino(sfp, pino);
-       sfp->count = 0;
-       dp->i_d.di_size = size;
-       xfs_dir2_sf_check(args);
-       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-       return 0;
-}
-
-/*
- * Lookup an entry in a shortform directory.
- * Returns EEXIST if found, ENOENT if not found.
- */
-int                                            /* error */
-xfs_dir2_sf_lookup(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     i;              /* entry index */
-       int                     error;
-       xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
-       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
-       enum xfs_dacmp          cmp;            /* comparison result */
-       xfs_dir2_sf_entry_t     *ci_sfep;       /* case-insens. entry */
-
-       trace_xfs_dir2_sf_lookup(args);
-
-       xfs_dir2_sf_check(args);
-       dp = args->dp;
-
-       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-       /*
-        * Bail out if the directory is way too short.
-        */
-       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-               return EIO;
-       }
-       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
-       ASSERT(dp->i_df.if_u1.if_data != NULL);
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
-       /*
-        * Special case for .
-        */
-       if (args->namelen == 1 && args->name[0] == '.') {
-               args->inumber = dp->i_ino;
-               args->cmpresult = XFS_CMP_EXACT;
-               args->filetype = XFS_DIR3_FT_DIR;
-               return EEXIST;
-       }
-       /*
-        * Special case for ..
-        */
-       if (args->namelen == 2 &&
-           args->name[0] == '.' && args->name[1] == '.') {
-               args->inumber = dp->d_ops->sf_get_parent_ino(sfp);
-               args->cmpresult = XFS_CMP_EXACT;
-               args->filetype = XFS_DIR3_FT_DIR;
-               return EEXIST;
-       }
-       /*
-        * Loop over all the entries trying to match ours.
-        */
-       ci_sfep = NULL;
-       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
-            i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
-               /*
-                * Compare name and if it's an exact match, return the inode
-                * number. If it's the first case-insensitive match, store the
-                * inode number and continue looking for an exact match.
-                */
-               cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name,
-                                                               sfep->namelen);
-               if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
-                       args->cmpresult = cmp;
-                       args->inumber = dp->d_ops->sf_get_ino(sfp, sfep);
-                       args->filetype = dp->d_ops->sf_get_ftype(sfep);
-                       if (cmp == XFS_CMP_EXACT)
-                               return EEXIST;
-                       ci_sfep = sfep;
-               }
-       }
-       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-       /*
-        * Here, we can only be doing a lookup (not a rename or replace).
-        * If a case-insensitive match was not found, return ENOENT.
-        */
-       if (!ci_sfep)
-               return ENOENT;
-       /* otherwise process the CI match as required by the caller */
-       error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
-       return error;
-}
-
-/*
- * Remove an entry from a shortform directory.
- */
-int                                            /* error */
-xfs_dir2_sf_removename(
-       xfs_da_args_t           *args)
-{
-       int                     byteoff;        /* offset of removed entry */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     entsize;        /* this entry's size */
-       int                     i;              /* shortform entry index */
-       int                     newsize;        /* new inode size */
-       int                     oldsize;        /* old inode size */
-       xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
-       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
-
-       trace_xfs_dir2_sf_removename(args);
-
-       dp = args->dp;
-
-       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-       oldsize = (int)dp->i_d.di_size;
-       /*
-        * Bail out if the directory is way too short.
-        */
-       if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-               return EIO;
-       }
-       ASSERT(dp->i_df.if_bytes == oldsize);
-       ASSERT(dp->i_df.if_u1.if_data != NULL);
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count));
-       /*
-        * Loop over the old directory entries.
-        * Find the one we're deleting.
-        */
-       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
-            i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
-               if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
-                                                               XFS_CMP_EXACT) {
-                       ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) ==
-                              args->inumber);
-                       break;
-               }
-       }
-       /*
-        * Didn't find it.
-        */
-       if (i == sfp->count)
-               return ENOENT;
-       /*
-        * Calculate sizes.
-        */
-       byteoff = (int)((char *)sfep - (char *)sfp);
-       entsize = dp->d_ops->sf_entsize(sfp, args->namelen);
-       newsize = oldsize - entsize;
-       /*
-        * Copy the part if any after the removed entry, sliding it down.
-        */
-       if (byteoff + entsize < oldsize)
-               memmove((char *)sfp + byteoff, (char *)sfp + byteoff + entsize,
-                       oldsize - (byteoff + entsize));
-       /*
-        * Fix up the header and file size.
-        */
-       sfp->count--;
-       dp->i_d.di_size = newsize;
-       /*
-        * Reallocate, making it smaller.
-        */
-       xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-#if XFS_BIG_INUMS
-       /*
-        * Are we changing inode number size?
-        */
-       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
-               if (sfp->i8count == 1)
-                       xfs_dir2_sf_toino4(args);
-               else
-                       sfp->i8count--;
-       }
-#endif
-       xfs_dir2_sf_check(args);
-       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-       return 0;
-}
-
-/*
- * Replace the inode number of an entry in a shortform directory.
- */
-int                                            /* error */
-xfs_dir2_sf_replace(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     i;              /* entry index */
-#if XFS_BIG_INUMS || defined(DEBUG)
-       xfs_ino_t               ino=0;          /* entry old inode number */
-#endif
-#if XFS_BIG_INUMS
-       int                     i8elevated;     /* sf_toino8 set i8count=1 */
-#endif
-       xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
-       xfs_dir2_sf_hdr_t       *sfp;           /* shortform structure */
-
-       trace_xfs_dir2_sf_replace(args);
-
-       dp = args->dp;
-
-       ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-       /*
-        * Bail out if the shortform directory is way too small.
-        */
-       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-               return EIO;
-       }
-       ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
-       ASSERT(dp->i_df.if_u1.if_data != NULL);
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
-#if XFS_BIG_INUMS
-       /*
-        * New inode number is large, and need to convert to 8-byte inodes.
-        */
-       if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
-               int     error;                  /* error return value */
-               int     newsize;                /* new inode size */
-
-               newsize =
-                       dp->i_df.if_bytes +
-                       (sfp->count + 1) *
-                       ((uint)sizeof(xfs_dir2_ino8_t) -
-                        (uint)sizeof(xfs_dir2_ino4_t));
-               /*
-                * Won't fit as shortform, convert to block then do replace.
-                */
-               if (newsize > XFS_IFORK_DSIZE(dp)) {
-                       error = xfs_dir2_sf_to_block(args);
-                       if (error) {
-                               return error;
-                       }
-                       return xfs_dir2_block_replace(args);
-               }
-               /*
-                * Still fits, convert to 8-byte now.
-                */
-               xfs_dir2_sf_toino8(args);
-               i8elevated = 1;
-               sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       } else
-               i8elevated = 0;
-#endif
-       ASSERT(args->namelen != 1 || args->name[0] != '.');
-       /*
-        * Replace ..'s entry.
-        */
-       if (args->namelen == 2 &&
-           args->name[0] == '.' && args->name[1] == '.') {
-#if XFS_BIG_INUMS || defined(DEBUG)
-               ino = dp->d_ops->sf_get_parent_ino(sfp);
-               ASSERT(args->inumber != ino);
-#endif
-               dp->d_ops->sf_put_parent_ino(sfp, args->inumber);
-       }
-       /*
-        * Normal entry, look for the name.
-        */
-       else {
-               for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
-                    i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
-                       if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
-                                                               XFS_CMP_EXACT) {
-#if XFS_BIG_INUMS || defined(DEBUG)
-                               ino = dp->d_ops->sf_get_ino(sfp, sfep);
-                               ASSERT(args->inumber != ino);
-#endif
-                               dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
-                               dp->d_ops->sf_put_ftype(sfep, args->filetype);
-                               break;
-                       }
-               }
-               /*
-                * Didn't find it.
-                */
-               if (i == sfp->count) {
-                       ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
-#if XFS_BIG_INUMS
-                       if (i8elevated)
-                               xfs_dir2_sf_toino4(args);
-#endif
-                       return ENOENT;
-               }
-       }
-#if XFS_BIG_INUMS
-       /*
-        * See if the old number was large, the new number is small.
-        */
-       if (ino > XFS_DIR2_MAX_SHORT_INUM &&
-           args->inumber <= XFS_DIR2_MAX_SHORT_INUM) {
-               /*
-                * And the old count was one, so need to convert to small.
-                */
-               if (sfp->i8count == 1)
-                       xfs_dir2_sf_toino4(args);
-               else
-                       sfp->i8count--;
-       }
-       /*
-        * See if the old number was small, the new number is large.
-        */
-       if (ino <= XFS_DIR2_MAX_SHORT_INUM &&
-           args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
-               /*
-                * add to the i8count unless we just converted to 8-byte
-                * inodes (which does an implied i8count = 1)
-                */
-               ASSERT(sfp->i8count != 0);
-               if (!i8elevated)
-                       sfp->i8count++;
-       }
-#endif
-       xfs_dir2_sf_check(args);
-       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
-       return 0;
-}
-
-#if XFS_BIG_INUMS
-/*
- * Convert from 8-byte inode numbers to 4-byte inode numbers.
- * The last 8-byte inode number is gone, but the count is still 1.
- */
-static void
-xfs_dir2_sf_toino4(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       char                    *buf;           /* old dir's buffer */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     i;              /* entry index */
-       int                     newsize;        /* new inode size */
-       xfs_dir2_sf_entry_t     *oldsfep;       /* old sf entry */
-       xfs_dir2_sf_hdr_t       *oldsfp;        /* old sf directory */
-       int                     oldsize;        /* old inode size */
-       xfs_dir2_sf_entry_t     *sfep;          /* new sf entry */
-       xfs_dir2_sf_hdr_t       *sfp;           /* new sf directory */
-       struct xfs_mount        *mp;
-
-       trace_xfs_dir2_sf_toino4(args);
-
-       dp = args->dp;
-       mp = dp->i_mount;
-
-       /*
-        * Copy the old directory to the buffer.
-        * Then nuke it from the inode, and add the new buffer to the inode.
-        * Don't want xfs_idata_realloc copying the data here.
-        */
-       oldsize = dp->i_df.if_bytes;
-       buf = kmem_alloc(oldsize, KM_SLEEP);
-       oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       ASSERT(oldsfp->i8count == 1);
-       memcpy(buf, oldsfp, oldsize);
-       /*
-        * Compute the new inode size.
-        */
-       newsize =
-               oldsize -
-               (oldsfp->count + 1) *
-               ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
-       xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
-       xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
-       /*
-        * Reset our pointers, the data has moved.
-        */
-       oldsfp = (xfs_dir2_sf_hdr_t *)buf;
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       /*
-        * Fill in the new header.
-        */
-       sfp->count = oldsfp->count;
-       sfp->i8count = 0;
-       dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp));
-       /*
-        * Copy the entries field by field.
-        */
-       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
-                   oldsfep = xfs_dir2_sf_firstentry(oldsfp);
-            i < sfp->count;
-            i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep),
-                 oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {
-               sfep->namelen = oldsfep->namelen;
-               sfep->offset = oldsfep->offset;
-               memcpy(sfep->name, oldsfep->name, sfep->namelen);
-               dp->d_ops->sf_put_ino(sfp, sfep,
-                                     dp->d_ops->sf_get_ino(oldsfp, oldsfep));
-               dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep));
-       }
-       /*
-        * Clean up the inode.
-        */
-       kmem_free(buf);
-       dp->i_d.di_size = newsize;
-       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-}
-
-/*
- * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers.
- * The new entry w/ an 8-byte inode number is not there yet; we leave with
- * i8count set to 1, but no corresponding 8-byte entry.
- */
-static void
-xfs_dir2_sf_toino8(
-       xfs_da_args_t           *args)          /* operation arguments */
-{
-       char                    *buf;           /* old dir's buffer */
-       xfs_inode_t             *dp;            /* incore directory inode */
-       int                     i;              /* entry index */
-       int                     newsize;        /* new inode size */
-       xfs_dir2_sf_entry_t     *oldsfep;       /* old sf entry */
-       xfs_dir2_sf_hdr_t       *oldsfp;        /* old sf directory */
-       int                     oldsize;        /* old inode size */
-       xfs_dir2_sf_entry_t     *sfep;          /* new sf entry */
-       xfs_dir2_sf_hdr_t       *sfp;           /* new sf directory */
-       struct xfs_mount        *mp;
-
-       trace_xfs_dir2_sf_toino8(args);
-
-       dp = args->dp;
-       mp = dp->i_mount;
-
-       /*
-        * Copy the old directory to the buffer.
-        * Then nuke it from the inode, and add the new buffer to the inode.
-        * Don't want xfs_idata_realloc copying the data here.
-        */
-       oldsize = dp->i_df.if_bytes;
-       buf = kmem_alloc(oldsize, KM_SLEEP);
-       oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       ASSERT(oldsfp->i8count == 0);
-       memcpy(buf, oldsfp, oldsize);
-       /*
-        * Compute the new inode size (nb: entry count + 1 for parent)
-        */
-       newsize =
-               oldsize +
-               (oldsfp->count + 1) *
-               ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
-       xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
-       xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
-       /*
-        * Reset our pointers, the data has moved.
-        */
-       oldsfp = (xfs_dir2_sf_hdr_t *)buf;
-       sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       /*
-        * Fill in the new header.
-        */
-       sfp->count = oldsfp->count;
-       sfp->i8count = 1;
-       dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp));
-       /*
-        * Copy the entries field by field.
-        */
-       for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
-                   oldsfep = xfs_dir2_sf_firstentry(oldsfp);
-            i < sfp->count;
-            i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep),
-                 oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {
-               sfep->namelen = oldsfep->namelen;
-               sfep->offset = oldsfep->offset;
-               memcpy(sfep->name, oldsfep->name, sfep->namelen);
-               dp->d_ops->sf_put_ino(sfp, sfep,
-                                     dp->d_ops->sf_get_ino(oldsfp, oldsfep));
-               dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep));
-       }
-       /*
-        * Clean up the inode.
-        */
-       kmem_free(buf);
-       dp->i_d.di_size = newsize;
-       xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
-}
-#endif /* XFS_BIG_INUMS */
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c
deleted file mode 100644 (file)
index c2ac0c6..0000000
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_qm.h"
-#include "xfs_error.h"
-#include "xfs_cksum.h"
-#include "xfs_trace.h"
-
-int
-xfs_calc_dquots_per_chunk(
-       unsigned int            nbblks) /* basic block units */
-{
-       unsigned int    ndquots;
-
-       ASSERT(nbblks > 0);
-       ndquots = BBTOB(nbblks);
-       do_div(ndquots, sizeof(xfs_dqblk_t));
-
-       return ndquots;
-}
-
-/*
- * Do some primitive error checking on ondisk dquot data structures.
- */
-int
-xfs_dqcheck(
-       struct xfs_mount *mp,
-       xfs_disk_dquot_t *ddq,
-       xfs_dqid_t       id,
-       uint             type,    /* used only when IO_dorepair is true */
-       uint             flags,
-       char             *str)
-{
-       xfs_dqblk_t      *d = (xfs_dqblk_t *)ddq;
-       int             errs = 0;
-
-       /*
-        * We can encounter an uninitialized dquot buffer for 2 reasons:
-        * 1. If we crash while deleting the quotainode(s), and those blks got
-        *    used for user data. This is because we take the path of regular
-        *    file deletion; however, the size field of quotainodes is never
-        *    updated, so all the tricks that we play in itruncate_finish
-        *    don't quite matter.
-        *
-        * 2. We don't play the quota buffers when there's a quotaoff logitem.
-        *    But the allocation will be replayed so we'll end up with an
-        *    uninitialized quota block.
-        *
-        * This is all fine; things are still consistent, and we haven't lost
-        * any quota information. Just don't complain about bad dquot blks.
-        */
-       if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
-               if (flags & XFS_QMOPT_DOWARN)
-                       xfs_alert(mp,
-                       "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
-                       str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
-               errs++;
-       }
-       if (ddq->d_version != XFS_DQUOT_VERSION) {
-               if (flags & XFS_QMOPT_DOWARN)
-                       xfs_alert(mp,
-                       "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
-                       str, id, ddq->d_version, XFS_DQUOT_VERSION);
-               errs++;
-       }
-
-       if (ddq->d_flags != XFS_DQ_USER &&
-           ddq->d_flags != XFS_DQ_PROJ &&
-           ddq->d_flags != XFS_DQ_GROUP) {
-               if (flags & XFS_QMOPT_DOWARN)
-                       xfs_alert(mp,
-                       "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
-                       str, id, ddq->d_flags);
-               errs++;
-       }
-
-       if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
-               if (flags & XFS_QMOPT_DOWARN)
-                       xfs_alert(mp,
-                       "%s : ondisk-dquot 0x%p, ID mismatch: "
-                       "0x%x expected, found id 0x%x",
-                       str, ddq, id, be32_to_cpu(ddq->d_id));
-               errs++;
-       }
-
-       if (!errs && ddq->d_id) {
-               if (ddq->d_blk_softlimit &&
-                   be64_to_cpu(ddq->d_bcount) >
-                               be64_to_cpu(ddq->d_blk_softlimit)) {
-                       if (!ddq->d_btimer) {
-                               if (flags & XFS_QMOPT_DOWARN)
-                                       xfs_alert(mp,
-                       "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
-                                       str, (int)be32_to_cpu(ddq->d_id), ddq);
-                               errs++;
-                       }
-               }
-               if (ddq->d_ino_softlimit &&
-                   be64_to_cpu(ddq->d_icount) >
-                               be64_to_cpu(ddq->d_ino_softlimit)) {
-                       if (!ddq->d_itimer) {
-                               if (flags & XFS_QMOPT_DOWARN)
-                                       xfs_alert(mp,
-                       "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
-                                       str, (int)be32_to_cpu(ddq->d_id), ddq);
-                               errs++;
-                       }
-               }
-               if (ddq->d_rtb_softlimit &&
-                   be64_to_cpu(ddq->d_rtbcount) >
-                               be64_to_cpu(ddq->d_rtb_softlimit)) {
-                       if (!ddq->d_rtbtimer) {
-                               if (flags & XFS_QMOPT_DOWARN)
-                                       xfs_alert(mp,
-                       "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
-                                       str, (int)be32_to_cpu(ddq->d_id), ddq);
-                               errs++;
-                       }
-               }
-       }
-
-       if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
-               return errs;
-
-       if (flags & XFS_QMOPT_DOWARN)
-               xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
-
-       /*
-        * Typically, a repair is only requested by quotacheck.
-        */
-       ASSERT(id != -1);
-       ASSERT(flags & XFS_QMOPT_DQREPAIR);
-       memset(d, 0, sizeof(xfs_dqblk_t));
-
-       d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
-       d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
-       d->dd_diskdq.d_flags = type;
-       d->dd_diskdq.d_id = cpu_to_be32(id);
-
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
-               xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
-                                XFS_DQUOT_CRC_OFF);
-       }
-
-       return errs;
-}
-
-STATIC bool
-xfs_dquot_buf_verify_crc(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
-{
-       struct xfs_dqblk        *d = (struct xfs_dqblk *)bp->b_addr;
-       int                     ndquots;
-       int                     i;
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return true;
-
-       /*
-        * if we are in log recovery, the quota subsystem has not been
-        * initialised so we have no quotainfo structure. In that case, we need
-        * to manually calculate the number of dquots in the buffer.
-        */
-       if (mp->m_quotainfo)
-               ndquots = mp->m_quotainfo->qi_dqperchunk;
-       else
-               ndquots = xfs_calc_dquots_per_chunk(
-                                       XFS_BB_TO_FSB(mp, bp->b_length));
-
-       for (i = 0; i < ndquots; i++, d++) {
-               if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
-                                XFS_DQUOT_CRC_OFF))
-                       return false;
-               if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
-                       return false;
-       }
-       return true;
-}
-
-STATIC bool
-xfs_dquot_buf_verify(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
-{
-       struct xfs_dqblk        *d = (struct xfs_dqblk *)bp->b_addr;
-       xfs_dqid_t              id = 0;
-       int                     ndquots;
-       int                     i;
-
-       /*
-        * if we are in log recovery, the quota subsystem has not been
-        * initialised so we have no quotainfo structure. In that case, we need
-        * to manually calculate the number of dquots in the buffer.
-        */
-       if (mp->m_quotainfo)
-               ndquots = mp->m_quotainfo->qi_dqperchunk;
-       else
-               ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
-
-       /*
-        * On the first read of the buffer, verify that each dquot is valid.
-        * We don't know what the id of the dquot is supposed to be, just that
-        * they should be increasing monotonically within the buffer. If the
-        * first id is corrupt, then it will fail on the second dquot in the
-        * buffer so corruptions could point to the wrong dquot in this case.
-        */
-       for (i = 0; i < ndquots; i++) {
-               struct xfs_disk_dquot   *ddq;
-               int                     error;
-
-               ddq = &d[i].dd_diskdq;
-
-               if (i == 0)
-                       id = be32_to_cpu(ddq->d_id);
-
-               error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
-                                      "xfs_dquot_buf_verify");
-               if (error)
-                       return false;
-       }
-       return true;
-}
-
-static void
-xfs_dquot_buf_read_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-
-       if (!xfs_dquot_buf_verify_crc(mp, bp))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_dquot_buf_verify(mp, bp))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-}
-
-/*
- * we don't calculate the CRC here as that is done when the dquot is flushed to
- * the buffer after the update is done. This ensures that the dquot in the
- * buffer always has an up-to-date CRC value.
- */
-static void
-xfs_dquot_buf_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-
-       if (!xfs_dquot_buf_verify(mp, bp)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-}
-
-const struct xfs_buf_ops xfs_dquot_buf_ops = {
-       .verify_read = xfs_dquot_buf_read_verify,
-       .verify_write = xfs_dquot_buf_write_verify,
-};
-
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
deleted file mode 100644 (file)
index 16fb63a..0000000
+++ /dev/null
@@ -1,2189 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_bmap.h"
-#include "xfs_cksum.h"
-#include "xfs_trans.h"
-#include "xfs_buf_item.h"
-#include "xfs_icreate_item.h"
-#include "xfs_icache.h"
-#include "xfs_dinode.h"
-#include "xfs_trace.h"
-
-
-/*
- * Allocation group level functions.
- */
-static inline int
-xfs_ialloc_cluster_alignment(
-       xfs_alloc_arg_t *args)
-{
-       if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
-           args->mp->m_sb.sb_inoalignmt >=
-            XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size))
-               return args->mp->m_sb.sb_inoalignmt;
-       return 1;
-}
-
-/*
- * Lookup a record by ino in the btree given by cur.
- */
-int                                    /* error */
-xfs_inobt_lookup(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_agino_t             ino,    /* starting inode of chunk */
-       xfs_lookup_t            dir,    /* <=, >=, == */
-       int                     *stat)  /* success/failure */
-{
-       cur->bc_rec.i.ir_startino = ino;
-       cur->bc_rec.i.ir_freecount = 0;
-       cur->bc_rec.i.ir_free = 0;
-       return xfs_btree_lookup(cur, dir, stat);
-}
-
-/*
- * Update the record referred to by cur to the value given.
- * This either works (return 0) or gets an EFSCORRUPTED error.
- */
-STATIC int                             /* error */
-xfs_inobt_update(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_inobt_rec_incore_t  *irec)  /* btree record */
-{
-       union xfs_btree_rec     rec;
-
-       rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
-       rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
-       rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
-       return xfs_btree_update(cur, &rec);
-}
-
-/*
- * Get the data from the pointed-to record.
- */
-int                                    /* error */
-xfs_inobt_get_rec(
-       struct xfs_btree_cur    *cur,   /* btree cursor */
-       xfs_inobt_rec_incore_t  *irec,  /* btree record */
-       int                     *stat)  /* output: success/failure */
-{
-       union xfs_btree_rec     *rec;
-       int                     error;
-
-       error = xfs_btree_get_rec(cur, &rec, stat);
-       if (!error && *stat == 1) {
-               irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
-               irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
-               irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
-       }
-       return error;
-}
-
-/*
- * Insert a single inobt record. Cursor must already point to desired location.
- */
-STATIC int
-xfs_inobt_insert_rec(
-       struct xfs_btree_cur    *cur,
-       __int32_t               freecount,
-       xfs_inofree_t           free,
-       int                     *stat)
-{
-       cur->bc_rec.i.ir_freecount = freecount;
-       cur->bc_rec.i.ir_free = free;
-       return xfs_btree_insert(cur, stat);
-}
-
-/*
- * Insert records describing a newly allocated inode chunk into the inobt.
- */
-STATIC int
-xfs_inobt_insert(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
-       struct xfs_buf          *agbp,
-       xfs_agino_t             newino,
-       xfs_agino_t             newlen,
-       xfs_btnum_t             btnum)
-{
-       struct xfs_btree_cur    *cur;
-       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
-       xfs_agnumber_t          agno = be32_to_cpu(agi->agi_seqno);
-       xfs_agino_t             thisino;
-       int                     i;
-       int                     error;
-
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
-
-       for (thisino = newino;
-            thisino < newino + newlen;
-            thisino += XFS_INODES_PER_CHUNK) {
-               error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
-               if (error) {
-                       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-                       return error;
-               }
-               ASSERT(i == 0);
-
-               error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
-                                            XFS_INOBT_ALL_FREE, &i);
-               if (error) {
-                       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-                       return error;
-               }
-               ASSERT(i == 1);
-       }
-
-       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-
-       return 0;
-}
-
-/*
- * Verify that the number of free inodes in the AGI is correct.
- */
-#ifdef DEBUG
-STATIC int
-xfs_check_agi_freecount(
-       struct xfs_btree_cur    *cur,
-       struct xfs_agi          *agi)
-{
-       if (cur->bc_nlevels == 1) {
-               xfs_inobt_rec_incore_t rec;
-               int             freecount = 0;
-               int             error;
-               int             i;
-
-               error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
-               if (error)
-                       return error;
-
-               do {
-                       error = xfs_inobt_get_rec(cur, &rec, &i);
-                       if (error)
-                               return error;
-
-                       if (i) {
-                               freecount += rec.ir_freecount;
-                               error = xfs_btree_increment(cur, 0, &i);
-                               if (error)
-                                       return error;
-                       }
-               } while (i == 1);
-
-               if (!XFS_FORCED_SHUTDOWN(cur->bc_mp))
-                       ASSERT(freecount == be32_to_cpu(agi->agi_freecount));
-       }
-       return 0;
-}
-#else
-#define xfs_check_agi_freecount(cur, agi)      0
-#endif
-
-/*
- * Initialise a new set of inodes. When called without a transaction context
- * (e.g. from recovery) we initiate a delayed write of the inode buffers rather
- * than logging them (which in a transaction context puts them into the AIL
- * for writeback rather than the xfsbufd queue).
- */
-int
-xfs_ialloc_inode_init(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
-       struct list_head        *buffer_list,
-       xfs_agnumber_t          agno,
-       xfs_agblock_t           agbno,
-       xfs_agblock_t           length,
-       unsigned int            gen)
-{
-       struct xfs_buf          *fbuf;
-       struct xfs_dinode       *free;
-       int                     nbufs, blks_per_cluster, inodes_per_cluster;
-       int                     version;
-       int                     i, j;
-       xfs_daddr_t             d;
-       xfs_ino_t               ino = 0;
-
-       /*
-        * Loop over the new block(s), filling in the inodes.  For small block
-        * sizes, manipulate the inodes in buffers  which are multiples of the
-        * blocks size.
-        */
-       blks_per_cluster = xfs_icluster_size_fsb(mp);
-       inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
-       nbufs = length / blks_per_cluster;
-
-       /*
-        * Figure out what version number to use in the inodes we create.  If
-        * the superblock version has caught up to the one that supports the new
-        * inode format, then use the new inode version.  Otherwise use the old
-        * version so that old kernels will continue to be able to use the file
-        * system.
-        *
-        * For v3 inodes, we also need to write the inode number into the inode,
-        * so calculate the first inode number of the chunk here as
-        * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not
-        * across multiple filesystem blocks (such as a cluster) and so cannot
-        * be used in the cluster buffer loop below.
-        *
-        * Further, because we are writing the inode directly into the buffer
-        * and calculating a CRC on the entire inode, we have ot log the entire
-        * inode so that the entire range the CRC covers is present in the log.
-        * That means for v3 inode we log the entire buffer rather than just the
-        * inode cores.
-        */
-       if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               version = 3;
-               ino = XFS_AGINO_TO_INO(mp, agno,
-                                      XFS_OFFBNO_TO_AGINO(mp, agbno, 0));
-
-               /*
-                * log the initialisation that is about to take place as an
-                * logical operation. This means the transaction does not
-                * need to log the physical changes to the inode buffers as log
-                * recovery will know what initialisation is actually needed.
-                * Hence we only need to log the buffers as "ordered" buffers so
-                * they track in the AIL as if they were physically logged.
-                */
-               if (tp)
-                       xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos,
-                                       mp->m_sb.sb_inodesize, length, gen);
-       } else
-               version = 2;
-
-       for (j = 0; j < nbufs; j++) {
-               /*
-                * Get the block.
-                */
-               d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
-               fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
-                                        mp->m_bsize * blks_per_cluster,
-                                        XBF_UNMAPPED);
-               if (!fbuf)
-                       return ENOMEM;
-
-               /* Initialize the inode buffers and log them appropriately. */
-               fbuf->b_ops = &xfs_inode_buf_ops;
-               xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
-               for (i = 0; i < inodes_per_cluster; i++) {
-                       int     ioffset = i << mp->m_sb.sb_inodelog;
-                       uint    isize = xfs_dinode_size(version);
-
-                       free = xfs_make_iptr(mp, fbuf, i);
-                       free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
-                       free->di_version = version;
-                       free->di_gen = cpu_to_be32(gen);
-                       free->di_next_unlinked = cpu_to_be32(NULLAGINO);
-
-                       if (version == 3) {
-                               free->di_ino = cpu_to_be64(ino);
-                               ino++;
-                               uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
-                               xfs_dinode_calc_crc(mp, free);
-                       } else if (tp) {
-                               /* just log the inode core */
-                               xfs_trans_log_buf(tp, fbuf, ioffset,
-                                                 ioffset + isize - 1);
-                       }
-               }
-
-               if (tp) {
-                       /*
-                        * Mark the buffer as an inode allocation buffer so it
-                        * sticks in AIL at the point of this allocation
-                        * transaction. This ensures the they are on disk before
-                        * the tail of the log can be moved past this
-                        * transaction (i.e. by preventing relogging from moving
-                        * it forward in the log).
-                        */
-                       xfs_trans_inode_alloc_buf(tp, fbuf);
-                       if (version == 3) {
-                               /*
-                                * Mark the buffer as ordered so that they are
-                                * not physically logged in the transaction but
-                                * still tracked in the AIL as part of the
-                                * transaction and pin the log appropriately.
-                                */
-                               xfs_trans_ordered_buf(tp, fbuf);
-                               xfs_trans_log_buf(tp, fbuf, 0,
-                                                 BBTOB(fbuf->b_length) - 1);
-                       }
-               } else {
-                       fbuf->b_flags |= XBF_DONE;
-                       xfs_buf_delwri_queue(fbuf, buffer_list);
-                       xfs_buf_relse(fbuf);
-               }
-       }
-       return 0;
-}
-
-/*
- * Allocate new inodes in the allocation group specified by agbp.
- * Return 0 for success, else error code.
- */
-STATIC int                             /* error code or 0 */
-xfs_ialloc_ag_alloc(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_buf_t       *agbp,          /* alloc group buffer */
-       int             *alloc)
-{
-       xfs_agi_t       *agi;           /* allocation group header */
-       xfs_alloc_arg_t args;           /* allocation argument structure */
-       xfs_agnumber_t  agno;
-       int             error;
-       xfs_agino_t     newino;         /* new first inode's number */
-       xfs_agino_t     newlen;         /* new number of inodes */
-       int             isaligned = 0;  /* inode allocation at stripe unit */
-                                       /* boundary */
-       struct xfs_perag *pag;
-
-       memset(&args, 0, sizeof(args));
-       args.tp = tp;
-       args.mp = tp->t_mountp;
-
-       /*
-        * Locking will ensure that we don't have two callers in here
-        * at one time.
-        */
-       newlen = args.mp->m_ialloc_inos;
-       if (args.mp->m_maxicount &&
-           args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
-               return ENOSPC;
-       args.minlen = args.maxlen = args.mp->m_ialloc_blks;
-       /*
-        * First try to allocate inodes contiguous with the last-allocated
-        * chunk of inodes.  If the filesystem is striped, this will fill
-        * an entire stripe unit with inodes.
-        */
-       agi = XFS_BUF_TO_AGI(agbp);
-       newino = be32_to_cpu(agi->agi_newino);
-       agno = be32_to_cpu(agi->agi_seqno);
-       args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
-                    args.mp->m_ialloc_blks;
-       if (likely(newino != NULLAGINO &&
-                 (args.agbno < be32_to_cpu(agi->agi_length)))) {
-               args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
-               args.type = XFS_ALLOCTYPE_THIS_BNO;
-               args.prod = 1;
-
-               /*
-                * We need to take into account alignment here to ensure that
-                * we don't modify the free list if we fail to have an exact
-                * block. If we don't have an exact match, and every oher
-                * attempt allocation attempt fails, we'll end up cancelling
-                * a dirty transaction and shutting down.
-                *
-                * For an exact allocation, alignment must be 1,
-                * however we need to take cluster alignment into account when
-                * fixing up the freelist. Use the minalignslop field to
-                * indicate that extra blocks might be required for alignment,
-                * but not to use them in the actual exact allocation.
-                */
-               args.alignment = 1;
-               args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
-
-               /* Allow space for the inode btree to split. */
-               args.minleft = args.mp->m_in_maxlevels - 1;
-               if ((error = xfs_alloc_vextent(&args)))
-                       return error;
-
-               /*
-                * This request might have dirtied the transaction if the AG can
-                * satisfy the request, but the exact block was not available.
-                * If the allocation did fail, subsequent requests will relax
-                * the exact agbno requirement and increase the alignment
-                * instead. It is critical that the total size of the request
-                * (len + alignment + slop) does not increase from this point
-                * on, so reset minalignslop to ensure it is not included in
-                * subsequent requests.
-                */
-               args.minalignslop = 0;
-       } else
-               args.fsbno = NULLFSBLOCK;
-
-       if (unlikely(args.fsbno == NULLFSBLOCK)) {
-               /*
-                * Set the alignment for the allocation.
-                * If stripe alignment is turned on then align at stripe unit
-                * boundary.
-                * If the cluster size is smaller than a filesystem block
-                * then we're doing I/O for inodes in filesystem block size
-                * pieces, so don't need alignment anyway.
-                */
-               isaligned = 0;
-               if (args.mp->m_sinoalign) {
-                       ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
-                       args.alignment = args.mp->m_dalign;
-                       isaligned = 1;
-               } else
-                       args.alignment = xfs_ialloc_cluster_alignment(&args);
-               /*
-                * Need to figure out where to allocate the inode blocks.
-                * Ideally they should be spaced out through the a.g.
-                * For now, just allocate blocks up front.
-                */
-               args.agbno = be32_to_cpu(agi->agi_root);
-               args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
-               /*
-                * Allocate a fixed-size extent of inodes.
-                */
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-               args.prod = 1;
-               /*
-                * Allow space for the inode btree to split.
-                */
-               args.minleft = args.mp->m_in_maxlevels - 1;
-               if ((error = xfs_alloc_vextent(&args)))
-                       return error;
-       }
-
-       /*
-        * If stripe alignment is turned on, then try again with cluster
-        * alignment.
-        */
-       if (isaligned && args.fsbno == NULLFSBLOCK) {
-               args.type = XFS_ALLOCTYPE_NEAR_BNO;
-               args.agbno = be32_to_cpu(agi->agi_root);
-               args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
-               args.alignment = xfs_ialloc_cluster_alignment(&args);
-               if ((error = xfs_alloc_vextent(&args)))
-                       return error;
-       }
-
-       if (args.fsbno == NULLFSBLOCK) {
-               *alloc = 0;
-               return 0;
-       }
-       ASSERT(args.len == args.minlen);
-
-       /*
-        * Stamp and write the inode buffers.
-        *
-        * Seed the new inode cluster with a random generation number. This
-        * prevents short-term reuse of generation numbers if a chunk is
-        * freed and then immediately reallocated. We use random numbers
-        * rather than a linear progression to prevent the next generation
-        * number from being easily guessable.
-        */
-       error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,
-                       args.len, prandom_u32());
-
-       if (error)
-               return error;
-       /*
-        * Convert the results.
-        */
-       newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
-       be32_add_cpu(&agi->agi_count, newlen);
-       be32_add_cpu(&agi->agi_freecount, newlen);
-       pag = xfs_perag_get(args.mp, agno);
-       pag->pagi_freecount += newlen;
-       xfs_perag_put(pag);
-       agi->agi_newino = cpu_to_be32(newino);
-
-       /*
-        * Insert records describing the new inode chunk into the btrees.
-        */
-       error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
-                                XFS_BTNUM_INO);
-       if (error)
-               return error;
-
-       if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
-               error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
-                                        XFS_BTNUM_FINO);
-               if (error)
-                       return error;
-       }
-       /*
-        * Log allocation group header fields
-        */
-       xfs_ialloc_log_agi(tp, agbp,
-               XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
-       /*
-        * Modify/log superblock values for inode count and inode free count.
-        */
-       xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
-       xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
-       *alloc = 1;
-       return 0;
-}
-
-STATIC xfs_agnumber_t
-xfs_ialloc_next_ag(
-       xfs_mount_t     *mp)
-{
-       xfs_agnumber_t  agno;
-
-       spin_lock(&mp->m_agirotor_lock);
-       agno = mp->m_agirotor;
-       if (++mp->m_agirotor >= mp->m_maxagi)
-               mp->m_agirotor = 0;
-       spin_unlock(&mp->m_agirotor_lock);
-
-       return agno;
-}
-
-/*
- * Select an allocation group to look for a free inode in, based on the parent
- * inode and the mode.  Return the allocation group buffer.
- */
-STATIC xfs_agnumber_t
-xfs_ialloc_ag_select(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_ino_t       parent,         /* parent directory inode number */
-       umode_t         mode,           /* bits set to indicate file type */
-       int             okalloc)        /* ok to allocate more space */
-{
-       xfs_agnumber_t  agcount;        /* number of ag's in the filesystem */
-       xfs_agnumber_t  agno;           /* current ag number */
-       int             flags;          /* alloc buffer locking flags */
-       xfs_extlen_t    ineed;          /* blocks needed for inode allocation */
-       xfs_extlen_t    longest = 0;    /* longest extent available */
-       xfs_mount_t     *mp;            /* mount point structure */
-       int             needspace;      /* file mode implies space allocated */
-       xfs_perag_t     *pag;           /* per allocation group data */
-       xfs_agnumber_t  pagno;          /* parent (starting) ag number */
-       int             error;
-
-       /*
-        * Files of these types need at least one block if length > 0
-        * (and they won't fit in the inode, but that's hard to figure out).
-        */
-       needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
-       mp = tp->t_mountp;
-       agcount = mp->m_maxagi;
-       if (S_ISDIR(mode))
-               pagno = xfs_ialloc_next_ag(mp);
-       else {
-               pagno = XFS_INO_TO_AGNO(mp, parent);
-               if (pagno >= agcount)
-                       pagno = 0;
-       }
-
-       ASSERT(pagno < agcount);
-
-       /*
-        * Loop through allocation groups, looking for one with a little
-        * free space in it.  Note we don't look for free inodes, exactly.
-        * Instead, we include whether there is a need to allocate inodes
-        * to mean that blocks must be allocated for them,
-        * if none are currently free.
-        */
-       agno = pagno;
-       flags = XFS_ALLOC_FLAG_TRYLOCK;
-       for (;;) {
-               pag = xfs_perag_get(mp, agno);
-               if (!pag->pagi_inodeok) {
-                       xfs_ialloc_next_ag(mp);
-                       goto nextag;
-               }
-
-               if (!pag->pagi_init) {
-                       error = xfs_ialloc_pagi_init(mp, tp, agno);
-                       if (error)
-                               goto nextag;
-               }
-
-               if (pag->pagi_freecount) {
-                       xfs_perag_put(pag);
-                       return agno;
-               }
-
-               if (!okalloc)
-                       goto nextag;
-
-               if (!pag->pagf_init) {
-                       error = xfs_alloc_pagf_init(mp, tp, agno, flags);
-                       if (error)
-                               goto nextag;
-               }
-
-               /*
-                * Is there enough free space for the file plus a block of
-                * inodes? (if we need to allocate some)?
-                */
-               ineed = mp->m_ialloc_blks;
-               longest = pag->pagf_longest;
-               if (!longest)
-                       longest = pag->pagf_flcount > 0;
-
-               if (pag->pagf_freeblks >= needspace + ineed &&
-                   longest >= ineed) {
-                       xfs_perag_put(pag);
-                       return agno;
-               }
-nextag:
-               xfs_perag_put(pag);
-               /*
-                * No point in iterating over the rest, if we're shutting
-                * down.
-                */
-               if (XFS_FORCED_SHUTDOWN(mp))
-                       return NULLAGNUMBER;
-               agno++;
-               if (agno >= agcount)
-                       agno = 0;
-               if (agno == pagno) {
-                       if (flags == 0)
-                               return NULLAGNUMBER;
-                       flags = 0;
-               }
-       }
-}
-
-/*
- * Try to retrieve the next record to the left/right from the current one.
- */
-STATIC int
-xfs_ialloc_next_rec(
-       struct xfs_btree_cur    *cur,
-       xfs_inobt_rec_incore_t  *rec,
-       int                     *done,
-       int                     left)
-{
-       int                     error;
-       int                     i;
-
-       if (left)
-               error = xfs_btree_decrement(cur, 0, &i);
-       else
-               error = xfs_btree_increment(cur, 0, &i);
-
-       if (error)
-               return error;
-       *done = !i;
-       if (i) {
-               error = xfs_inobt_get_rec(cur, rec, &i);
-               if (error)
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_ialloc_get_rec(
-       struct xfs_btree_cur    *cur,
-       xfs_agino_t             agino,
-       xfs_inobt_rec_incore_t  *rec,
-       int                     *done)
-{
-       int                     error;
-       int                     i;
-
-       error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i);
-       if (error)
-               return error;
-       *done = !i;
-       if (i) {
-               error = xfs_inobt_get_rec(cur, rec, &i);
-               if (error)
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
-       }
-
-       return 0;
-}
-
-/*
- * Allocate an inode using the inobt-only algorithm.
- */
-STATIC int
-xfs_dialloc_ag_inobt(
-       struct xfs_trans        *tp,
-       struct xfs_buf          *agbp,
-       xfs_ino_t               parent,
-       xfs_ino_t               *inop)
-{
-       struct xfs_mount        *mp = tp->t_mountp;
-       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
-       xfs_agnumber_t          agno = be32_to_cpu(agi->agi_seqno);
-       xfs_agnumber_t          pagno = XFS_INO_TO_AGNO(mp, parent);
-       xfs_agino_t             pagino = XFS_INO_TO_AGINO(mp, parent);
-       struct xfs_perag        *pag;
-       struct xfs_btree_cur    *cur, *tcur;
-       struct xfs_inobt_rec_incore rec, trec;
-       xfs_ino_t               ino;
-       int                     error;
-       int                     offset;
-       int                     i, j;
-
-       pag = xfs_perag_get(mp, agno);
-
-       ASSERT(pag->pagi_init);
-       ASSERT(pag->pagi_inodeok);
-       ASSERT(pag->pagi_freecount > 0);
-
- restart_pagno:
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
-       /*
-        * If pagino is 0 (this is the root inode allocation) use newino.
-        * This must work because we've just allocated some.
-        */
-       if (!pagino)
-               pagino = be32_to_cpu(agi->agi_newino);
-
-       error = xfs_check_agi_freecount(cur, agi);
-       if (error)
-               goto error0;
-
-       /*
-        * If in the same AG as the parent, try to get near the parent.
-        */
-       if (pagno == agno) {
-               int             doneleft;       /* done, to the left */
-               int             doneright;      /* done, to the right */
-               int             searchdistance = 10;
-
-               error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
-               if (error)
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-
-               error = xfs_inobt_get_rec(cur, &rec, &j);
-               if (error)
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(j == 1, error0);
-
-               if (rec.ir_freecount > 0) {
-                       /*
-                        * Found a free inode in the same chunk
-                        * as the parent, done.
-                        */
-                       goto alloc_inode;
-               }
-
-
-               /*
-                * In the same AG as parent, but parent's chunk is full.
-                */
-
-               /* duplicate the cursor, search left & right simultaneously */
-               error = xfs_btree_dup_cursor(cur, &tcur);
-               if (error)
-                       goto error0;
-
-               /*
-                * Skip to last blocks looked up if same parent inode.
-                */
-               if (pagino != NULLAGINO &&
-                   pag->pagl_pagino == pagino &&
-                   pag->pagl_leftrec != NULLAGINO &&
-                   pag->pagl_rightrec != NULLAGINO) {
-                       error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
-                                                  &trec, &doneleft);
-                       if (error)
-                               goto error1;
-
-                       error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
-                                                  &rec, &doneright);
-                       if (error)
-                               goto error1;
-               } else {
-                       /* search left with tcur, back up 1 record */
-                       error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1);
-                       if (error)
-                               goto error1;
-
-                       /* search right with cur, go forward 1 record. */
-                       error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0);
-                       if (error)
-                               goto error1;
-               }
-
-               /*
-                * Loop until we find an inode chunk with a free inode.
-                */
-               while (!doneleft || !doneright) {
-                       int     useleft;  /* using left inode chunk this time */
-
-                       if (!--searchdistance) {
-                               /*
-                                * Not in range - save last search
-                                * location and allocate a new inode
-                                */
-                               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
-                               pag->pagl_leftrec = trec.ir_startino;
-                               pag->pagl_rightrec = rec.ir_startino;
-                               pag->pagl_pagino = pagino;
-                               goto newino;
-                       }
-
-                       /* figure out the closer block if both are valid. */
-                       if (!doneleft && !doneright) {
-                               useleft = pagino -
-                                (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
-                                 rec.ir_startino - pagino;
-                       } else {
-                               useleft = !doneleft;
-                       }
-
-                       /* free inodes to the left? */
-                       if (useleft && trec.ir_freecount) {
-                               rec = trec;
-                               xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-                               cur = tcur;
-
-                               pag->pagl_leftrec = trec.ir_startino;
-                               pag->pagl_rightrec = rec.ir_startino;
-                               pag->pagl_pagino = pagino;
-                               goto alloc_inode;
-                       }
-
-                       /* free inodes to the right? */
-                       if (!useleft && rec.ir_freecount) {
-                               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
-
-                               pag->pagl_leftrec = trec.ir_startino;
-                               pag->pagl_rightrec = rec.ir_startino;
-                               pag->pagl_pagino = pagino;
-                               goto alloc_inode;
-                       }
-
-                       /* get next record to check */
-                       if (useleft) {
-                               error = xfs_ialloc_next_rec(tcur, &trec,
-                                                                &doneleft, 1);
-                       } else {
-                               error = xfs_ialloc_next_rec(cur, &rec,
-                                                                &doneright, 0);
-                       }
-                       if (error)
-                               goto error1;
-               }
-
-               /*
-                * We've reached the end of the btree. because
-                * we are only searching a small chunk of the
-                * btree each search, there is obviously free
-                * inodes closer to the parent inode than we
-                * are now. restart the search again.
-                */
-               pag->pagl_pagino = NULLAGINO;
-               pag->pagl_leftrec = NULLAGINO;
-               pag->pagl_rightrec = NULLAGINO;
-               xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
-               xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-               goto restart_pagno;
-       }
-
-       /*
-        * In a different AG from the parent.
-        * See if the most recently allocated block has any free.
-        */
-newino:
-       if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
-               error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
-                                        XFS_LOOKUP_EQ, &i);
-               if (error)
-                       goto error0;
-
-               if (i == 1) {
-                       error = xfs_inobt_get_rec(cur, &rec, &j);
-                       if (error)
-                               goto error0;
-
-                       if (j == 1 && rec.ir_freecount > 0) {
-                               /*
-                                * The last chunk allocated in the group
-                                * still has a free inode.
-                                */
-                               goto alloc_inode;
-                       }
-               }
-       }
-
-       /*
-        * None left in the last group, search the whole AG
-        */
-       error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
-       if (error)
-               goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-
-       for (;;) {
-               error = xfs_inobt_get_rec(cur, &rec, &i);
-               if (error)
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-               if (rec.ir_freecount > 0)
-                       break;
-               error = xfs_btree_increment(cur, 0, &i);
-               if (error)
-                       goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-       }
-
-alloc_inode:
-       offset = xfs_lowbit64(rec.ir_free);
-       ASSERT(offset >= 0);
-       ASSERT(offset < XFS_INODES_PER_CHUNK);
-       ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
-                                  XFS_INODES_PER_CHUNK) == 0);
-       ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
-       rec.ir_free &= ~XFS_INOBT_MASK(offset);
-       rec.ir_freecount--;
-       error = xfs_inobt_update(cur, &rec);
-       if (error)
-               goto error0;
-       be32_add_cpu(&agi->agi_freecount, -1);
-       xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
-       pag->pagi_freecount--;
-
-       error = xfs_check_agi_freecount(cur, agi);
-       if (error)
-               goto error0;
-
-       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-       xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
-       xfs_perag_put(pag);
-       *inop = ino;
-       return 0;
-error1:
-       xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
-error0:
-       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-       xfs_perag_put(pag);
-       return error;
-}
-
-/*
- * Use the free inode btree to allocate an inode based on distance from the
- * parent. Note that the provided cursor may be deleted and replaced.
- */
-STATIC int
-xfs_dialloc_ag_finobt_near(
-       xfs_agino_t                     pagino,
-       struct xfs_btree_cur            **ocur,
-       struct xfs_inobt_rec_incore     *rec)
-{
-       struct xfs_btree_cur            *lcur = *ocur;  /* left search cursor */
-       struct xfs_btree_cur            *rcur;  /* right search cursor */
-       struct xfs_inobt_rec_incore     rrec;
-       int                             error;
-       int                             i, j;
-
-       error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
-       if (error)
-               return error;
-
-       if (i == 1) {
-               error = xfs_inobt_get_rec(lcur, rec, &i);
-               if (error)
-                       return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
-
-               /*
-                * See if we've landed in the parent inode record. The finobt
-                * only tracks chunks with at least one free inode, so record
-                * existence is enough.
-                */
-               if (pagino >= rec->ir_startino &&
-                   pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
-                       return 0;
-       }
-
-       error = xfs_btree_dup_cursor(lcur, &rcur);
-       if (error)
-               return error;
-
-       error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
-       if (error)
-               goto error_rcur;
-       if (j == 1) {
-               error = xfs_inobt_get_rec(rcur, &rrec, &j);
-               if (error)
-                       goto error_rcur;
-               XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
-       }
-
-       XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
-       if (i == 1 && j == 1) {
-               /*
-                * Both the left and right records are valid. Choose the closer
-                * inode chunk to the target.
-                */
-               if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
-                   (rrec.ir_startino - pagino)) {
-                       *rec = rrec;
-                       xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
-                       *ocur = rcur;
-               } else {
-                       xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
-               }
-       } else if (j == 1) {
-               /* only the right record is valid */
-               *rec = rrec;
-               xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
-               *ocur = rcur;
-       } else if (i == 1) {
-               /* only the left record is valid */
-               xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
-       }
-
-       return 0;
-
-error_rcur:
-       xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
-       return error;
-}
-
-/*
- * Use the free inode btree to find a free inode based on a newino hint. If
- * the hint is NULL, find the first free inode in the AG.
- */
-STATIC int
-xfs_dialloc_ag_finobt_newino(
-       struct xfs_agi                  *agi,
-       struct xfs_btree_cur            *cur,
-       struct xfs_inobt_rec_incore     *rec)
-{
-       int error;
-       int i;
-
-       if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
-               error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ,
-                                        &i);
-               if (error)
-                       return error;
-               if (i == 1) {
-                       error = xfs_inobt_get_rec(cur, rec, &i);
-                       if (error)
-                               return error;
-                       XFS_WANT_CORRUPTED_RETURN(i == 1);
-
-                       return 0;
-               }
-       }
-
-       /*
-        * Find the first inode available in the AG.
-        */
-       error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
-       if (error)
-               return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
-
-       error = xfs_inobt_get_rec(cur, rec, &i);
-       if (error)
-               return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
-
-       return 0;
-}
-
-/*
- * Update the inobt based on a modification made to the finobt. Also ensure that
- * the records from both trees are equivalent post-modification.
- */
-STATIC int
-xfs_dialloc_ag_update_inobt(
-       struct xfs_btree_cur            *cur,   /* inobt cursor */
-       struct xfs_inobt_rec_incore     *frec,  /* finobt record */
-       int                             offset) /* inode offset */
-{
-       struct xfs_inobt_rec_incore     rec;
-       int                             error;
-       int                             i;
-
-       error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
-       if (error)
-               return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
-
-       error = xfs_inobt_get_rec(cur, &rec, &i);
-       if (error)
-               return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
-       ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
-                                  XFS_INODES_PER_CHUNK) == 0);
-
-       rec.ir_free &= ~XFS_INOBT_MASK(offset);
-       rec.ir_freecount--;
-
-       XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
-                                 (rec.ir_freecount == frec->ir_freecount));
-
-       error = xfs_inobt_update(cur, &rec);
-       if (error)
-               return error;
-
-       return 0;
-}
-
-/*
- * Allocate an inode using the free inode btree, if available. Otherwise, fall
- * back to the inobt search algorithm.
- *
- * The caller selected an AG for us, and made sure that free inodes are
- * available.
- */
-STATIC int
-xfs_dialloc_ag(
-       struct xfs_trans        *tp,
-       struct xfs_buf          *agbp,
-       xfs_ino_t               parent,
-       xfs_ino_t               *inop)
-{
-       struct xfs_mount                *mp = tp->t_mountp;
-       struct xfs_agi                  *agi = XFS_BUF_TO_AGI(agbp);
-       xfs_agnumber_t                  agno = be32_to_cpu(agi->agi_seqno);
-       xfs_agnumber_t                  pagno = XFS_INO_TO_AGNO(mp, parent);
-       xfs_agino_t                     pagino = XFS_INO_TO_AGINO(mp, parent);
-       struct xfs_perag                *pag;
-       struct xfs_btree_cur            *cur;   /* finobt cursor */
-       struct xfs_btree_cur            *icur;  /* inobt cursor */
-       struct xfs_inobt_rec_incore     rec;
-       xfs_ino_t                       ino;
-       int                             error;
-       int                             offset;
-       int                             i;
-
-       if (!xfs_sb_version_hasfinobt(&mp->m_sb))
-               return xfs_dialloc_ag_inobt(tp, agbp, parent, inop);
-
-       pag = xfs_perag_get(mp, agno);
-
-       /*
-        * If pagino is 0 (this is the root inode allocation) use newino.
-        * This must work because we've just allocated some.
-        */
-       if (!pagino)
-               pagino = be32_to_cpu(agi->agi_newino);
-
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
-
-       error = xfs_check_agi_freecount(cur, agi);
-       if (error)
-               goto error_cur;
-
-       /*
-        * The search algorithm depends on whether we're in the same AG as the
-        * parent. If so, find the closest available inode to the parent. If
-        * not, consider the agi hint or find the first free inode in the AG.
-        */
-       if (agno == pagno)
-               error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
-       else
-               error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
-       if (error)
-               goto error_cur;
-
-       offset = xfs_lowbit64(rec.ir_free);
-       ASSERT(offset >= 0);
-       ASSERT(offset < XFS_INODES_PER_CHUNK);
-       ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
-                                  XFS_INODES_PER_CHUNK) == 0);
-       ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
-
-       /*
-        * Modify or remove the finobt record.
-        */
-       rec.ir_free &= ~XFS_INOBT_MASK(offset);
-       rec.ir_freecount--;
-       if (rec.ir_freecount)
-               error = xfs_inobt_update(cur, &rec);
-       else
-               error = xfs_btree_delete(cur, &i);
-       if (error)
-               goto error_cur;
-
-       /*
-        * The finobt has now been updated appropriately. We haven't updated the
-        * agi and superblock yet, so we can create an inobt cursor and validate
-        * the original freecount. If all is well, make the equivalent update to
-        * the inobt using the finobt record and offset information.
-        */
-       icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
-
-       error = xfs_check_agi_freecount(icur, agi);
-       if (error)
-               goto error_icur;
-
-       error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
-       if (error)
-               goto error_icur;
-
-       /*
-        * Both trees have now been updated. We must update the perag and
-        * superblock before we can check the freecount for each btree.
-        */
-       be32_add_cpu(&agi->agi_freecount, -1);
-       xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
-       pag->pagi_freecount--;
-
-       xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
-
-       error = xfs_check_agi_freecount(icur, agi);
-       if (error)
-               goto error_icur;
-       error = xfs_check_agi_freecount(cur, agi);
-       if (error)
-               goto error_icur;
-
-       xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
-       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-       xfs_perag_put(pag);
-       *inop = ino;
-       return 0;
-
-error_icur:
-       xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
-error_cur:
-       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-       xfs_perag_put(pag);
-       return error;
-}
-
-/*
- * Allocate an inode on disk.
- *
- * Mode is used to tell whether the new inode will need space, and whether it
- * is a directory.
- *
- * This function is designed to be called twice if it has to do an allocation
- * to make more free inodes.  On the first call, *IO_agbp should be set to NULL.
- * If an inode is available without having to performn an allocation, an inode
- * number is returned.  In this case, *IO_agbp is set to NULL.  If an allocation
- * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp.
- * The caller should then commit the current transaction, allocate a
- * new transaction, and call xfs_dialloc() again, passing in the previous value
- * of *IO_agbp.  IO_agbp should be held across the transactions. Since the AGI
- * buffer is locked across the two calls, the second call is guaranteed to have
- * a free inode available.
- *
- * Once we successfully pick an inode its number is returned and the on-disk
- * data structures are updated.  The inode itself is not read in, since doing so
- * would break ordering constraints with xfs_reclaim.
- */
-int
-xfs_dialloc(
-       struct xfs_trans        *tp,
-       xfs_ino_t               parent,
-       umode_t                 mode,
-       int                     okalloc,
-       struct xfs_buf          **IO_agbp,
-       xfs_ino_t               *inop)
-{
-       struct xfs_mount        *mp = tp->t_mountp;
-       struct xfs_buf          *agbp;
-       xfs_agnumber_t          agno;
-       int                     error;
-       int                     ialloced;
-       int                     noroom = 0;
-       xfs_agnumber_t          start_agno;
-       struct xfs_perag        *pag;
-
-       if (*IO_agbp) {
-               /*
-                * If the caller passes in a pointer to the AGI buffer,
-                * continue where we left off before.  In this case, we
-                * know that the allocation group has free inodes.
-                */
-               agbp = *IO_agbp;
-               goto out_alloc;
-       }
-
-       /*
-        * We do not have an agbp, so select an initial allocation
-        * group for inode allocation.
-        */
-       start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
-       if (start_agno == NULLAGNUMBER) {
-               *inop = NULLFSINO;
-               return 0;
-       }
-
-       /*
-        * If we have already hit the ceiling of inode blocks then clear
-        * okalloc so we scan all available agi structures for a free
-        * inode.
-        */
-       if (mp->m_maxicount &&
-           mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
-               noroom = 1;
-               okalloc = 0;
-       }
-
-       /*
-        * Loop until we find an allocation group that either has free inodes
-        * or in which we can allocate some inodes.  Iterate through the
-        * allocation groups upward, wrapping at the end.
-        */
-       agno = start_agno;
-       for (;;) {
-               pag = xfs_perag_get(mp, agno);
-               if (!pag->pagi_inodeok) {
-                       xfs_ialloc_next_ag(mp);
-                       goto nextag;
-               }
-
-               if (!pag->pagi_init) {
-                       error = xfs_ialloc_pagi_init(mp, tp, agno);
-                       if (error)
-                               goto out_error;
-               }
-
-               /*
-                * Do a first racy fast path check if this AG is usable.
-                */
-               if (!pag->pagi_freecount && !okalloc)
-                       goto nextag;
-
-               /*
-                * Then read in the AGI buffer and recheck with the AGI buffer
-                * lock held.
-                */
-               error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
-               if (error)
-                       goto out_error;
-
-               if (pag->pagi_freecount) {
-                       xfs_perag_put(pag);
-                       goto out_alloc;
-               }
-
-               if (!okalloc)
-                       goto nextag_relse_buffer;
-
-
-               error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
-               if (error) {
-                       xfs_trans_brelse(tp, agbp);
-
-                       if (error != ENOSPC)
-                               goto out_error;
-
-                       xfs_perag_put(pag);
-                       *inop = NULLFSINO;
-                       return 0;
-               }
-
-               if (ialloced) {
-                       /*
-                        * We successfully allocated some inodes, return
-                        * the current context to the caller so that it
-                        * can commit the current transaction and call
-                        * us again where we left off.
-                        */
-                       ASSERT(pag->pagi_freecount > 0);
-                       xfs_perag_put(pag);
-
-                       *IO_agbp = agbp;
-                       *inop = NULLFSINO;
-                       return 0;
-               }
-
-nextag_relse_buffer:
-               xfs_trans_brelse(tp, agbp);
-nextag:
-               xfs_perag_put(pag);
-               if (++agno == mp->m_sb.sb_agcount)
-                       agno = 0;
-               if (agno == start_agno) {
-                       *inop = NULLFSINO;
-                       return noroom ? ENOSPC : 0;
-               }
-       }
-
-out_alloc:
-       *IO_agbp = NULL;
-       return xfs_dialloc_ag(tp, agbp, parent, inop);
-out_error:
-       xfs_perag_put(pag);
-       return error;
-}
-
-STATIC int
-xfs_difree_inobt(
-       struct xfs_mount                *mp,
-       struct xfs_trans                *tp,
-       struct xfs_buf                  *agbp,
-       xfs_agino_t                     agino,
-       struct xfs_bmap_free            *flist,
-       int                             *deleted,
-       xfs_ino_t                       *first_ino,
-       struct xfs_inobt_rec_incore     *orec)
-{
-       struct xfs_agi                  *agi = XFS_BUF_TO_AGI(agbp);
-       xfs_agnumber_t                  agno = be32_to_cpu(agi->agi_seqno);
-       struct xfs_perag                *pag;
-       struct xfs_btree_cur            *cur;
-       struct xfs_inobt_rec_incore     rec;
-       int                             ilen;
-       int                             error;
-       int                             i;
-       int                             off;
-
-       ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
-       ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
-
-       /*
-        * Initialize the cursor.
-        */
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
-
-       error = xfs_check_agi_freecount(cur, agi);
-       if (error)
-               goto error0;
-
-       /*
-        * Look for the entry describing this inode.
-        */
-       if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
-               xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
-                       __func__, error);
-               goto error0;
-       }
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-       error = xfs_inobt_get_rec(cur, &rec, &i);
-       if (error) {
-               xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
-                       __func__, error);
-               goto error0;
-       }
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
-       /*
-        * Get the offset in the inode chunk.
-        */
-       off = agino - rec.ir_startino;
-       ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
-       ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off)));
-       /*
-        * Mark the inode free & increment the count.
-        */
-       rec.ir_free |= XFS_INOBT_MASK(off);
-       rec.ir_freecount++;
-
-       /*
-        * When an inode cluster is free, it becomes eligible for removal
-        */
-       if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
-           (rec.ir_freecount == mp->m_ialloc_inos)) {
-
-               *deleted = 1;
-               *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
-
-               /*
-                * Remove the inode cluster from the AGI B+Tree, adjust the
-                * AGI and Superblock inode counts, and mark the disk space
-                * to be freed when the transaction is committed.
-                */
-               ilen = mp->m_ialloc_inos;
-               be32_add_cpu(&agi->agi_count, -ilen);
-               be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
-               xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
-               pag = xfs_perag_get(mp, agno);
-               pag->pagi_freecount -= ilen - 1;
-               xfs_perag_put(pag);
-               xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
-               xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
-
-               if ((error = xfs_btree_delete(cur, &i))) {
-                       xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
-                               __func__, error);
-                       goto error0;
-               }
-
-               xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
-                                 XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
-                                 mp->m_ialloc_blks, flist, mp);
-       } else {
-               *deleted = 0;
-
-               error = xfs_inobt_update(cur, &rec);
-               if (error) {
-                       xfs_warn(mp, "%s: xfs_inobt_update returned error %d.",
-                               __func__, error);
-                       goto error0;
-               }
-
-               /* 
-                * Change the inode free counts and log the ag/sb changes.
-                */
-               be32_add_cpu(&agi->agi_freecount, 1);
-               xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
-               pag = xfs_perag_get(mp, agno);
-               pag->pagi_freecount++;
-               xfs_perag_put(pag);
-               xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
-       }
-
-       error = xfs_check_agi_freecount(cur, agi);
-       if (error)
-               goto error0;
-
-       *orec = rec;
-       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-       return 0;
-
-error0:
-       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-       return error;
-}
-
-/*
- * Free an inode in the free inode btree.
- */
-STATIC int
-xfs_difree_finobt(
-       struct xfs_mount                *mp,
-       struct xfs_trans                *tp,
-       struct xfs_buf                  *agbp,
-       xfs_agino_t                     agino,
-       struct xfs_inobt_rec_incore     *ibtrec) /* inobt record */
-{
-       struct xfs_agi                  *agi = XFS_BUF_TO_AGI(agbp);
-       xfs_agnumber_t                  agno = be32_to_cpu(agi->agi_seqno);
-       struct xfs_btree_cur            *cur;
-       struct xfs_inobt_rec_incore     rec;
-       int                             offset = agino - ibtrec->ir_startino;
-       int                             error;
-       int                             i;
-
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO);
-
-       error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
-       if (error)
-               goto error;
-       if (i == 0) {
-               /*
-                * If the record does not exist in the finobt, we must have just
-                * freed an inode in a previously fully allocated chunk. If not,
-                * something is out of sync.
-                */
-               XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
-
-               error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
-                                            ibtrec->ir_free, &i);
-               if (error)
-                       goto error;
-               ASSERT(i == 1);
-
-               goto out;
-       }
-
-       /*
-        * Read and update the existing record. We could just copy the ibtrec
-        * across here, but that would defeat the purpose of having redundant
-        * metadata. By making the modifications independently, we can catch
-        * corruptions that we wouldn't see if we just copied from one record
-        * to another.
-        */
-       error = xfs_inobt_get_rec(cur, &rec, &i);
-       if (error)
-               goto error;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error);
-
-       rec.ir_free |= XFS_INOBT_MASK(offset);
-       rec.ir_freecount++;
-
-       XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
-                               (rec.ir_freecount == ibtrec->ir_freecount),
-                               error);
-
-       /*
-        * The content of inobt records should always match between the inobt
-        * and finobt. The lifecycle of records in the finobt is different from
-        * the inobt in that the finobt only tracks records with at least one
-        * free inode. Hence, if all of the inodes are free and we aren't
-        * keeping inode chunks permanently on disk, remove the record.
-        * Otherwise, update the record with the new information.
-        */
-       if (rec.ir_freecount == mp->m_ialloc_inos &&
-           !(mp->m_flags & XFS_MOUNT_IKEEP)) {
-               error = xfs_btree_delete(cur, &i);
-               if (error)
-                       goto error;
-               ASSERT(i == 1);
-       } else {
-               error = xfs_inobt_update(cur, &rec);
-               if (error)
-                       goto error;
-       }
-
-out:
-       error = xfs_check_agi_freecount(cur, agi);
-       if (error)
-               goto error;
-
-       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-       return 0;
-
-error:
-       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
-       return error;
-}
-
-/*
- * Free disk inode.  Carefully avoids touching the incore inode, all
- * manipulations incore are the caller's responsibility.
- * The on-disk inode is not changed by this operation, only the
- * btree (free inode mask) is changed.
- */
-int
-xfs_difree(
-       struct xfs_trans        *tp,            /* transaction pointer */
-       xfs_ino_t               inode,          /* inode to be freed */
-       struct xfs_bmap_free    *flist,         /* extents to free */
-       int                     *deleted,/* set if inode cluster was deleted */
-       xfs_ino_t               *first_ino)/* first inode in deleted cluster */
-{
-       /* REFERENCED */
-       xfs_agblock_t           agbno;  /* block number containing inode */
-       struct xfs_buf          *agbp;  /* buffer for allocation group header */
-       xfs_agino_t             agino;  /* allocation group inode number */
-       xfs_agnumber_t          agno;   /* allocation group number */
-       int                     error;  /* error return value */
-       struct xfs_mount        *mp;    /* mount structure for filesystem */
-       struct xfs_inobt_rec_incore rec;/* btree record */
-
-       mp = tp->t_mountp;
-
-       /*
-        * Break up inode number into its components.
-        */
-       agno = XFS_INO_TO_AGNO(mp, inode);
-       if (agno >= mp->m_sb.sb_agcount)  {
-               xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
-                       __func__, agno, mp->m_sb.sb_agcount);
-               ASSERT(0);
-               return EINVAL;
-       }
-       agino = XFS_INO_TO_AGINO(mp, inode);
-       if (inode != XFS_AGINO_TO_INO(mp, agno, agino))  {
-               xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
-                       __func__, (unsigned long long)inode,
-                       (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
-               ASSERT(0);
-               return EINVAL;
-       }
-       agbno = XFS_AGINO_TO_AGBNO(mp, agino);
-       if (agbno >= mp->m_sb.sb_agblocks)  {
-               xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
-                       __func__, agbno, mp->m_sb.sb_agblocks);
-               ASSERT(0);
-               return EINVAL;
-       }
-       /*
-        * Get the allocation group header.
-        */
-       error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
-       if (error) {
-               xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
-                       __func__, error);
-               return error;
-       }
-
-       /*
-        * Fix up the inode allocation btree.
-        */
-       error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
-                                &rec);
-       if (error)
-               goto error0;
-
-       /*
-        * Fix up the free inode btree.
-        */
-       if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-               error = xfs_difree_finobt(mp, tp, agbp, agino, &rec);
-               if (error)
-                       goto error0;
-       }
-
-       return 0;
-
-error0:
-       return error;
-}
-
-STATIC int
-xfs_imap_lookup(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
-       xfs_agnumber_t          agno,
-       xfs_agino_t             agino,
-       xfs_agblock_t           agbno,
-       xfs_agblock_t           *chunk_agbno,
-       xfs_agblock_t           *offset_agbno,
-       int                     flags)
-{
-       struct xfs_inobt_rec_incore rec;
-       struct xfs_btree_cur    *cur;
-       struct xfs_buf          *agbp;
-       int                     error;
-       int                     i;
-
-       error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
-       if (error) {
-               xfs_alert(mp,
-                       "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
-                       __func__, error, agno);
-               return error;
-       }
-
-       /*
-        * Lookup the inode record for the given agino. If the record cannot be
-        * found, then it's an invalid inode number and we should abort. Once
-        * we have a record, we need to ensure it contains the inode number
-        * we are looking up.
-        */
-       cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);
-       error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
-       if (!error) {
-               if (i)
-                       error = xfs_inobt_get_rec(cur, &rec, &i);
-               if (!error && i == 0)
-                       error = EINVAL;
-       }
-
-       xfs_trans_brelse(tp, agbp);
-       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
-       if (error)
-               return error;
-
-       /* check that the returned record contains the required inode */
-       if (rec.ir_startino > agino ||
-           rec.ir_startino + mp->m_ialloc_inos <= agino)
-               return EINVAL;
-
-       /* for untrusted inodes check it is allocated first */
-       if ((flags & XFS_IGET_UNTRUSTED) &&
-           (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
-               return EINVAL;
-
-       *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
-       *offset_agbno = agbno - *chunk_agbno;
-       return 0;
-}
-
-/*
- * Return the location of the inode in imap, for mapping it into a buffer.
- */
-int
-xfs_imap(
-       xfs_mount_t      *mp,   /* file system mount structure */
-       xfs_trans_t      *tp,   /* transaction pointer */
-       xfs_ino_t       ino,    /* inode to locate */
-       struct xfs_imap *imap,  /* location map structure */
-       uint            flags)  /* flags for inode btree lookup */
-{
-       xfs_agblock_t   agbno;  /* block number of inode in the alloc group */
-       xfs_agino_t     agino;  /* inode number within alloc group */
-       xfs_agnumber_t  agno;   /* allocation group number */
-       int             blks_per_cluster; /* num blocks per inode cluster */
-       xfs_agblock_t   chunk_agbno;    /* first block in inode chunk */
-       xfs_agblock_t   cluster_agbno;  /* first block in inode cluster */
-       int             error;  /* error code */
-       int             offset; /* index of inode in its buffer */
-       xfs_agblock_t   offset_agbno;   /* blks from chunk start to inode */
-
-       ASSERT(ino != NULLFSINO);
-
-       /*
-        * Split up the inode number into its parts.
-        */
-       agno = XFS_INO_TO_AGNO(mp, ino);
-       agino = XFS_INO_TO_AGINO(mp, ino);
-       agbno = XFS_AGINO_TO_AGBNO(mp, agino);
-       if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
-           ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
-#ifdef DEBUG
-               /*
-                * Don't output diagnostic information for untrusted inodes
-                * as they can be invalid without implying corruption.
-                */
-               if (flags & XFS_IGET_UNTRUSTED)
-                       return EINVAL;
-               if (agno >= mp->m_sb.sb_agcount) {
-                       xfs_alert(mp,
-                               "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
-                               __func__, agno, mp->m_sb.sb_agcount);
-               }
-               if (agbno >= mp->m_sb.sb_agblocks) {
-                       xfs_alert(mp,
-               "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
-                               __func__, (unsigned long long)agbno,
-                               (unsigned long)mp->m_sb.sb_agblocks);
-               }
-               if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
-                       xfs_alert(mp,
-               "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
-                               __func__, ino,
-                               XFS_AGINO_TO_INO(mp, agno, agino));
-               }
-               xfs_stack_trace();
-#endif /* DEBUG */
-               return EINVAL;
-       }
-
-       blks_per_cluster = xfs_icluster_size_fsb(mp);
-
-       /*
-        * For bulkstat and handle lookups, we have an untrusted inode number
-        * that we have to verify is valid. We cannot do this just by reading
-        * the inode buffer as it may have been unlinked and removed leaving
-        * inodes in stale state on disk. Hence we have to do a btree lookup
-        * in all cases where an untrusted inode number is passed.
-        */
-       if (flags & XFS_IGET_UNTRUSTED) {
-               error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
-                                       &chunk_agbno, &offset_agbno, flags);
-               if (error)
-                       return error;
-               goto out_map;
-       }
-
-       /*
-        * If the inode cluster size is the same as the blocksize or
-        * smaller we get to the buffer by simple arithmetics.
-        */
-       if (blks_per_cluster == 1) {
-               offset = XFS_INO_TO_OFFSET(mp, ino);
-               ASSERT(offset < mp->m_sb.sb_inopblock);
-
-               imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
-               imap->im_len = XFS_FSB_TO_BB(mp, 1);
-               imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
-               return 0;
-       }
-
-       /*
-        * If the inode chunks are aligned then use simple maths to
-        * find the location. Otherwise we have to do a btree
-        * lookup to find the location.
-        */
-       if (mp->m_inoalign_mask) {
-               offset_agbno = agbno & mp->m_inoalign_mask;
-               chunk_agbno = agbno - offset_agbno;
-       } else {
-               error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
-                                       &chunk_agbno, &offset_agbno, flags);
-               if (error)
-                       return error;
-       }
-
-out_map:
-       ASSERT(agbno >= chunk_agbno);
-       cluster_agbno = chunk_agbno +
-               ((offset_agbno / blks_per_cluster) * blks_per_cluster);
-       offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
-               XFS_INO_TO_OFFSET(mp, ino);
-
-       imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
-       imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
-       imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
-
-       /*
-        * If the inode number maps to a block outside the bounds
-        * of the file system then return NULL rather than calling
-        * read_buf and panicing when we get an error from the
-        * driver.
-        */
-       if ((imap->im_blkno + imap->im_len) >
-           XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-               xfs_alert(mp,
-       "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
-                       __func__, (unsigned long long) imap->im_blkno,
-                       (unsigned long long) imap->im_len,
-                       XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
-               return EINVAL;
-       }
-       return 0;
-}
-
-/*
- * Compute and fill in value of m_in_maxlevels.
- */
-void
-xfs_ialloc_compute_maxlevels(
-       xfs_mount_t     *mp)            /* file system mount structure */
-{
-       int             level;
-       uint            maxblocks;
-       uint            maxleafents;
-       int             minleafrecs;
-       int             minnoderecs;
-
-       maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
-               XFS_INODES_PER_CHUNK_LOG;
-       minleafrecs = mp->m_alloc_mnr[0];
-       minnoderecs = mp->m_alloc_mnr[1];
-       maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
-       for (level = 1; maxblocks > 1; level++)
-               maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
-       mp->m_in_maxlevels = level;
-}
-
-/*
- * Log specified fields for the ag hdr (inode section). The growth of the agi
- * structure over time requires that we interpret the buffer as two logical
- * regions delineated by the end of the unlinked list. This is due to the size
- * of the hash table and its location in the middle of the agi.
- *
- * For example, a request to log a field before agi_unlinked and a field after
- * agi_unlinked could cause us to log the entire hash table and use an excessive
- * amount of log space. To avoid this behavior, log the region up through
- * agi_unlinked in one call and the region after agi_unlinked through the end of
- * the structure in another.
- */
-void
-xfs_ialloc_log_agi(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_buf_t       *bp,            /* allocation group header buffer */
-       int             fields)         /* bitmask of fields to log */
-{
-       int                     first;          /* first byte number */
-       int                     last;           /* last byte number */
-       static const short      offsets[] = {   /* field starting offsets */
-                                       /* keep in sync with bit definitions */
-               offsetof(xfs_agi_t, agi_magicnum),
-               offsetof(xfs_agi_t, agi_versionnum),
-               offsetof(xfs_agi_t, agi_seqno),
-               offsetof(xfs_agi_t, agi_length),
-               offsetof(xfs_agi_t, agi_count),
-               offsetof(xfs_agi_t, agi_root),
-               offsetof(xfs_agi_t, agi_level),
-               offsetof(xfs_agi_t, agi_freecount),
-               offsetof(xfs_agi_t, agi_newino),
-               offsetof(xfs_agi_t, agi_dirino),
-               offsetof(xfs_agi_t, agi_unlinked),
-               offsetof(xfs_agi_t, agi_free_root),
-               offsetof(xfs_agi_t, agi_free_level),
-               sizeof(xfs_agi_t)
-       };
-#ifdef DEBUG
-       xfs_agi_t               *agi;   /* allocation group header */
-
-       agi = XFS_BUF_TO_AGI(bp);
-       ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
-#endif
-
-       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
-
-       /*
-        * Compute byte offsets for the first and last fields in the first
-        * region and log the agi buffer. This only logs up through
-        * agi_unlinked.
-        */
-       if (fields & XFS_AGI_ALL_BITS_R1) {
-               xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
-                                 &first, &last);
-               xfs_trans_log_buf(tp, bp, first, last);
-       }
-
-       /*
-        * Mask off the bits in the first region and calculate the first and
-        * last field offsets for any bits in the second region.
-        */
-       fields &= ~XFS_AGI_ALL_BITS_R1;
-       if (fields) {
-               xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
-                                 &first, &last);
-               xfs_trans_log_buf(tp, bp, first, last);
-       }
-}
-
-#ifdef DEBUG
-STATIC void
-xfs_check_agi_unlinked(
-       struct xfs_agi          *agi)
-{
-       int                     i;
-
-       for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
-               ASSERT(agi->agi_unlinked[i]);
-}
-#else
-#define xfs_check_agi_unlinked(agi)
-#endif
-
-static bool
-xfs_agi_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_agi  *agi = XFS_BUF_TO_AGI(bp);
-
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-           !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid))
-                       return false;
-       /*
-        * Validate the magic number of the agi block.
-        */
-       if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC))
-               return false;
-       if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
-               return false;
-
-       /*
-        * during growfs operations, the perag is not fully initialised,
-        * so we can't use it for any useful checking. growfs ensures we can't
-        * use it by using uncached buffers that don't have the perag attached
-        * so we can detect and avoid this problem.
-        */
-       if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
-               return false;
-
-       xfs_check_agi_unlinked(agi);
-       return true;
-}
-
-static void
-xfs_agi_read_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-           !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
-                               XFS_ERRTAG_IALLOC_READ_AGI,
-                               XFS_RANDOM_IALLOC_READ_AGI))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-}
-
-static void
-xfs_agi_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-
-       if (!xfs_agi_verify(bp)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (bip)
-               XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-       xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
-}
-
-const struct xfs_buf_ops xfs_agi_buf_ops = {
-       .verify_read = xfs_agi_read_verify,
-       .verify_write = xfs_agi_write_verify,
-};
-
-/*
- * Read in the allocation group header (inode allocation section)
- */
-int
-xfs_read_agi(
-       struct xfs_mount        *mp,    /* file system mount structure */
-       struct xfs_trans        *tp,    /* transaction pointer */
-       xfs_agnumber_t          agno,   /* allocation group number */
-       struct xfs_buf          **bpp)  /* allocation group hdr buf */
-{
-       int                     error;
-
-       trace_xfs_read_agi(mp, agno);
-
-       ASSERT(agno != NULLAGNUMBER);
-       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-                       XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
-                       XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);
-       if (error)
-               return error;
-
-       xfs_buf_set_ref(*bpp, XFS_AGI_REF);
-       return 0;
-}
-
-int
-xfs_ialloc_read_agi(
-       struct xfs_mount        *mp,    /* file system mount structure */
-       struct xfs_trans        *tp,    /* transaction pointer */
-       xfs_agnumber_t          agno,   /* allocation group number */
-       struct xfs_buf          **bpp)  /* allocation group hdr buf */
-{
-       struct xfs_agi          *agi;   /* allocation group header */
-       struct xfs_perag        *pag;   /* per allocation group data */
-       int                     error;
-
-       trace_xfs_ialloc_read_agi(mp, agno);
-
-       error = xfs_read_agi(mp, tp, agno, bpp);
-       if (error)
-               return error;
-
-       agi = XFS_BUF_TO_AGI(*bpp);
-       pag = xfs_perag_get(mp, agno);
-       if (!pag->pagi_init) {
-               pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
-               pag->pagi_count = be32_to_cpu(agi->agi_count);
-               pag->pagi_init = 1;
-       }
-
-       /*
-        * It's possible for these to be out of sync if
-        * we are in the middle of a forced shutdown.
-        */
-       ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
-               XFS_FORCED_SHUTDOWN(mp));
-       xfs_perag_put(pag);
-       return 0;
-}
-
-/*
- * Read in the agi to initialise the per-ag data in the mount structure
- */
-int
-xfs_ialloc_pagi_init(
-       xfs_mount_t     *mp,            /* file system mount structure */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_agnumber_t  agno)           /* allocation group number */
-{
-       xfs_buf_t       *bp = NULL;
-       int             error;
-
-       error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
-       if (error)
-               return error;
-       if (bp)
-               xfs_trans_brelse(tp, bp);
-       return 0;
-}
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
deleted file mode 100644 (file)
index 726f83a..0000000
+++ /dev/null
@@ -1,422 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_cksum.h"
-#include "xfs_trans.h"
-
-
-STATIC int
-xfs_inobt_get_minrecs(
-       struct xfs_btree_cur    *cur,
-       int                     level)
-{
-       return cur->bc_mp->m_inobt_mnr[level != 0];
-}
-
-STATIC struct xfs_btree_cur *
-xfs_inobt_dup_cursor(
-       struct xfs_btree_cur    *cur)
-{
-       return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
-                       cur->bc_private.a.agbp, cur->bc_private.a.agno,
-                       cur->bc_btnum);
-}
-
-STATIC void
-xfs_inobt_set_root(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *nptr,
-       int                     inc)    /* level change */
-{
-       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
-       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
-
-       agi->agi_root = nptr->s;
-       be32_add_cpu(&agi->agi_level, inc);
-       xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
-}
-
-STATIC void
-xfs_finobt_set_root(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *nptr,
-       int                     inc)    /* level change */
-{
-       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
-       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
-
-       agi->agi_free_root = nptr->s;
-       be32_add_cpu(&agi->agi_free_level, inc);
-       xfs_ialloc_log_agi(cur->bc_tp, agbp,
-                          XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
-}
-
-STATIC int
-xfs_inobt_alloc_block(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *start,
-       union xfs_btree_ptr     *new,
-       int                     *stat)
-{
-       xfs_alloc_arg_t         args;           /* block allocation args */
-       int                     error;          /* error return value */
-       xfs_agblock_t           sbno = be32_to_cpu(start->s);
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
-       memset(&args, 0, sizeof(args));
-       args.tp = cur->bc_tp;
-       args.mp = cur->bc_mp;
-       args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
-       args.minlen = 1;
-       args.maxlen = 1;
-       args.prod = 1;
-       args.type = XFS_ALLOCTYPE_NEAR_BNO;
-
-       error = xfs_alloc_vextent(&args);
-       if (error) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-               return error;
-       }
-       if (args.fsbno == NULLFSBLOCK) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-               *stat = 0;
-               return 0;
-       }
-       ASSERT(args.len == 1);
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-
-       new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
-       *stat = 1;
-       return 0;
-}
-
-STATIC int
-xfs_inobt_free_block(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp)
-{
-       xfs_fsblock_t           fsbno;
-       int                     error;
-
-       fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
-       error = xfs_free_extent(cur->bc_tp, fsbno, 1);
-       if (error)
-               return error;
-
-       xfs_trans_binval(cur->bc_tp, bp);
-       return error;
-}
-
-STATIC int
-xfs_inobt_get_maxrecs(
-       struct xfs_btree_cur    *cur,
-       int                     level)
-{
-       return cur->bc_mp->m_inobt_mxr[level != 0];
-}
-
-STATIC void
-xfs_inobt_init_key_from_rec(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
-{
-       key->inobt.ir_startino = rec->inobt.ir_startino;
-}
-
-STATIC void
-xfs_inobt_init_rec_from_key(
-       union xfs_btree_key     *key,
-       union xfs_btree_rec     *rec)
-{
-       rec->inobt.ir_startino = key->inobt.ir_startino;
-}
-
-STATIC void
-xfs_inobt_init_rec_from_cur(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_rec     *rec)
-{
-       rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
-       rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
-       rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
-}
-
-/*
- * initial value of ptr for lookup
- */
-STATIC void
-xfs_inobt_init_ptr_from_cur(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr)
-{
-       struct xfs_agi          *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
-
-       ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
-
-       ptr->s = agi->agi_root;
-}
-
-STATIC void
-xfs_finobt_init_ptr_from_cur(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr)
-{
-       struct xfs_agi          *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
-
-       ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
-       ptr->s = agi->agi_free_root;
-}
-
-STATIC __int64_t
-xfs_inobt_key_diff(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *key)
-{
-       return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
-                         cur->bc_rec.i.ir_startino;
-}
-
-static int
-xfs_inobt_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-       struct xfs_perag        *pag = bp->b_pag;
-       unsigned int            level;
-
-       /*
-        * During growfs operations, we can't verify the exact owner as the
-        * perag is not fully initialised and hence not attached to the buffer.
-        *
-        * Similarly, during log recovery we will have a perag structure
-        * attached, but the agi information will not yet have been initialised
-        * from the on disk AGI. We don't currently use any of this information,
-        * but beware of the landmine (i.e. need to check pag->pagi_init) if we
-        * ever do.
-        */
-       switch (block->bb_magic) {
-       case cpu_to_be32(XFS_IBT_CRC_MAGIC):
-       case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
-               if (!xfs_sb_version_hascrc(&mp->m_sb))
-                       return false;
-               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
-                       return false;
-               if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
-                       return false;
-               if (pag &&
-                   be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
-                       return false;
-               /* fall through */
-       case cpu_to_be32(XFS_IBT_MAGIC):
-       case cpu_to_be32(XFS_FIBT_MAGIC):
-               break;
-       default:
-               return 0;
-       }
-
-       /* numrecs and level verification */
-       level = be16_to_cpu(block->bb_level);
-       if (level >= mp->m_in_maxlevels)
-               return false;
-       if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
-               return false;
-
-       /* sibling pointer verification */
-       if (!block->bb_u.s.bb_leftsib ||
-           (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
-            block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
-               return false;
-       if (!block->bb_u.s.bb_rightsib ||
-           (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
-            block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
-               return false;
-
-       return true;
-}
-
-static void
-xfs_inobt_read_verify(
-       struct xfs_buf  *bp)
-{
-       if (!xfs_btree_sblock_verify_crc(bp))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_inobt_verify(bp))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_verifier_error(bp);
-       }
-}
-
-static void
-xfs_inobt_write_verify(
-       struct xfs_buf  *bp)
-{
-       if (!xfs_inobt_verify(bp)) {
-               trace_xfs_btree_corrupt(bp, _RET_IP_);
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-       xfs_btree_sblock_calc_crc(bp);
-
-}
-
-const struct xfs_buf_ops xfs_inobt_buf_ops = {
-       .verify_read = xfs_inobt_read_verify,
-       .verify_write = xfs_inobt_write_verify,
-};
-
-#if defined(DEBUG) || defined(XFS_WARN)
-STATIC int
-xfs_inobt_keys_inorder(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_key     *k1,
-       union xfs_btree_key     *k2)
-{
-       return be32_to_cpu(k1->inobt.ir_startino) <
-               be32_to_cpu(k2->inobt.ir_startino);
-}
-
-STATIC int
-xfs_inobt_recs_inorder(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_rec     *r1,
-       union xfs_btree_rec     *r2)
-{
-       return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
-               be32_to_cpu(r2->inobt.ir_startino);
-}
-#endif /* DEBUG */
-
-static const struct xfs_btree_ops xfs_inobt_ops = {
-       .rec_len                = sizeof(xfs_inobt_rec_t),
-       .key_len                = sizeof(xfs_inobt_key_t),
-
-       .dup_cursor             = xfs_inobt_dup_cursor,
-       .set_root               = xfs_inobt_set_root,
-       .alloc_block            = xfs_inobt_alloc_block,
-       .free_block             = xfs_inobt_free_block,
-       .get_minrecs            = xfs_inobt_get_minrecs,
-       .get_maxrecs            = xfs_inobt_get_maxrecs,
-       .init_key_from_rec      = xfs_inobt_init_key_from_rec,
-       .init_rec_from_key      = xfs_inobt_init_rec_from_key,
-       .init_rec_from_cur      = xfs_inobt_init_rec_from_cur,
-       .init_ptr_from_cur      = xfs_inobt_init_ptr_from_cur,
-       .key_diff               = xfs_inobt_key_diff,
-       .buf_ops                = &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
-       .keys_inorder           = xfs_inobt_keys_inorder,
-       .recs_inorder           = xfs_inobt_recs_inorder,
-#endif
-};
-
-static const struct xfs_btree_ops xfs_finobt_ops = {
-       .rec_len                = sizeof(xfs_inobt_rec_t),
-       .key_len                = sizeof(xfs_inobt_key_t),
-
-       .dup_cursor             = xfs_inobt_dup_cursor,
-       .set_root               = xfs_finobt_set_root,
-       .alloc_block            = xfs_inobt_alloc_block,
-       .free_block             = xfs_inobt_free_block,
-       .get_minrecs            = xfs_inobt_get_minrecs,
-       .get_maxrecs            = xfs_inobt_get_maxrecs,
-       .init_key_from_rec      = xfs_inobt_init_key_from_rec,
-       .init_rec_from_key      = xfs_inobt_init_rec_from_key,
-       .init_rec_from_cur      = xfs_inobt_init_rec_from_cur,
-       .init_ptr_from_cur      = xfs_finobt_init_ptr_from_cur,
-       .key_diff               = xfs_inobt_key_diff,
-       .buf_ops                = &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
-       .keys_inorder           = xfs_inobt_keys_inorder,
-       .recs_inorder           = xfs_inobt_recs_inorder,
-#endif
-};
-
-/*
- * Allocate a new inode btree cursor.
- */
-struct xfs_btree_cur *                         /* new inode btree cursor */
-xfs_inobt_init_cursor(
-       struct xfs_mount        *mp,            /* file system mount point */
-       struct xfs_trans        *tp,            /* transaction pointer */
-       struct xfs_buf          *agbp,          /* buffer for agi structure */
-       xfs_agnumber_t          agno,           /* allocation group number */
-       xfs_btnum_t             btnum)          /* ialloc or free ino btree */
-{
-       struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
-       struct xfs_btree_cur    *cur;
-
-       cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
-
-       cur->bc_tp = tp;
-       cur->bc_mp = mp;
-       cur->bc_btnum = btnum;
-       if (btnum == XFS_BTNUM_INO) {
-               cur->bc_nlevels = be32_to_cpu(agi->agi_level);
-               cur->bc_ops = &xfs_inobt_ops;
-       } else {
-               cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
-               cur->bc_ops = &xfs_finobt_ops;
-       }
-
-       cur->bc_blocklog = mp->m_sb.sb_blocklog;
-
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
-
-       cur->bc_private.a.agbp = agbp;
-       cur->bc_private.a.agno = agno;
-
-       return cur;
-}
-
-/*
- * Calculate number of records in an inobt btree block.
- */
-int
-xfs_inobt_maxrecs(
-       struct xfs_mount        *mp,
-       int                     blocklen,
-       int                     leaf)
-{
-       blocklen -= XFS_INOBT_BLOCK_LEN(mp);
-
-       if (leaf)
-               return blocklen / sizeof(xfs_inobt_rec_t);
-       return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
-}
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c
deleted file mode 100644 (file)
index 1e5366d..0000000
+++ /dev/null
@@ -1,479 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_error.h"
-#include "xfs_cksum.h"
-#include "xfs_icache.h"
-#include "xfs_trans.h"
-#include "xfs_ialloc.h"
-#include "xfs_dinode.h"
-
-/*
- * Check that none of the inode's in the buffer have a next
- * unlinked field of 0.
- */
-#if defined(DEBUG)
-void
-xfs_inobp_check(
-       xfs_mount_t     *mp,
-       xfs_buf_t       *bp)
-{
-       int             i;
-       int             j;
-       xfs_dinode_t    *dip;
-
-       j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-
-       for (i = 0; i < j; i++) {
-               dip = (xfs_dinode_t *)xfs_buf_offset(bp,
-                                       i * mp->m_sb.sb_inodesize);
-               if (!dip->di_next_unlinked)  {
-                       xfs_alert(mp,
-       "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
-                               i, (long long)bp->b_bn);
-               }
-       }
-}
-#endif
-
-/*
- * If we are doing readahead on an inode buffer, we might be in log recovery
- * reading an inode allocation buffer that hasn't yet been replayed, and hence
- * has not had the inode cores stamped into it. Hence for readahead, the buffer
- * may be potentially invalid.
- *
- * If the readahead buffer is invalid, we don't want to mark it with an error,
- * but we do want to clear the DONE status of the buffer so that a followup read
- * will re-read it from disk. This will ensure that we don't get an unnecessary
- * warnings during log recovery and we don't get unnecssary panics on debug
- * kernels.
- */
-static void
-xfs_inode_buf_verify(
-       struct xfs_buf  *bp,
-       bool            readahead)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       int             i;
-       int             ni;
-
-       /*
-        * Validate the magic number and version of every inode in the buffer
-        */
-       ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
-       for (i = 0; i < ni; i++) {
-               int             di_ok;
-               xfs_dinode_t    *dip;
-
-               dip = (struct xfs_dinode *)xfs_buf_offset(bp,
-                                       (i << mp->m_sb.sb_inodelog));
-               di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
-                           XFS_DINODE_GOOD_VERSION(dip->di_version);
-               if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
-                                               XFS_ERRTAG_ITOBP_INOTOBP,
-                                               XFS_RANDOM_ITOBP_INOTOBP))) {
-                       if (readahead) {
-                               bp->b_flags &= ~XBF_DONE;
-                               return;
-                       }
-
-                       xfs_buf_ioerror(bp, EFSCORRUPTED);
-                       xfs_verifier_error(bp);
-#ifdef DEBUG
-                       xfs_alert(mp,
-                               "bad inode magic/vsn daddr %lld #%d (magic=%x)",
-                               (unsigned long long)bp->b_bn, i,
-                               be16_to_cpu(dip->di_magic));
-#endif
-               }
-       }
-       xfs_inobp_check(mp, bp);
-}
-
-
-static void
-xfs_inode_buf_read_verify(
-       struct xfs_buf  *bp)
-{
-       xfs_inode_buf_verify(bp, false);
-}
-
-static void
-xfs_inode_buf_readahead_verify(
-       struct xfs_buf  *bp)
-{
-       xfs_inode_buf_verify(bp, true);
-}
-
-static void
-xfs_inode_buf_write_verify(
-       struct xfs_buf  *bp)
-{
-       xfs_inode_buf_verify(bp, false);
-}
-
-const struct xfs_buf_ops xfs_inode_buf_ops = {
-       .verify_read = xfs_inode_buf_read_verify,
-       .verify_write = xfs_inode_buf_write_verify,
-};
-
-const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
-       .verify_read = xfs_inode_buf_readahead_verify,
-       .verify_write = xfs_inode_buf_write_verify,
-};
-
-
-/*
- * This routine is called to map an inode to the buffer containing the on-disk
- * version of the inode.  It returns a pointer to the buffer containing the
- * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
- * pointer to the on-disk inode within that buffer.
- *
- * If a non-zero error is returned, then the contents of bpp and dipp are
- * undefined.
- */
-int
-xfs_imap_to_bp(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
-       struct xfs_imap         *imap,
-       struct xfs_dinode       **dipp,
-       struct xfs_buf          **bpp,
-       uint                    buf_flags,
-       uint                    iget_flags)
-{
-       struct xfs_buf          *bp;
-       int                     error;
-
-       buf_flags |= XBF_UNMAPPED;
-       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
-                                  (int)imap->im_len, buf_flags, &bp,
-                                  &xfs_inode_buf_ops);
-       if (error) {
-               if (error == EAGAIN) {
-                       ASSERT(buf_flags & XBF_TRYLOCK);
-                       return error;
-               }
-
-               if (error == EFSCORRUPTED &&
-                   (iget_flags & XFS_IGET_UNTRUSTED))
-                       return EINVAL;
-
-               xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
-                       __func__, error);
-               return error;
-       }
-
-       *bpp = bp;
-       *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
-       return 0;
-}
-
-void
-xfs_dinode_from_disk(
-       xfs_icdinode_t          *to,
-       xfs_dinode_t            *from)
-{
-       to->di_magic = be16_to_cpu(from->di_magic);
-       to->di_mode = be16_to_cpu(from->di_mode);
-       to->di_version = from ->di_version;
-       to->di_format = from->di_format;
-       to->di_onlink = be16_to_cpu(from->di_onlink);
-       to->di_uid = be32_to_cpu(from->di_uid);
-       to->di_gid = be32_to_cpu(from->di_gid);
-       to->di_nlink = be32_to_cpu(from->di_nlink);
-       to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
-       to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
-       memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
-       to->di_flushiter = be16_to_cpu(from->di_flushiter);
-       to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
-       to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
-       to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
-       to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
-       to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
-       to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
-       to->di_size = be64_to_cpu(from->di_size);
-       to->di_nblocks = be64_to_cpu(from->di_nblocks);
-       to->di_extsize = be32_to_cpu(from->di_extsize);
-       to->di_nextents = be32_to_cpu(from->di_nextents);
-       to->di_anextents = be16_to_cpu(from->di_anextents);
-       to->di_forkoff = from->di_forkoff;
-       to->di_aformat  = from->di_aformat;
-       to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
-       to->di_dmstate  = be16_to_cpu(from->di_dmstate);
-       to->di_flags    = be16_to_cpu(from->di_flags);
-       to->di_gen      = be32_to_cpu(from->di_gen);
-
-       if (to->di_version == 3) {
-               to->di_changecount = be64_to_cpu(from->di_changecount);
-               to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
-               to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
-               to->di_flags2 = be64_to_cpu(from->di_flags2);
-               to->di_ino = be64_to_cpu(from->di_ino);
-               to->di_lsn = be64_to_cpu(from->di_lsn);
-               memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
-               uuid_copy(&to->di_uuid, &from->di_uuid);
-       }
-}
-
-void
-xfs_dinode_to_disk(
-       xfs_dinode_t            *to,
-       xfs_icdinode_t          *from)
-{
-       to->di_magic = cpu_to_be16(from->di_magic);
-       to->di_mode = cpu_to_be16(from->di_mode);
-       to->di_version = from ->di_version;
-       to->di_format = from->di_format;
-       to->di_onlink = cpu_to_be16(from->di_onlink);
-       to->di_uid = cpu_to_be32(from->di_uid);
-       to->di_gid = cpu_to_be32(from->di_gid);
-       to->di_nlink = cpu_to_be32(from->di_nlink);
-       to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
-       to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
-       memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
-       to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
-       to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
-       to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
-       to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
-       to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
-       to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
-       to->di_size = cpu_to_be64(from->di_size);
-       to->di_nblocks = cpu_to_be64(from->di_nblocks);
-       to->di_extsize = cpu_to_be32(from->di_extsize);
-       to->di_nextents = cpu_to_be32(from->di_nextents);
-       to->di_anextents = cpu_to_be16(from->di_anextents);
-       to->di_forkoff = from->di_forkoff;
-       to->di_aformat = from->di_aformat;
-       to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
-       to->di_dmstate = cpu_to_be16(from->di_dmstate);
-       to->di_flags = cpu_to_be16(from->di_flags);
-       to->di_gen = cpu_to_be32(from->di_gen);
-
-       if (from->di_version == 3) {
-               to->di_changecount = cpu_to_be64(from->di_changecount);
-               to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
-               to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
-               to->di_flags2 = cpu_to_be64(from->di_flags2);
-               to->di_ino = cpu_to_be64(from->di_ino);
-               to->di_lsn = cpu_to_be64(from->di_lsn);
-               memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
-               uuid_copy(&to->di_uuid, &from->di_uuid);
-               to->di_flushiter = 0;
-       } else {
-               to->di_flushiter = cpu_to_be16(from->di_flushiter);
-       }
-}
-
-static bool
-xfs_dinode_verify(
-       struct xfs_mount        *mp,
-       struct xfs_inode        *ip,
-       struct xfs_dinode       *dip)
-{
-       if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
-               return false;
-
-       /* only version 3 or greater inodes are extensively verified here */
-       if (dip->di_version < 3)
-               return true;
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return false;
-       if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
-                             XFS_DINODE_CRC_OFF))
-               return false;
-       if (be64_to_cpu(dip->di_ino) != ip->i_ino)
-               return false;
-       if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
-               return false;
-       return true;
-}
-
-void
-xfs_dinode_calc_crc(
-       struct xfs_mount        *mp,
-       struct xfs_dinode       *dip)
-{
-       __uint32_t              crc;
-
-       if (dip->di_version < 3)
-               return;
-
-       ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
-       crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
-                             XFS_DINODE_CRC_OFF);
-       dip->di_crc = xfs_end_cksum(crc);
-}
-
-/*
- * Read the disk inode attributes into the in-core inode structure.
- *
- * For version 5 superblocks, if we are initialising a new inode and we are not
- * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
- * inode core with a random generation number. If we are keeping inodes around,
- * we need to read the inode cluster to get the existing generation number off
- * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
- * format) then log recovery is dependent on the di_flushiter field being
- * initialised from the current on-disk value and hence we must also read the
- * inode off disk.
- */
-int
-xfs_iread(
-       xfs_mount_t     *mp,
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       uint            iget_flags)
-{
-       xfs_buf_t       *bp;
-       xfs_dinode_t    *dip;
-       int             error;
-
-       /*
-        * Fill in the location information in the in-core inode.
-        */
-       error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
-       if (error)
-               return error;
-
-       /* shortcut IO on inode allocation if possible */
-       if ((iget_flags & XFS_IGET_CREATE) &&
-           xfs_sb_version_hascrc(&mp->m_sb) &&
-           !(mp->m_flags & XFS_MOUNT_IKEEP)) {
-               /* initialise the on-disk inode core */
-               memset(&ip->i_d, 0, sizeof(ip->i_d));
-               ip->i_d.di_magic = XFS_DINODE_MAGIC;
-               ip->i_d.di_gen = prandom_u32();
-               if (xfs_sb_version_hascrc(&mp->m_sb)) {
-                       ip->i_d.di_version = 3;
-                       ip->i_d.di_ino = ip->i_ino;
-                       uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
-               } else
-                       ip->i_d.di_version = 2;
-               return 0;
-       }
-
-       /*
-        * Get pointers to the on-disk inode and the buffer containing it.
-        */
-       error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
-       if (error)
-               return error;
-
-       /* even unallocated inodes are verified */
-       if (!xfs_dinode_verify(mp, ip, dip)) {
-               xfs_alert(mp, "%s: validation failed for inode %lld failed",
-                               __func__, ip->i_ino);
-
-               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
-               error = EFSCORRUPTED;
-               goto out_brelse;
-       }
-
-       /*
-        * If the on-disk inode is already linked to a directory
-        * entry, copy all of the inode into the in-core inode.
-        * xfs_iformat_fork() handles copying in the inode format
-        * specific information.
-        * Otherwise, just get the truly permanent information.
-        */
-       if (dip->di_mode) {
-               xfs_dinode_from_disk(&ip->i_d, dip);
-               error = xfs_iformat_fork(ip, dip);
-               if (error)  {
-#ifdef DEBUG
-                       xfs_alert(mp, "%s: xfs_iformat() returned error %d",
-                               __func__, error);
-#endif /* DEBUG */
-                       goto out_brelse;
-               }
-       } else {
-               /*
-                * Partial initialisation of the in-core inode. Just the bits
-                * that xfs_ialloc won't overwrite or relies on being correct.
-                */
-               ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
-               ip->i_d.di_version = dip->di_version;
-               ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
-               ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
-
-               if (dip->di_version == 3) {
-                       ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
-                       uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
-               }
-
-               /*
-                * Make sure to pull in the mode here as well in
-                * case the inode is released without being used.
-                * This ensures that xfs_inactive() will see that
-                * the inode is already free and not try to mess
-                * with the uninitialized part of it.
-                */
-               ip->i_d.di_mode = 0;
-       }
-
-       /*
-        * Automatically convert version 1 inode formats in memory to version 2
-        * inode format. If the inode is modified, it will get logged and
-        * rewritten as a version 2 inode. We can do this because we set the
-        * superblock feature bit for v2 inodes unconditionally during mount
-        * and it means the reast of the code can assume the inode version is 2
-        * or higher.
-        */
-       if (ip->i_d.di_version == 1) {
-               ip->i_d.di_version = 2;
-               memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
-               ip->i_d.di_nlink = ip->i_d.di_onlink;
-               ip->i_d.di_onlink = 0;
-               xfs_set_projid(ip, 0);
-       }
-
-       ip->i_delayed_blks = 0;
-
-       /*
-        * Mark the buffer containing the inode as something to keep
-        * around for a while.  This helps to keep recently accessed
-        * meta-data in-core longer.
-        */
-       xfs_buf_set_ref(bp, XFS_INO_REF);
-
-       /*
-        * Use xfs_trans_brelse() to release the buffer containing the on-disk
-        * inode, because it was acquired with xfs_trans_read_buf() in
-        * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
-        * brelse().  If we're within a transaction, then xfs_trans_brelse()
-        * will only release the buffer if it is not dirty within the
-        * transaction.  It will be OK to release the buffer in this case,
-        * because inodes on disk are never destroyed and we will be locking the
-        * new in-core inode before putting it in the cache where other
-        * processes can find it.  Thus we don't have to worry about the inode
-        * being changed just because we released the buffer.
-        */
- out_brelse:
-       xfs_trans_brelse(tp, bp);
-       return error;
-}
diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c
deleted file mode 100644 (file)
index 2a124e9..0000000
+++ /dev/null
@@ -1,1906 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <linux/log2.h>
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-
-kmem_zone_t *xfs_ifork_zone;
-
-STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
-STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
-STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
-
-#ifdef DEBUG
-/*
- * Make sure that the extents in the given memory buffer
- * are valid.
- */
-void
-xfs_validate_extents(
-       xfs_ifork_t             *ifp,
-       int                     nrecs,
-       xfs_exntfmt_t           fmt)
-{
-       xfs_bmbt_irec_t         irec;
-       xfs_bmbt_rec_host_t     rec;
-       int                     i;
-
-       for (i = 0; i < nrecs; i++) {
-               xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
-               rec.l0 = get_unaligned(&ep->l0);
-               rec.l1 = get_unaligned(&ep->l1);
-               xfs_bmbt_get_all(&rec, &irec);
-               if (fmt == XFS_EXTFMT_NOSTATE)
-                       ASSERT(irec.br_state == XFS_EXT_NORM);
-       }
-}
-#else /* DEBUG */
-#define xfs_validate_extents(ifp, nrecs, fmt)
-#endif /* DEBUG */
-
-
-/*
- * Move inode type and inode format specific information from the
- * on-disk inode to the in-core inode.  For fifos, devs, and sockets
- * this means set if_rdev to the proper value.  For files, directories,
- * and symlinks this means to bring in the in-line data or extent
- * pointers.  For a file in B-tree format, only the root is immediately
- * brought in-core.  The rest will be in-lined in if_extents when it
- * is first referenced (see xfs_iread_extents()).
- */
-int
-xfs_iformat_fork(
-       xfs_inode_t             *ip,
-       xfs_dinode_t            *dip)
-{
-       xfs_attr_shortform_t    *atp;
-       int                     size;
-       int                     error = 0;
-       xfs_fsize_t             di_size;
-
-       if (unlikely(be32_to_cpu(dip->di_nextents) +
-                    be16_to_cpu(dip->di_anextents) >
-                    be64_to_cpu(dip->di_nblocks))) {
-               xfs_warn(ip->i_mount,
-                       "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
-                       (unsigned long long)ip->i_ino,
-                       (int)(be32_to_cpu(dip->di_nextents) +
-                             be16_to_cpu(dip->di_anextents)),
-                       (unsigned long long)
-                               be64_to_cpu(dip->di_nblocks));
-               XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
-                                    ip->i_mount, dip);
-               return EFSCORRUPTED;
-       }
-
-       if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
-               xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
-                       (unsigned long long)ip->i_ino,
-                       dip->di_forkoff);
-               XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
-                                    ip->i_mount, dip);
-               return EFSCORRUPTED;
-       }
-
-       if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
-                    !ip->i_mount->m_rtdev_targp)) {
-               xfs_warn(ip->i_mount,
-                       "corrupt dinode %Lu, has realtime flag set.",
-                       ip->i_ino);
-               XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
-                                    XFS_ERRLEVEL_LOW, ip->i_mount, dip);
-               return EFSCORRUPTED;
-       }
-
-       switch (ip->i_d.di_mode & S_IFMT) {
-       case S_IFIFO:
-       case S_IFCHR:
-       case S_IFBLK:
-       case S_IFSOCK:
-               if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
-                       XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
-                                             ip->i_mount, dip);
-                       return EFSCORRUPTED;
-               }
-               ip->i_d.di_size = 0;
-               ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
-               break;
-
-       case S_IFREG:
-       case S_IFLNK:
-       case S_IFDIR:
-               switch (dip->di_format) {
-               case XFS_DINODE_FMT_LOCAL:
-                       /*
-                        * no local regular files yet
-                        */
-                       if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
-                               xfs_warn(ip->i_mount,
-                       "corrupt inode %Lu (local format for regular file).",
-                                       (unsigned long long) ip->i_ino);
-                               XFS_CORRUPTION_ERROR("xfs_iformat(4)",
-                                                    XFS_ERRLEVEL_LOW,
-                                                    ip->i_mount, dip);
-                               return EFSCORRUPTED;
-                       }
-
-                       di_size = be64_to_cpu(dip->di_size);
-                       if (unlikely(di_size < 0 ||
-                                    di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
-                               xfs_warn(ip->i_mount,
-                       "corrupt inode %Lu (bad size %Ld for local inode).",
-                                       (unsigned long long) ip->i_ino,
-                                       (long long) di_size);
-                               XFS_CORRUPTION_ERROR("xfs_iformat(5)",
-                                                    XFS_ERRLEVEL_LOW,
-                                                    ip->i_mount, dip);
-                               return EFSCORRUPTED;
-                       }
-
-                       size = (int)di_size;
-                       error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
-                       break;
-               case XFS_DINODE_FMT_EXTENTS:
-                       error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
-                       break;
-               case XFS_DINODE_FMT_BTREE:
-                       error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
-                       break;
-               default:
-                       XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
-                                        ip->i_mount);
-                       return EFSCORRUPTED;
-               }
-               break;
-
-       default:
-               XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
-               return EFSCORRUPTED;
-       }
-       if (error) {
-               return error;
-       }
-       if (!XFS_DFORK_Q(dip))
-               return 0;
-
-       ASSERT(ip->i_afp == NULL);
-       ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
-
-       switch (dip->di_aformat) {
-       case XFS_DINODE_FMT_LOCAL:
-               atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
-               size = be16_to_cpu(atp->hdr.totsize);
-
-               if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
-                       xfs_warn(ip->i_mount,
-                               "corrupt inode %Lu (bad attr fork size %Ld).",
-                               (unsigned long long) ip->i_ino,
-                               (long long) size);
-                       XFS_CORRUPTION_ERROR("xfs_iformat(8)",
-                                            XFS_ERRLEVEL_LOW,
-                                            ip->i_mount, dip);
-                       return EFSCORRUPTED;
-               }
-
-               error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
-               break;
-       case XFS_DINODE_FMT_EXTENTS:
-               error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
-               break;
-       case XFS_DINODE_FMT_BTREE:
-               error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
-               break;
-       default:
-               error = EFSCORRUPTED;
-               break;
-       }
-       if (error) {
-               kmem_zone_free(xfs_ifork_zone, ip->i_afp);
-               ip->i_afp = NULL;
-               xfs_idestroy_fork(ip, XFS_DATA_FORK);
-       }
-       return error;
-}
-
-/*
- * The file is in-lined in the on-disk inode.
- * If it fits into if_inline_data, then copy
- * it there, otherwise allocate a buffer for it
- * and copy the data there.  Either way, set
- * if_data to point at the data.
- * If we allocate a buffer for the data, make
- * sure that its size is a multiple of 4 and
- * record the real size in i_real_bytes.
- */
-STATIC int
-xfs_iformat_local(
-       xfs_inode_t     *ip,
-       xfs_dinode_t    *dip,
-       int             whichfork,
-       int             size)
-{
-       xfs_ifork_t     *ifp;
-       int             real_size;
-
-       /*
-        * If the size is unreasonable, then something
-        * is wrong and we just bail out rather than crash in
-        * kmem_alloc() or memcpy() below.
-        */
-       if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-               xfs_warn(ip->i_mount,
-       "corrupt inode %Lu (bad size %d for local fork, size = %d).",
-                       (unsigned long long) ip->i_ino, size,
-                       XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
-               XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
-                                    ip->i_mount, dip);
-               return EFSCORRUPTED;
-       }
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       real_size = 0;
-       if (size == 0)
-               ifp->if_u1.if_data = NULL;
-       else if (size <= sizeof(ifp->if_u2.if_inline_data))
-               ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
-       else {
-               real_size = roundup(size, 4);
-               ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
-       }
-       ifp->if_bytes = size;
-       ifp->if_real_bytes = real_size;
-       if (size)
-               memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
-       ifp->if_flags &= ~XFS_IFEXTENTS;
-       ifp->if_flags |= XFS_IFINLINE;
-       return 0;
-}
-
-/*
- * The file consists of a set of extents all
- * of which fit into the on-disk inode.
- * If there are few enough extents to fit into
- * the if_inline_ext, then copy them there.
- * Otherwise allocate a buffer for them and copy
- * them into it.  Either way, set if_extents
- * to point at the extents.
- */
-STATIC int
-xfs_iformat_extents(
-       xfs_inode_t     *ip,
-       xfs_dinode_t    *dip,
-       int             whichfork)
-{
-       xfs_bmbt_rec_t  *dp;
-       xfs_ifork_t     *ifp;
-       int             nex;
-       int             size;
-       int             i;
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       nex = XFS_DFORK_NEXTENTS(dip, whichfork);
-       size = nex * (uint)sizeof(xfs_bmbt_rec_t);
-
-       /*
-        * If the number of extents is unreasonable, then something
-        * is wrong and we just bail out rather than crash in
-        * kmem_alloc() or memcpy() below.
-        */
-       if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-               xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
-                       (unsigned long long) ip->i_ino, nex);
-               XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
-                                    ip->i_mount, dip);
-               return EFSCORRUPTED;
-       }
-
-       ifp->if_real_bytes = 0;
-       if (nex == 0)
-               ifp->if_u1.if_extents = NULL;
-       else if (nex <= XFS_INLINE_EXTS)
-               ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
-       else
-               xfs_iext_add(ifp, 0, nex);
-
-       ifp->if_bytes = size;
-       if (size) {
-               dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
-               xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
-               for (i = 0; i < nex; i++, dp++) {
-                       xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
-                       ep->l0 = get_unaligned_be64(&dp->l0);
-                       ep->l1 = get_unaligned_be64(&dp->l1);
-               }
-               XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
-               if (whichfork != XFS_DATA_FORK ||
-                       XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
-                               if (unlikely(xfs_check_nostate_extents(
-                                   ifp, 0, nex))) {
-                                       XFS_ERROR_REPORT("xfs_iformat_extents(2)",
-                                                        XFS_ERRLEVEL_LOW,
-                                                        ip->i_mount);
-                                       return EFSCORRUPTED;
-                               }
-       }
-       ifp->if_flags |= XFS_IFEXTENTS;
-       return 0;
-}
-
-/*
- * The file has too many extents to fit into
- * the inode, so they are in B-tree format.
- * Allocate a buffer for the root of the B-tree
- * and copy the root into it.  The i_extents
- * field will remain NULL until all of the
- * extents are read in (when they are needed).
- */
-STATIC int
-xfs_iformat_btree(
-       xfs_inode_t             *ip,
-       xfs_dinode_t            *dip,
-       int                     whichfork)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       xfs_bmdr_block_t        *dfp;
-       xfs_ifork_t             *ifp;
-       /* REFERENCED */
-       int                     nrecs;
-       int                     size;
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
-       size = XFS_BMAP_BROOT_SPACE(mp, dfp);
-       nrecs = be16_to_cpu(dfp->bb_numrecs);
-
-       /*
-        * blow out if -- fork has less extents than can fit in
-        * fork (fork shouldn't be a btree format), root btree
-        * block has more records than can fit into the fork,
-        * or the number of extents is greater than the number of
-        * blocks.
-        */
-       if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
-                                       XFS_IFORK_MAXEXT(ip, whichfork) ||
-                    XFS_BMDR_SPACE_CALC(nrecs) >
-                                       XFS_DFORK_SIZE(dip, mp, whichfork) ||
-                    XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
-               xfs_warn(mp, "corrupt inode %Lu (btree).",
-                                       (unsigned long long) ip->i_ino);
-               XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
-                                        mp, dip);
-               return EFSCORRUPTED;
-       }
-
-       ifp->if_broot_bytes = size;
-       ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
-       ASSERT(ifp->if_broot != NULL);
-       /*
-        * Copy and convert from the on-disk structure
-        * to the in-memory structure.
-        */
-       xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
-                        ifp->if_broot, size);
-       ifp->if_flags &= ~XFS_IFEXTENTS;
-       ifp->if_flags |= XFS_IFBROOT;
-
-       return 0;
-}
-
-/*
- * Read in extents from a btree-format inode.
- * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
- */
-int
-xfs_iread_extents(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       int             whichfork)
-{
-       int             error;
-       xfs_ifork_t     *ifp;
-       xfs_extnum_t    nextents;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-       if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
-               XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
-                                ip->i_mount);
-               return EFSCORRUPTED;
-       }
-       nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-
-       /*
-        * We know that the size is valid (it's checked in iformat_btree)
-        */
-       ifp->if_bytes = ifp->if_real_bytes = 0;
-       ifp->if_flags |= XFS_IFEXTENTS;
-       xfs_iext_add(ifp, 0, nextents);
-       error = xfs_bmap_read_extents(tp, ip, whichfork);
-       if (error) {
-               xfs_iext_destroy(ifp);
-               ifp->if_flags &= ~XFS_IFEXTENTS;
-               return error;
-       }
-       xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
-       return 0;
-}
-/*
- * Reallocate the space for if_broot based on the number of records
- * being added or deleted as indicated in rec_diff.  Move the records
- * and pointers in if_broot to fit the new size.  When shrinking this
- * will eliminate holes between the records and pointers created by
- * the caller.  When growing this will create holes to be filled in
- * by the caller.
- *
- * The caller must not request to add more records than would fit in
- * the on-disk inode root.  If the if_broot is currently NULL, then
- * if we are adding records, one will be allocated.  The caller must also
- * not request that the number of records go below zero, although
- * it can go to zero.
- *
- * ip -- the inode whose if_broot area is changing
- * ext_diff -- the change in the number of records, positive or negative,
- *      requested for the if_broot array.
- */
-void
-xfs_iroot_realloc(
-       xfs_inode_t             *ip,
-       int                     rec_diff,
-       int                     whichfork)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       int                     cur_max;
-       xfs_ifork_t             *ifp;
-       struct xfs_btree_block  *new_broot;
-       int                     new_max;
-       size_t                  new_size;
-       char                    *np;
-       char                    *op;
-
-       /*
-        * Handle the degenerate case quietly.
-        */
-       if (rec_diff == 0) {
-               return;
-       }
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (rec_diff > 0) {
-               /*
-                * If there wasn't any memory allocated before, just
-                * allocate it now and get out.
-                */
-               if (ifp->if_broot_bytes == 0) {
-                       new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
-                       ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
-                       ifp->if_broot_bytes = (int)new_size;
-                       return;
-               }
-
-               /*
-                * If there is already an existing if_broot, then we need
-                * to realloc() it and shift the pointers to their new
-                * location.  The records don't change location because
-                * they are kept butted up against the btree block header.
-                */
-               cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
-               new_max = cur_max + rec_diff;
-               new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
-               ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
-                               XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
-                               KM_SLEEP | KM_NOFS);
-               op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
-                                                    ifp->if_broot_bytes);
-               np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
-                                                    (int)new_size);
-               ifp->if_broot_bytes = (int)new_size;
-               ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
-                       XFS_IFORK_SIZE(ip, whichfork));
-               memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
-               return;
-       }
-
-       /*
-        * rec_diff is less than 0.  In this case, we are shrinking the
-        * if_broot buffer.  It must already exist.  If we go to zero
-        * records, just get rid of the root and clear the status bit.
-        */
-       ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
-       cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
-       new_max = cur_max + rec_diff;
-       ASSERT(new_max >= 0);
-       if (new_max > 0)
-               new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
-       else
-               new_size = 0;
-       if (new_size > 0) {
-               new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
-               /*
-                * First copy over the btree block header.
-                */
-               memcpy(new_broot, ifp->if_broot,
-                       XFS_BMBT_BLOCK_LEN(ip->i_mount));
-       } else {
-               new_broot = NULL;
-               ifp->if_flags &= ~XFS_IFBROOT;
-       }
-
-       /*
-        * Only copy the records and pointers if there are any.
-        */
-       if (new_max > 0) {
-               /*
-                * First copy the records.
-                */
-               op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
-               np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
-               memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
-
-               /*
-                * Then copy the pointers.
-                */
-               op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
-                                                    ifp->if_broot_bytes);
-               np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
-                                                    (int)new_size);
-               memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
-       }
-       kmem_free(ifp->if_broot);
-       ifp->if_broot = new_broot;
-       ifp->if_broot_bytes = (int)new_size;
-       if (ifp->if_broot)
-               ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
-                       XFS_IFORK_SIZE(ip, whichfork));
-       return;
-}
-
-
-/*
- * This is called when the amount of space needed for if_data
- * is increased or decreased.  The change in size is indicated by
- * the number of bytes that need to be added or deleted in the
- * byte_diff parameter.
- *
- * If the amount of space needed has decreased below the size of the
- * inline buffer, then switch to using the inline buffer.  Otherwise,
- * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
- * to what is needed.
- *
- * ip -- the inode whose if_data area is changing
- * byte_diff -- the change in the number of bytes, positive or negative,
- *      requested for the if_data array.
- */
-void
-xfs_idata_realloc(
-       xfs_inode_t     *ip,
-       int             byte_diff,
-       int             whichfork)
-{
-       xfs_ifork_t     *ifp;
-       int             new_size;
-       int             real_size;
-
-       if (byte_diff == 0) {
-               return;
-       }
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       new_size = (int)ifp->if_bytes + byte_diff;
-       ASSERT(new_size >= 0);
-
-       if (new_size == 0) {
-               if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
-                       kmem_free(ifp->if_u1.if_data);
-               }
-               ifp->if_u1.if_data = NULL;
-               real_size = 0;
-       } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
-               /*
-                * If the valid extents/data can fit in if_inline_ext/data,
-                * copy them from the malloc'd vector and free it.
-                */
-               if (ifp->if_u1.if_data == NULL) {
-                       ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
-               } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
-                       ASSERT(ifp->if_real_bytes != 0);
-                       memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
-                             new_size);
-                       kmem_free(ifp->if_u1.if_data);
-                       ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
-               }
-               real_size = 0;
-       } else {
-               /*
-                * Stuck with malloc/realloc.
-                * For inline data, the underlying buffer must be
-                * a multiple of 4 bytes in size so that it can be
-                * logged and stay on word boundaries.  We enforce
-                * that here.
-                */
-               real_size = roundup(new_size, 4);
-               if (ifp->if_u1.if_data == NULL) {
-                       ASSERT(ifp->if_real_bytes == 0);
-                       ifp->if_u1.if_data = kmem_alloc(real_size,
-                                                       KM_SLEEP | KM_NOFS);
-               } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
-                       /*
-                        * Only do the realloc if the underlying size
-                        * is really changing.
-                        */
-                       if (ifp->if_real_bytes != real_size) {
-                               ifp->if_u1.if_data =
-                                       kmem_realloc(ifp->if_u1.if_data,
-                                                       real_size,
-                                                       ifp->if_real_bytes,
-                                                       KM_SLEEP | KM_NOFS);
-                       }
-               } else {
-                       ASSERT(ifp->if_real_bytes == 0);
-                       ifp->if_u1.if_data = kmem_alloc(real_size,
-                                                       KM_SLEEP | KM_NOFS);
-                       memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
-                               ifp->if_bytes);
-               }
-       }
-       ifp->if_real_bytes = real_size;
-       ifp->if_bytes = new_size;
-       ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
-}
-
-void
-xfs_idestroy_fork(
-       xfs_inode_t     *ip,
-       int             whichfork)
-{
-       xfs_ifork_t     *ifp;
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       if (ifp->if_broot != NULL) {
-               kmem_free(ifp->if_broot);
-               ifp->if_broot = NULL;
-       }
-
-       /*
-        * If the format is local, then we can't have an extents
-        * array so just look for an inline data array.  If we're
-        * not local then we may or may not have an extents list,
-        * so check and free it up if we do.
-        */
-       if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
-               if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
-                   (ifp->if_u1.if_data != NULL)) {
-                       ASSERT(ifp->if_real_bytes != 0);
-                       kmem_free(ifp->if_u1.if_data);
-                       ifp->if_u1.if_data = NULL;
-                       ifp->if_real_bytes = 0;
-               }
-       } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
-                  ((ifp->if_flags & XFS_IFEXTIREC) ||
-                   ((ifp->if_u1.if_extents != NULL) &&
-                    (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
-               ASSERT(ifp->if_real_bytes != 0);
-               xfs_iext_destroy(ifp);
-       }
-       ASSERT(ifp->if_u1.if_extents == NULL ||
-              ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
-       ASSERT(ifp->if_real_bytes == 0);
-       if (whichfork == XFS_ATTR_FORK) {
-               kmem_zone_free(xfs_ifork_zone, ip->i_afp);
-               ip->i_afp = NULL;
-       }
-}
-
-/*
- * Convert in-core extents to on-disk form
- *
- * For either the data or attr fork in extent format, we need to endian convert
- * the in-core extent as we place them into the on-disk inode.
- *
- * In the case of the data fork, the in-core and on-disk fork sizes can be
- * different due to delayed allocation extents. We only copy on-disk extents
- * here, so callers must always use the physical fork size to determine the
- * size of the buffer passed to this routine.  We will return the size actually
- * used.
- */
-int
-xfs_iextents_copy(
-       xfs_inode_t             *ip,
-       xfs_bmbt_rec_t          *dp,
-       int                     whichfork)
-{
-       int                     copied;
-       int                     i;
-       xfs_ifork_t             *ifp;
-       int                     nrecs;
-       xfs_fsblock_t           start_block;
-
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-       ASSERT(ifp->if_bytes > 0);
-
-       nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
-       ASSERT(nrecs > 0);
-
-       /*
-        * There are some delayed allocation extents in the
-        * inode, so copy the extents one at a time and skip
-        * the delayed ones.  There must be at least one
-        * non-delayed extent.
-        */
-       copied = 0;
-       for (i = 0; i < nrecs; i++) {
-               xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
-               start_block = xfs_bmbt_get_startblock(ep);
-               if (isnullstartblock(start_block)) {
-                       /*
-                        * It's a delayed allocation extent, so skip it.
-                        */
-                       continue;
-               }
-
-               /* Translate to on disk format */
-               put_unaligned_be64(ep->l0, &dp->l0);
-               put_unaligned_be64(ep->l1, &dp->l1);
-               dp++;
-               copied++;
-       }
-       ASSERT(copied != 0);
-       xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
-
-       return (copied * (uint)sizeof(xfs_bmbt_rec_t));
-}
-
-/*
- * Each of the following cases stores data into the same region
- * of the on-disk inode, so only one of them can be valid at
- * any given time. While it is possible to have conflicting formats
- * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
- * in EXTENTS format, this can only happen when the fork has
- * changed formats after being modified but before being flushed.
- * In these cases, the format always takes precedence, because the
- * format indicates the current state of the fork.
- */
-void
-xfs_iflush_fork(
-       xfs_inode_t             *ip,
-       xfs_dinode_t            *dip,
-       xfs_inode_log_item_t    *iip,
-       int                     whichfork)
-{
-       char                    *cp;
-       xfs_ifork_t             *ifp;
-       xfs_mount_t             *mp;
-       static const short      brootflag[2] =
-               { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
-       static const short      dataflag[2] =
-               { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
-       static const short      extflag[2] =
-               { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
-
-       if (!iip)
-               return;
-       ifp = XFS_IFORK_PTR(ip, whichfork);
-       /*
-        * This can happen if we gave up in iformat in an error path,
-        * for the attribute fork.
-        */
-       if (!ifp) {
-               ASSERT(whichfork == XFS_ATTR_FORK);
-               return;
-       }
-       cp = XFS_DFORK_PTR(dip, whichfork);
-       mp = ip->i_mount;
-       switch (XFS_IFORK_FORMAT(ip, whichfork)) {
-       case XFS_DINODE_FMT_LOCAL:
-               if ((iip->ili_fields & dataflag[whichfork]) &&
-                   (ifp->if_bytes > 0)) {
-                       ASSERT(ifp->if_u1.if_data != NULL);
-                       ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
-                       memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
-               }
-               break;
-
-       case XFS_DINODE_FMT_EXTENTS:
-               ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
-                      !(iip->ili_fields & extflag[whichfork]));
-               if ((iip->ili_fields & extflag[whichfork]) &&
-                   (ifp->if_bytes > 0)) {
-                       ASSERT(xfs_iext_get_ext(ifp, 0));
-                       ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
-                       (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
-                               whichfork);
-               }
-               break;
-
-       case XFS_DINODE_FMT_BTREE:
-               if ((iip->ili_fields & brootflag[whichfork]) &&
-                   (ifp->if_broot_bytes > 0)) {
-                       ASSERT(ifp->if_broot != NULL);
-                       ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
-                               XFS_IFORK_SIZE(ip, whichfork));
-                       xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
-                               (xfs_bmdr_block_t *)cp,
-                               XFS_DFORK_SIZE(dip, mp, whichfork));
-               }
-               break;
-
-       case XFS_DINODE_FMT_DEV:
-               if (iip->ili_fields & XFS_ILOG_DEV) {
-                       ASSERT(whichfork == XFS_DATA_FORK);
-                       xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
-               }
-               break;
-
-       case XFS_DINODE_FMT_UUID:
-               if (iip->ili_fields & XFS_ILOG_UUID) {
-                       ASSERT(whichfork == XFS_DATA_FORK);
-                       memcpy(XFS_DFORK_DPTR(dip),
-                              &ip->i_df.if_u2.if_uuid,
-                              sizeof(uuid_t));
-               }
-               break;
-
-       default:
-               ASSERT(0);
-               break;
-       }
-}
-
-/*
- * Return a pointer to the extent record at file index idx.
- */
-xfs_bmbt_rec_host_t *
-xfs_iext_get_ext(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_extnum_t    idx)            /* index of target extent */
-{
-       ASSERT(idx >= 0);
-       ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
-
-       if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
-               return ifp->if_u1.if_ext_irec->er_extbuf;
-       } else if (ifp->if_flags & XFS_IFEXTIREC) {
-               xfs_ext_irec_t  *erp;           /* irec pointer */
-               int             erp_idx = 0;    /* irec index */
-               xfs_extnum_t    page_idx = idx; /* ext index in target list */
-
-               erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
-               return &erp->er_extbuf[page_idx];
-       } else if (ifp->if_bytes) {
-               return &ifp->if_u1.if_extents[idx];
-       } else {
-               return NULL;
-       }
-}
-
-/*
- * Insert new item(s) into the extent records for incore inode
- * fork 'ifp'.  'count' new items are inserted at index 'idx'.
- */
-void
-xfs_iext_insert(
-       xfs_inode_t     *ip,            /* incore inode pointer */
-       xfs_extnum_t    idx,            /* starting index of new items */
-       xfs_extnum_t    count,          /* number of inserted items */
-       xfs_bmbt_irec_t *new,           /* items to insert */
-       int             state)          /* type of extent conversion */
-{
-       xfs_ifork_t     *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
-       xfs_extnum_t    i;              /* extent record index */
-
-       trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
-
-       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-       xfs_iext_add(ifp, idx, count);
-       for (i = idx; i < idx + count; i++, new++)
-               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
-}
-
-/*
- * This is called when the amount of space required for incore file
- * extents needs to be increased. The ext_diff parameter stores the
- * number of new extents being added and the idx parameter contains
- * the extent index where the new extents will be added. If the new
- * extents are being appended, then we just need to (re)allocate and
- * initialize the space. Otherwise, if the new extents are being
- * inserted into the middle of the existing entries, a bit more work
- * is required to make room for the new extents to be inserted. The
- * caller is responsible for filling in the new extent entries upon
- * return.
- */
-void
-xfs_iext_add(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_extnum_t    idx,            /* index to begin adding exts */
-       int             ext_diff)       /* number of extents to add */
-{
-       int             byte_diff;      /* new bytes being added */
-       int             new_size;       /* size of extents after adding */
-       xfs_extnum_t    nextents;       /* number of extents in file */
-
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       ASSERT((idx >= 0) && (idx <= nextents));
-       byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
-       new_size = ifp->if_bytes + byte_diff;
-       /*
-        * If the new number of extents (nextents + ext_diff)
-        * fits inside the inode, then continue to use the inline
-        * extent buffer.
-        */
-       if (nextents + ext_diff <= XFS_INLINE_EXTS) {
-               if (idx < nextents) {
-                       memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
-                               &ifp->if_u2.if_inline_ext[idx],
-                               (nextents - idx) * sizeof(xfs_bmbt_rec_t));
-                       memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
-               }
-               ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
-               ifp->if_real_bytes = 0;
-       }
-       /*
-        * Otherwise use a linear (direct) extent list.
-        * If the extents are currently inside the inode,
-        * xfs_iext_realloc_direct will switch us from
-        * inline to direct extent allocation mode.
-        */
-       else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
-               xfs_iext_realloc_direct(ifp, new_size);
-               if (idx < nextents) {
-                       memmove(&ifp->if_u1.if_extents[idx + ext_diff],
-                               &ifp->if_u1.if_extents[idx],
-                               (nextents - idx) * sizeof(xfs_bmbt_rec_t));
-                       memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
-               }
-       }
-       /* Indirection array */
-       else {
-               xfs_ext_irec_t  *erp;
-               int             erp_idx = 0;
-               int             page_idx = idx;
-
-               ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
-               if (ifp->if_flags & XFS_IFEXTIREC) {
-                       erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
-               } else {
-                       xfs_iext_irec_init(ifp);
-                       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-                       erp = ifp->if_u1.if_ext_irec;
-               }
-               /* Extents fit in target extent page */
-               if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
-                       if (page_idx < erp->er_extcount) {
-                               memmove(&erp->er_extbuf[page_idx + ext_diff],
-                                       &erp->er_extbuf[page_idx],
-                                       (erp->er_extcount - page_idx) *
-                                       sizeof(xfs_bmbt_rec_t));
-                               memset(&erp->er_extbuf[page_idx], 0, byte_diff);
-                       }
-                       erp->er_extcount += ext_diff;
-                       xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
-               }
-               /* Insert a new extent page */
-               else if (erp) {
-                       xfs_iext_add_indirect_multi(ifp,
-                               erp_idx, page_idx, ext_diff);
-               }
-               /*
-                * If extent(s) are being appended to the last page in
-                * the indirection array and the new extent(s) don't fit
-                * in the page, then erp is NULL and erp_idx is set to
-                * the next index needed in the indirection array.
-                */
-               else {
-                       uint    count = ext_diff;
-
-                       while (count) {
-                               erp = xfs_iext_irec_new(ifp, erp_idx);
-                               erp->er_extcount = min(count, XFS_LINEAR_EXTS);
-                               count -= erp->er_extcount;
-                               if (count)
-                                       erp_idx++;
-                       }
-               }
-       }
-       ifp->if_bytes = new_size;
-}
-
-/*
- * This is called when incore extents are being added to the indirection
- * array and the new extents do not fit in the target extent list. The
- * erp_idx parameter contains the irec index for the target extent list
- * in the indirection array, and the idx parameter contains the extent
- * index within the list. The number of extents being added is stored
- * in the count parameter.
- *
- *    |-------|   |-------|
- *    |       |   |       |    idx - number of extents before idx
- *    |  idx  |   | count |
- *    |       |   |       |    count - number of extents being inserted at idx
- *    |-------|   |-------|
- *    | count |   | nex2  |    nex2 - number of extents after idx + count
- *    |-------|   |-------|
- */
-void
-xfs_iext_add_indirect_multi(
-       xfs_ifork_t     *ifp,                   /* inode fork pointer */
-       int             erp_idx,                /* target extent irec index */
-       xfs_extnum_t    idx,                    /* index within target list */
-       int             count)                  /* new extents being added */
-{
-       int             byte_diff;              /* new bytes being added */
-       xfs_ext_irec_t  *erp;                   /* pointer to irec entry */
-       xfs_extnum_t    ext_diff;               /* number of extents to add */
-       xfs_extnum_t    ext_cnt;                /* new extents still needed */
-       xfs_extnum_t    nex2;                   /* extents after idx + count */
-       xfs_bmbt_rec_t  *nex2_ep = NULL;        /* temp list for nex2 extents */
-       int             nlists;                 /* number of irec's (lists) */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       erp = &ifp->if_u1.if_ext_irec[erp_idx];
-       nex2 = erp->er_extcount - idx;
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-
-       /*
-        * Save second part of target extent list
-        * (all extents past */
-       if (nex2) {
-               byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
-               nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
-               memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
-               erp->er_extcount -= nex2;
-               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
-               memset(&erp->er_extbuf[idx], 0, byte_diff);
-       }
-
-       /*
-        * Add the new extents to the end of the target
-        * list, then allocate new irec record(s) and
-        * extent buffer(s) as needed to store the rest
-        * of the new extents.
-        */
-       ext_cnt = count;
-       ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
-       if (ext_diff) {
-               erp->er_extcount += ext_diff;
-               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
-               ext_cnt -= ext_diff;
-       }
-       while (ext_cnt) {
-               erp_idx++;
-               erp = xfs_iext_irec_new(ifp, erp_idx);
-               ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
-               erp->er_extcount = ext_diff;
-               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
-               ext_cnt -= ext_diff;
-       }
-
-       /* Add nex2 extents back to indirection array */
-       if (nex2) {
-               xfs_extnum_t    ext_avail;
-               int             i;
-
-               byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
-               ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
-               i = 0;
-               /*
-                * If nex2 extents fit in the current page, append
-                * nex2_ep after the new extents.
-                */
-               if (nex2 <= ext_avail) {
-                       i = erp->er_extcount;
-               }
-               /*
-                * Otherwise, check if space is available in the
-                * next page.
-                */
-               else if ((erp_idx < nlists - 1) &&
-                        (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
-                         ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
-                       erp_idx++;
-                       erp++;
-                       /* Create a hole for nex2 extents */
-                       memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
-                               erp->er_extcount * sizeof(xfs_bmbt_rec_t));
-               }
-               /*
-                * Final choice, create a new extent page for
-                * nex2 extents.
-                */
-               else {
-                       erp_idx++;
-                       erp = xfs_iext_irec_new(ifp, erp_idx);
-               }
-               memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
-               kmem_free(nex2_ep);
-               erp->er_extcount += nex2;
-               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
-       }
-}
-
-/*
- * This is called when the amount of space required for incore file
- * extents needs to be decreased. The ext_diff parameter stores the
- * number of extents to be removed and the idx parameter contains
- * the extent index where the extents will be removed from.
- *
- * If the amount of space needed has decreased below the linear
- * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
- * extent array.  Otherwise, use kmem_realloc() to adjust the
- * size to what is needed.
- */
-void
-xfs_iext_remove(
-       xfs_inode_t     *ip,            /* incore inode pointer */
-       xfs_extnum_t    idx,            /* index to begin removing exts */
-       int             ext_diff,       /* number of extents to remove */
-       int             state)          /* type of extent conversion */
-{
-       xfs_ifork_t     *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
-       xfs_extnum_t    nextents;       /* number of extents in file */
-       int             new_size;       /* size of extents after removal */
-
-       trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
-
-       ASSERT(ext_diff > 0);
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
-
-       if (new_size == 0) {
-               xfs_iext_destroy(ifp);
-       } else if (ifp->if_flags & XFS_IFEXTIREC) {
-               xfs_iext_remove_indirect(ifp, idx, ext_diff);
-       } else if (ifp->if_real_bytes) {
-               xfs_iext_remove_direct(ifp, idx, ext_diff);
-       } else {
-               xfs_iext_remove_inline(ifp, idx, ext_diff);
-       }
-       ifp->if_bytes = new_size;
-}
-
-/*
- * This removes ext_diff extents from the inline buffer, beginning
- * at extent index idx.
- */
-void
-xfs_iext_remove_inline(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_extnum_t    idx,            /* index to begin removing exts */
-       int             ext_diff)       /* number of extents to remove */
-{
-       int             nextents;       /* number of extents in file */
-
-       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
-       ASSERT(idx < XFS_INLINE_EXTS);
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       ASSERT(((nextents - ext_diff) > 0) &&
-               (nextents - ext_diff) < XFS_INLINE_EXTS);
-
-       if (idx + ext_diff < nextents) {
-               memmove(&ifp->if_u2.if_inline_ext[idx],
-                       &ifp->if_u2.if_inline_ext[idx + ext_diff],
-                       (nextents - (idx + ext_diff)) *
-                        sizeof(xfs_bmbt_rec_t));
-               memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
-                       0, ext_diff * sizeof(xfs_bmbt_rec_t));
-       } else {
-               memset(&ifp->if_u2.if_inline_ext[idx], 0,
-                       ext_diff * sizeof(xfs_bmbt_rec_t));
-       }
-}
-
-/*
- * This removes ext_diff extents from a linear (direct) extent list,
- * beginning at extent index idx. If the extents are being removed
- * from the end of the list (ie. truncate) then we just need to re-
- * allocate the list to remove the extra space. Otherwise, if the
- * extents are being removed from the middle of the existing extent
- * entries, then we first need to move the extent records beginning
- * at idx + ext_diff up in the list to overwrite the records being
- * removed, then remove the extra space via kmem_realloc.
- */
-void
-xfs_iext_remove_direct(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_extnum_t    idx,            /* index to begin removing exts */
-       int             ext_diff)       /* number of extents to remove */
-{
-       xfs_extnum_t    nextents;       /* number of extents in file */
-       int             new_size;       /* size of extents after removal */
-
-       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
-       new_size = ifp->if_bytes -
-               (ext_diff * sizeof(xfs_bmbt_rec_t));
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-
-       if (new_size == 0) {
-               xfs_iext_destroy(ifp);
-               return;
-       }
-       /* Move extents up in the list (if needed) */
-       if (idx + ext_diff < nextents) {
-               memmove(&ifp->if_u1.if_extents[idx],
-                       &ifp->if_u1.if_extents[idx + ext_diff],
-                       (nextents - (idx + ext_diff)) *
-                        sizeof(xfs_bmbt_rec_t));
-       }
-       memset(&ifp->if_u1.if_extents[nextents - ext_diff],
-               0, ext_diff * sizeof(xfs_bmbt_rec_t));
-       /*
-        * Reallocate the direct extent list. If the extents
-        * will fit inside the inode then xfs_iext_realloc_direct
-        * will switch from direct to inline extent allocation
-        * mode for us.
-        */
-       xfs_iext_realloc_direct(ifp, new_size);
-       ifp->if_bytes = new_size;
-}
-
-/*
- * This is called when incore extents are being removed from the
- * indirection array and the extents being removed span multiple extent
- * buffers. The idx parameter contains the file extent index where we
- * want to begin removing extents, and the count parameter contains
- * how many extents need to be removed.
- *
- *    |-------|   |-------|
- *    | nex1  |   |       |    nex1 - number of extents before idx
- *    |-------|   | count |
- *    |       |   |       |    count - number of extents being removed at idx
- *    | count |   |-------|
- *    |       |   | nex2  |    nex2 - number of extents after idx + count
- *    |-------|   |-------|
- */
-void
-xfs_iext_remove_indirect(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_extnum_t    idx,            /* index to begin removing extents */
-       int             count)          /* number of extents to remove */
-{
-       xfs_ext_irec_t  *erp;           /* indirection array pointer */
-       int             erp_idx = 0;    /* indirection array index */
-       xfs_extnum_t    ext_cnt;        /* extents left to remove */
-       xfs_extnum_t    ext_diff;       /* extents to remove in current list */
-       xfs_extnum_t    nex1;           /* number of extents before idx */
-       xfs_extnum_t    nex2;           /* extents after idx + count */
-       int             page_idx = idx; /* index in target extent list */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
-       ASSERT(erp != NULL);
-       nex1 = page_idx;
-       ext_cnt = count;
-       while (ext_cnt) {
-               nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
-               ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
-               /*
-                * Check for deletion of entire list;
-                * xfs_iext_irec_remove() updates extent offsets.
-                */
-               if (ext_diff == erp->er_extcount) {
-                       xfs_iext_irec_remove(ifp, erp_idx);
-                       ext_cnt -= ext_diff;
-                       nex1 = 0;
-                       if (ext_cnt) {
-                               ASSERT(erp_idx < ifp->if_real_bytes /
-                                       XFS_IEXT_BUFSZ);
-                               erp = &ifp->if_u1.if_ext_irec[erp_idx];
-                               nex1 = 0;
-                               continue;
-                       } else {
-                               break;
-                       }
-               }
-               /* Move extents up (if needed) */
-               if (nex2) {
-                       memmove(&erp->er_extbuf[nex1],
-                               &erp->er_extbuf[nex1 + ext_diff],
-                               nex2 * sizeof(xfs_bmbt_rec_t));
-               }
-               /* Zero out rest of page */
-               memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
-                       ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
-               /* Update remaining counters */
-               erp->er_extcount -= ext_diff;
-               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
-               ext_cnt -= ext_diff;
-               nex1 = 0;
-               erp_idx++;
-               erp++;
-       }
-       ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
-       xfs_iext_irec_compact(ifp);
-}
-
-/*
- * Create, destroy, or resize a linear (direct) block of extents.
- */
-void
-xfs_iext_realloc_direct(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       int             new_size)       /* new size of extents after adding */
-{
-       int             rnew_size;      /* real new size of extents */
-
-       rnew_size = new_size;
-
-       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
-               ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
-                (new_size != ifp->if_real_bytes)));
-
-       /* Free extent records */
-       if (new_size == 0) {
-               xfs_iext_destroy(ifp);
-       }
-       /* Resize direct extent list and zero any new bytes */
-       else if (ifp->if_real_bytes) {
-               /* Check if extents will fit inside the inode */
-               if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
-                       xfs_iext_direct_to_inline(ifp, new_size /
-                               (uint)sizeof(xfs_bmbt_rec_t));
-                       ifp->if_bytes = new_size;
-                       return;
-               }
-               if (!is_power_of_2(new_size)){
-                       rnew_size = roundup_pow_of_two(new_size);
-               }
-               if (rnew_size != ifp->if_real_bytes) {
-                       ifp->if_u1.if_extents =
-                               kmem_realloc(ifp->if_u1.if_extents,
-                                               rnew_size,
-                                               ifp->if_real_bytes, KM_NOFS);
-               }
-               if (rnew_size > ifp->if_real_bytes) {
-                       memset(&ifp->if_u1.if_extents[ifp->if_bytes /
-                               (uint)sizeof(xfs_bmbt_rec_t)], 0,
-                               rnew_size - ifp->if_real_bytes);
-               }
-       }
-       /* Switch from the inline extent buffer to a direct extent list */
-       else {
-               if (!is_power_of_2(new_size)) {
-                       rnew_size = roundup_pow_of_two(new_size);
-               }
-               xfs_iext_inline_to_direct(ifp, rnew_size);
-       }
-       ifp->if_real_bytes = rnew_size;
-       ifp->if_bytes = new_size;
-}
-
-/*
- * Switch from linear (direct) extent records to inline buffer.
- */
-void
-xfs_iext_direct_to_inline(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_extnum_t    nextents)       /* number of extents in file */
-{
-       ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-       ASSERT(nextents <= XFS_INLINE_EXTS);
-       /*
-        * The inline buffer was zeroed when we switched
-        * from inline to direct extent allocation mode,
-        * so we don't need to clear it here.
-        */
-       memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
-               nextents * sizeof(xfs_bmbt_rec_t));
-       kmem_free(ifp->if_u1.if_extents);
-       ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
-       ifp->if_real_bytes = 0;
-}
-
-/*
- * Switch from inline buffer to linear (direct) extent records.
- * new_size should already be rounded up to the next power of 2
- * by the caller (when appropriate), so use new_size as it is.
- * However, since new_size may be rounded up, we can't update
- * if_bytes here. It is the caller's responsibility to update
- * if_bytes upon return.
- */
-void
-xfs_iext_inline_to_direct(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       int             new_size)       /* number of extents in file */
-{
-       ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
-       memset(ifp->if_u1.if_extents, 0, new_size);
-       if (ifp->if_bytes) {
-               memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
-                       ifp->if_bytes);
-               memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
-                       sizeof(xfs_bmbt_rec_t));
-       }
-       ifp->if_real_bytes = new_size;
-}
-
-/*
- * Resize an extent indirection array to new_size bytes.
- */
-STATIC void
-xfs_iext_realloc_indirect(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       int             new_size)       /* new indirection array size */
-{
-       int             nlists;         /* number of irec's (ex lists) */
-       int             size;           /* current indirection array size */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-       size = nlists * sizeof(xfs_ext_irec_t);
-       ASSERT(ifp->if_real_bytes);
-       ASSERT((new_size >= 0) && (new_size != size));
-       if (new_size == 0) {
-               xfs_iext_destroy(ifp);
-       } else {
-               ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
-                       kmem_realloc(ifp->if_u1.if_ext_irec,
-                               new_size, size, KM_NOFS);
-       }
-}
-
-/*
- * Switch from indirection array to linear (direct) extent allocations.
- */
-STATIC void
-xfs_iext_indirect_to_direct(
-        xfs_ifork_t    *ifp)           /* inode fork pointer */
-{
-       xfs_bmbt_rec_host_t *ep;        /* extent record pointer */
-       xfs_extnum_t    nextents;       /* number of extents in file */
-       int             size;           /* size of file extents */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       ASSERT(nextents <= XFS_LINEAR_EXTS);
-       size = nextents * sizeof(xfs_bmbt_rec_t);
-
-       xfs_iext_irec_compact_pages(ifp);
-       ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
-
-       ep = ifp->if_u1.if_ext_irec->er_extbuf;
-       kmem_free(ifp->if_u1.if_ext_irec);
-       ifp->if_flags &= ~XFS_IFEXTIREC;
-       ifp->if_u1.if_extents = ep;
-       ifp->if_bytes = size;
-       if (nextents < XFS_LINEAR_EXTS) {
-               xfs_iext_realloc_direct(ifp, size);
-       }
-}
-
-/*
- * Free incore file extents.
- */
-void
-xfs_iext_destroy(
-       xfs_ifork_t     *ifp)           /* inode fork pointer */
-{
-       if (ifp->if_flags & XFS_IFEXTIREC) {
-               int     erp_idx;
-               int     nlists;
-
-               nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-               for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
-                       xfs_iext_irec_remove(ifp, erp_idx);
-               }
-               ifp->if_flags &= ~XFS_IFEXTIREC;
-       } else if (ifp->if_real_bytes) {
-               kmem_free(ifp->if_u1.if_extents);
-       } else if (ifp->if_bytes) {
-               memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
-                       sizeof(xfs_bmbt_rec_t));
-       }
-       ifp->if_u1.if_extents = NULL;
-       ifp->if_real_bytes = 0;
-       ifp->if_bytes = 0;
-}
-
-/*
- * Return a pointer to the extent record for file system block bno.
- */
-xfs_bmbt_rec_host_t *                  /* pointer to found extent record */
-xfs_iext_bno_to_ext(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_fileoff_t   bno,            /* block number to search for */
-       xfs_extnum_t    *idxp)          /* index of target extent */
-{
-       xfs_bmbt_rec_host_t *base;      /* pointer to first extent */
-       xfs_filblks_t   blockcount = 0; /* number of blocks in extent */
-       xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */
-       xfs_ext_irec_t  *erp = NULL;    /* indirection array pointer */
-       int             high;           /* upper boundary in search */
-       xfs_extnum_t    idx = 0;        /* index of target extent */
-       int             low;            /* lower boundary in search */
-       xfs_extnum_t    nextents;       /* number of file extents */
-       xfs_fileoff_t   startoff = 0;   /* start offset of extent */
-
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       if (nextents == 0) {
-               *idxp = 0;
-               return NULL;
-       }
-       low = 0;
-       if (ifp->if_flags & XFS_IFEXTIREC) {
-               /* Find target extent list */
-               int     erp_idx = 0;
-               erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
-               base = erp->er_extbuf;
-               high = erp->er_extcount - 1;
-       } else {
-               base = ifp->if_u1.if_extents;
-               high = nextents - 1;
-       }
-       /* Binary search extent records */
-       while (low <= high) {
-               idx = (low + high) >> 1;
-               ep = base + idx;
-               startoff = xfs_bmbt_get_startoff(ep);
-               blockcount = xfs_bmbt_get_blockcount(ep);
-               if (bno < startoff) {
-                       high = idx - 1;
-               } else if (bno >= startoff + blockcount) {
-                       low = idx + 1;
-               } else {
-                       /* Convert back to file-based extent index */
-                       if (ifp->if_flags & XFS_IFEXTIREC) {
-                               idx += erp->er_extoff;
-                       }
-                       *idxp = idx;
-                       return ep;
-               }
-       }
-       /* Convert back to file-based extent index */
-       if (ifp->if_flags & XFS_IFEXTIREC) {
-               idx += erp->er_extoff;
-       }
-       if (bno >= startoff + blockcount) {
-               if (++idx == nextents) {
-                       ep = NULL;
-               } else {
-                       ep = xfs_iext_get_ext(ifp, idx);
-               }
-       }
-       *idxp = idx;
-       return ep;
-}
-
-/*
- * Return a pointer to the indirection array entry containing the
- * extent record for filesystem block bno. Store the index of the
- * target irec in *erp_idxp.
- */
-xfs_ext_irec_t *                       /* pointer to found extent record */
-xfs_iext_bno_to_irec(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_fileoff_t   bno,            /* block number to search for */
-       int             *erp_idxp)      /* irec index of target ext list */
-{
-       xfs_ext_irec_t  *erp = NULL;    /* indirection array pointer */
-       xfs_ext_irec_t  *erp_next;      /* next indirection array entry */
-       int             erp_idx;        /* indirection array index */
-       int             nlists;         /* number of extent irec's (lists) */
-       int             high;           /* binary search upper limit */
-       int             low;            /* binary search lower limit */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-       erp_idx = 0;
-       low = 0;
-       high = nlists - 1;
-       while (low <= high) {
-               erp_idx = (low + high) >> 1;
-               erp = &ifp->if_u1.if_ext_irec[erp_idx];
-               erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
-               if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
-                       high = erp_idx - 1;
-               } else if (erp_next && bno >=
-                          xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
-                       low = erp_idx + 1;
-               } else {
-                       break;
-               }
-       }
-       *erp_idxp = erp_idx;
-       return erp;
-}
-
-/*
- * Return a pointer to the indirection array entry containing the
- * extent record at file extent index *idxp. Store the index of the
- * target irec in *erp_idxp and store the page index of the target
- * extent record in *idxp.
- */
-xfs_ext_irec_t *
-xfs_iext_idx_to_irec(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       xfs_extnum_t    *idxp,          /* extent index (file -> page) */
-       int             *erp_idxp,      /* pointer to target irec */
-       int             realloc)        /* new bytes were just added */
-{
-       xfs_ext_irec_t  *prev;          /* pointer to previous irec */
-       xfs_ext_irec_t  *erp = NULL;    /* pointer to current irec */
-       int             erp_idx;        /* indirection array index */
-       int             nlists;         /* number of irec's (ex lists) */
-       int             high;           /* binary search upper limit */
-       int             low;            /* binary search lower limit */
-       xfs_extnum_t    page_idx = *idxp; /* extent index in target list */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       ASSERT(page_idx >= 0);
-       ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
-       ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
-
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-       erp_idx = 0;
-       low = 0;
-       high = nlists - 1;
-
-       /* Binary search extent irec's */
-       while (low <= high) {
-               erp_idx = (low + high) >> 1;
-               erp = &ifp->if_u1.if_ext_irec[erp_idx];
-               prev = erp_idx > 0 ? erp - 1 : NULL;
-               if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
-                    realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
-                       high = erp_idx - 1;
-               } else if (page_idx > erp->er_extoff + erp->er_extcount ||
-                          (page_idx == erp->er_extoff + erp->er_extcount &&
-                           !realloc)) {
-                       low = erp_idx + 1;
-               } else if (page_idx == erp->er_extoff + erp->er_extcount &&
-                          erp->er_extcount == XFS_LINEAR_EXTS) {
-                       ASSERT(realloc);
-                       page_idx = 0;
-                       erp_idx++;
-                       erp = erp_idx < nlists ? erp + 1 : NULL;
-                       break;
-               } else {
-                       page_idx -= erp->er_extoff;
-                       break;
-               }
-       }
-       *idxp = page_idx;
-       *erp_idxp = erp_idx;
-       return erp;
-}
-
-/*
- * Allocate and initialize an indirection array once the space needed
- * for incore extents increases above XFS_IEXT_BUFSZ.
- */
-void
-xfs_iext_irec_init(
-       xfs_ifork_t     *ifp)           /* inode fork pointer */
-{
-       xfs_ext_irec_t  *erp;           /* indirection array pointer */
-       xfs_extnum_t    nextents;       /* number of extents in file */
-
-       ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       ASSERT(nextents <= XFS_LINEAR_EXTS);
-
-       erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
-
-       if (nextents == 0) {
-               ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
-       } else if (!ifp->if_real_bytes) {
-               xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
-       } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
-               xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
-       }
-       erp->er_extbuf = ifp->if_u1.if_extents;
-       erp->er_extcount = nextents;
-       erp->er_extoff = 0;
-
-       ifp->if_flags |= XFS_IFEXTIREC;
-       ifp->if_real_bytes = XFS_IEXT_BUFSZ;
-       ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
-       ifp->if_u1.if_ext_irec = erp;
-
-       return;
-}
-
-/*
- * Allocate and initialize a new entry in the indirection array.
- */
-xfs_ext_irec_t *
-xfs_iext_irec_new(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       int             erp_idx)        /* index for new irec */
-{
-       xfs_ext_irec_t  *erp;           /* indirection array pointer */
-       int             i;              /* loop counter */
-       int             nlists;         /* number of irec's (ex lists) */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-
-       /* Resize indirection array */
-       xfs_iext_realloc_indirect(ifp, ++nlists *
-                                 sizeof(xfs_ext_irec_t));
-       /*
-        * Move records down in the array so the
-        * new page can use erp_idx.
-        */
-       erp = ifp->if_u1.if_ext_irec;
-       for (i = nlists - 1; i > erp_idx; i--) {
-               memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
-       }
-       ASSERT(i == erp_idx);
-
-       /* Initialize new extent record */
-       erp = ifp->if_u1.if_ext_irec;
-       erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
-       ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
-       memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
-       erp[erp_idx].er_extcount = 0;
-       erp[erp_idx].er_extoff = erp_idx > 0 ?
-               erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
-       return (&erp[erp_idx]);
-}
-
-/*
- * Remove a record from the indirection array.
- */
-void
-xfs_iext_irec_remove(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       int             erp_idx)        /* irec index to remove */
-{
-       xfs_ext_irec_t  *erp;           /* indirection array pointer */
-       int             i;              /* loop counter */
-       int             nlists;         /* number of irec's (ex lists) */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-       erp = &ifp->if_u1.if_ext_irec[erp_idx];
-       if (erp->er_extbuf) {
-               xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
-                       -erp->er_extcount);
-               kmem_free(erp->er_extbuf);
-       }
-       /* Compact extent records */
-       erp = ifp->if_u1.if_ext_irec;
-       for (i = erp_idx; i < nlists - 1; i++) {
-               memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
-       }
-       /*
-        * Manually free the last extent record from the indirection
-        * array.  A call to xfs_iext_realloc_indirect() with a size
-        * of zero would result in a call to xfs_iext_destroy() which
-        * would in turn call this function again, creating a nasty
-        * infinite loop.
-        */
-       if (--nlists) {
-               xfs_iext_realloc_indirect(ifp,
-                       nlists * sizeof(xfs_ext_irec_t));
-       } else {
-               kmem_free(ifp->if_u1.if_ext_irec);
-       }
-       ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
-}
-
-/*
- * This is called to clean up large amounts of unused memory allocated
- * by the indirection array.  Before compacting anything though, verify
- * that the indirection array is still needed and switch back to the
- * linear extent list (or even the inline buffer) if possible.  The
- * compaction policy is as follows:
- *
- *    Full Compaction: Extents fit into a single page (or inline buffer)
- * Partial Compaction: Extents occupy less than 50% of allocated space
- *      No Compaction: Extents occupy at least 50% of allocated space
- */
-void
-xfs_iext_irec_compact(
-       xfs_ifork_t     *ifp)           /* inode fork pointer */
-{
-       xfs_extnum_t    nextents;       /* number of extents in file */
-       int             nlists;         /* number of irec's (ex lists) */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-
-       if (nextents == 0) {
-               xfs_iext_destroy(ifp);
-       } else if (nextents <= XFS_INLINE_EXTS) {
-               xfs_iext_indirect_to_direct(ifp);
-               xfs_iext_direct_to_inline(ifp, nextents);
-       } else if (nextents <= XFS_LINEAR_EXTS) {
-               xfs_iext_indirect_to_direct(ifp);
-       } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
-               xfs_iext_irec_compact_pages(ifp);
-       }
-}
-
-/*
- * Combine extents from neighboring extent pages.
- */
-void
-xfs_iext_irec_compact_pages(
-       xfs_ifork_t     *ifp)           /* inode fork pointer */
-{
-       xfs_ext_irec_t  *erp, *erp_next;/* pointers to irec entries */
-       int             erp_idx = 0;    /* indirection array index */
-       int             nlists;         /* number of irec's (ex lists) */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-       while (erp_idx < nlists - 1) {
-               erp = &ifp->if_u1.if_ext_irec[erp_idx];
-               erp_next = erp + 1;
-               if (erp_next->er_extcount <=
-                   (XFS_LINEAR_EXTS - erp->er_extcount)) {
-                       memcpy(&erp->er_extbuf[erp->er_extcount],
-                               erp_next->er_extbuf, erp_next->er_extcount *
-                               sizeof(xfs_bmbt_rec_t));
-                       erp->er_extcount += erp_next->er_extcount;
-                       /*
-                        * Free page before removing extent record
-                        * so er_extoffs don't get modified in
-                        * xfs_iext_irec_remove.
-                        */
-                       kmem_free(erp_next->er_extbuf);
-                       erp_next->er_extbuf = NULL;
-                       xfs_iext_irec_remove(ifp, erp_idx + 1);
-                       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-               } else {
-                       erp_idx++;
-               }
-       }
-}
-
-/*
- * This is called to update the er_extoff field in the indirection
- * array when extents have been added or removed from one of the
- * extent lists. erp_idx contains the irec index to begin updating
- * at and ext_diff contains the number of extents that were added
- * or removed.
- */
-void
-xfs_iext_irec_update_extoffs(
-       xfs_ifork_t     *ifp,           /* inode fork pointer */
-       int             erp_idx,        /* irec index to update */
-       int             ext_diff)       /* number of new extents */
-{
-       int             i;              /* loop counter */
-       int             nlists;         /* number of irec's (ex lists */
-
-       ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-       for (i = erp_idx; i < nlists; i++) {
-               ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
-       }
-}
diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/xfs_log_rlimit.c
deleted file mode 100644 (file)
index ee7e0e8..0000000
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2013 Jie Liu.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_ag.h"
-#include "xfs_sb.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_trans_space.h"
-#include "xfs_inode.h"
-#include "xfs_da_btree.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_bmap_btree.h"
-
-/*
- * Calculate the maximum length in bytes that would be required for a local
- * attribute value as large attributes out of line are not logged.
- */
-STATIC int
-xfs_log_calc_max_attrsetm_res(
-       struct xfs_mount        *mp)
-{
-       int                     size;
-       int                     nblks;
-
-       size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) -
-              MAXNAMELEN - 1;
-       nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
-       nblks += XFS_B_TO_FSB(mp, size);
-       nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK);
-
-       return  M_RES(mp)->tr_attrsetm.tr_logres +
-               M_RES(mp)->tr_attrsetrt.tr_logres * nblks;
-}
-
-/*
- * Iterate over the log space reservation table to figure out and return
- * the maximum one in terms of the pre-calculated values which were done
- * at mount time.
- */
-STATIC void
-xfs_log_get_max_trans_res(
-       struct xfs_mount        *mp,
-       struct xfs_trans_res    *max_resp)
-{
-       struct xfs_trans_res    *resp;
-       struct xfs_trans_res    *end_resp;
-       int                     log_space = 0;
-       int                     attr_space;
-
-       attr_space = xfs_log_calc_max_attrsetm_res(mp);
-
-       resp = (struct xfs_trans_res *)M_RES(mp);
-       end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1);
-       for (; resp < end_resp; resp++) {
-               int             tmp = resp->tr_logcount > 1 ?
-                                     resp->tr_logres * resp->tr_logcount :
-                                     resp->tr_logres;
-               if (log_space < tmp) {
-                       log_space = tmp;
-                       *max_resp = *resp;              /* struct copy */
-               }
-       }
-
-       if (attr_space > log_space) {
-               *max_resp = M_RES(mp)->tr_attrsetm;     /* struct copy */
-               max_resp->tr_logres = attr_space;
-       }
-}
-
-/*
- * Calculate the minimum valid log size for the given superblock configuration.
- * Used to calculate the minimum log size at mkfs time, and to determine if
- * the log is large enough or not at mount time. Returns the minimum size in
- * filesystem block size units.
- */
-int
-xfs_log_calc_minimum_size(
-       struct xfs_mount        *mp)
-{
-       struct xfs_trans_res    tres = {0};
-       int                     max_logres;
-       int                     min_logblks = 0;
-       int                     lsunit = 0;
-
-       xfs_log_get_max_trans_res(mp, &tres);
-
-       max_logres = xfs_log_calc_unit_res(mp, tres.tr_logres);
-       if (tres.tr_logcount > 1)
-               max_logres *= tres.tr_logcount;
-
-       if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1)
-               lsunit = BTOBB(mp->m_sb.sb_logsunit);
-
-       /*
-        * Two factors should be taken into account for calculating the minimum
-        * log space.
-        * 1) The fundamental limitation is that no single transaction can be
-        *    larger than half size of the log.
-        *
-        *    From mkfs.xfs, this is considered by the XFS_MIN_LOG_FACTOR
-        *    define, which is set to 3. That means we can definitely fit
-        *    maximally sized 2 transactions in the log. We'll use this same
-        *    value here.
-        *
-        * 2) If the lsunit option is specified, a transaction requires 2 LSU
-        *    for the reservation because there are two log writes that can
-        *    require padding - the transaction data and the commit record which
-        *    are written separately and both can require padding to the LSU.
-        *    Consider that we can have an active CIL reservation holding 2*LSU,
-        *    but the CIL is not over a push threshold, in this case, if we
-        *    don't have enough log space for at one new transaction, which
-        *    includes another 2*LSU in the reservation, we will run into dead
-        *    loop situation in log space grant procedure. i.e.
-        *    xlog_grant_head_wait().
-        *
-        *    Hence the log size needs to be able to contain two maximally sized
-        *    and padded transactions, which is (2 * (2 * LSU + maxlres)).
-        *
-        * Also, the log size should be a multiple of the log stripe unit, round
-        * it up to lsunit boundary if lsunit is specified.
-        */
-       if (lsunit) {
-               min_logblks = roundup_64(BTOBB(max_logres), lsunit) +
-                             2 * lsunit;
-       } else
-               min_logblks = BTOBB(max_logres) + 2 * BBSIZE;
-       min_logblks *= XFS_MIN_LOG_FACTOR;
-
-       return XFS_BB_TO_FSB(mp, min_logblks);
-}
diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/xfs_rtbitmap.c
deleted file mode 100644 (file)
index f4dd697..0000000
+++ /dev/null
@@ -1,973 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_trans.h"
-#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_buf.h"
-#include "xfs_icache.h"
-#include "xfs_dinode.h"
-#include "xfs_rtalloc.h"
-
-
-/*
- * Realtime allocator bitmap functions shared with userspace.
- */
-
-/*
- * Get a buffer for the bitmap or summary file block specified.
- * The buffer is returned read and locked.
- */
-int
-xfs_rtbuf_get(
-       xfs_mount_t     *mp,            /* file system mount structure */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_rtblock_t   block,          /* block number in bitmap or summary */
-       int             issum,          /* is summary not bitmap */
-       xfs_buf_t       **bpp)          /* output: buffer for the block */
-{
-       xfs_buf_t       *bp;            /* block buffer, result */
-       xfs_inode_t     *ip;            /* bitmap or summary inode */
-       xfs_bmbt_irec_t map;
-       int             nmap = 1;
-       int             error;          /* error value */
-
-       ip = issum ? mp->m_rsumip : mp->m_rbmip;
-
-       error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK);
-       if (error)
-               return error;
-
-       ASSERT(map.br_startblock != NULLFSBLOCK);
-       error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-                                  XFS_FSB_TO_DADDR(mp, map.br_startblock),
-                                  mp->m_bsize, 0, &bp, NULL);
-       if (error)
-               return error;
-       *bpp = bp;
-       return 0;
-}
-
-/*
- * Searching backward from start to limit, find the first block whose
- * allocated/free state is different from start's.
- */
-int
-xfs_rtfind_back(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_rtblock_t   start,          /* starting block to look at */
-       xfs_rtblock_t   limit,          /* last block to look at */
-       xfs_rtblock_t   *rtblock)       /* out: start block found */
-{
-       xfs_rtword_t    *b;             /* current word in buffer */
-       int             bit;            /* bit number in the word */
-       xfs_rtblock_t   block;          /* bitmap block number */
-       xfs_buf_t       *bp;            /* buf for the block */
-       xfs_rtword_t    *bufp;          /* starting word in buffer */
-       int             error;          /* error value */
-       xfs_rtblock_t   firstbit;       /* first useful bit in the word */
-       xfs_rtblock_t   i;              /* current bit number rel. to start */
-       xfs_rtblock_t   len;            /* length of inspected area */
-       xfs_rtword_t    mask;           /* mask of relevant bits for value */
-       xfs_rtword_t    want;           /* mask for "good" values */
-       xfs_rtword_t    wdiff;          /* difference from wanted value */
-       int             word;           /* word number in the buffer */
-
-       /*
-        * Compute and read in starting bitmap block for starting block.
-        */
-       block = XFS_BITTOBLOCK(mp, start);
-       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
-       if (error) {
-               return error;
-       }
-       bufp = bp->b_addr;
-       /*
-        * Get the first word's index & point to it.
-        */
-       word = XFS_BITTOWORD(mp, start);
-       b = &bufp[word];
-       bit = (int)(start & (XFS_NBWORD - 1));
-       len = start - limit + 1;
-       /*
-        * Compute match value, based on the bit at start: if 1 (free)
-        * then all-ones, else all-zeroes.
-        */
-       want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
-       /*
-        * If the starting position is not word-aligned, deal with the
-        * partial word.
-        */
-       if (bit < XFS_NBWORD - 1) {
-               /*
-                * Calculate first (leftmost) bit number to look at,
-                * and mask for all the relevant bits in this word.
-                */
-               firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0);
-               mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) <<
-                       firstbit;
-               /*
-                * Calculate the difference between the value there
-                * and what we're looking for.
-                */
-               if ((wdiff = (*b ^ want) & mask)) {
-                       /*
-                        * Different.  Mark where we are and return.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       i = bit - XFS_RTHIBIT(wdiff);
-                       *rtblock = start - i + 1;
-                       return 0;
-               }
-               i = bit - firstbit + 1;
-               /*
-                * Go on to previous block if that's where the previous word is
-                * and we need the previous word.
-                */
-               if (--word == -1 && i < len) {
-                       /*
-                        * If done with this block, get the previous one.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
-                       if (error) {
-                               return error;
-                       }
-                       bufp = bp->b_addr;
-                       word = XFS_BLOCKWMASK(mp);
-                       b = &bufp[word];
-               } else {
-                       /*
-                        * Go on to the previous word in the buffer.
-                        */
-                       b--;
-               }
-       } else {
-               /*
-                * Starting on a word boundary, no partial word.
-                */
-               i = 0;
-       }
-       /*
-        * Loop over whole words in buffers.  When we use up one buffer
-        * we move on to the previous one.
-        */
-       while (len - i >= XFS_NBWORD) {
-               /*
-                * Compute difference between actual and desired value.
-                */
-               if ((wdiff = *b ^ want)) {
-                       /*
-                        * Different, mark where we are and return.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
-                       *rtblock = start - i + 1;
-                       return 0;
-               }
-               i += XFS_NBWORD;
-               /*
-                * Go on to previous block if that's where the previous word is
-                * and we need the previous word.
-                */
-               if (--word == -1 && i < len) {
-                       /*
-                        * If done with this block, get the previous one.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
-                       if (error) {
-                               return error;
-                       }
-                       bufp = bp->b_addr;
-                       word = XFS_BLOCKWMASK(mp);
-                       b = &bufp[word];
-               } else {
-                       /*
-                        * Go on to the previous word in the buffer.
-                        */
-                       b--;
-               }
-       }
-       /*
-        * If not ending on a word boundary, deal with the last
-        * (partial) word.
-        */
-       if (len - i) {
-               /*
-                * Calculate first (leftmost) bit number to look at,
-                * and mask for all the relevant bits in this word.
-                */
-               firstbit = XFS_NBWORD - (len - i);
-               mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit;
-               /*
-                * Compute difference between actual and desired value.
-                */
-               if ((wdiff = (*b ^ want) & mask)) {
-                       /*
-                        * Different, mark where we are and return.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
-                       *rtblock = start - i + 1;
-                       return 0;
-               } else
-                       i = len;
-       }
-       /*
-        * No match, return that we scanned the whole area.
-        */
-       xfs_trans_brelse(tp, bp);
-       *rtblock = start - i + 1;
-       return 0;
-}
-
-/*
- * Searching forward from start to limit, find the first block whose
- * allocated/free state is different from start's.
- */
-int
-xfs_rtfind_forw(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_rtblock_t   start,          /* starting block to look at */
-       xfs_rtblock_t   limit,          /* last block to look at */
-       xfs_rtblock_t   *rtblock)       /* out: start block found */
-{
-       xfs_rtword_t    *b;             /* current word in buffer */
-       int             bit;            /* bit number in the word */
-       xfs_rtblock_t   block;          /* bitmap block number */
-       xfs_buf_t       *bp;            /* buf for the block */
-       xfs_rtword_t    *bufp;          /* starting word in buffer */
-       int             error;          /* error value */
-       xfs_rtblock_t   i;              /* current bit number rel. to start */
-       xfs_rtblock_t   lastbit;        /* last useful bit in the word */
-       xfs_rtblock_t   len;            /* length of inspected area */
-       xfs_rtword_t    mask;           /* mask of relevant bits for value */
-       xfs_rtword_t    want;           /* mask for "good" values */
-       xfs_rtword_t    wdiff;          /* difference from wanted value */
-       int             word;           /* word number in the buffer */
-
-       /*
-        * Compute and read in starting bitmap block for starting block.
-        */
-       block = XFS_BITTOBLOCK(mp, start);
-       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
-       if (error) {
-               return error;
-       }
-       bufp = bp->b_addr;
-       /*
-        * Get the first word's index & point to it.
-        */
-       word = XFS_BITTOWORD(mp, start);
-       b = &bufp[word];
-       bit = (int)(start & (XFS_NBWORD - 1));
-       len = limit - start + 1;
-       /*
-        * Compute match value, based on the bit at start: if 1 (free)
-        * then all-ones, else all-zeroes.
-        */
-       want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
-       /*
-        * If the starting position is not word-aligned, deal with the
-        * partial word.
-        */
-       if (bit) {
-               /*
-                * Calculate last (rightmost) bit number to look at,
-                * and mask for all the relevant bits in this word.
-                */
-               lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
-               mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
-               /*
-                * Calculate the difference between the value there
-                * and what we're looking for.
-                */
-               if ((wdiff = (*b ^ want) & mask)) {
-                       /*
-                        * Different.  Mark where we are and return.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       i = XFS_RTLOBIT(wdiff) - bit;
-                       *rtblock = start + i - 1;
-                       return 0;
-               }
-               i = lastbit - bit;
-               /*
-                * Go on to next block if that's where the next word is
-                * and we need the next word.
-                */
-               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
-                       /*
-                        * If done with this block, get the previous one.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-                       if (error) {
-                               return error;
-                       }
-                       b = bufp = bp->b_addr;
-                       word = 0;
-               } else {
-                       /*
-                        * Go on to the previous word in the buffer.
-                        */
-                       b++;
-               }
-       } else {
-               /*
-                * Starting on a word boundary, no partial word.
-                */
-               i = 0;
-       }
-       /*
-        * Loop over whole words in buffers.  When we use up one buffer
-        * we move on to the next one.
-        */
-       while (len - i >= XFS_NBWORD) {
-               /*
-                * Compute difference between actual and desired value.
-                */
-               if ((wdiff = *b ^ want)) {
-                       /*
-                        * Different, mark where we are and return.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       i += XFS_RTLOBIT(wdiff);
-                       *rtblock = start + i - 1;
-                       return 0;
-               }
-               i += XFS_NBWORD;
-               /*
-                * Go on to next block if that's where the next word is
-                * and we need the next word.
-                */
-               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
-                       /*
-                        * If done with this block, get the next one.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-                       if (error) {
-                               return error;
-                       }
-                       b = bufp = bp->b_addr;
-                       word = 0;
-               } else {
-                       /*
-                        * Go on to the next word in the buffer.
-                        */
-                       b++;
-               }
-       }
-       /*
-        * If not ending on a word boundary, deal with the last
-        * (partial) word.
-        */
-       if ((lastbit = len - i)) {
-               /*
-                * Calculate mask for all the relevant bits in this word.
-                */
-               mask = ((xfs_rtword_t)1 << lastbit) - 1;
-               /*
-                * Compute difference between actual and desired value.
-                */
-               if ((wdiff = (*b ^ want) & mask)) {
-                       /*
-                        * Different, mark where we are and return.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       i += XFS_RTLOBIT(wdiff);
-                       *rtblock = start + i - 1;
-                       return 0;
-               } else
-                       i = len;
-       }
-       /*
-        * No match, return that we scanned the whole area.
-        */
-       xfs_trans_brelse(tp, bp);
-       *rtblock = start + i - 1;
-       return 0;
-}
-
-/*
- * Read and modify the summary information for a given extent size,
- * bitmap block combination.
- * Keeps track of a current summary block, so we don't keep reading
- * it from the buffer cache.
- */
-int
-xfs_rtmodify_summary(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       int             log,            /* log2 of extent size */
-       xfs_rtblock_t   bbno,           /* bitmap block number */
-       int             delta,          /* change to make to summary info */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
-       xfs_fsblock_t   *rsb)           /* in/out: summary block number */
-{
-       xfs_buf_t       *bp;            /* buffer for the summary block */
-       int             error;          /* error value */
-       xfs_fsblock_t   sb;             /* summary fsblock */
-       int             so;             /* index into the summary file */
-       xfs_suminfo_t   *sp;            /* pointer to returned data */
-
-       /*
-        * Compute entry number in the summary file.
-        */
-       so = XFS_SUMOFFS(mp, log, bbno);
-       /*
-        * Compute the block number in the summary file.
-        */
-       sb = XFS_SUMOFFSTOBLOCK(mp, so);
-       /*
-        * If we have an old buffer, and the block number matches, use that.
-        */
-       if (rbpp && *rbpp && *rsb == sb)
-               bp = *rbpp;
-       /*
-        * Otherwise we have to get the buffer.
-        */
-       else {
-               /*
-                * If there was an old one, get rid of it first.
-                */
-               if (rbpp && *rbpp)
-                       xfs_trans_brelse(tp, *rbpp);
-               error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
-               if (error) {
-                       return error;
-               }
-               /*
-                * Remember this buffer and block for the next call.
-                */
-               if (rbpp) {
-                       *rbpp = bp;
-                       *rsb = sb;
-               }
-       }
-       /*
-        * Point to the summary information, modify and log it.
-        */
-       sp = XFS_SUMPTR(mp, bp, so);
-       *sp += delta;
-       xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
-               (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
-       return 0;
-}
-
-/*
- * Set the given range of bitmap bits to the given value.
- * Do whatever I/O and logging is required.
- */
-int
-xfs_rtmodify_range(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_rtblock_t   start,          /* starting block to modify */
-       xfs_extlen_t    len,            /* length of extent to modify */
-       int             val)            /* 1 for free, 0 for allocated */
-{
-       xfs_rtword_t    *b;             /* current word in buffer */
-       int             bit;            /* bit number in the word */
-       xfs_rtblock_t   block;          /* bitmap block number */
-       xfs_buf_t       *bp;            /* buf for the block */
-       xfs_rtword_t    *bufp;          /* starting word in buffer */
-       int             error;          /* error value */
-       xfs_rtword_t    *first;         /* first used word in the buffer */
-       int             i;              /* current bit number rel. to start */
-       int             lastbit;        /* last useful bit in word */
-       xfs_rtword_t    mask;           /* mask o frelevant bits for value */
-       int             word;           /* word number in the buffer */
-
-       /*
-        * Compute starting bitmap block number.
-        */
-       block = XFS_BITTOBLOCK(mp, start);
-       /*
-        * Read the bitmap block, and point to its data.
-        */
-       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
-       if (error) {
-               return error;
-       }
-       bufp = bp->b_addr;
-       /*
-        * Compute the starting word's address, and starting bit.
-        */
-       word = XFS_BITTOWORD(mp, start);
-       first = b = &bufp[word];
-       bit = (int)(start & (XFS_NBWORD - 1));
-       /*
-        * 0 (allocated) => all zeroes; 1 (free) => all ones.
-        */
-       val = -val;
-       /*
-        * If not starting on a word boundary, deal with the first
-        * (partial) word.
-        */
-       if (bit) {
-               /*
-                * Compute first bit not changed and mask of relevant bits.
-                */
-               lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
-               mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
-               /*
-                * Set/clear the active bits.
-                */
-               if (val)
-                       *b |= mask;
-               else
-                       *b &= ~mask;
-               i = lastbit - bit;
-               /*
-                * Go on to the next block if that's where the next word is
-                * and we need the next word.
-                */
-               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
-                       /*
-                        * Log the changed part of this block.
-                        * Get the next one.
-                        */
-                       xfs_trans_log_buf(tp, bp,
-                               (uint)((char *)first - (char *)bufp),
-                               (uint)((char *)b - (char *)bufp));
-                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-                       if (error) {
-                               return error;
-                       }
-                       first = b = bufp = bp->b_addr;
-                       word = 0;
-               } else {
-                       /*
-                        * Go on to the next word in the buffer
-                        */
-                       b++;
-               }
-       } else {
-               /*
-                * Starting on a word boundary, no partial word.
-                */
-               i = 0;
-       }
-       /*
-        * Loop over whole words in buffers.  When we use up one buffer
-        * we move on to the next one.
-        */
-       while (len - i >= XFS_NBWORD) {
-               /*
-                * Set the word value correctly.
-                */
-               *b = val;
-               i += XFS_NBWORD;
-               /*
-                * Go on to the next block if that's where the next word is
-                * and we need the next word.
-                */
-               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
-                       /*
-                        * Log the changed part of this block.
-                        * Get the next one.
-                        */
-                       xfs_trans_log_buf(tp, bp,
-                               (uint)((char *)first - (char *)bufp),
-                               (uint)((char *)b - (char *)bufp));
-                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-                       if (error) {
-                               return error;
-                       }
-                       first = b = bufp = bp->b_addr;
-                       word = 0;
-               } else {
-                       /*
-                        * Go on to the next word in the buffer
-                        */
-                       b++;
-               }
-       }
-       /*
-        * If not ending on a word boundary, deal with the last
-        * (partial) word.
-        */
-       if ((lastbit = len - i)) {
-               /*
-                * Compute a mask of relevant bits.
-                */
-               bit = 0;
-               mask = ((xfs_rtword_t)1 << lastbit) - 1;
-               /*
-                * Set/clear the active bits.
-                */
-               if (val)
-                       *b |= mask;
-               else
-                       *b &= ~mask;
-               b++;
-       }
-       /*
-        * Log any remaining changed bytes.
-        */
-       if (b > first)
-               xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp),
-                       (uint)((char *)b - (char *)bufp - 1));
-       return 0;
-}
-
-/*
- * Mark an extent specified by start and len freed.
- * Updates all the summary information as well as the bitmap.
- */
-int
-xfs_rtfree_range(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_rtblock_t   start,          /* starting block to free */
-       xfs_extlen_t    len,            /* length to free */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
-       xfs_fsblock_t   *rsb)           /* in/out: summary block number */
-{
-       xfs_rtblock_t   end;            /* end of the freed extent */
-       int             error;          /* error value */
-       xfs_rtblock_t   postblock;      /* first block freed > end */
-       xfs_rtblock_t   preblock;       /* first block freed < start */
-
-       end = start + len - 1;
-       /*
-        * Modify the bitmap to mark this extent freed.
-        */
-       error = xfs_rtmodify_range(mp, tp, start, len, 1);
-       if (error) {
-               return error;
-       }
-       /*
-        * Assume we're freeing out of the middle of an allocated extent.
-        * We need to find the beginning and end of the extent so we can
-        * properly update the summary.
-        */
-       error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
-       if (error) {
-               return error;
-       }
-       /*
-        * Find the next allocated block (end of allocated extent).
-        */
-       error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
-               &postblock);
-       if (error)
-               return error;
-       /*
-        * If there are blocks not being freed at the front of the
-        * old extent, add summary data for them to be allocated.
-        */
-       if (preblock < start) {
-               error = xfs_rtmodify_summary(mp, tp,
-                       XFS_RTBLOCKLOG(start - preblock),
-                       XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
-               if (error) {
-                       return error;
-               }
-       }
-       /*
-        * If there are blocks not being freed at the end of the
-        * old extent, add summary data for them to be allocated.
-        */
-       if (postblock > end) {
-               error = xfs_rtmodify_summary(mp, tp,
-                       XFS_RTBLOCKLOG(postblock - end),
-                       XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb);
-               if (error) {
-                       return error;
-               }
-       }
-       /*
-        * Increment the summary information corresponding to the entire
-        * (new) free extent.
-        */
-       error = xfs_rtmodify_summary(mp, tp,
-               XFS_RTBLOCKLOG(postblock + 1 - preblock),
-               XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
-       return error;
-}
-
-/*
- * Check that the given range is either all allocated (val = 0) or
- * all free (val = 1).
- */
-int
-xfs_rtcheck_range(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_rtblock_t   start,          /* starting block number of extent */
-       xfs_extlen_t    len,            /* length of extent */
-       int             val,            /* 1 for free, 0 for allocated */
-       xfs_rtblock_t   *new,           /* out: first block not matching */
-       int             *stat)          /* out: 1 for matches, 0 for not */
-{
-       xfs_rtword_t    *b;             /* current word in buffer */
-       int             bit;            /* bit number in the word */
-       xfs_rtblock_t   block;          /* bitmap block number */
-       xfs_buf_t       *bp;            /* buf for the block */
-       xfs_rtword_t    *bufp;          /* starting word in buffer */
-       int             error;          /* error value */
-       xfs_rtblock_t   i;              /* current bit number rel. to start */
-       xfs_rtblock_t   lastbit;        /* last useful bit in word */
-       xfs_rtword_t    mask;           /* mask of relevant bits for value */
-       xfs_rtword_t    wdiff;          /* difference from wanted value */
-       int             word;           /* word number in the buffer */
-
-       /*
-        * Compute starting bitmap block number
-        */
-       block = XFS_BITTOBLOCK(mp, start);
-       /*
-        * Read the bitmap block.
-        */
-       error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
-       if (error) {
-               return error;
-       }
-       bufp = bp->b_addr;
-       /*
-        * Compute the starting word's address, and starting bit.
-        */
-       word = XFS_BITTOWORD(mp, start);
-       b = &bufp[word];
-       bit = (int)(start & (XFS_NBWORD - 1));
-       /*
-        * 0 (allocated) => all zero's; 1 (free) => all one's.
-        */
-       val = -val;
-       /*
-        * If not starting on a word boundary, deal with the first
-        * (partial) word.
-        */
-       if (bit) {
-               /*
-                * Compute first bit not examined.
-                */
-               lastbit = XFS_RTMIN(bit + len, XFS_NBWORD);
-               /*
-                * Mask of relevant bits.
-                */
-               mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
-               /*
-                * Compute difference between actual and desired value.
-                */
-               if ((wdiff = (*b ^ val) & mask)) {
-                       /*
-                        * Different, compute first wrong bit and return.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       i = XFS_RTLOBIT(wdiff) - bit;
-                       *new = start + i;
-                       *stat = 0;
-                       return 0;
-               }
-               i = lastbit - bit;
-               /*
-                * Go on to next block if that's where the next word is
-                * and we need the next word.
-                */
-               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
-                       /*
-                        * If done with this block, get the next one.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-                       if (error) {
-                               return error;
-                       }
-                       b = bufp = bp->b_addr;
-                       word = 0;
-               } else {
-                       /*
-                        * Go on to the next word in the buffer.
-                        */
-                       b++;
-               }
-       } else {
-               /*
-                * Starting on a word boundary, no partial word.
-                */
-               i = 0;
-       }
-       /*
-        * Loop over whole words in buffers.  When we use up one buffer
-        * we move on to the next one.
-        */
-       while (len - i >= XFS_NBWORD) {
-               /*
-                * Compute difference between actual and desired value.
-                */
-               if ((wdiff = *b ^ val)) {
-                       /*
-                        * Different, compute first wrong bit and return.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       i += XFS_RTLOBIT(wdiff);
-                       *new = start + i;
-                       *stat = 0;
-                       return 0;
-               }
-               i += XFS_NBWORD;
-               /*
-                * Go on to next block if that's where the next word is
-                * and we need the next word.
-                */
-               if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
-                       /*
-                        * If done with this block, get the next one.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
-                       if (error) {
-                               return error;
-                       }
-                       b = bufp = bp->b_addr;
-                       word = 0;
-               } else {
-                       /*
-                        * Go on to the next word in the buffer.
-                        */
-                       b++;
-               }
-       }
-       /*
-        * If not ending on a word boundary, deal with the last
-        * (partial) word.
-        */
-       if ((lastbit = len - i)) {
-               /*
-                * Mask of relevant bits.
-                */
-               mask = ((xfs_rtword_t)1 << lastbit) - 1;
-               /*
-                * Compute difference between actual and desired value.
-                */
-               if ((wdiff = (*b ^ val) & mask)) {
-                       /*
-                        * Different, compute first wrong bit and return.
-                        */
-                       xfs_trans_brelse(tp, bp);
-                       i += XFS_RTLOBIT(wdiff);
-                       *new = start + i;
-                       *stat = 0;
-                       return 0;
-               } else
-                       i = len;
-       }
-       /*
-        * Successful, return.
-        */
-       xfs_trans_brelse(tp, bp);
-       *new = start + i;
-       *stat = 1;
-       return 0;
-}
-
-#ifdef DEBUG
-/*
- * Check that the given extent (block range) is allocated already.
- */
-STATIC int                             /* error */
-xfs_rtcheck_alloc_range(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_rtblock_t   bno,            /* starting block number of extent */
-       xfs_extlen_t    len)            /* length of extent */
-{
-       xfs_rtblock_t   new;            /* dummy for xfs_rtcheck_range */
-       int             stat;
-       int             error;
-
-       error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat);
-       if (error)
-               return error;
-       ASSERT(stat);
-       return 0;
-}
-#else
-#define xfs_rtcheck_alloc_range(m,t,b,l)       (0)
-#endif
-/*
- * Free an extent in the realtime subvolume.  Length is expressed in
- * realtime extents, as is the block number.
- */
-int                                    /* error */
-xfs_rtfree_extent(
-       xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_rtblock_t   bno,            /* starting block number to free */
-       xfs_extlen_t    len)            /* length of extent freed */
-{
-       int             error;          /* error value */
-       xfs_mount_t     *mp;            /* file system mount structure */
-       xfs_fsblock_t   sb;             /* summary file block number */
-       xfs_buf_t       *sumbp = NULL;  /* summary file block buffer */
-
-       mp = tp->t_mountp;
-
-       ASSERT(mp->m_rbmip->i_itemp != NULL);
-       ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
-
-       error = xfs_rtcheck_alloc_range(mp, tp, bno, len);
-       if (error)
-               return error;
-
-       /*
-        * Free the range of realtime blocks.
-        */
-       error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb);
-       if (error) {
-               return error;
-       }
-       /*
-        * Mark more blocks free in the superblock.
-        */
-       xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len);
-       /*
-        * If we've now freed all the blocks, reset the file sequence
-        * number to 0.
-        */
-       if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
-           mp->m_sb.sb_rextents) {
-               if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
-                       mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
-               *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
-               xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
-       }
-       return 0;
-}
-
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c
deleted file mode 100644 (file)
index 23c2f25..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * Copyright (c) 2012-2013 Red Hat, Inc.
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_shared.h"
-#include "xfs_trans_resv.h"
-#include "xfs_ag.h"
-#include "xfs_sb.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_symlink.h"
-#include "xfs_cksum.h"
-#include "xfs_trans.h"
-#include "xfs_buf_item.h"
-
-
-/*
- * Each contiguous block has a header, so it is not just a simple pathlen
- * to FSB conversion.
- */
-int
-xfs_symlink_blocks(
-       struct xfs_mount *mp,
-       int             pathlen)
-{
-       int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
-
-       return (pathlen + buflen - 1) / buflen;
-}
-
-int
-xfs_symlink_hdr_set(
-       struct xfs_mount        *mp,
-       xfs_ino_t               ino,
-       uint32_t                offset,
-       uint32_t                size,
-       struct xfs_buf          *bp)
-{
-       struct xfs_dsymlink_hdr *dsl = bp->b_addr;
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return 0;
-
-       dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
-       dsl->sl_offset = cpu_to_be32(offset);
-       dsl->sl_bytes = cpu_to_be32(size);
-       uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
-       dsl->sl_owner = cpu_to_be64(ino);
-       dsl->sl_blkno = cpu_to_be64(bp->b_bn);
-       bp->b_ops = &xfs_symlink_buf_ops;
-
-       return sizeof(struct xfs_dsymlink_hdr);
-}
-
-/*
- * Checking of the symlink header is split into two parts. the verifier does
- * CRC, location and bounds checking, the unpacking function checks the path
- * parameters and owner.
- */
-bool
-xfs_symlink_hdr_ok(
-       xfs_ino_t               ino,
-       uint32_t                offset,
-       uint32_t                size,
-       struct xfs_buf          *bp)
-{
-       struct xfs_dsymlink_hdr *dsl = bp->b_addr;
-
-       if (offset != be32_to_cpu(dsl->sl_offset))
-               return false;
-       if (size != be32_to_cpu(dsl->sl_bytes))
-               return false;
-       if (ino != be64_to_cpu(dsl->sl_owner))
-               return false;
-
-       /* ok */
-       return true;
-}
-
-static bool
-xfs_symlink_verify(
-       struct xfs_buf          *bp)
-{
-       struct xfs_mount        *mp = bp->b_target->bt_mount;
-       struct xfs_dsymlink_hdr *dsl = bp->b_addr;
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return false;
-       if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
-               return false;
-       if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
-               return false;
-       if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
-               return false;
-       if (be32_to_cpu(dsl->sl_offset) +
-                               be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
-               return false;
-       if (dsl->sl_owner == 0)
-               return false;
-
-       return true;
-}
-
-static void
-xfs_symlink_read_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-
-       /* no verification of non-crc buffers */
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
-               xfs_buf_ioerror(bp, EFSBADCRC);
-       else if (!xfs_symlink_verify(bp))
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-
-       if (bp->b_error)
-               xfs_verifier_error(bp);
-}
-
-static void
-xfs_symlink_write_verify(
-       struct xfs_buf  *bp)
-{
-       struct xfs_mount *mp = bp->b_target->bt_mount;
-       struct xfs_buf_log_item *bip = bp->b_fspriv;
-
-       /* no verification of non-crc buffers */
-       if (!xfs_sb_version_hascrc(&mp->m_sb))
-               return;
-
-       if (!xfs_symlink_verify(bp)) {
-               xfs_buf_ioerror(bp, EFSCORRUPTED);
-               xfs_verifier_error(bp);
-               return;
-       }
-
-       if (bip) {
-               struct xfs_dsymlink_hdr *dsl = bp->b_addr;
-               dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-       }
-       xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);
-}
-
-const struct xfs_buf_ops xfs_symlink_buf_ops = {
-       .verify_read = xfs_symlink_read_verify,
-       .verify_write = xfs_symlink_write_verify,
-};
-
-void
-xfs_symlink_local_to_remote(
-       struct xfs_trans        *tp,
-       struct xfs_buf          *bp,
-       struct xfs_inode        *ip,
-       struct xfs_ifork        *ifp)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       char                    *buf;
-
-       if (!xfs_sb_version_hascrc(&mp->m_sb)) {
-               bp->b_ops = NULL;
-               memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
-               return;
-       }
-
-       /*
-        * As this symlink fits in an inode literal area, it must also fit in
-        * the smallest buffer the filesystem supports.
-        */
-       ASSERT(BBTOB(bp->b_length) >=
-                       ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr));
-
-       bp->b_ops = &xfs_symlink_buf_ops;
-
-       buf = bp->b_addr;
-       buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
-       memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
-}
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
deleted file mode 100644 (file)
index f2bda7c..0000000
+++ /dev/null
@@ -1,894 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * Copyright (C) 2010 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_qm.h"
-#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-
-/*
- * A buffer has a format structure overhead in the log in addition
- * to the data, so we need to take this into account when reserving
- * space in a transaction for a buffer.  Round the space required up
- * to a multiple of 128 bytes so that we don't change the historical
- * reservation that has been used for this overhead.
- */
-STATIC uint
-xfs_buf_log_overhead(void)
-{
-       return round_up(sizeof(struct xlog_op_header) +
-                       sizeof(struct xfs_buf_log_format), 128);
-}
-
-/*
- * Calculate out transaction log reservation per item in bytes.
- *
- * The nbufs argument is used to indicate the number of items that
- * will be changed in a transaction.  size is used to tell how many
- * bytes should be reserved per item.
- */
-STATIC uint
-xfs_calc_buf_res(
-       uint            nbufs,
-       uint            size)
-{
-       return nbufs * (size + xfs_buf_log_overhead());
-}
-
-/*
- * Logging inodes is really tricksy. They are logged in memory format,
- * which means that what we write into the log doesn't directly translate into
- * the amount of space they use on disk.
- *
- * Case in point - btree format forks in memory format use more space than the
- * on-disk format. In memory, the buffer contains a normal btree block header so
- * the btree code can treat it as though it is just another generic buffer.
- * However, when we write it to the inode fork, we don't write all of this
- * header as it isn't needed. e.g. the root is only ever in the inode, so
- * there's no need for sibling pointers which would waste 16 bytes of space.
- *
- * Hence when we have an inode with a maximally sized btree format fork, then
- * amount of information we actually log is greater than the size of the inode
- * on disk. Hence we need an inode reservation function that calculates all this
- * correctly. So, we log:
- *
- * - 4 log op headers for object
- *     - for the ilf, the inode core and 2 forks
- * - inode log format object
- * - the inode core
- * - two inode forks containing bmap btree root blocks.
- *     - the btree data contained by both forks will fit into the inode size,
- *       hence when combined with the inode core above, we have a total of the
- *       actual inode size.
- *     - the BMBT headers need to be accounted separately, as they are
- *       additional to the records and pointers that fit inside the inode
- *       forks.
- */
-STATIC uint
-xfs_calc_inode_res(
-       struct xfs_mount        *mp,
-       uint                    ninodes)
-{
-       return ninodes *
-               (4 * sizeof(struct xlog_op_header) +
-                sizeof(struct xfs_inode_log_format) +
-                mp->m_sb.sb_inodesize +
-                2 * XFS_BMBT_BLOCK_LEN(mp));
-}
-
-/*
- * The free inode btree is a conditional feature and the log reservation
- * requirements differ slightly from that of the traditional inode allocation
- * btree. The finobt tracks records for inode chunks with at least one free
- * inode. A record can be removed from the tree for an inode allocation
- * or free and thus the finobt reservation is unconditional across:
- *
- *     - inode allocation
- *     - inode free
- *     - inode chunk allocation
- *
- * The 'modify' param indicates to include the record modification scenario. The
- * 'alloc' param indicates to include the reservation for free space btree
- * modifications on behalf of finobt modifications. This is required only for
- * transactions that do not already account for free space btree modifications.
- *
- * the free inode btree: max depth * block size
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- * the free inode btree entry: block size
- */
-STATIC uint
-xfs_calc_finobt_res(
-       struct xfs_mount        *mp,
-       int                     alloc,
-       int                     modify)
-{
-       uint res;
-
-       if (!xfs_sb_version_hasfinobt(&mp->m_sb))
-               return 0;
-
-       res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
-       if (alloc)
-               res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 
-                                       XFS_FSB_TO_B(mp, 1));
-       if (modify)
-               res += (uint)XFS_FSB_TO_B(mp, 1);
-
-       return res;
-}
-
-/*
- * Various log reservation values.
- *
- * These are based on the size of the file system block because that is what
- * most transactions manipulate.  Each adds in an additional 128 bytes per
- * item logged to try to account for the overhead of the transaction mechanism.
- *
- * Note:  Most of the reservations underestimate the number of allocation
- * groups into which they could free extents in the xfs_bmap_finish() call.
- * This is because the number in the worst case is quite high and quite
- * unusual.  In order to fix this we need to change xfs_bmap_finish() to free
- * extents in only a single AG at a time.  This will require changes to the
- * EFI code as well, however, so that the EFI for the extents not freed is
- * logged again in each transaction.  See SGI PV #261917.
- *
- * Reservation functions here avoid a huge stack in xfs_trans_init due to
- * register overflow from temporaries in the calculations.
- */
-
-
-/*
- * In a write transaction we can allocate a maximum of 2
- * extents.  This gives:
- *    the inode getting the new extents: inode size
- *    the inode's bmap btree: max depth * block size
- *    the agfs of the ags from which the extents are allocated: 2 * sector
- *    the superblock free block counter: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
- *    the agfs of the ags containing the blocks: 2 * sector size
- *    the agfls of the ags containing the blocks: 2 * sector size
- *    the super block free block counter: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_write_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               MAX((xfs_calc_inode_res(mp, 1) +
-                    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
-                                     XFS_FSB_TO_B(mp, 1)) +
-                    xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-                                     XFS_FSB_TO_B(mp, 1))),
-                   (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-                                     XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * In truncating a file we free up to two extents at once.  We can modify:
- *    the inode being truncated: inode size
- *    the inode's bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- *    the agf for each of the ags: 4 * sector size
- *    the agfl for each of the ags: 4 * sector size
- *    the super block to reflect the freed blocks: sector size
- *    worst case split in allocation btrees per extent assuming 4 extents:
- *             4 exts * 2 trees * (2 * max depth - 1) * block size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_itruncate_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               MAX((xfs_calc_inode_res(mp, 1) +
-                    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
-                                     XFS_FSB_TO_B(mp, 1))),
-                   (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
-                                     XFS_FSB_TO_B(mp, 1)) +
-                   xfs_calc_buf_res(5, 0) +
-                   xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                                    XFS_FSB_TO_B(mp, 1)) +
-                   xfs_calc_buf_res(2 + mp->m_ialloc_blks +
-                                    mp->m_in_maxlevels, 0)));
-}
-
-/*
- * In renaming a files we can modify:
- *    the four inodes involved: 4 * inode size
- *    the two directory btrees: 2 * (max depth + v2) * dir block size
- *    the two directory bmap btrees: 2 * max depth * block size
- * And the bmap_finish transaction can free dir and bmap blocks (two sets
- *     of bmap blocks) giving:
- *    the agf for the ags in which the blocks live: 3 * sector size
- *    the agfl for the ags in which the blocks live: 3 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_rename_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               MAX((xfs_calc_inode_res(mp, 4) +
-                    xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
-                                     XFS_FSB_TO_B(mp, 1))),
-                   (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
-                                     XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * For removing an inode from unlinked list at first, we can modify:
- *    the agi hash list and counters: sector size
- *    the on disk inode before ours in the agi hash list: inode cluster size
- */
-STATIC uint
-xfs_calc_iunlink_remove_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-              max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
-}
-
-/*
- * For creating a link to an inode:
- *    the parent directory inode: inode size
- *    the linked inode: inode size
- *    the directory btree could split: (max depth + v2) * dir block size
- *    the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free some bmap blocks giving:
- *    the agf for the ag in which the blocks live: sector size
- *    the agfl for the ag in which the blocks live: sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_link_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               xfs_calc_iunlink_remove_reservation(mp) +
-               MAX((xfs_calc_inode_res(mp, 2) +
-                    xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
-                                     XFS_FSB_TO_B(mp, 1))),
-                   (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                                     XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * For adding an inode to unlinked list we can modify:
- *    the agi hash list: sector size
- *    the unlinked inode: inode size
- */
-STATIC uint
-xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
-{
-       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-               xfs_calc_inode_res(mp, 1);
-}
-
-/*
- * For removing a directory entry we can modify:
- *    the parent directory inode: inode size
- *    the removed inode: inode size
- *    the directory btree could join: (max depth + v2) * dir block size
- *    the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free the dir and bmap blocks giving:
- *    the agf for the ag in which the blocks live: 2 * sector size
- *    the agfl for the ag in which the blocks live: 2 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_remove_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               xfs_calc_iunlink_add_reservation(mp) +
-               MAX((xfs_calc_inode_res(mp, 1) +
-                    xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
-                                     XFS_FSB_TO_B(mp, 1))),
-                   (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-                                     XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * For create, break it in to the two cases that the transaction
- * covers. We start with the modify case - allocation done by modification
- * of the state of existing inodes - and the allocation case.
- */
-
-/*
- * For create we can modify:
- *    the parent directory inode: inode size
- *    the new inode: inode size
- *    the inode btree entry: block size
- *    the superblock for the nlink flag: sector size
- *    the directory btree: (max depth + v2) * dir block size
- *    the directory inode's bmap btree: (max depth + v2) * block size
- *    the finobt (record modification and allocation btrees)
- */
-STATIC uint
-xfs_calc_create_resv_modify(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_inode_res(mp, 2) +
-               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-               (uint)XFS_FSB_TO_B(mp, 1) +
-               xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_finobt_res(mp, 1, 1);
-}
-
-/*
- * For create we can allocate some inodes giving:
- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
- *    the superblock for the nlink flag: sector size
- *    the inode blocks allocated: mp->m_ialloc_blks * blocksize
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_create_resv_alloc(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-               mp->m_sb.sb_sectsize +
-               xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                                XFS_FSB_TO_B(mp, 1));
-}
-
-STATIC uint
-__xfs_calc_create_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               MAX(xfs_calc_create_resv_alloc(mp),
-                   xfs_calc_create_resv_modify(mp));
-}
-
-/*
- * For icreate we can allocate some inodes giving:
- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
- *    the superblock for the nlink flag: sector size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- *    the finobt (record insertion)
- */
-STATIC uint
-xfs_calc_icreate_resv_alloc(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-               mp->m_sb.sb_sectsize +
-               xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                                XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_finobt_res(mp, 0, 0);
-}
-
-STATIC uint
-xfs_calc_icreate_reservation(xfs_mount_t *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               MAX(xfs_calc_icreate_resv_alloc(mp),
-                   xfs_calc_create_resv_modify(mp));
-}
-
-STATIC uint
-xfs_calc_create_reservation(
-       struct xfs_mount        *mp)
-{
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               return xfs_calc_icreate_reservation(mp);
-       return __xfs_calc_create_reservation(mp);
-
-}
-
-STATIC uint
-xfs_calc_create_tmpfile_reservation(
-       struct xfs_mount        *mp)
-{
-       uint    res = XFS_DQUOT_LOGRES(mp);
-
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               res += xfs_calc_icreate_resv_alloc(mp);
-       else
-               res += xfs_calc_create_resv_alloc(mp);
-
-       return res + xfs_calc_iunlink_add_reservation(mp);
-}
-
-/*
- * Making a new directory is the same as creating a new file.
- */
-STATIC uint
-xfs_calc_mkdir_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_create_reservation(mp);
-}
-
-
-/*
- * Making a new symplink is the same as creating a new file, but
- * with the added blocks for remote symlink data which can be up to 1kB in
- * length (MAXPATHLEN).
- */
-STATIC uint
-xfs_calc_symlink_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_create_reservation(mp) +
-              xfs_calc_buf_res(1, MAXPATHLEN);
-}
-
-/*
- * In freeing an inode we can modify:
- *    the inode being freed: inode size
- *    the super block free inode counter: sector size
- *    the agi hash list and counters: sector size
- *    the inode btree entry: block size
- *    the on disk inode before ours in the agi hash list: inode cluster size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- *    the finobt (record insertion, removal or modification)
- */
-STATIC uint
-xfs_calc_ifree_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               xfs_calc_inode_res(mp, 1) +
-               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-               xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_iunlink_remove_reservation(mp) +
-               xfs_calc_buf_res(1, 0) +
-               xfs_calc_buf_res(2 + mp->m_ialloc_blks +
-                                mp->m_in_maxlevels, 0) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                                XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_finobt_res(mp, 0, 1);
-}
-
-/*
- * When only changing the inode we log the inode and possibly the superblock
- * We also add a bit of slop for the transaction stuff.
- */
-STATIC uint
-xfs_calc_ichange_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               xfs_calc_inode_res(mp, 1) +
-               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
-
-}
-
-/*
- * Growing the data section of the filesystem.
- *     superblock
- *     agi and agf
- *     allocation btrees
- */
-STATIC uint
-xfs_calc_growdata_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                                XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * Growing the rt section of the filesystem.
- * In the first set of transactions (ALLOC) we allocate space to the
- * bitmap or summary files.
- *     superblock: sector size
- *     agf of the ag from which the extent is allocated: sector size
- *     bmap btree for bitmap/summary inode: max depth * blocksize
- *     bitmap/summary inode: inode size
- *     allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
- */
-STATIC uint
-xfs_calc_growrtalloc_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-               xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
-                                XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_inode_res(mp, 1) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                                XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * Growing the rt section of the filesystem.
- * In the second set of transactions (ZERO) we zero the new metadata blocks.
- *     one bitmap/summary block: blocksize
- */
-STATIC uint
-xfs_calc_growrtzero_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
-}
-
-/*
- * Growing the rt section of the filesystem.
- * In the third set of transactions (FREE) we update metadata without
- * allocating any new blocks.
- *     superblock: sector size
- *     bitmap inode: inode size
- *     summary inode: inode size
- *     one bitmap block: blocksize
- *     summary blocks: new summary size
- */
-STATIC uint
-xfs_calc_growrtfree_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-               xfs_calc_inode_res(mp, 2) +
-               xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
-               xfs_calc_buf_res(1, mp->m_rsumsize);
-}
-
-/*
- * Logging the inode modification timestamp on a synchronous write.
- *     inode
- */
-STATIC uint
-xfs_calc_swrite_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_inode_res(mp, 1);
-}
-
-/*
- * Logging the inode mode bits when writing a setuid/setgid file
- *     inode
- */
-STATIC uint
-xfs_calc_writeid_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_inode_res(mp, 1);
-}
-
-/*
- * Converting the inode from non-attributed to attributed.
- *     the inode being converted: inode size
- *     agf block and superblock (for block allocation)
- *     the new block (directory sized)
- *     bmap blocks for the new directory block
- *     allocation btrees
- */
-STATIC uint
-xfs_calc_addafork_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               xfs_calc_inode_res(mp, 1) +
-               xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-               xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
-               xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
-                                XFS_FSB_TO_B(mp, 1)) +
-               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                                XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * Removing the attribute fork of a file
- *    the inode being truncated: inode size
- *    the inode's bmap btree: max depth * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- *    the agf for each of the ags: 4 * sector size
- *    the agfl for each of the ags: 4 * sector size
- *    the super block to reflect the freed blocks: sector size
- *    worst case split in allocation btrees per extent assuming 4 extents:
- *             4 exts * 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_attrinval_reservation(
-       struct xfs_mount        *mp)
-{
-       return MAX((xfs_calc_inode_res(mp, 1) +
-                   xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
-                                    XFS_FSB_TO_B(mp, 1))),
-                  (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
-                   xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
-                                    XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * Setting an attribute at mount time.
- *     the inode getting the attribute
- *     the superblock for allocations
- *     the agfs extents are allocated from
- *     the attribute btree * max depth
- *     the inode allocation btree
- * Since attribute transaction space is dependent on the size of the attribute,
- * the calculation is done partially at mount time and partially at runtime(see
- * below).
- */
-STATIC uint
-xfs_calc_attrsetm_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               xfs_calc_inode_res(mp, 1) +
-               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-               xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * Setting an attribute at runtime, transaction space unit per block.
- *     the superblock for allocations: sector size
- *     the inode bmap btree could join or split: max depth * block size
- * Since the runtime attribute transaction space is dependent on the total
- * blocks needed for the 1st bmap, here we calculate out the space unit for
- * one block so that the caller could figure out the total space according
- * to the attibute extent length in blocks by:
- *     ext * M_RES(mp)->tr_attrsetrt.tr_logres
- */
-STATIC uint
-xfs_calc_attrsetrt_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-               xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
-                                XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * Removing an attribute.
- *    the inode: inode size
- *    the attribute btree could join: max depth * block size
- *    the inode bmap btree could join or split: max depth * block size
- * And the bmap_finish transaction can free the attr blocks freed giving:
- *    the agf for the ag in which the blocks live: 2 * sector size
- *    the agfl for the ag in which the blocks live: 2 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_attrrm_reservation(
-       struct xfs_mount        *mp)
-{
-       return XFS_DQUOT_LOGRES(mp) +
-               MAX((xfs_calc_inode_res(mp, 1) +
-                    xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
-                                     XFS_FSB_TO_B(mp, 1)) +
-                    (uint)XFS_FSB_TO_B(mp,
-                                       XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
-                    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
-                   (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-                                     XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * Clearing a bad agino number in an agi hash bucket.
- */
-STATIC uint
-xfs_calc_clear_agi_bucket_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
-}
-
-/*
- * Clearing the quotaflags in the superblock.
- *     the super block for changing quota flags: sector size
- */
-STATIC uint
-xfs_calc_qm_sbchange_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
-}
-
-/*
- * Adjusting quota limits.
- *    the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
- */
-STATIC uint
-xfs_calc_qm_setqlim_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
-}
-
-/*
- * Allocating quota on disk if needed.
- *     the write transaction log space for quota file extent allocation
- *     the unit of quota allocation: one system block size
- */
-STATIC uint
-xfs_calc_qm_dqalloc_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_write_reservation(mp) +
-               xfs_calc_buf_res(1,
-                       XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
-}
-
-/*
- * Turning off quotas.
- *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
- *    the superblock for the quota flags: sector size
- */
-STATIC uint
-xfs_calc_qm_quotaoff_reservation(
-       struct xfs_mount        *mp)
-{
-       return sizeof(struct xfs_qoff_logitem) * 2 +
-               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
-}
-
-/*
- * End of turning off quotas.
- *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
- */
-STATIC uint
-xfs_calc_qm_quotaoff_end_reservation(
-       struct xfs_mount        *mp)
-{
-       return sizeof(struct xfs_qoff_logitem) * 2;
-}
-
-/*
- * Syncing the incore super block changes to disk.
- *     the super block to reflect the changes: sector size
- */
-STATIC uint
-xfs_calc_sb_reservation(
-       struct xfs_mount        *mp)
-{
-       return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
-}
-
-void
-xfs_trans_resv_calc(
-       struct xfs_mount        *mp,
-       struct xfs_trans_resv   *resp)
-{
-       /*
-        * The following transactions are logged in physical format and
-        * require a permanent reservation on space.
-        */
-       resp->tr_write.tr_logres = xfs_calc_write_reservation(mp);
-       resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
-       resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp);
-       resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
-       resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
-       resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT;
-       resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
-       resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT;
-       resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
-       resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT;
-       resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
-       resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
-       resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_create.tr_logres = xfs_calc_create_reservation(mp);
-       resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
-       resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_create_tmpfile.tr_logres =
-                       xfs_calc_create_tmpfile_reservation(mp);
-       resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
-       resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
-       resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
-       resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
-       resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
-       resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp);
-       resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT;
-       resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp);
-       resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT;
-       resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp);
-       resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT;
-       resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp);
-       resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT;
-       resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp);
-       resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
-       resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp);
-       resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
-       resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
-       /*
-        * The following transactions are logged in logical format with
-        * a default log count.
-        */
-       resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp);
-       resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT;
-
-       resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp);
-       resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
-
-       resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp);
-       resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
-
-       resp->tr_qm_equotaoff.tr_logres =
-               xfs_calc_qm_quotaoff_end_reservation(mp);
-       resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
-
-       resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
-       resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
-
-       /* The following transaction are logged in logical format */
-       resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
-       resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
-       resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
-       resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
-       resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
-       resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
-       resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
-       resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
-}