[XFS] Implement the di_extsize allocator hint for non-realtime files as
authorNathan Scott <nathans@sgi.com>
Wed, 11 Jan 2006 04:28:28 +0000 (15:28 +1100)
committerNathan Scott <nathans@sgi.com>
Wed, 11 Jan 2006 04:28:28 +0000 (15:28 +1100)
well.  Also provides a mechanism for inheriting this property from the
parent directory for new files.

SGI-PV: 945264
SGI-Modid: xfs-linux-melb:xfs-kern:24367a

Signed-off-by: Nathan Scott <nathans@sgi.com>
fs/xfs/xfs_bmap.c
fs/xfs/xfs_bmap.h
fs/xfs/xfs_dinode.h
fs/xfs/xfs_fs.h
fs/xfs/xfs_inode.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_vnodeops.c

index e415a4698e9c3e30836f5c2578f40f2f921592c0..8a32d65211b081fad7edfac85a95eec5303a7b23 100644 (file)
@@ -2146,13 +2146,176 @@ xfs_bmap_add_extent_hole_real(
        return 0; /* keep gcc quite */
 }
 
+/*
+ * Adjust the size of the new extent based on di_extsize and rt extsize.
+ */
+STATIC int
+xfs_bmap_extsize_align(
+       xfs_mount_t     *mp,
+       xfs_bmbt_irec_t *gotp,          /* next extent pointer */
+       xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
+       xfs_extlen_t    extsz,          /* align to this extent size */
+       int             rt,             /* is this a realtime inode? */
+       int             eof,            /* is extent at end-of-file? */
+       int             delay,          /* creating delalloc extent? */
+       int             convert,        /* overwriting unwritten extent? */
+       xfs_fileoff_t   *offp,          /* in/out: aligned offset */
+       xfs_extlen_t    *lenp)          /* in/out: aligned length */
+{
+       xfs_fileoff_t   orig_off;       /* original offset */
+       xfs_extlen_t    orig_alen;      /* original length */
+       xfs_fileoff_t   orig_end;       /* original off+len */
+       xfs_fileoff_t   nexto;          /* next file offset */
+       xfs_fileoff_t   prevo;          /* previous file offset */
+       xfs_fileoff_t   align_off;      /* temp for offset */
+       xfs_extlen_t    align_alen;     /* temp for length */
+       xfs_extlen_t    temp;           /* temp for calculations */
+
+       if (convert)
+               return 0;
+
+       orig_off = align_off = *offp;
+       orig_alen = align_alen = *lenp;
+       orig_end = orig_off + orig_alen;
+
+       /*
+        * If this request overlaps an existing extent, then don't
+        * attempt to perform any additional alignment.
+        */
+       if (!delay && !eof &&
+           (orig_off >= gotp->br_startoff) &&
+           (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
+               return 0;
+       }
+
+       /*
+        * If the file offset is unaligned vs. the extent size
+        * we need to align it.  This will be possible unless
+        * the file was previously written with a kernel that didn't
+        * perform this alignment, or if a truncate shot us in the
+        * foot.
+        */
+       temp = do_mod(orig_off, extsz);
+       if (temp) {
+               align_alen += temp;
+               align_off -= temp;
+       }
+       /*
+        * Same adjustment for the end of the requested area.
+        */
+       if ((temp = (align_alen % extsz))) {
+               align_alen += extsz - temp;
+       }
+       /*
+        * If the previous block overlaps with this proposed allocation
+        * then move the start forward without adjusting the length.
+        */
+       if (prevp->br_startoff != NULLFILEOFF) {
+               if (prevp->br_startblock == HOLESTARTBLOCK)
+                       prevo = prevp->br_startoff;
+               else
+                       prevo = prevp->br_startoff + prevp->br_blockcount;
+       } else
+               prevo = 0;
+       if (align_off != orig_off && align_off < prevo)
+               align_off = prevo;
+       /*
+        * If the next block overlaps with this proposed allocation
+        * then move the start back without adjusting the length,
+        * but not before offset 0.
+        * This may of course make the start overlap previous block,
+        * and if we hit the offset 0 limit then the next block
+        * can still overlap too.
+        */
+       if (!eof && gotp->br_startoff != NULLFILEOFF) {
+               if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
+                   (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
+                       nexto = gotp->br_startoff + gotp->br_blockcount;
+               else
+                       nexto = gotp->br_startoff;
+       } else
+               nexto = NULLFILEOFF;
+       if (!eof &&
+           align_off + align_alen != orig_end &&
+           align_off + align_alen > nexto)
+               align_off = nexto > align_alen ? nexto - align_alen : 0;
+       /*
+        * If we're now overlapping the next or previous extent that
+        * means we can't fit an extsz piece in this hole.  Just move
+        * the start forward to the first valid spot and set
+        * the length so we hit the end.
+        */
+       if (align_off != orig_off && align_off < prevo)
+               align_off = prevo;
+       if (align_off + align_alen != orig_end &&
+           align_off + align_alen > nexto &&
+           nexto != NULLFILEOFF) {
+               ASSERT(nexto > prevo);
+               align_alen = nexto - align_off;
+       }
+
+       /*
+        * If realtime, and the result isn't a multiple of the realtime
+        * extent size we need to remove blocks until it is.
+        */
+       if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
+               /*
+                * We're not covering the original request, or
+                * we won't be able to once we fix the length.
+                */
+               if (orig_off < align_off ||
+                   orig_end > align_off + align_alen ||
+                   align_alen - temp < orig_alen)
+                       return XFS_ERROR(EINVAL);
+               /*
+                * Try to fix it by moving the start up.
+                */
+               if (align_off + temp <= orig_off) {
+                       align_alen -= temp;
+                       align_off += temp;
+               }
+               /*
+                * Try to fix it by moving the end in.
+                */
+               else if (align_off + align_alen - temp >= orig_end)
+                       align_alen -= temp;
+               /*
+                * Set the start to the minimum then trim the length.
+                */
+               else {
+                       align_alen -= orig_off - align_off;
+                       align_off = orig_off;
+                       align_alen -= align_alen % mp->m_sb.sb_rextsize;
+               }
+               /*
+                * Result doesn't cover the request, fail it.
+                */
+               if (orig_off < align_off || orig_end > align_off + align_alen)
+                       return XFS_ERROR(EINVAL);
+       } else {
+               ASSERT(orig_off >= align_off);
+               ASSERT(orig_end <= align_off + align_alen);
+       }
+
+#ifdef DEBUG
+       if (!eof && gotp->br_startoff != NULLFILEOFF)
+               ASSERT(align_off + align_alen <= gotp->br_startoff);
+       if (prevp->br_startoff != NULLFILEOFF)
+               ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
+#endif
+
+       *lenp = align_alen;
+       *offp = align_off;
+       return 0;
+}
+
 #define XFS_ALLOC_GAP_UNITS    4
 
 /*
  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
  * It figures out where to ask the underlying allocator to put the new extent.
  */
-STATIC int                             /* error */
+STATIC int
 xfs_bmap_alloc(
        xfs_bmalloca_t  *ap)            /* bmap alloc argument struct */
 {
@@ -2163,10 +2326,10 @@ xfs_bmap_alloc(
        xfs_mount_t     *mp;            /* mount point structure */
        int             nullfb;         /* true if ap->firstblock isn't set */
        int             rt;             /* true if inode is realtime */
-#ifdef __KERNEL__
-       xfs_extlen_t    prod=0;         /* product factor for allocators */
-       xfs_extlen_t    ralen=0;        /* realtime allocation length */
-#endif
+       xfs_extlen_t    prod = 0;       /* product factor for allocators */
+       xfs_extlen_t    ralen = 0;      /* realtime allocation length */
+       xfs_extlen_t    align;          /* minimum allocation alignment */
+       xfs_rtblock_t   rtx;
 
 #define        ISVALID(x,y)    \
        (rt ? \
@@ -2182,125 +2345,25 @@ xfs_bmap_alloc(
        nullfb = ap->firstblock == NULLFSBLOCK;
        rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-#ifdef __KERNEL__
        if (rt) {
-               xfs_extlen_t    extsz;          /* file extent size for rt */
-               xfs_fileoff_t   nexto;          /* next file offset */
-               xfs_extlen_t    orig_alen;      /* original ap->alen */
-               xfs_fileoff_t   orig_end;       /* original off+len */
-               xfs_fileoff_t   orig_off;       /* original ap->off */
-               xfs_extlen_t    mod_off;        /* modulus calculations */
-               xfs_fileoff_t   prevo;          /* previous file offset */
-               xfs_rtblock_t   rtx;            /* realtime extent number */
-               xfs_extlen_t    temp;           /* temp for rt calculations */
-
-               /*
-                * Set prod to match the realtime extent size.
-                */
-               if (!(extsz = ap->ip->i_d.di_extsize))
-                       extsz = mp->m_sb.sb_rextsize;
-               prod = extsz / mp->m_sb.sb_rextsize;
-               orig_off = ap->off;
-               orig_alen = ap->alen;
-               orig_end = orig_off + orig_alen;
-               /*
-                * If the file offset is unaligned vs. the extent size
-                * we need to align it.  This will be possible unless
-                * the file was previously written with a kernel that didn't
-                * perform this alignment.
-                */
-               mod_off = do_mod(orig_off, extsz);
-               if (mod_off) {
-                       ap->alen += mod_off;
-                       ap->off -= mod_off;
-               }
-               /*
-                * Same adjustment for the end of the requested area.
-                */
-               if ((temp = (ap->alen % extsz)))
-                       ap->alen += extsz - temp;
-               /*
-                * If the previous block overlaps with this proposed allocation
-                * then move the start forward without adjusting the length.
-                */
-               prevo =
-                       ap->prevp->br_startoff == NULLFILEOFF ?
-                               0 :
-                               (ap->prevp->br_startoff +
-                                ap->prevp->br_blockcount);
-               if (ap->off != orig_off && ap->off < prevo)
-                       ap->off = prevo;
-               /*
-                * If the next block overlaps with this proposed allocation
-                * then move the start back without adjusting the length,
-                * but not before offset 0.
-                * This may of course make the start overlap previous block,
-                * and if we hit the offset 0 limit then the next block
-                * can still overlap too.
-                */
-               nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ?
-                       NULLFILEOFF : ap->gotp->br_startoff;
-               if (!ap->eof &&
-                   ap->off + ap->alen != orig_end &&
-                   ap->off + ap->alen > nexto)
-                       ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
-               /*
-                * If we're now overlapping the next or previous extent that
-                * means we can't fit an extsz piece in this hole.  Just move
-                * the start forward to the first valid spot and set
-                * the length so we hit the end.
-                */
-               if ((ap->off != orig_off && ap->off < prevo) ||
-                   (ap->off + ap->alen != orig_end &&
-                    ap->off + ap->alen > nexto)) {
-                       ap->off = prevo;
-                       ap->alen = nexto - prevo;
-               }
-               /*
-                * If the result isn't a multiple of rtextents we need to
-                * remove blocks until it is.
-                */
-               if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) {
-                       /*
-                        * We're not covering the original request, or
-                        * we won't be able to once we fix the length.
-                        */
-                       if (orig_off < ap->off ||
-                           orig_end > ap->off + ap->alen ||
-                           ap->alen - temp < orig_alen)
-                               return XFS_ERROR(EINVAL);
-                       /*
-                        * Try to fix it by moving the start up.
-                        */
-                       if (ap->off + temp <= orig_off) {
-                               ap->alen -= temp;
-                               ap->off += temp;
-                       }
-                       /*
-                        * Try to fix it by moving the end in.
-                        */
-                       else if (ap->off + ap->alen - temp >= orig_end)
-                               ap->alen -= temp;
-                       /*
-                        * Set the start to the minimum then trim the length.
-                        */
-                       else {
-                               ap->alen -= orig_off - ap->off;
-                               ap->off = orig_off;
-                               ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
-                       }
-                       /*
-                        * Result doesn't cover the request, fail it.
-                        */
-                       if (orig_off < ap->off || orig_end > ap->off + ap->alen)
-                               return XFS_ERROR(EINVAL);
-               }
+               align = ap->ip->i_d.di_extsize ?
+                       ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
+               /* Set prod to match the extent size */
+               prod = align / mp->m_sb.sb_rextsize;
+
+               error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+                                               align, rt, ap->eof, 0,
+                                               ap->conv, &ap->off, &ap->alen);
+               if (error)
+                       return error;
+               ASSERT(ap->alen);
                ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
+
                /*
                 * If the offset & length are not perfectly aligned
                 * then kill prod, it will just get us in trouble.
                 */
-               if (do_mod(ap->off, extsz) || ap->alen % extsz)
+               if (do_mod(ap->off, align) || ap->alen % align)
                        prod = 1;
                /*
                 * Set ralen to be the actual requested length in rtextents.
@@ -2326,15 +2389,24 @@ xfs_bmap_alloc(
                        ap->rval = rtx * mp->m_sb.sb_rextsize;
                } else
                        ap->rval = 0;
+       } else {
+               align = (ap->userdata && ap->ip->i_d.di_extsize &&
+                       (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
+                       ap->ip->i_d.di_extsize : 0;
+               if (unlikely(align)) {
+                       error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+                                                       align, rt,
+                                                       ap->eof, 0, ap->conv,
+                                                       &ap->off, &ap->alen);
+                       ASSERT(!error);
+                       ASSERT(ap->alen);
+               }
+               if (nullfb)
+                       ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+               else
+                       ap->rval = ap->firstblock;
        }
-#else
-       if (rt)
-               ap->rval = 0;
-#endif /* __KERNEL__ */
-       else if (nullfb)
-               ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
-       else
-               ap->rval = ap->firstblock;
+
        /*
         * If allocating at eof, and there's a previous real block,
         * try to use it's last block as our starting point.
@@ -2598,11 +2670,12 @@ xfs_bmap_alloc(
                        args.total = ap->total;
                        args.minlen = ap->minlen;
                }
-               if (ap->ip->i_d.di_extsize) {
+               if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
+                           (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
                        args.prod = ap->ip->i_d.di_extsize;
                        if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
                                args.mod = (xfs_extlen_t)(args.prod - args.mod);
-               } else if (mp->m_sb.sb_blocksize >= NBPP) {
+               } else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
                        args.prod = 1;
                        args.mod = 0;
                } else {
@@ -4590,6 +4663,7 @@ xfs_bmapi(
        char            contig;         /* allocation must be one extent */
        char            delay;          /* this request is for delayed alloc */
        char            exact;          /* don't do all of wasdelayed extent */
+       char            convert;        /* unwritten extent I/O completion */
        xfs_bmbt_rec_t  *ep;            /* extent list entry pointer */
        int             error;          /* error return */
        xfs_bmbt_irec_t got;            /* current extent list record */
@@ -4643,7 +4717,7 @@ xfs_bmapi(
        }
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-       rt = XFS_IS_REALTIME_INODE(ip);
+       rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
        ifp = XFS_IFORK_PTR(ip, whichfork);
        ASSERT(ifp->if_ext_max ==
               XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
@@ -4654,6 +4728,7 @@ xfs_bmapi(
        delay = (flags & XFS_BMAPI_DELAY) != 0;
        trim = (flags & XFS_BMAPI_ENTIRE) == 0;
        userdata = (flags & XFS_BMAPI_METADATA) == 0;
+       convert = (flags & XFS_BMAPI_CONVERT) != 0;
        exact = (flags & XFS_BMAPI_EXACT) != 0;
        rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
        contig = (flags & XFS_BMAPI_CONTIG) != 0;
@@ -4748,16 +4823,26 @@ xfs_bmapi(
                        }
                        minlen = contig ? alen : 1;
                        if (delay) {
-                               xfs_extlen_t    extsz = 0;
+                               xfs_extlen_t    extsz;
 
                                /* Figure out the extent size, adjust alen */
                                if (rt) {
                                        if (!(extsz = ip->i_d.di_extsize))
                                                extsz = mp->m_sb.sb_rextsize;
-                                       alen = roundup(alen, extsz);
-                                       extsz = alen / mp->m_sb.sb_rextsize;
+                               } else {
+                                       extsz = ip->i_d.di_extsize;
+                               }
+                               if (extsz) {
+                                       error = xfs_bmap_extsize_align(mp,
+                                                       &got, &prev, extsz,
+                                                       rt, eof, delay, convert,
+                                                       &aoff, &alen);
+                                       ASSERT(!error);
                                }
 
+                               if (rt)
+                                       extsz = alen / mp->m_sb.sb_rextsize;
+
                                /*
                                 * Make a transaction-less quota reservation for
                                 * delayed allocation blocks. This number gets
@@ -4785,14 +4870,15 @@ xfs_bmapi(
                                        xfs_bmap_worst_indlen(ip, alen);
                                ASSERT(indlen > 0);
 
-                               if (rt)
+                               if (rt) {
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FREXTENTS,
                                                        -(extsz), rsvd);
-                               else
+                               } else {
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -(alen), rsvd);
+                               }
                                if (!error) {
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FDBLOCKS,
@@ -4811,6 +4897,7 @@ xfs_bmapi(
                                if (error) {
                                        if (XFS_IS_QUOTA_ON(ip->i_mount))
                                                /* unreserve the blocks now */
+                                               (void)
                                                XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
                                                        mp, NULL, ip,
                                                        (long)alen, 0, rt ?
@@ -4849,6 +4936,7 @@ xfs_bmapi(
                                bma.firstblock = *firstblock;
                                bma.alen = alen;
                                bma.off = aoff;
+                               bma.conv = convert;
                                bma.wasdel = wasdelay;
                                bma.minlen = minlen;
                                bma.low = flist->xbf_low;
@@ -5270,8 +5358,7 @@ xfs_bunmapi(
                return 0;
        }
        XFS_STATS_INC(xs_blk_unmap);
-       isrt = (whichfork == XFS_DATA_FORK) &&
-              (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
+       isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
        start = bno;
        bno = start + len - 1;
        ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
@@ -5443,7 +5530,7 @@ xfs_bunmapi(
                }
                if (wasdel) {
                        ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
-                       /* Update realtim/data freespace, unreserve quota */
+                       /* Update realtime/data freespace, unreserve quota */
                        if (isrt) {
                                xfs_filblks_t rtexts;
 
@@ -5451,14 +5538,14 @@ xfs_bunmapi(
                                do_div(rtexts, mp->m_sb.sb_rextsize);
                                xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
                                                (int)rtexts, rsvd);
-                               XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
-                                       -((long)del.br_blockcount), 0,
+                               (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+                                       NULL, ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_RTBLKS);
                        } else {
                                xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
                                                (int)del.br_blockcount, rsvd);
-                               XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
-                                       -((long)del.br_blockcount), 0,
+                               (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+                                       NULL, ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_REGBLKS);
                        }
                        ip->i_delayed_blks -= del.br_blockcount;
@@ -5652,7 +5739,9 @@ xfs_getbmap(
                   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
                return XFS_ERROR(EINVAL);
        if (whichfork == XFS_DATA_FORK) {
-               if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) {
+               if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
+                               (XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
+                   ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
                        prealloced = 1;
                        fixlen = XFS_MAXIOFFSET(mp);
                } else {
index 2e0717a01309ee5b75ab8ba521aceb31900cd77e..12cc63dfc2c472edfd0831893cf58be5cb2c4107 100644 (file)
@@ -62,6 +62,10 @@ typedef      struct xfs_bmap_free
 #define        XFS_BMAPI_IGSTATE       0x200   /* Ignore state - */
                                        /* combine contig. space */
 #define        XFS_BMAPI_CONTIG        0x400   /* must allocate only one extent */
+/*     XFS_BMAPI_DIRECT_IO     0x800   */
+#define XFS_BMAPI_CONVERT      0x1000  /* unwritten extent conversion - */
+                                       /* need write cache flushing and no */
+                                       /* additional allocation alignments */
 
 #define        XFS_BMAPI_AFLAG(w)      xfs_bmapi_aflag(w)
 static inline int xfs_bmapi_aflag(int w)
@@ -101,7 +105,8 @@ typedef struct xfs_bmalloca {
        char                    wasdel; /* replacing a delayed allocation */
        char                    userdata;/* set if is user data */
        char                    low;    /* low on space, using seq'l ags */
-       char                    aeof;   /* allocated space at eof */
+       char                    aeof;   /* allocated space at eof */
+       char                    conv;   /* overwriting unwritten extents */
 } xfs_bmalloca_t;
 
 #ifdef __KERNEL__
index c5a0e537ff1ab93c8184f0d0ef55dcdf7845dc1a..f697aab8a3d2708c6f2045a0179c8402bf7bf7cd 100644 (file)
@@ -246,8 +246,10 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_NOATIME_BIT   6     /* do not update atime */
 #define XFS_DIFLAG_NODUMP_BIT    7     /* do not dump */
 #define XFS_DIFLAG_RTINHERIT_BIT 8     /* create with realtime bit set */
-#define XFS_DIFLAG_PROJINHERIT_BIT  9  /* create with parents projid */
-#define XFS_DIFLAG_NOSYMLINKS_BIT  10  /* disallow symlink creation */
+#define XFS_DIFLAG_PROJINHERIT_BIT   9 /* create with parents projid */
+#define XFS_DIFLAG_NOSYMLINKS_BIT   10 /* disallow symlink creation */
+#define XFS_DIFLAG_EXTSIZE_BIT      11 /* inode extent size allocator hint */
+#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
 #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -259,11 +261,14 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_RTINHERIT     (1 << XFS_DIFLAG_RTINHERIT_BIT)
 #define XFS_DIFLAG_PROJINHERIT   (1 << XFS_DIFLAG_PROJINHERIT_BIT)
 #define XFS_DIFLAG_NOSYMLINKS    (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
+#define XFS_DIFLAG_EXTSIZE       (1 << XFS_DIFLAG_EXTSIZE_BIT)
+#define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
 
 #define XFS_DIFLAG_ANY \
        (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
         XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
         XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
-        XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS)
+        XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
+        XFS_DIFLAG_EXTSZINHERIT)
 
 #endif /* __XFS_DINODE_H__ */
index ba096f80f48d5883217d90abc3943550bf64c018..3280f49496ba1b0dc8701f92a613497a99287239 100644 (file)
@@ -65,6 +65,8 @@ struct fsxattr {
 #define XFS_XFLAG_RTINHERIT    0x00000100      /* create with rt bit set */
 #define XFS_XFLAG_PROJINHERIT  0x00000200      /* create with parents projid */
 #define XFS_XFLAG_NOSYMLINKS   0x00000400      /* disallow symlink creation */
+#define XFS_XFLAG_EXTSIZE      0x00000800      /* extent size allocator hint */
+#define XFS_XFLAG_EXTSZINHERIT 0x00001000      /* inherit inode extent size */
 #define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
 
 /*
index df0d4572d70a8a7b18fdfbbd62d4bfb2caccfc32..e486c7d244c24fb5266bff7c314a546b17e10cb3 100644 (file)
@@ -809,6 +809,10 @@ _xfs_dic2xflags(
                        flags |= XFS_XFLAG_PROJINHERIT;
                if (di_flags & XFS_DIFLAG_NOSYMLINKS)
                        flags |= XFS_XFLAG_NOSYMLINKS;
+               if (di_flags & XFS_DIFLAG_EXTSIZE)
+                       flags |= XFS_XFLAG_EXTSIZE;
+               if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
+                       flags |= XFS_XFLAG_EXTSZINHERIT;
        }
 
        return flags;
@@ -1192,11 +1196,19 @@ xfs_ialloc(
                        if ((mode & S_IFMT) == S_IFDIR) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
                                        di_flags |= XFS_DIFLAG_RTINHERIT;
-                       } else {
+                               if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+                                       di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+                                       ip->i_d.di_extsize = pip->i_d.di_extsize;
+                               }
+                       } else if ((mode & S_IFMT) == S_IFREG) {
                                if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
                                        di_flags |= XFS_DIFLAG_REALTIME;
                                        ip->i_iocore.io_flags |= XFS_IOCORE_RT;
                                }
+                               if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+                                       di_flags |= XFS_DIFLAG_EXTSIZE;
+                                       ip->i_d.di_extsize = pip->i_d.di_extsize;
+                               }
                        }
                        if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
                            xfs_inherit_noatime)
@@ -1262,7 +1274,7 @@ xfs_isize_check(
        if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
                return;
 
-       if ( ip->i_d.di_flags & XFS_DIFLAG_REALTIME )
+       if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE))
                return;
 
        nimaps = 2;
index 45a77a3a6c07ed60b2a4fe041c6496c5ded3e1a5..5ecf3e3e86aaa4feab769541b0a7a5c1802a73cd 100644 (file)
@@ -263,7 +263,7 @@ phase2:
        case BMAPI_WRITE:
                /* If we found an extent, return it */
                if (nimaps &&
-                   (imap.br_startblock != HOLESTARTBLOCK) && 
+                   (imap.br_startblock != HOLESTARTBLOCK) &&
                    (imap.br_startblock != DELAYSTARTBLOCK)) {
                        xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
                                        offset, count, iomapp, &imap, flags);
@@ -317,6 +317,58 @@ out:
        return XFS_ERROR(error);
 }
 
+STATIC int
+xfs_iomap_eof_align_last_fsb(
+       xfs_mount_t     *mp,
+       xfs_iocore_t    *io,
+       xfs_fsize_t     isize,
+       xfs_extlen_t    extsize,
+       xfs_fileoff_t   *last_fsb)
+{
+       xfs_fileoff_t   new_last_fsb = 0;
+       xfs_extlen_t    align;
+       int             eof, error;
+
+       if (io->io_flags & XFS_IOCORE_RT)
+               ;
+       /*
+        * If mounted with the "-o swalloc" option, roundup the allocation
+        * request to a stripe width boundary if the file size is >=
+        * stripe width and we are allocating past the allocation eof.
+        */
+       else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
+               (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
+               new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
+       /*
+        * Roundup the allocation request to a stripe unit (m_dalign) boundary
+        * if the file size is >= stripe unit size, and we are allocating past
+        * the allocation eof.
+        */
+       else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
+               new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
+
+       /*
+        * Always round up the allocation request to an extent boundary
+        * (when file on a real-time subvolume or has di_extsize hint).
+        */
+       if (extsize) {
+               if (new_last_fsb)
+                       align = roundup_64(new_last_fsb, extsize);
+               else
+                       align = extsize;
+               new_last_fsb = roundup_64(*last_fsb, align);
+       }
+
+       if (new_last_fsb) {
+               error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
+               if (error)
+                       return error;
+               if (eof)
+                       *last_fsb = new_last_fsb;
+       }
+       return 0;
+}
+
 STATIC int
 xfs_flush_space(
        xfs_inode_t     *ip,
@@ -363,19 +415,20 @@ xfs_iomap_write_direct(
        xfs_iocore_t    *io = &ip->i_iocore;
        xfs_fileoff_t   offset_fsb;
        xfs_fileoff_t   last_fsb;
-       xfs_filblks_t   count_fsb;
+       xfs_filblks_t   count_fsb, resaligned;
        xfs_fsblock_t   firstfsb;
+       xfs_extlen_t    extsz, temp;
+       xfs_fsize_t     isize;
        int             nimaps;
-       int             error;
        int             bmapi_flag;
        int             quota_flag;
        int             rt;
        xfs_trans_t     *tp;
        xfs_bmbt_irec_t imap;
        xfs_bmap_free_t free_list;
-       xfs_filblks_t   qblocks, resblks;
+       uint            qblocks, resblks, resrtextents;
        int             committed;
-       int             resrtextents;
+       int             error;
 
        /*
         * Make sure that the dquots are there. This doesn't hold
@@ -385,37 +438,52 @@ xfs_iomap_write_direct(
        if (error)
                return XFS_ERROR(error);
 
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-       last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
-       count_fsb = last_fsb - offset_fsb;
-       if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) {
-               xfs_fileoff_t   map_last_fsb;
-
-               map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
-               if (map_last_fsb < last_fsb) {
-                       last_fsb = map_last_fsb;
-                       count_fsb = last_fsb - offset_fsb;
-               }
-               ASSERT(count_fsb > 0);
+       rt = XFS_IS_REALTIME_INODE(ip);
+       if (unlikely(rt)) {
+               if (!(extsz = ip->i_d.di_extsize))
+                       extsz = mp->m_sb.sb_rextsize;
+       } else {
+               extsz = ip->i_d.di_extsize;
        }
 
-       /*
-        * Determine if reserving space on the data or realtime partition.
-        */
-       if ((rt = XFS_IS_REALTIME_INODE(ip))) {
-               xfs_extlen_t    extsz;
+       isize = ip->i_d.di_size;
+       if (io->io_new_size > isize)
+               isize = io->io_new_size;
 
-               if (!(extsz = ip->i_d.di_extsize))
-                       extsz = mp->m_sb.sb_rextsize;
-               resrtextents = qblocks = (count_fsb + extsz - 1);
-               do_div(resrtextents, mp->m_sb.sb_rextsize);
-               resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-               quota_flag = XFS_QMOPT_RES_RTBLKS;
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+       if ((offset + count) > isize) {
+               error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
+                                                       &last_fsb);
+               if (error)
+                       goto error_out;
        } else {
-               resrtextents = 0;
-               resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);
-               quota_flag = XFS_QMOPT_RES_REGBLKS;
+               if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
+                       last_fsb = MIN(last_fsb, (xfs_fileoff_t)
+                                       ret_imap->br_blockcount +
+                                       ret_imap->br_startoff);
        }
+       count_fsb = last_fsb - offset_fsb;
+       ASSERT(count_fsb > 0);
+
+       resaligned = count_fsb;
+       if (unlikely(extsz)) {
+               if ((temp = do_mod(offset_fsb, extsz)))
+                       resaligned += temp;
+               if ((temp = do_mod(resaligned, extsz)))
+                       resaligned += extsz - temp;
+       }
+
+       if (unlikely(rt)) {
+               resrtextents = qblocks = resaligned;
+               resrtextents /= mp->m_sb.sb_rextsize;
+               resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+               quota_flag = XFS_QMOPT_RES_RTBLKS;
+       } else {
+               resrtextents = 0;
+               resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+               quota_flag = XFS_QMOPT_RES_REGBLKS;
+       }
 
        /*
         * Allocate and setup the transaction
@@ -426,7 +494,6 @@ xfs_iomap_write_direct(
                        XFS_WRITE_LOG_RES(mp), resrtextents,
                        XFS_TRANS_PERM_LOG_RES,
                        XFS_WRITE_LOG_COUNT);
-
        /*
         * Check for running out of space, note: need lock to return
         */
@@ -436,20 +503,20 @@ xfs_iomap_write_direct(
        if (error)
                goto error_out;
 
-       if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) {
-               error = (EDQUOT);
+       error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+                                             qblocks, 0, quota_flag);
+       if (error)
                goto error1;
-       }
 
-       bmapi_flag = XFS_BMAPI_WRITE;
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_ihold(tp, ip);
 
-       if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt))
+       bmapi_flag = XFS_BMAPI_WRITE;
+       if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))
                bmapi_flag |= XFS_BMAPI_PREALLOC;
 
        /*
-        * Issue the bmapi() call to allocate the blocks
+        * Issue the xfs_bmapi() call to allocate the blocks
         */
        XFS_BMAP_INIT(&free_list, &firstfsb);
        nimaps = 1;
@@ -501,6 +568,62 @@ error_out:
        return XFS_ERROR(error);
 }
 
+/*
+ * If the caller is doing a write at the end of the file,
+ * then extend the allocation out to the file system's write
+ * iosize.  We clean up any extra space left over when the
+ * file is closed in xfs_inactive().
+ *
+ * For sync writes, we are flushing delayed allocate space to
+ * try to make additional space available for allocation near
+ * the filesystem full boundary - preallocation hurts in that
+ * situation, of course.
+ */
+STATIC int
+xfs_iomap_eof_want_preallocate(
+       xfs_mount_t     *mp,
+       xfs_iocore_t    *io,
+       xfs_fsize_t     isize,
+       xfs_off_t       offset,
+       size_t          count,
+       int             ioflag,
+       xfs_bmbt_irec_t *imap,
+       int             nimaps,
+       int             *prealloc)
+{
+       xfs_fileoff_t   start_fsb;
+       xfs_filblks_t   count_fsb;
+       xfs_fsblock_t   firstblock;
+       int             n, error, imaps;
+
+       *prealloc = 0;
+       if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
+               return 0;
+
+       /*
+        * If there are any real blocks past eof, then don't
+        * do any speculative allocation.
+        */
+       start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
+       count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+       while (count_fsb > 0) {
+               imaps = nimaps;
+               error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
+                                 0, &firstblock, 0, imap, &imaps, NULL);
+               if (error)
+                       return error;
+               for (n = 0; n < imaps; n++) {
+                       if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
+                           (imap[n].br_startblock != DELAYSTARTBLOCK))
+                               return 0;
+                       start_fsb += imap[n].br_blockcount;
+                       count_fsb -= imap[n].br_blockcount;
+               }
+       }
+       *prealloc = 1;
+       return 0;
+}
+
 int
 xfs_iomap_write_delay(
        xfs_inode_t     *ip,
@@ -514,13 +637,15 @@ xfs_iomap_write_delay(
        xfs_iocore_t    *io = &ip->i_iocore;
        xfs_fileoff_t   offset_fsb;
        xfs_fileoff_t   last_fsb;
-       xfs_fsize_t     isize;
+       xfs_off_t       aligned_offset;
+       xfs_fileoff_t   ioalign;
        xfs_fsblock_t   firstblock;
+       xfs_extlen_t    extsz;
+       xfs_fsize_t     isize;
        int             nimaps;
-       int             error;
        xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
-       int             aeof;
-       int             fsynced = 0;
+       int             prealloc, fsynced = 0;
+       int             error;
 
        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
 
@@ -528,152 +653,57 @@ xfs_iomap_write_delay(
         * Make sure that the dquots are there. This doesn't hold
         * the ilock across a disk read.
         */
-
        error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
        if (error)
                return XFS_ERROR(error);
 
+       if (XFS_IS_REALTIME_INODE(ip)) {
+               if (!(extsz = ip->i_d.di_extsize))
+                       extsz = mp->m_sb.sb_rextsize;
+       } else {
+               extsz = ip->i_d.di_extsize;
+       }
+
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
 retry:
        isize = ip->i_d.di_size;
-       if (io->io_new_size > isize) {
+       if (io->io_new_size > isize)
                isize = io->io_new_size;
-       }
 
-       aeof = 0;
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-       last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
-       /*
-        * If the caller is doing a write at the end of the file,
-        * then extend the allocation (and the buffer used for the write)
-        * out to the file system's write iosize.  We clean up any extra
-        * space left over when the file is closed in xfs_inactive().
-        *
-        * For sync writes, we are flushing delayed allocate space to
-        * try to make additional space available for allocation near
-        * the filesystem full boundary - preallocation hurts in that
-        * situation, of course.
-        */
-       if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {
-               xfs_off_t       aligned_offset;
-               xfs_filblks_t   count_fsb;
-               unsigned int    iosize;
-               xfs_fileoff_t   ioalign;
-               int             n;
-               xfs_fileoff_t   start_fsb;
+       error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
+                               ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
+       if (error)
+               return error;
 
-               /*
-                * If there are any real blocks past eof, then don't
-                * do any speculative allocation.
-                */
-               start_fsb = XFS_B_TO_FSBT(mp,
-                                       ((xfs_ufsize_t)(offset + count - 1)));
-               count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-               while (count_fsb > 0) {
-                       nimaps = XFS_WRITE_IMAPS;
-                       error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
-                                       0, &firstblock, 0, imap, &nimaps, NULL);
-                       if (error) {
-                               return error;
-                       }
-                       for (n = 0; n < nimaps; n++) {
-                               if ( !(io->io_flags & XFS_IOCORE_RT)  && 
-                                       !imap[n].br_startblock) {
-                                       cmn_err(CE_PANIC,"Access to block "
-                                               "zero:  fs <%s> inode: %lld "
-                                               "start_block : %llx start_off "
-                                               ": %llx blkcnt : %llx "
-                                               "extent-state : %x \n",
-                                               (ip->i_mount)->m_fsname,
-                                               (long long)ip->i_ino,
-                                               imap[n].br_startblock,
-                                               imap[n].br_startoff,
-                                               imap[n].br_blockcount,
-                                               imap[n].br_state);
-                               }
-                               if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
-                                   (imap[n].br_startblock != DELAYSTARTBLOCK)) {
-                                       goto write_map;
-                               }
-                               start_fsb += imap[n].br_blockcount;
-                               count_fsb -= imap[n].br_blockcount;
-                       }
-               }
-               iosize = mp->m_writeio_blocks;
+       if (prealloc) {
                aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
                ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
-               last_fsb = ioalign + iosize;
-               aeof = 1;
+               last_fsb = ioalign + mp->m_writeio_blocks;
+       } else {
+               last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
        }
-write_map:
-       nimaps = XFS_WRITE_IMAPS;
-       firstblock = NULLFSBLOCK;
 
-       /*
-        * If mounted with the "-o swalloc" option, roundup the allocation
-        * request to a stripe width boundary if the file size is >=
-        * stripe width and we are allocating past the allocation eof.
-        */
-       if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_swidth 
-           && (mp->m_flags & XFS_MOUNT_SWALLOC)
-           && (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)) && aeof) {
-               int eof;
-               xfs_fileoff_t new_last_fsb;
-
-               new_last_fsb = roundup_64(last_fsb, mp->m_swidth);
-               error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
-               if (error) {
-                       return error;
-               }
-               if (eof) {
-                       last_fsb = new_last_fsb;
-               }
-       /*
-        * Roundup the allocation request to a stripe unit (m_dalign) boundary
-        * if the file size is >= stripe unit size, and we are allocating past
-        * the allocation eof.
-        */
-       } else if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_dalign &&
-                  (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) {
-               int eof;
-               xfs_fileoff_t new_last_fsb;
-               new_last_fsb = roundup_64(last_fsb, mp->m_dalign);
-               error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
-               if (error) {
-                       return error;
-               }
-               if (eof) {
-                       last_fsb = new_last_fsb;
-               }
-       /*
-        * Round up the allocation request to a real-time extent boundary
-        * if the file is on the real-time subvolume.
-        */
-       } else if (io->io_flags & XFS_IOCORE_RT && aeof) {
-               int eof;
-               xfs_fileoff_t new_last_fsb;
-
-               new_last_fsb = roundup_64(last_fsb, mp->m_sb.sb_rextsize);
-               error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
-               if (error) {
+       if (prealloc || extsz) {
+               error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
+                                                       &last_fsb);
+               if (error)
                        return error;
-               }
-               if (eof)
-                       last_fsb = new_last_fsb;
        }
+
+       nimaps = XFS_WRITE_IMAPS;
+       firstblock = NULLFSBLOCK;
        error = xfs_bmapi(NULL, ip, offset_fsb,
                          (xfs_filblks_t)(last_fsb - offset_fsb),
                          XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
                          XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
                          &nimaps, NULL);
-       /*
-        * This can be EDQUOT, if nimaps == 0
-        */
-       if (error && (error != ENOSPC)) {
+       if (error && (error != ENOSPC))
                return XFS_ERROR(error);
-       }
+
        /*
         * If bmapi returned us nothing, and if we didn't get back EDQUOT,
-        * then we must have run out of space.
+        * then we must have run out of space - flush delalloc, and retry..
         */
        if (nimaps == 0) {
                xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
@@ -685,9 +715,7 @@ write_map:
                goto retry;
        }
 
-       *ret_imap = imap[0];
-       *nmaps = 1;
-       if ( !(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
+       if (!(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
                cmn_err(CE_PANIC,"Access to block zero:  fs <%s> inode: %lld "
                         "start_block : %llx start_off : %llx blkcnt : %llx "
                         "extent-state : %x \n",
@@ -696,6 +724,10 @@ write_map:
                         ret_imap->br_startblock, ret_imap->br_startoff,
                         ret_imap->br_blockcount,ret_imap->br_state);
        }
+
+       *ret_imap = imap[0];
+       *nmaps = 1;
+
        return 0;
 }
 
@@ -868,17 +900,17 @@ xfs_iomap_write_unwritten(
 {
        xfs_mount_t     *mp = ip->i_mount;
        xfs_iocore_t    *io = &ip->i_iocore;
-       xfs_trans_t     *tp;
        xfs_fileoff_t   offset_fsb;
        xfs_filblks_t   count_fsb;
        xfs_filblks_t   numblks_fsb;
-       xfs_bmbt_irec_t imap;
+       xfs_fsblock_t   firstfsb;
+       int             nimaps;
+       xfs_trans_t     *tp;
+       xfs_bmbt_irec_t imap;
+       xfs_bmap_free_t free_list;
+       uint            resblks;
        int             committed;
        int             error;
-       int             nres;
-       int             nimaps;
-       xfs_fsblock_t   firstfsb;
-       xfs_bmap_free_t free_list;
 
        xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
                                &ip->i_iocore, offset, count);
@@ -887,9 +919,9 @@ xfs_iomap_write_unwritten(
        count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
        count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
 
-       do {
-               nres = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+       resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
 
+       do {
                /*
                 * set up a transaction to convert the range of extents
                 * from unwritten to real. Do allocations in a loop until
@@ -897,7 +929,7 @@ xfs_iomap_write_unwritten(
                 */
 
                tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
-               error = xfs_trans_reserve(tp, nres,
+               error = xfs_trans_reserve(tp, resblks,
                                XFS_WRITE_LOG_RES(mp), 0,
                                XFS_TRANS_PERM_LOG_RES,
                                XFS_WRITE_LOG_COUNT);
@@ -916,7 +948,7 @@ xfs_iomap_write_unwritten(
                XFS_BMAP_INIT(&free_list, &firstfsb);
                nimaps = 1;
                error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
-                                 XFS_BMAPI_WRITE, &firstfsb,
+                                 XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
                                  1, &imap, &nimaps, &free_list);
                if (error)
                        goto error_on_bmapi_transaction;
@@ -930,7 +962,7 @@ xfs_iomap_write_unwritten(
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                if (error)
                        goto error0;
-               
+
                if ( !(io->io_flags & XFS_IOCORE_RT)  && !imap.br_startblock) {
                        cmn_err(CE_PANIC,"Access to block zero:  fs <%s> "
                                "inode: %lld start_block : %llx start_off : "
index 5f6dce3b4fd63b3d4252024c50af75c3cf4d9121..a2b422c984f2af2266c10adba6f819f2f899b077 100644 (file)
@@ -540,24 +540,6 @@ xfs_setattr(
                        goto error_return;
                }
 
-               /*
-                * Can't set extent size unless the file is marked, or
-                * about to be marked as a realtime file.
-                *
-                * This check will be removed when fixed size extents
-                * with buffered data writes is implemented.
-                *
-                */
-               if ((mask & XFS_AT_EXTSIZE)                     &&
-                   ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
-                    vap->va_extsize) &&
-                   (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
-                      ((mask & XFS_AT_XFLAGS) &&
-                       (vap->va_xflags & XFS_XFLAG_REALTIME))))) {
-                       code = XFS_ERROR(EINVAL);
-                       goto error_return;
-               }
-
                /*
                 * Can't change realtime flag if any extents are allocated.
                 */
@@ -820,13 +802,17 @@ xfs_setattr(
                                        di_flags |= XFS_DIFLAG_RTINHERIT;
                                if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
                                        di_flags |= XFS_DIFLAG_NOSYMLINKS;
-                       } else {
+                               if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
+                                       di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+                       } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
                                if (vap->va_xflags & XFS_XFLAG_REALTIME) {
                                        di_flags |= XFS_DIFLAG_REALTIME;
                                        ip->i_iocore.io_flags |= XFS_IOCORE_RT;
                                } else {
                                        ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
                                }
+                               if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
+                                       di_flags |= XFS_DIFLAG_EXTSIZE;
                        }
                        ip->i_d.di_flags = di_flags;
                }
@@ -1568,7 +1554,8 @@ xfs_release(
                if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
                     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
-                   (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) {
+                   (!(ip->i_d.di_flags &
+                               (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
                        if ((error = xfs_inactive_free_eofblocks(mp, ip)))
                                return (error);
                        /* Update linux inode block count after free above */
@@ -1644,9 +1631,10 @@ xfs_inactive(
        if (ip->i_d.di_nlink != 0) {
                if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
-                    (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
-                   (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) ||
-                    (ip->i_delayed_blks != 0))) {
+                     (ip->i_df.if_flags & XFS_IFEXTENTS) &&
+                    (!(ip->i_d.di_flags &
+                               (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
+                     (ip->i_delayed_blks != 0)))) {
                        if ((error = xfs_inactive_free_eofblocks(mp, ip)))
                                return (VN_INACTIVE_CACHE);
                        /* Update linux inode block count after free above */
@@ -3998,42 +3986,36 @@ xfs_alloc_file_space(
        int                     alloc_type,
        int                     attr_flags)
 {
+       xfs_mount_t             *mp = ip->i_mount;
+       xfs_off_t               count;
        xfs_filblks_t           allocated_fsb;
        xfs_filblks_t           allocatesize_fsb;
-       int                     committed;
-       xfs_off_t               count;
-       xfs_filblks_t           datablocks;
-       int                     error;
+       xfs_extlen_t            extsz, temp;
+       xfs_fileoff_t           startoffset_fsb;
        xfs_fsblock_t           firstfsb;
-       xfs_bmap_free_t         free_list;
-       xfs_bmbt_irec_t         *imapp;
-       xfs_bmbt_irec_t         imaps[1];
-       xfs_mount_t             *mp;
-       int                     numrtextents;
-       int                     reccount;
-       uint                    resblks;
+       int                     nimaps;
+       int                     bmapi_flag;
+       int                     quota_flag;
        int                     rt;
-       int                     rtextsize;
-       xfs_fileoff_t           startoffset_fsb;
        xfs_trans_t             *tp;
-       int                     xfs_bmapi_flags;
+       xfs_bmbt_irec_t         imaps[1], *imapp;
+       xfs_bmap_free_t         free_list;
+       uint                    qblocks, resblks, resrtextents;
+       int                     committed;
+       int                     error;
 
        vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
-       mp = ip->i_mount;
 
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
 
-       /*
-        * determine if this is a realtime file
-        */
-       if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) {
-               if (ip->i_d.di_extsize)
-                       rtextsize = ip->i_d.di_extsize;
-               else
-                       rtextsize = mp->m_sb.sb_rextsize;
-       } else
-               rtextsize = 0;
+       rt = XFS_IS_REALTIME_INODE(ip);
+       if (unlikely(rt)) {
+               if (!(extsz = ip->i_d.di_extsize))
+                       extsz = mp->m_sb.sb_rextsize;
+       } else {
+               extsz = ip->i_d.di_extsize;
+       }
 
        if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
                return error;
@@ -4044,8 +4026,8 @@ xfs_alloc_file_space(
        count = len;
        error = 0;
        imapp = &imaps[0];
-       reccount = 1;
-       xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+       nimaps = 1;
+       bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
        startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
        allocatesize_fsb = XFS_B_TO_FSB(mp, count);
 
@@ -4066,43 +4048,51 @@ xfs_alloc_file_space(
        }
 
        /*
-        * allocate file space until done or until there is an error
+        * Allocate file space until done or until there is an error
         */
 retry:
        while (allocatesize_fsb && !error) {
+               xfs_fileoff_t   s, e;
+
                /*
-                * determine if reserving space on
-                * the data or realtime partition.
+                * Determine space reservations for data/realtime,
                 */
-               if (rt) {
-                       xfs_fileoff_t s, e;
-
+               if (unlikely(extsz)) {
                        s = startoffset_fsb;
-                       do_div(s, rtextsize);
-                       s *= rtextsize;
-                       e = roundup_64(startoffset_fsb + allocatesize_fsb,
-                               rtextsize);
-                       numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize;
-                       datablocks = 0;
+                       do_div(s, extsz);
+                       s *= extsz;
+                       e = startoffset_fsb + allocatesize_fsb;
+                       if ((temp = do_mod(startoffset_fsb, extsz)))
+                               e += temp;
+                       if ((temp = do_mod(e, extsz)))
+                               e += extsz - temp;
+               } else {
+                       s = 0;
+                       e = allocatesize_fsb;
+               }
+
+               if (unlikely(rt)) {
+                       resrtextents = qblocks = (uint)(e - s);
+                       resrtextents /= mp->m_sb.sb_rextsize;
+                       resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+                       quota_flag = XFS_QMOPT_RES_RTBLKS;
                } else {
-                       datablocks = allocatesize_fsb;
-                       numrtextents = 0;
+                       resrtextents = 0;
+                       resblks = qblocks = \
+                               XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
+                       quota_flag = XFS_QMOPT_RES_REGBLKS;
                }
 
                /*
-                * allocate and setup the transaction
+                * Allocate and setup the transaction.
                 */
                tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-               resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
-               error = xfs_trans_reserve(tp,
-                                         resblks,
-                                         XFS_WRITE_LOG_RES(mp),
-                                         numrtextents,
+               error = xfs_trans_reserve(tp, resblks,
+                                         XFS_WRITE_LOG_RES(mp), resrtextents,
                                          XFS_TRANS_PERM_LOG_RES,
                                          XFS_WRITE_LOG_COUNT);
-
                /*
-                * check for running out of space
+                * Check for running out of space
                 */
                if (error) {
                        /*
@@ -4113,8 +4103,8 @@ retry:
                        break;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-               error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
-                               ip->i_udquot, ip->i_gdquot, resblks, 0, 0);
+               error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+                                                     qblocks, 0, quota_flag);
                if (error)
                        goto error1;
 
@@ -4122,19 +4112,19 @@ retry:
                xfs_trans_ihold(tp, ip);
 
                /*
-                * issue the bmapi() call to allocate the blocks
+                * Issue the xfs_bmapi() call to allocate the blocks
                 */
                XFS_BMAP_INIT(&free_list, &firstfsb);
                error = xfs_bmapi(tp, ip, startoffset_fsb,
-                                 allocatesize_fsb, xfs_bmapi_flags,
-                                 &firstfsb, 0, imapp, &reccount,
+                                 allocatesize_fsb, bmapi_flag,
+                                 &firstfsb, 0, imapp, &nimaps,
                                  &free_list);
                if (error) {
                        goto error0;
                }
 
                /*
-                * complete the transaction
+                * Complete the transaction
                 */
                error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
                if (error) {
@@ -4149,7 +4139,7 @@ retry:
 
                allocated_fsb = imapp->br_blockcount;
 
-               if (reccount == 0) {
+               if (nimaps == 0) {
                        error = XFS_ERROR(ENOSPC);
                        break;
                }
@@ -4172,9 +4162,11 @@ dmapi_enospc_check:
 
        return error;
 
- error0:
+error0:        /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
        xfs_bmap_cancel(&free_list);
- error1:
+       XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+
+error1:        /* Just cancel transaction */
        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        goto dmapi_enospc_check;
@@ -4419,8 +4411,8 @@ xfs_free_file_space(
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
                error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
-                               ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
-                               XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+                               ip->i_udquot, ip->i_gdquot, resblks, 0,
+                               XFS_QMOPT_RES_REGBLKS);
                if (error)
                        goto error1;