xfs: swalloc doesn't align allocations properly
authorDave Chinner <dchinner@redhat.com>
Thu, 12 Dec 2013 05:34:36 +0000 (16:34 +1100)
committerBen Myers <bpm@sgi.com>
Tue, 17 Dec 2013 15:30:12 +0000 (09:30 -0600)
When swalloc is specified as a mount option, allocations are
supposed to be aligned to the stripe width rather than the stripe
unit of the underlying filesystem. However, it does not do this.

What the implementation does is round up the allocation size to a
stripe width, hence ensuring that all allocations span a full stripe
width. It does not, however, ensure that that allocation is aligned
to a stripe width, and hence the allocations can span multiple
underlying stripes and so still see RMW cycles for things like
direct IO on MD RAID.

So, if the swalloc mount option is set, change the allocation
alignment in xfs_bmap_btalloc() to use the stripe width rather than
the stripe unit.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
fs/xfs/xfs_bmap.c

index 8401f11f378f989ea2a2ed70a9f744b8b89ced69..3b2c14b6f0fb13efd7a3aa89abc05d0a8334a5ff 100644 (file)
@@ -3648,10 +3648,19 @@ xfs_bmap_btalloc(
        int             isaligned;
        int             tryagain;
        int             error;
+       int             stripe_align;
 
        ASSERT(ap->length);
 
        mp = ap->ip->i_mount;
+
+       /* stripe alignment for allocation is determined by mount parameters */
+       stripe_align = 0;
+       if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
+               stripe_align = mp->m_swidth;
+       else if (mp->m_dalign)
+               stripe_align = mp->m_dalign;
+
        align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
        if (unlikely(align)) {
                error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
@@ -3660,6 +3669,8 @@ xfs_bmap_btalloc(
                ASSERT(!error);
                ASSERT(ap->length);
        }
+
+
        nullfb = *ap->firstblock == NULLFSBLOCK;
        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
        if (nullfb) {
@@ -3735,7 +3746,7 @@ xfs_bmap_btalloc(
         */
        if (!ap->flist->xbf_low && ap->aeof) {
                if (!ap->offset) {
-                       args.alignment = mp->m_dalign;
+                       args.alignment = stripe_align;
                        atype = args.type;
                        isaligned = 1;
                        /*
@@ -3760,13 +3771,13 @@ xfs_bmap_btalloc(
                         * of minlen+alignment+slop doesn't go up
                         * between the calls.
                         */
-                       if (blen > mp->m_dalign && blen <= args.maxlen)
-                               nextminlen = blen - mp->m_dalign;
+                       if (blen > stripe_align && blen <= args.maxlen)
+                               nextminlen = blen - stripe_align;
                        else
                                nextminlen = args.minlen;
-                       if (nextminlen + mp->m_dalign > args.minlen + 1)
+                       if (nextminlen + stripe_align > args.minlen + 1)
                                args.minalignslop =
-                                       nextminlen + mp->m_dalign -
+                                       nextminlen + stripe_align -
                                        args.minlen - 1;
                        else
                                args.minalignslop = 0;
@@ -3788,7 +3799,7 @@ xfs_bmap_btalloc(
                 */
                args.type = atype;
                args.fsbno = ap->blkno;
-               args.alignment = mp->m_dalign;
+               args.alignment = stripe_align;
                args.minlen = nextminlen;
                args.minalignslop = 0;
                isaligned = 1;