IB/ipath: Performance optimization for CPU differences
authorRalph Campbell <ralph.campbell@qlogic.com>
Tue, 24 Jul 2007 20:55:39 +0000 (13:55 -0700)
committerRoland Dreier <rolandd@cisco.com>
Wed, 10 Oct 2007 03:04:14 +0000 (20:04 -0700)
Different processors have different ordering restrictions for write
combining.  By taking advantage of this, we can eliminate some write
barriers when writing to the send buffers.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/ipath/ipath_diag.c
drivers/infiniband/hw/ipath/ipath_iba6120.c
drivers/infiniband/hw/ipath/ipath_kernel.h
drivers/infiniband/hw/ipath/ipath_verbs.c

index cf25cdab02f9dd3efa5af610d200cf6211b9b85a..4137c7770f1bce083d7342508005c9497e5a91cd 100644 (file)
@@ -446,19 +446,21 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
                           dd->ipath_unit, plen - 1, pbufn);
 
        if (dp.pbc_wd == 0)
-               /* Legacy operation, use computed pbc_wd */
                dp.pbc_wd = plen;
-
-       /* we have to flush after the PBC for correctness on some cpus
-        * or WC buffer can be written out of order */
        writeq(dp.pbc_wd, piobuf);
-       ipath_flush_wc();
-       /* copy all by the trigger word, then flush, so it's written
+       /*
+        * Copy all by the trigger word, then flush, so it's written
         * to chip before trigger word, then write trigger word, then
-        * flush again, so packet is sent. */
-       __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
-       ipath_flush_wc();
-       __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+        * flush again, so packet is sent.
+        */
+       if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
+               ipath_flush_wc();
+               __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+               ipath_flush_wc();
+               __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+       } else
+               __iowrite32_copy(piobuf + 2, tmpbuf, clen);
+
        ipath_flush_wc();
 
        ret = sizeof(dp);
index 5b6ac9a1a7095ba28ec986ecd89a90e4939fdb02..a324c6f7aeba11524dc56f7f6a28fb7abd64f357 100644 (file)
@@ -1273,6 +1273,8 @@ static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
 static int ipath_pe_early_init(struct ipath_devdata *dd)
 {
        dd->ipath_flags |= IPATH_4BYTE_TID;
+       if (ipath_unordered_wc())
+               dd->ipath_flags |= IPATH_PIO_FLUSH_WC;
 
        /*
         * For openfabrics, we need to be able to handle an IB header of
index 7a7966f7e4fff96cc5a2027072fb15893126d927..d983f92b9bcbe2569393f9189c1bf1e224fd4c7a 100644 (file)
@@ -724,6 +724,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 #define IPATH_LINKACTIVE    0x200
                /* link current state is unknown */
 #define IPATH_LINKUNK       0x400
+               /* Write combining flush needed for PIO */
+#define IPATH_PIO_FLUSH_WC  0x1000
                /* no IB cable, or no device on IB cable */
 #define IPATH_NOCABLE       0x4000
                /* Supports port zero per packet receive interrupts via
index 16aa61fd80856419957fdbe7ec6dcbb0054de026..559d4a662937c9c7346c4a3cb00da891149b307f 100644 (file)
@@ -631,7 +631,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 #endif
 
 static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-                   u32 length)
+                   u32 length, unsigned flush_wc)
 {
        u32 extra = 0;
        u32 data = 0;
@@ -757,11 +757,14 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
        }
        /* Update address before sending packet. */
        update_sge(ss, length);
-       /* must flush early everything before trigger word */
-       ipath_flush_wc();
-       __raw_writel(last, piobuf);
-       /* be sure trigger word is written */
-       ipath_flush_wc();
+       if (flush_wc) {
+               /* must flush early everything before trigger word */
+               ipath_flush_wc();
+               __raw_writel(last, piobuf);
+               /* be sure trigger word is written */
+               ipath_flush_wc();
+       } else
+               __raw_writel(last, piobuf);
 }
 
 /**
@@ -776,6 +779,7 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
                     u32 *hdr, u32 len, struct ipath_sge_state *ss)
 {
        u32 __iomem *piobuf;
+       unsigned flush_wc;
        u32 plen;
        int ret;
 
@@ -799,47 +803,55 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
         * or WC buffer can be written out of order.
         */
        writeq(plen, piobuf);
-       ipath_flush_wc();
        piobuf += 2;
+
+       flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
        if (len == 0) {
                /*
                 * If there is just the header portion, must flush before
                 * writing last word of header for correctness, and after
                 * the last header word (trigger word).
                 */
-               __iowrite32_copy(piobuf, hdr, hdrwords - 1);
-               ipath_flush_wc();
-               __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-               ipath_flush_wc();
-               ret = 0;
-               goto bail;
+               if (flush_wc) {
+                       ipath_flush_wc();
+                       __iowrite32_copy(piobuf, hdr, hdrwords - 1);
+                       ipath_flush_wc();
+                       __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+                       ipath_flush_wc();
+               } else
+                       __iowrite32_copy(piobuf, hdr, hdrwords);
+               goto done;
        }
 
+       if (flush_wc)
+               ipath_flush_wc();
        __iowrite32_copy(piobuf, hdr, hdrwords);
        piobuf += hdrwords;
 
        /* The common case is aligned and contained in one segment. */
        if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
                   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-               u32 w;
+               u32 dwords;
                u32 *addr = (u32 *) ss->sge.vaddr;
 
                /* Update address before sending packet. */
                update_sge(ss, len);
                /* Need to round up for the last dword in the packet. */
-               w = (len + 3) >> 2;
-               __iowrite32_copy(piobuf, addr, w - 1);
-               /* must flush early everything before trigger word */
-               ipath_flush_wc();
-               __raw_writel(addr[w - 1], piobuf + w - 1);
-               /* be sure trigger word is written */
-               ipath_flush_wc();
-               ret = 0;
-               goto bail;
+               dwords = (len + 3) >> 2;
+               if (flush_wc) {
+                       __iowrite32_copy(piobuf, addr, dwords - 1);
+                       /* must flush early everything before trigger word */
+                       ipath_flush_wc();
+                       __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
+                       /* be sure trigger word is written */
+                       ipath_flush_wc();
+               } else
+                       __iowrite32_copy(piobuf, addr, dwords);
+               goto done;
        }
-       copy_io(piobuf, ss, len);
+       copy_io(piobuf, ss, len, flush_wc);
+done:
        ret = 0;
-
 bail:
        return ret;
 }