net: fec: Add software TSO support
authorNimrod Andy <B38611@freescale.com>
Thu, 12 Jun 2014 00:16:23 +0000 (08:16 +0800)
committerDavid S. Miller <davem@davemloft.net>
Thu, 12 Jun 2014 18:01:57 +0000 (11:01 -0700)
Add software TSO support for FEC.
This feature allows to improve outbound throughput performance.

Tested on imx6dl sabresd board, running iperf tcp tests shows:
- 16.2% improvement comparing with FEC SG patch
- 82% improvement comparing with NO SG & TSO patch

$ ethtool -K eth0 tso on
$ iperf -c 10.192.242.167 -t 3 &
[  3] local 10.192.242.108 port 35388 connected with 10.192.242.167 port 5001
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 3.0 sec   181 MBytes   506 Mbits/sec

During the testing, CPU loading is 30%.
Since imx6dl FEC Bandwidth is limited to SOC system bus bandwidth, the
performance with SW TSO is a milestone.

CC: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Laight <David.Laight@ACULAB.COM>
CC: Li Frank <B20596@freescale.com>
Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/freescale/fec.h
drivers/net/ethernet/freescale/fec_main.c

index e7ce14d8d3c3f02340bc5fb8046477dea0de9f74..671d080105a7e08c5e20456a5ad38b29f6704e19 100644 (file)
@@ -299,6 +299,12 @@ struct fec_enet_private {
        unsigned short bufdesc_size;
        unsigned short tx_ring_size;
        unsigned short rx_ring_size;
+       unsigned short tx_stop_threshold;
+       unsigned short tx_wake_threshold;
+
+       /* Software TSO */
+       char *tso_hdrs;
+       dma_addr_t tso_hdrs_dma;
 
        struct  platform_device *pdev;
 
index bea00a8d6c99b6d663c20a134220b75a14632290..38d9d276ab8b8c006fe13f1aa76eac2e55a2d775 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/ip.h>
+#include <net/tso.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
@@ -228,6 +229,15 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 #define FEC_PAUSE_FLAG_AUTONEG 0x1
 #define FEC_PAUSE_FLAG_ENABLE  0x2
 
+#define TSO_HEADER_SIZE                128
+/* Max number of allowed TCP segments for software TSO */
+#define FEC_MAX_TSO_SEGS       100
+#define FEC_MAX_SKB_DESCS      (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
+
+#define IS_TSO_HEADER(txq, addr) \
+       ((addr >= txq->tso_hdrs_dma) && \
+       (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))
+
 static int mii_cnt;
 
 static inline
@@ -438,8 +448,17 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
        unsigned short buflen;
        unsigned int estatus = 0;
        unsigned int index;
+       int entries_free;
        int ret;
 
+       entries_free = fec_enet_get_free_txdesc_num(fep);
+       if (entries_free < MAX_SKB_FRAGS + 1) {
+               dev_kfree_skb_any(skb);
+               if (net_ratelimit())
+                       netdev_err(ndev, "NOT enough BD for SG!\n");
+               return NETDEV_TX_OK;
+       }
+
        /* Protocol checksum off-load for TCP and UDP. */
        if (fec_enet_clear_csum(skb, ndev)) {
                dev_kfree_skb_any(skb);
@@ -534,35 +553,210 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
        return 0;
 }
 
-static netdev_tx_t
-fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static int
+fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev,
+                       struct bufdesc *bdp, int index, char *data,
+                       int size, bool last_tcp, bool is_last)
 {
        struct fec_enet_private *fep = netdev_priv(ndev);
-       struct bufdesc *bdp;
-       unsigned short  status;
-       int entries_free;
-       int ret;
-
-       /* Fill in a Tx ring entry */
-       bdp = fep->cur_tx;
+       const struct platform_device_id *id_entry =
+                               platform_get_device_id(fep->pdev);
+       struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+       unsigned short status;
+       unsigned int estatus = 0;
 
        status = bdp->cbd_sc;
+       status &= ~BD_ENET_TX_STATS;
 
-       if (status & BD_ENET_TX_READY) {
-               /* Ooops.  All transmit buffers are full.  Bail out.
-                * This should not happen, since ndev->tbusy should be set.
-                */
+       status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+       bdp->cbd_datlen = size;
+
+       if (((unsigned long) data) & FEC_ALIGNMENT ||
+               id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+               memcpy(fep->tx_bounce[index], data, size);
+               data = fep->tx_bounce[index];
+
+               if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+                       swap_buffer(data, size);
+       }
+
+       bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,
+                                       size, DMA_TO_DEVICE);
+       if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
+               dev_kfree_skb_any(skb);
                if (net_ratelimit())
-                       netdev_err(ndev, "tx queue full!\n");
+                       netdev_err(ndev, "Tx DMA memory map failed\n");
                return NETDEV_TX_BUSY;
        }
 
-       ret = fec_enet_txq_submit_skb(skb, ndev);
+       if (fep->bufdesc_ex) {
+               if (skb->ip_summed == CHECKSUM_PARTIAL)
+                       estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+               ebdp->cbd_bdu = 0;
+               ebdp->cbd_esc = estatus;
+       }
+
+       /* Handle the last BD specially */
+       if (last_tcp)
+               status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC);
+       if (is_last) {
+               status |= BD_ENET_TX_INTR;
+               if (fep->bufdesc_ex)
+                       ebdp->cbd_esc |= BD_ENET_TX_INT;
+       }
+
+       bdp->cbd_sc = status;
+
+       return 0;
+}
+
+static int
+fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev,
+                       struct bufdesc *bdp, int index)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       const struct platform_device_id *id_entry =
+                               platform_get_device_id(fep->pdev);
+       int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+       void *bufaddr;
+       unsigned long dmabuf;
+       unsigned short status;
+       unsigned int estatus = 0;
+
+       status = bdp->cbd_sc;
+       status &= ~BD_ENET_TX_STATS;
+       status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
+
+       bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
+       dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE;
+       if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
+               id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
+               memcpy(fep->tx_bounce[index], skb->data, hdr_len);
+               bufaddr = fep->tx_bounce[index];
+
+               if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
+                       swap_buffer(bufaddr, hdr_len);
+
+               dmabuf = dma_map_single(&fep->pdev->dev, bufaddr,
+                                       hdr_len, DMA_TO_DEVICE);
+               if (dma_mapping_error(&fep->pdev->dev, dmabuf)) {
+                       dev_kfree_skb_any(skb);
+                       if (net_ratelimit())
+                               netdev_err(ndev, "Tx DMA memory map failed\n");
+                       return NETDEV_TX_BUSY;
+               }
+       }
+
+       bdp->cbd_bufaddr = dmabuf;
+       bdp->cbd_datlen = hdr_len;
+
+       if (fep->bufdesc_ex) {
+               if (skb->ip_summed == CHECKSUM_PARTIAL)
+                       estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+               ebdp->cbd_bdu = 0;
+               ebdp->cbd_esc = estatus;
+       }
+
+       bdp->cbd_sc = status;
+
+       return 0;
+}
+
+static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       int total_len, data_left;
+       struct bufdesc *bdp = fep->cur_tx;
+       struct tso_t tso;
+       unsigned int index = 0;
+       int ret;
+
+       if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) {
+               dev_kfree_skb_any(skb);
+               if (net_ratelimit())
+                       netdev_err(ndev, "NOT enough BD for TSO!\n");
+               return NETDEV_TX_OK;
+       }
+
+       /* Protocol checksum off-load for TCP and UDP. */
+       if (fec_enet_clear_csum(skb, ndev)) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
+
+       /* Initialize the TSO handler, and prepare the first payload */
+       tso_start(skb, &tso);
+
+       total_len = skb->len - hdr_len;
+       while (total_len > 0) {
+               char *hdr;
+
+               index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+               data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
+               total_len -= data_left;
+
+               /* prepare packet headers: MAC + IP + TCP */
+               hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
+               tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
+               ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index);
+               if (ret)
+                       goto err_release;
+
+               while (data_left > 0) {
+                       int size;
+
+                       size = min_t(int, tso.size, data_left);
+                       bdp = fec_enet_get_nextdesc(bdp, fep);
+                       index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
+                       ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data,
+                                                       size, size == data_left,
+                                                       total_len == 0);
+                       if (ret)
+                               goto err_release;
+
+                       data_left -= size;
+                       tso_build_data(skb, &tso, size);
+               }
+
+               bdp = fec_enet_get_nextdesc(bdp, fep);
+       }
+
+       /* Save skb pointer */
+       fep->tx_skbuff[index] = skb;
+
+       fec_enet_submit_work(bdp, fep);
+
+       skb_tx_timestamp(skb);
+       fep->cur_tx = bdp;
+
+       /* Trigger transmission start */
+       writel(0, fep->hwp + FEC_X_DES_ACTIVE);
+
+       return 0;
+
+err_release:
+       /* TODO: Release all used data descriptors for TSO */
+       return ret;
+}
+
+static netdev_tx_t
+fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       int entries_free;
+       int ret;
+
+       if (skb_is_gso(skb))
+               ret = fec_enet_txq_submit_tso(skb, ndev);
+       else
+               ret = fec_enet_txq_submit_skb(skb, ndev);
        if (ret)
                return ret;
 
        entries_free = fec_enet_get_free_txdesc_num(fep);
-       if (entries_free < MAX_SKB_FRAGS + 1)
+       if (entries_free <= fep->tx_stop_threshold)
                netif_stop_queue(ndev);
 
        return NETDEV_TX_OK;
@@ -883,7 +1077,7 @@ fec_enet_tx(struct net_device *ndev)
        unsigned short status;
        struct  sk_buff *skb;
        int     index = 0;
-       int     entries;
+       int     entries_free;
 
        fep = netdev_priv(ndev);
        bdp = fep->dirty_tx;
@@ -900,8 +1094,9 @@ fec_enet_tx(struct net_device *ndev)
                index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
 
                skb = fep->tx_skbuff[index];
-               dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen,
-                               DMA_TO_DEVICE);
+               if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr))
+                       dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+                                       bdp->cbd_datlen, DMA_TO_DEVICE);
                bdp->cbd_bufaddr = 0;
                if (!skb) {
                        bdp = fec_enet_get_nextdesc(bdp, fep);
@@ -962,9 +1157,11 @@ fec_enet_tx(struct net_device *ndev)
 
                /* Since we have freed up a buffer, the ring is no longer full
                 */
-               entries = fec_enet_get_free_txdesc_num(fep);
-               if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev))
-                       netif_wake_queue(ndev);
+               if (netif_queue_stopped(ndev)) {
+                       entries_free = fec_enet_get_free_txdesc_num(fep);
+                       if (entries_free >= fep->tx_wake_threshold)
+                               netif_wake_queue(ndev);
+               }
        }
        return;
 }
@@ -2166,6 +2363,9 @@ static int fec_enet_init(struct net_device *ndev)
        fep->tx_ring_size = TX_RING_SIZE;
        fep->rx_ring_size = RX_RING_SIZE;
 
+       fep->tx_stop_threshold = FEC_MAX_SKB_DESCS;
+       fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2;
+
        if (fep->bufdesc_ex)
                fep->bufdesc_size = sizeof(struct bufdesc_ex);
        else
@@ -2179,6 +2379,13 @@ static int fec_enet_init(struct net_device *ndev)
        if (!cbd_base)
                return -ENOMEM;
 
+       fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE,
+                                               &fep->tso_hdrs_dma, GFP_KERNEL);
+       if (!fep->tso_hdrs) {
+               dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma);
+               return -ENOMEM;
+       }
+
        memset(cbd_base, 0, PAGE_SIZE);
 
        fep->netdev = ndev;
@@ -2209,9 +2416,11 @@ static int fec_enet_init(struct net_device *ndev)
                ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;
 
        if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
+               ndev->gso_max_segs = FEC_MAX_TSO_SEGS;
+
                /* enable hw accelerator */
                ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
-                               | NETIF_F_RXCSUM | NETIF_F_SG);
+                               | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO);
                fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
        }