net: hns: optimize XGE capability by reducing cpu usage
authoryankejian <yankejian@huawei.com>
Tue, 8 Dec 2015 03:02:31 +0000 (11:02 +0800)
committerDavid S. Miller <davem@davemloft.net>
Wed, 9 Dec 2015 03:06:40 +0000 (22:06 -0500)
here is the patch raising the performance of XGE by:
1)changes the way page management method for enet momery, and
2)reduces the count of rmb, and
3)adds Memory prefetching

Signed-off-by: Kejian Yan <yankejian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/hisilicon/hns/hnae.h
drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
drivers/net/ethernet/hisilicon/hns/hns_enet.c

index d1f33166e0c3542d2522719954182c8fe3644099..6ca94dc3dda3c2447d59e369fee4eadeba6e0f52 100644 (file)
@@ -341,7 +341,8 @@ struct hnae_queue {
        void __iomem *io_base;
        phys_addr_t phy_base;
        struct hnae_ae_dev *dev;        /* the device who use this queue */
-       struct hnae_ring rx_ring, tx_ring;
+       struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp;
+       struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp;
        struct hnae_handle *handle;
 };
 
@@ -597,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i,
                                       struct hnae_desc_cb *res_cb)
 {
        struct hnae_buf_ops *bops = ring->q->handle->bops;
-       struct hnae_desc_cb tmp_cb = ring->desc_cb[i];
 
        bops->unmap_buffer(ring, &ring->desc_cb[i]);
        ring->desc_cb[i] = *res_cb;
-       *res_cb = tmp_cb;
        ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
        ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
 }
index 77c6edbe26660ccffd62098d15dc0a58fea2e7bd..522b264866b42e68b65e40cd93e28f991a3e678c 100644 (file)
@@ -341,7 +341,6 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask)
        else
                flag = RCB_INT_FLAG_RX;
 
-       hns_rcb_int_clr_hw(ring->q, flag);
        hns_rcb_int_ctrl_hw(ring->q, flag, mask);
 }
 
index cad2663392009d6400a5071de8dff52471e8bde8..5a81dafd725e29b076d89eb151ce8953fdc908a8 100644 (file)
@@ -33,6 +33,7 @@
 
 #define RCB_IRQ_NOT_INITED 0
 #define RCB_IRQ_INITED 1
+#define HNS_BUFFER_SIZE_2048 2048
 
 #define BD_MAX_SEND_SIZE 8191
 #define SKB_TMP_LEN(SKB) \
@@ -491,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag,
                return max_size;
 }
 
-static void
-hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset)
+static void hns_nic_reuse_page(struct sk_buff *skb, int i,
+                              struct hnae_ring *ring, int pull_len,
+                              struct hnae_desc_cb *desc_cb)
 {
+       struct hnae_desc *desc;
+       int truesize, size;
+       int last_offset;
+
+       desc = &ring->desc[ring->next_to_clean];
+       size = le16_to_cpu(desc->rx.size);
+
+#if (PAGE_SIZE < 8192)
+       if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
+               truesize = hnae_buf_size(ring);
+       } else {
+               truesize = ALIGN(size, L1_CACHE_BYTES);
+               last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
+       }
+
+#else
+       truesize = ALIGN(size, L1_CACHE_BYTES);
+       last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
+#endif
+
+       skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
+                       size - pull_len, truesize - pull_len);
+
         /* avoid re-using remote pages,flag default unreuse */
        if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) {
+#if (PAGE_SIZE < 8192)
+               if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) {
+                       /* if we are only owner of page we can reuse it */
+                       if (likely(page_count(desc_cb->priv) == 1)) {
+                               /* flip page offset to other buffer */
+                               desc_cb->page_offset ^= truesize;
+
+                               desc_cb->reuse_flag = 1;
+                               /* bump ref count on page before it is given*/
+                               get_page(desc_cb->priv);
+                       }
+                       return;
+               }
+#endif
                /* move offset up to the next cache line */
-               desc_cb->page_offset += tsize;
+               desc_cb->page_offset += truesize;
 
                if (desc_cb->page_offset <= last_offset) {
                        desc_cb->reuse_flag = 1;
@@ -529,11 +568,10 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
        struct hnae_desc *desc;
        struct hnae_desc_cb *desc_cb;
        unsigned char *va;
-       int bnum, length, size, i, truesize, last_offset;
+       int bnum, length, i;
        int pull_len;
        u32 bnum_flag;
 
-       last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
        desc = &ring->desc[ring->next_to_clean];
        desc_cb = &ring->desc_cb[ring->next_to_clean];
 
@@ -555,17 +593,12 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
                return -ENOMEM;
        }
 
+       prefetchw(skb->data);
        length = le16_to_cpu(desc->rx.pkt_len);
        bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag);
        priv->ops.get_rxd_bnum(bnum_flag, &bnum);
        *out_bnum = bnum;
 
-       /* we will be copying header into skb->data in
-        * pskb_may_pull so it is in our interest to prefetch
-        * it now to avoid a possible cache miss
-        */
-       prefetchw(skb->data);
-
        if (length <= HNS_RX_HEAD_SIZE) {
                memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
 
@@ -588,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
                memcpy(__skb_put(skb, pull_len), va,
                       ALIGN(pull_len, sizeof(long)));
 
-               size = le16_to_cpu(desc->rx.size);
-               truesize = ALIGN(size, L1_CACHE_BYTES);
-               skb_add_rx_frag(skb, 0, desc_cb->priv,
-                               desc_cb->page_offset + pull_len,
-                               size - pull_len, truesize - pull_len);
-
-               hns_nic_reuse_page(desc_cb, truesize, last_offset);
+               hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
                ring_ptr_move_fw(ring, next_to_clean);
 
                if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/
@@ -604,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
                for (i = 1; i < bnum; i++) {
                        desc = &ring->desc[ring->next_to_clean];
                        desc_cb = &ring->desc_cb[ring->next_to_clean];
-                       size = le16_to_cpu(desc->rx.size);
-                       truesize = ALIGN(size, L1_CACHE_BYTES);
-                       skb_add_rx_frag(skb, i, desc_cb->priv,
-                                       desc_cb->page_offset,
-                                       size, truesize);
 
-                       hns_nic_reuse_page(desc_cb, truesize, last_offset);
+                       hns_nic_reuse_page(skb, i, ring, 0, desc_cb);
                        ring_ptr_move_fw(ring, next_to_clean);
                }
        }
@@ -750,9 +772,10 @@ recv:
        /* make all data has been write before submit */
        if (recv_pkts < budget) {
                ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
-               rmb(); /*complete read rx ring bd number*/
+
                if (ex_num > clean_count) {
                        num += ex_num - clean_count;
+                       rmb(); /*complete read rx ring bd number*/
                        goto recv;
                }
        }
@@ -849,8 +872,11 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
 
        bytes = 0;
        pkts = 0;
-       while (head != ring->next_to_clean)
+       while (head != ring->next_to_clean) {
                hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
+               /* issue prefetch for next Tx descriptor */
+               prefetch(&ring->desc_cb[ring->next_to_clean]);
+       }
 
        NETIF_TX_UNLOCK(ndev);
 
@@ -926,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget)
                        ring_data->ring, 0);
 
                ring_data->fini_process(ring_data);
+               return 0;
        }
 
        return clean_complete;