igb: Support using build_skb in the case that jumbo frames are disabled
authorAlexander Duyck <alexander.h.duyck@intel.com>
Sat, 2 Feb 2013 05:07:11 +0000 (05:07 +0000)
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>
Fri, 15 Feb 2013 09:11:29 +0000 (01:11 -0800)
This change makes it so that we can enable the use of build_skb for cases
where jumbo frames are disabled.  The advantage to this is that we do not
have to perform a memcpy to populate the header and as a result we see a
significant performance improvement.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_main.c

index 4b78053592baf8650d89a987348138999d73fcab..afdb8bbcf6ce18bda1448de10fa6df87b97b757f 100644 (file)
@@ -275,10 +275,18 @@ struct igb_q_vector {
 enum e1000_ring_flags_t {
        IGB_RING_FLAG_RX_SCTP_CSUM,
        IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
+       IGB_RING_FLAG_RX_BUILD_SKB_ENABLED,
        IGB_RING_FLAG_TX_CTX_IDX,
        IGB_RING_FLAG_TX_DETECT_HANG
 };
 
+#define ring_uses_build_skb(ring) \
+       test_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+#define set_ring_build_skb_enabled(ring) \
+       set_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+#define clear_ring_build_skb_enabled(ring) \
+       clear_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+
 #define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS)
 
 #define IGB_RX_DESC(R, i)          \
index 1aaf19351863e2f8db625518080f8b0e3e7d0641..b070a97376c8a4e2c4a3573fc737109faf8ed134 100644 (file)
@@ -3354,6 +3354,20 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
        wr32(E1000_RXDCTL(reg_idx), rxdctl);
 }
 
+static void igb_set_rx_buffer_len(struct igb_adapter *adapter,
+                                 struct igb_ring *rx_ring)
+{
+#define IGB_MAX_BUILD_SKB_SIZE \
+       (SKB_WITH_OVERHEAD(IGB_RX_BUFSZ) - \
+        (NET_SKB_PAD + NET_IP_ALIGN + IGB_TS_HDR_LEN))
+
+       /* set build_skb flag */
+       if (adapter->max_frame_size <= IGB_MAX_BUILD_SKB_SIZE)
+               set_ring_build_skb_enabled(rx_ring);
+       else
+               clear_ring_build_skb_enabled(rx_ring);
+}
+
 /**
  * igb_configure_rx - Configure receive Unit after Reset
  * @adapter: board private structure
@@ -3373,8 +3387,11 @@ static void igb_configure_rx(struct igb_adapter *adapter)
 
        /* Setup the HW Rx Head and Tail Descriptor Pointers and
         * the Base and Length of the Rx Descriptor Ring */
-       for (i = 0; i < adapter->num_rx_queues; i++)
-               igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               struct igb_ring *rx_ring = adapter->rx_ring[i];
+               igb_set_rx_buffer_len(adapter, rx_ring);
+               igb_configure_rx_ring(adapter, rx_ring);
+       }
 }
 
 /**
@@ -6097,6 +6114,41 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring,
                                         DMA_FROM_DEVICE);
 }
 
+static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
+                                 struct page *page,
+                                 unsigned int truesize)
+{
+       /* avoid re-using remote pages */
+       if (unlikely(page_to_nid(page) != numa_node_id()))
+               return false;
+
+#if (PAGE_SIZE < 8192)
+       /* if we are only owner of page we can reuse it */
+       if (unlikely(page_count(page) != 1))
+               return false;
+
+       /* flip page offset to other buffer */
+       rx_buffer->page_offset ^= IGB_RX_BUFSZ;
+
+       /* since we are the only owner of the page and we need to
+        * increment it, just set the value to 2 in order to avoid
+        * an unnecessary locked operation
+        */
+       atomic_set(&page->_count, 2);
+#else
+       /* move offset up to the next cache line */
+       rx_buffer->page_offset += truesize;
+
+       if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ))
+               return false;
+
+       /* bump ref count on page before it is given to the stack */
+       get_page(page);
+#endif
+
+       return true;
+}
+
 /**
  * igb_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
@@ -6119,6 +6171,11 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring,
 {
        struct page *page = rx_buffer->page;
        unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = IGB_RX_BUFSZ;
+#else
+       unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+#endif
 
        if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) {
                unsigned char *va = page_address(page) + rx_buffer->page_offset;
@@ -6141,38 +6198,88 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring,
        }
 
        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                       rx_buffer->page_offset, size, IGB_RX_BUFSZ);
+                       rx_buffer->page_offset, size, truesize);
 
-       /* avoid re-using remote pages */
-       if (unlikely(page_to_nid(page) != numa_node_id()))
-               return false;
+       return igb_can_reuse_rx_page(rx_buffer, page, truesize);
+}
 
+static struct sk_buff *igb_build_rx_buffer(struct igb_ring *rx_ring,
+                                          union e1000_adv_rx_desc *rx_desc)
+{
+       struct igb_rx_buffer *rx_buffer;
+       struct sk_buff *skb;
+       struct page *page;
+       void *page_addr;
+       unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
 #if (PAGE_SIZE < 8192)
-       /* if we are only owner of page we can reuse it */
-       if (unlikely(page_count(page) != 1))
-               return false;
+       unsigned int truesize = IGB_RX_BUFSZ;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+                               SKB_DATA_ALIGN(NET_SKB_PAD +
+                                              NET_IP_ALIGN +
+                                              size);
+#endif
 
-       /* flip page offset to other buffer */
-       rx_buffer->page_offset ^= IGB_RX_BUFSZ;
+       /* If we spanned a buffer we have a huge mess so test for it */
+       BUG_ON(unlikely(!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)));
 
-       /*
-        * since we are the only owner of the page and we need to
-        * increment it, just set the value to 2 in order to avoid
-        * an unnecessary locked operation
-        */
-       atomic_set(&page->_count, 2);
-#else
-       /* move offset up to the next cache line */
-       rx_buffer->page_offset += SKB_DATA_ALIGN(size);
+       /* Guarantee this function can be used by verifying buffer sizes */
+       BUILD_BUG_ON(SKB_WITH_OVERHEAD(IGB_RX_BUFSZ) < (NET_SKB_PAD +
+                                                       NET_IP_ALIGN +
+                                                       IGB_TS_HDR_LEN +
+                                                       ETH_FRAME_LEN +
+                                                       ETH_FCS_LEN));
 
-       if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ))
-               return false;
+       rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+       page = rx_buffer->page;
+       prefetchw(page);
 
-       /* bump ref count on page before it is given to the stack */
-       get_page(page);
+       page_addr = page_address(page) + rx_buffer->page_offset;
+
+       /* prefetch first cache line of first page */
+       prefetch(page_addr + NET_SKB_PAD + NET_IP_ALIGN);
+#if L1_CACHE_BYTES < 128
+       prefetch(page_addr + L1_CACHE_BYTES + NET_SKB_PAD + NET_IP_ALIGN);
 #endif
 
-       return true;
+       /* build an skb to around the page buffer */
+       skb = build_skb(page_addr, truesize);
+       if (unlikely(!skb)) {
+               rx_ring->rx_stats.alloc_failed++;
+               return NULL;
+       }
+
+       /* we are reusing so sync this buffer for CPU use */
+       dma_sync_single_range_for_cpu(rx_ring->dev,
+                                     rx_buffer->dma,
+                                     rx_buffer->page_offset,
+                                     IGB_RX_BUFSZ,
+                                     DMA_FROM_DEVICE);
+
+       /* update pointers within the skb to store the data */
+       skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD);
+       __skb_put(skb, size);
+
+       /* pull timestamp out of packet data */
+       if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+               igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb);
+               __skb_pull(skb, IGB_TS_HDR_LEN);
+       }
+
+       if (igb_can_reuse_rx_page(rx_buffer, page, truesize)) {
+               /* hand second half of page back to the ring */
+               igb_reuse_rx_page(rx_ring, rx_buffer);
+       } else {
+               /* we are not reusing the buffer so unmap it */
+               dma_unmap_page(rx_ring->dev, rx_buffer->dma,
+                              PAGE_SIZE, DMA_FROM_DEVICE);
+       }
+
+       /* clear contents of buffer_info */
+       rx_buffer->dma = 0;
+       rx_buffer->page = NULL;
+
+       return skb;
 }
 
 static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
@@ -6184,13 +6291,6 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
 
        rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
 
-       /*
-        * This memory barrier is needed to keep us from reading
-        * any other fields out of the rx_desc until we know the
-        * RXD_STAT_DD bit is set
-        */
-       rmb();
-
        page = rx_buffer->page;
        prefetchw(page);
 
@@ -6590,8 +6690,17 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
                if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
                        break;
 
+               /* This memory barrier is needed to keep us from reading
+                * any other fields out of the rx_desc until we know the
+                * RXD_STAT_DD bit is set
+                */
+               rmb();
+
                /* retrieve a buffer from the ring */
-               skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
+               if (ring_uses_build_skb(rx_ring))
+                       skb = igb_build_rx_buffer(rx_ring, rx_desc);
+               else
+                       skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
 
                /* exit if we failed to retrieve a buffer */
                if (!skb)
@@ -6678,6 +6787,14 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
        return true;
 }
 
+static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring)
+{
+       if (ring_uses_build_skb(rx_ring))
+               return NET_SKB_PAD + NET_IP_ALIGN;
+       else
+               return 0;
+}
+
 /**
  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
  * @adapter: address of board private structure
@@ -6704,7 +6821,9 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
                 * Refresh the desc even if buffer_addrs didn't change
                 * because each write-back erases this info.
                 */
-               rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+               rx_desc->read.pkt_addr = cpu_to_le64(bi->dma +
+                                                    bi->page_offset +
+                                                    igb_rx_offset(rx_ring));
 
                rx_desc++;
                bi++;