ixgbe: performance tweaks
authorJesse Brandeburg <jesse.brandeburg@intel.com>
Thu, 3 Dec 2009 11:33:29 +0000 (11:33 +0000)
committerDavid S. Miller <davem@davemloft.net>
Thu, 3 Dec 2009 23:43:22 +0000 (15:43 -0800)
drop variables that had cache lines modified in simultaneous hot paths.
keep some variables modified on hot paths but make their storage per queue.
cache align DMA data buffer start addresses.
cache align (padding) some structures that end within a cacheline.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ixgbe/ixgbe.h
drivers/net/ixgbe/ixgbe_ethtool.c
drivers/net/ixgbe/ixgbe_main.c

index 91d80b731352e25596c53c6ce4d4f4bac985b132..8da8eb53508456e09885554ca4e73266aaa00c7a 100644 (file)
@@ -161,10 +161,12 @@ struct ixgbe_ring {
        unsigned long reinit_state;
        u64 rsc_count;                  /* stat for coalesced packets */
        u64 rsc_flush;                  /* stats for flushed packets */
+       u32 restart_queue;              /* track tx queue restarts */
+       u32 non_eop_descs;              /* track hardware descriptor chaining */
 
        unsigned int size;              /* length in bytes */
        dma_addr_t dma;                 /* phys. address of descriptor ring */
-};
+} ____cacheline_internodealigned_in_smp;
 
 enum ixgbe_ring_f_enum {
        RING_F_NONE = 0,
@@ -189,7 +191,7 @@ enum ixgbe_ring_f_enum {
 struct ixgbe_ring_feature {
        int indices;
        int mask;
-};
+} ____cacheline_internodealigned_in_smp;
 
 #define MAX_RX_QUEUES 128
 #define MAX_TX_QUEUES 128
@@ -275,29 +277,25 @@ struct ixgbe_adapter {
        u16 eitr_high;
 
        /* TX */
-       struct ixgbe_ring *tx_ring;     /* One per active queue */
+       struct ixgbe_ring *tx_ring ____cacheline_aligned_in_smp; /* One per active queue */
        int num_tx_queues;
-       u64 restart_queue;
-       u64 hw_csum_tx_good;
-       u64 lsc_int;
-       u64 hw_tso_ctxt;
-       u64 hw_tso6_ctxt;
        u32 tx_timeout_count;
        bool detect_tx_hung;
 
+       u64 restart_queue;
+       u64 lsc_int;
+
        /* RX */
-       struct ixgbe_ring *rx_ring;     /* One per active queue */
+       struct ixgbe_ring *rx_ring ____cacheline_aligned_in_smp; /* One per active queue */
        int num_rx_queues;
        u64 hw_csum_rx_error;
        u64 hw_rx_no_dma_resources;
-       u64 hw_csum_rx_good;
        u64 non_eop_descs;
        int num_msix_vectors;
        int max_msix_q_vectors;         /* true count of q_vectors for device */
        struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE];
        struct msix_entry *msix_entries;
 
-       u64 rx_hdr_split;
        u32 alloc_rx_page_failed;
        u32 alloc_rx_buff_failed;
 
index 1928d559e65fd4f2c42fcf3f22d80e4d6727c683..06a9d18bbdbc13f22b3a824567e6adbf0ea5e115 100644 (file)
@@ -93,16 +93,11 @@ static struct ixgbe_stats ixgbe_gstrings_stats[] = {
        {"tx_restart_queue", IXGBE_STAT(restart_queue)},
        {"rx_long_length_errors", IXGBE_STAT(stats.roc)},
        {"rx_short_length_errors", IXGBE_STAT(stats.ruc)},
-       {"tx_tcp4_seg_ctxt", IXGBE_STAT(hw_tso_ctxt)},
-       {"tx_tcp6_seg_ctxt", IXGBE_STAT(hw_tso6_ctxt)},
        {"tx_flow_control_xon", IXGBE_STAT(stats.lxontxc)},
        {"rx_flow_control_xon", IXGBE_STAT(stats.lxonrxc)},
        {"tx_flow_control_xoff", IXGBE_STAT(stats.lxofftxc)},
        {"rx_flow_control_xoff", IXGBE_STAT(stats.lxoffrxc)},
-       {"rx_csum_offload_good", IXGBE_STAT(hw_csum_rx_good)},
        {"rx_csum_offload_errors", IXGBE_STAT(hw_csum_rx_error)},
-       {"tx_csum_offload_ctxt", IXGBE_STAT(hw_csum_tx_good)},
-       {"rx_header_split", IXGBE_STAT(rx_hdr_split)},
        {"alloc_rx_page_failed", IXGBE_STAT(alloc_rx_page_failed)},
        {"alloc_rx_buff_failed", IXGBE_STAT(alloc_rx_buff_failed)},
        {"rx_no_dma_resources", IXGBE_STAT(hw_rx_no_dma_resources)},
index e3dc68ba4b70374c3ee726eed2a835f2b60bbfb7..db05030a30ec5c01a7a342b9151884903d67ef91 100644 (file)
@@ -413,7 +413,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
                if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
                    !test_bit(__IXGBE_DOWN, &adapter->state)) {
                        netif_wake_subqueue(netdev, tx_ring->queue_index);
-                       ++adapter->restart_queue;
+                       ++tx_ring->restart_queue;
                }
        }
 
@@ -624,7 +624,6 @@ static inline void ixgbe_rx_checksum(struct ixgbe_adapter *adapter,
 
        /* It must be a TCP or UDP packet with a valid checksum */
        skb->ip_summed = CHECKSUM_UNNECESSARY;
-       adapter->hw_csum_rx_good++;
 }
 
 static inline void ixgbe_release_rx_desc(struct ixgbe_hw *hw,
@@ -681,14 +680,19 @@ static void ixgbe_alloc_rx_buffers(struct ixgbe_adapter *adapter,
 
                if (!bi->skb) {
                        struct sk_buff *skb;
-                       skb = netdev_alloc_skb_ip_align(adapter->netdev,
-                                                       rx_ring->rx_buf_len);
+                       /* netdev_alloc_skb reserves 32 bytes up front!! */
+                       uint bufsz = rx_ring->rx_buf_len + SMP_CACHE_BYTES;
+                       skb = netdev_alloc_skb(adapter->netdev, bufsz);
 
                        if (!skb) {
                                adapter->alloc_rx_buff_failed++;
                                goto no_buffers;
                        }
 
+                       /* advance the data pointer to the next cache line */
+                       skb_reserve(skb, (PTR_ALIGN(skb->data, SMP_CACHE_BYTES)
+                                         - skb->data));
+
                        bi->skb = skb;
                        bi->dma = pci_map_single(pdev, skb->data,
                                                 rx_ring->rx_buf_len,
@@ -801,8 +805,6 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
                        hdr_info = le16_to_cpu(ixgbe_get_hdr_info(rx_desc));
                        len = (hdr_info & IXGBE_RXDADV_HDRBUFLEN_MASK) >>
                               IXGBE_RXDADV_HDRBUFLEN_SHIFT;
-                       if (hdr_info & IXGBE_RXDADV_SPH)
-                               adapter->rx_hdr_split++;
                        if (len > IXGBE_RX_HDR_SIZE)
                                len = IXGBE_RX_HDR_SIZE;
                        upper_len = le16_to_cpu(rx_desc->wb.upper.length);
@@ -812,7 +814,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 
                cleaned = true;
                skb = rx_buffer_info->skb;
-               prefetch(skb->data - NET_IP_ALIGN);
+               prefetch(skb->data);
                rx_buffer_info->skb = NULL;
 
                if (rx_buffer_info->dma) {
@@ -884,7 +886,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
                                skb->next = next_buffer->skb;
                                skb->next->prev = skb;
                        }
-                       adapter->non_eop_descs++;
+                       rx_ring->non_eop_descs++;
                        goto next_desc;
                }
 
@@ -4511,6 +4513,13 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
                adapter->rsc_total_flush = rsc_flush;
        }
 
+       /* gather some stats to the adapter struct that are per queue */
+       for (i = 0; i < adapter->num_tx_queues; i++)
+               adapter->restart_queue += adapter->tx_ring[i].restart_queue;
+
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               adapter->non_eop_descs += adapter->tx_ring[i].non_eop_descs;
+
        adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
        for (i = 0; i < 8; i++) {
                /* for packet buffers not used, the register should read 0 */
@@ -4893,14 +4902,12 @@ static int ixgbe_tso(struct ixgbe_adapter *adapter,
                                                                 iph->daddr, 0,
                                                                 IPPROTO_TCP,
                                                                 0);
-                       adapter->hw_tso_ctxt++;
                } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
                        ipv6_hdr(skb)->payload_len = 0;
                        tcp_hdr(skb)->check =
                            ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
                                             &ipv6_hdr(skb)->daddr,
                                             0, IPPROTO_TCP, 0);
-                       adapter->hw_tso6_ctxt++;
                }
 
                i = tx_ring->next_to_use;
@@ -5019,7 +5026,6 @@ static bool ixgbe_tx_csum(struct ixgbe_adapter *adapter,
                tx_buffer_info->time_stamp = jiffies;
                tx_buffer_info->next_to_watch = i;
 
-               adapter->hw_csum_tx_good++;
                i++;
                if (i == tx_ring->count)
                        i = 0;
@@ -5256,8 +5262,6 @@ static void ixgbe_atr(struct ixgbe_adapter *adapter, struct sk_buff *skb,
 static int __ixgbe_maybe_stop_tx(struct net_device *netdev,
                                  struct ixgbe_ring *tx_ring, int size)
 {
-       struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
        netif_stop_subqueue(netdev, tx_ring->queue_index);
        /* Herbert's original patch had:
         *  smp_mb__after_netif_stop_queue();
@@ -5271,7 +5275,7 @@ static int __ixgbe_maybe_stop_tx(struct net_device *netdev,
 
        /* A reprieve! - use start_queue because it doesn't call schedule */
        netif_start_subqueue(netdev, tx_ring->queue_index);
-       ++adapter->restart_queue;
+       ++tx_ring->restart_queue;
        return 0;
 }