xps: Improvements in TX queue selection
authorTom Herbert <therbert@google.com>
Sun, 21 Nov 2010 13:17:29 +0000 (13:17 +0000)
committerDavid S. Miller <davem@davemloft.net>
Wed, 24 Nov 2010 19:44:19 +0000 (11:44 -0800)
In dev_pick_tx, don't do work in calculating queue
index or setting
the index in the sock unless the device has more than one queue.  This
allows the sock to be set only with a queue index of a multi-queue
device which is desirable if device are stacked like in a tunnel.

We also allow the mapping of a socket to queue to be changed.  To
maintain in order packet transmission a flag (ooo_okay) has been
added to the sk_buff structure.  If a transport layer sets this flag
on a packet, the transmit queue can be changed for the socket.
Presumably, the transport would set this if there was no possbility
of creating OOO packets (for instance, there are no packets in flight
for the socket).  This patch includes the modification in TCP output
for setting this flag.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/skbuff.h
net/core/dev.c
net/ipv4/tcp_output.c

index e6ba898de61c181cea2cad515ce78cb63719136b..19f37a6ee6c4b609480e388553ba282d95fd5e27 100644 (file)
@@ -386,9 +386,10 @@ struct sk_buff {
 #else
        __u8                    deliver_no_wcard:1;
 #endif
+       __u8                    ooo_okay:1;
        kmemcheck_bitfield_end(flags2);
 
-       /* 0/14 bit hole */
+       /* 0/13 bit hole */
 
 #ifdef CONFIG_NET_DMA
        dma_cookie_t            dma_cookie;
index 381b8e280162f55b654fae40576fcaf86b8f3b56..7b17674a29ec77f18247ddd0cc498d93c59ce85f 100644 (file)
@@ -2148,20 +2148,24 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
        int queue_index;
        const struct net_device_ops *ops = dev->netdev_ops;
 
-       if (ops->ndo_select_queue) {
+       if (dev->real_num_tx_queues == 1)
+               queue_index = 0;
+       else if (ops->ndo_select_queue) {
                queue_index = ops->ndo_select_queue(dev, skb);
                queue_index = dev_cap_txqueue(dev, queue_index);
        } else {
                struct sock *sk = skb->sk;
                queue_index = sk_tx_queue_get(sk);
-               if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
 
-                       queue_index = 0;
-                       if (dev->real_num_tx_queues > 1)
-                               queue_index = skb_tx_hash(dev, skb);
+               if (queue_index < 0 || skb->ooo_okay ||
+                   queue_index >= dev->real_num_tx_queues) {
+                       int old_index = queue_index;
 
-                       if (sk) {
-                               struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+                       queue_index = skb_tx_hash(dev, skb);
+
+                       if (queue_index != old_index && sk) {
+                               struct dst_entry *dst =
+                                   rcu_dereference_check(sk->sk_dst_cache, 1);
 
                                if (dst && skb_dst(skb) == dst)
                                        sk_tx_queue_set(sk, queue_index);
index bb8f547fc7d2268662db8c8a12c276005b5ac113..5f29b2e20e23b524a218a6d2e5956f428509b69e 100644 (file)
@@ -822,8 +822,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
                                                           &md5);
        tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
 
-       if (tcp_packets_in_flight(tp) == 0)
+       if (tcp_packets_in_flight(tp) == 0) {
                tcp_ca_event(sk, CA_EVENT_TX_START);
+               skb->ooo_okay = 1;
+       } else
+               skb->ooo_okay = 0;
 
        skb_push(skb, tcp_header_size);
        skb_reset_transport_header(skb);