s390/qeth: use skb_cow_head() for L2 OSA xmit
authorJulian Wiedmann <jwi@linux.vnet.ibm.com>
Fri, 18 Aug 2017 08:19:10 +0000 (10:19 +0200)
committerDavid S. Miller <davem@davemloft.net>
Fri, 18 Aug 2017 17:21:30 +0000 (10:21 -0700)
Taking a full copy via skb_realloc_headroom() on every xmit is overkill
and wastes CPU time; all we actually need is to push on the qeth_hdr.
So rework the L2 OSA TX path to avoid the copy.
Minor complications arise because struct qeth_hdr must not cross a page
boundary. So add a new helper qeth_push_hdr() that catches this, and
falls back to the hdr cache that we already use for IQDs.

This change uncovered that qeth's TX completion takes rather long.
Now that we no longer free the original skb straight away and thus call
skb->destructor later than before, throughput regresses significantly.
For now, restore old behaviour by adding an explicit skb_orphan(),
and a big TODO to improve the TX completion time.

Tested-by: Nils Hoppmann <niho@de.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_l2_main.c

index 5753fbc485d580fb361c50bb7a6cef64a4fb0df9..59e09854c4f763f0c51683a4b57c7f584ab2aa8f 100644 (file)
@@ -985,6 +985,7 @@ int qeth_set_features(struct net_device *, netdev_features_t);
 int qeth_recover_features(struct net_device *);
 netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
 int qeth_vm_request_mac(struct qeth_card *card);
+int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len);
 
 /* exports for OSN */
 int qeth_osn_assist(struct net_device *, void *, int);
index ffefdd97abca199d7b05e70df4aa4dc2109311a4..bae7440abc01e3560a987b39db440c375488a979 100644 (file)
@@ -3890,6 +3890,34 @@ int qeth_hdr_chk_and_bounce(struct sk_buff *skb, struct qeth_hdr **hdr, int len)
 }
 EXPORT_SYMBOL_GPL(qeth_hdr_chk_and_bounce);
 
+/**
+ * qeth_push_hdr() - push a qeth_hdr onto an skb.
+ * @skb: skb that the qeth_hdr should be pushed onto.
+ * @hdr: double pointer to a qeth_hdr. When returning with >= 0,
+ *      it contains a valid pointer to a qeth_hdr.
+ * @len: length of the hdr that needs to be pushed on.
+ *
+ * Returns the pushed length. If the header can't be pushed on
+ * (eg. because it would cross a page boundary), it is allocated from
+ * the cache instead and 0 is returned.
+ * Error to create the hdr is indicated by returning with < 0.
+ */
+int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len)
+{
+       if (skb_headroom(skb) >= len &&
+           qeth_get_elements_for_range((addr_t)skb->data - len,
+                                       (addr_t)skb->data) == 1) {
+               *hdr = skb_push(skb, len);
+               return len;
+       }
+       /* fall back */
+       *hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
+       if (!*hdr)
+               return -ENOMEM;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(qeth_push_hdr);
+
 static void __qeth_fill_buffer(struct sk_buff *skb,
                               struct qeth_qdio_out_buffer *buf,
                               bool is_first_elem, unsigned int offset)
index c85fadf21b389d64952871aff4888dfe7de9c5c4..760b023eae9562a799f7c7a90a91cbd371d99fd2 100644 (file)
@@ -705,9 +705,11 @@ out:
 static int qeth_l2_xmit_osa(struct qeth_card *card, struct sk_buff *skb,
                            struct qeth_qdio_out_q *queue, int cast_type)
 {
+       int push_len = sizeof(struct qeth_hdr);
        unsigned int elements, nr_frags;
-       struct sk_buff *skb_copy;
-       struct qeth_hdr *hdr;
+       unsigned int hdr_elements = 0;
+       struct qeth_hdr *hdr = NULL;
+       unsigned int hd_len = 0;
        int rc;
 
        /* fix hardware limitation: as long as we do not have sbal
@@ -727,38 +729,44 @@ static int qeth_l2_xmit_osa(struct qeth_card *card, struct sk_buff *skb,
        }
        nr_frags = skb_shinfo(skb)->nr_frags;
 
-       /* create a copy with writeable headroom */
-       skb_copy = skb_realloc_headroom(skb, sizeof(struct qeth_hdr));
-       if (!skb_copy)
-               return -ENOMEM;
-       hdr = skb_push(skb_copy, sizeof(struct qeth_hdr));
-       qeth_l2_fill_header(hdr, skb_copy, cast_type,
-                           skb_copy->len - sizeof(*hdr));
-       if (skb_copy->ip_summed == CHECKSUM_PARTIAL)
-               qeth_l2_hdr_csum(card, hdr, skb_copy);
-
-       elements = qeth_get_elements_no(card, skb_copy, 0, 0);
+       rc = skb_cow_head(skb, push_len);
+       if (rc)
+               return rc;
+       push_len = qeth_push_hdr(skb, &hdr, push_len);
+       if (push_len < 0)
+               return push_len;
+       if (!push_len) {
+               /* hdr was allocated from cache */
+               hd_len = sizeof(*hdr);
+               hdr_elements = 1;
+       }
+       qeth_l2_fill_header(hdr, skb, cast_type, skb->len - push_len);
+       if (skb->ip_summed == CHECKSUM_PARTIAL)
+               qeth_l2_hdr_csum(card, hdr, skb);
+
+       elements = qeth_get_elements_no(card, skb, hdr_elements, 0);
        if (!elements) {
                rc = -E2BIG;
                goto out;
        }
-       if (qeth_hdr_chk_and_bounce(skb_copy, &hdr, sizeof(*hdr))) {
-               rc = -EINVAL;
-               goto out;
-       }
-       rc = qeth_do_send_packet(card, queue, skb_copy, hdr, 0, 0, elements);
+       elements += hdr_elements;
+
+       /* TODO: remove the skb_orphan() once TX completion is fast enough */
+       skb_orphan(skb);
+       rc = qeth_do_send_packet(card, queue, skb, hdr, 0, hd_len, elements);
 out:
        if (!rc) {
-               /* tx success, free dangling original */
-               dev_kfree_skb_any(skb);
                if (card->options.performance_stats && nr_frags) {
                        card->perf_stats.sg_skbs_sent++;
                        /* nr_frags + skb->data */
                        card->perf_stats.sg_frags_sent += nr_frags + 1;
                }
        } else {
-               /* tx fail, free copy */
-               dev_kfree_skb_any(skb_copy);
+               if (hd_len)
+                       kmem_cache_free(qeth_core_header_cache, hdr);
+               if (rc == -EBUSY)
+                       /* roll back to ETH header */
+                       skb_pull(skb, push_len);
        }
        return rc;
 }
@@ -1011,6 +1019,12 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
                        card->dev->vlan_features |= NETIF_F_RXCSUM;
                }
        }
+       if (card->info.type != QETH_CARD_TYPE_OSN &&
+           card->info.type != QETH_CARD_TYPE_IQD) {
+               card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+               card->dev->needed_headroom = sizeof(struct qeth_hdr);
+       }
+
        card->info.broadcast_capable = 1;
        qeth_l2_request_initial_mac(card);
        card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *