ath10k: bypass htc for htt tx path
authorMichal Kazior <michal.kazior@tieto.com>
Thu, 27 Feb 2014 16:50:04 +0000 (18:50 +0200)
committerKalle Valo <kvalo@qca.qualcomm.com>
Fri, 28 Feb 2014 09:59:17 +0000 (11:59 +0200)
Going through full htc tx path for htt tx is a
waste of resources. By skipping it it's possible
to easily submit scatter-gather to the pci hif for
reduced host cpu load and improved performance.

The new approach uses dma pool to store the
following metadata for each tx request:
 * msdu fragment list
 * htc header
 * htt tx command

The htt tx command contains a msdu prefetch.
Instead of copying it original mapped msdu address
is used to submit a second scatter-gather item to
hif to make a complete htt tx command.

The htt tx command itself hands over dma mapped
pointers to msdus and completion of the command
itself doesn't mean the frame has been sent and
can be unmapped/freed. This is why htc tx
completion is skipped for htt tx as all tx related
resources are freed upon htt tx completion
indication event (which also implicitly means htt
tx command itself was completed).

Since now each htt tx request effectively consists
of 2 copy engine items CE_HTT_H2T_MSG_SRC_NENTRIES
is updated to allow maximum of
TARGET_10X_NUM_MSDU_DESC msdus being queued. This
keeps the tx path resource management simple.

Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
drivers/net/wireless/ath/ath10k/ce.c
drivers/net/wireless/ath/ath10k/ce.h
drivers/net/wireless/ath/ath10k/core.h
drivers/net/wireless/ath/ath10k/htc.c
drivers/net/wireless/ath/ath10k/htt.h
drivers/net/wireless/ath/ath10k/htt_tx.c
drivers/net/wireless/ath/ath10k/pci.c
drivers/net/wireless/ath/ath10k/txrx.c

index a0b1a8cc3393e28e74f098008cd337649cc0a239..a79499c8235009f701073c83b3974d66d183e001 100644 (file)
@@ -1067,9 +1067,9 @@ struct ath10k_ce_pipe *ath10k_ce_init(struct ath10k *ar,
         *
         * For the lack of a better place do the check here.
         */
-       BUILD_BUG_ON(TARGET_NUM_MSDU_DESC >
+       BUILD_BUG_ON(2*TARGET_NUM_MSDU_DESC >
                     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
-       BUILD_BUG_ON(TARGET_10X_NUM_MSDU_DESC >
+       BUILD_BUG_ON(2*TARGET_10X_NUM_MSDU_DESC >
                     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
 
        ret = ath10k_pci_wake(ar);
index 322e929437de8737d18d3641d49ab65e12b5f1e5..8eb7f99ed992277b0efb3e7ae4f971b8e4eb7557 100644 (file)
@@ -23,7 +23,7 @@
 
 /* Maximum number of Copy Engine's supported */
 #define CE_COUNT_MAX 8
-#define CE_HTT_H2T_MSG_SRC_NENTRIES 2048
+#define CE_HTT_H2T_MSG_SRC_NENTRIES 4096
 
 /* Descriptor rings must be aligned to this boundary */
 #define CE_DESC_RING_ALIGN     8
index ab7009126c780b5ca28422548fa7ee36af16dba9..0e71979d837cf90888c74e4d85035c2a4d6fd4ef 100644 (file)
@@ -67,9 +67,8 @@ struct ath10k_skb_cb {
        struct {
                u8 tid;
                bool is_offchan;
-
-               u8 frag_len;
-               u8 pad_len;
+               struct ath10k_htt_txbuf *txbuf;
+               u32 txbuf_paddr;
        } __packed htt;
 
        struct {
index 64ab8d642f5f2b15c87ef8a41b1560a41527e73a..7f1bccd3597f1bb2a3b40d5a482f308e99e9e27c 100644 (file)
@@ -202,10 +202,8 @@ static int ath10k_htc_tx_completion_handler(struct ath10k *ar,
        struct ath10k_htc *htc = &ar->htc;
        struct ath10k_htc_ep *ep = &htc->endpoint[eid];
 
-       if (!skb) {
-               ath10k_warn("invalid sk_buff completion - NULL pointer. firmware crashed?\n");
+       if (WARN_ON_ONCE(!skb))
                return 0;
-       }
 
        ath10k_htc_notify_tx_completion(ep, skb);
        /* the skb now belongs to the completion handler */
index 02c009d227a41c39932baadacd84fa3c243cb8fb..2b76cb5d77a4d10b2776818d90db237f38ff98d7 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <linux/bug.h>
 #include <linux/interrupt.h>
+#include <linux/dmapool.h>
 
 #include "htc.h"
 #include "rx_desc.h"
@@ -1188,6 +1189,13 @@ struct htt_rx_info {
        bool mic_err;
 };
 
+struct ath10k_htt_txbuf {
+       struct htt_data_tx_desc_frag frags[2];
+       struct ath10k_htc_hdr htc_hdr;
+       struct htt_cmd_hdr cmd_hdr;
+       struct htt_data_tx_desc cmd_tx;
+} __packed;
+
 struct ath10k_htt {
        struct ath10k *ar;
        enum ath10k_htc_ep_id eid;
@@ -1269,6 +1277,7 @@ struct ath10k_htt {
        struct sk_buff **pending_tx;
        unsigned long *used_msdu_ids; /* bitmap */
        wait_queue_head_t empty_tx_wq;
+       struct dma_pool *tx_pool;
 
        /* set if host-fw communication goes haywire
         * used to avoid further failures */
index f5960c593810f345f34f759da3a69efba4e2a746..20b7a446a9f8f85ee6f1d76bacedcc8df1d3d02b 100644 (file)
@@ -109,6 +109,14 @@ int ath10k_htt_tx_attach(struct ath10k_htt *htt)
                return -ENOMEM;
        }
 
+       htt->tx_pool = dma_pool_create("ath10k htt tx pool", htt->ar->dev,
+                                      sizeof(struct ath10k_htt_txbuf), 4, 0);
+       if (!htt->tx_pool) {
+               kfree(htt->used_msdu_ids);
+               kfree(htt->pending_tx);
+               return -ENOMEM;
+       }
+
        return 0;
 }
 
@@ -139,6 +147,7 @@ void ath10k_htt_tx_detach(struct ath10k_htt *htt)
        ath10k_htt_tx_cleanup_pending(htt);
        kfree(htt->pending_tx);
        kfree(htt->used_msdu_ids);
+       dma_pool_destroy(htt->tx_pool);
        return;
 }
 
@@ -350,8 +359,7 @@ int ath10k_htt_mgmt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
        memcpy(cmd->mgmt_tx.hdr, msdu->data,
               min_t(int, msdu->len, HTT_MGMT_FRM_HDR_DOWNLOAD_LEN));
 
-       skb_cb->htt.frag_len = 0;
-       skb_cb->htt.pad_len = 0;
+       skb_cb->htt.txbuf = NULL;
 
        res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
        if (res)
@@ -377,19 +385,19 @@ err:
 int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
 {
        struct device *dev = htt->ar->dev;
-       struct htt_cmd *cmd;
-       struct htt_data_tx_desc_frag *tx_frags;
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)msdu->data;
        struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(msdu);
-       struct sk_buff *txdesc = NULL;
-       bool use_frags;
-       u8 vdev_id = ATH10K_SKB_CB(msdu)->vdev_id;
-       u8 tid;
-       int prefetch_len, desc_len;
-       int msdu_id = -1;
+       struct ath10k_hif_sg_item sg_items[2];
+       struct htt_data_tx_desc_frag *frags;
+       u8 vdev_id = skb_cb->vdev_id;
+       u8 tid = skb_cb->htt.tid;
+       int prefetch_len;
        int res;
-       u8 flags0;
-       u16 flags1;
+       u8 flags0 = 0;
+       u16 msdu_id, flags1 = 0;
+       dma_addr_t paddr;
+       u32 frags_paddr;
+       bool use_frags;
 
        res = ath10k_htt_tx_inc_pending(htt);
        if (res)
@@ -408,105 +416,109 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
        prefetch_len = min(htt->prefetch_len, msdu->len);
        prefetch_len = roundup(prefetch_len, 4);
 
-       desc_len = sizeof(cmd->hdr) + sizeof(cmd->data_tx) + prefetch_len;
-
-       txdesc = ath10k_htc_alloc_skb(desc_len);
-       if (!txdesc) {
-               res = -ENOMEM;
-               goto err_free_msdu_id;
-       }
-
        /* Since HTT 3.0 there is no separate mgmt tx command. However in case
         * of mgmt tx using TX_FRM there is not tx fragment list. Instead of tx
         * fragment list host driver specifies directly frame pointer. */
        use_frags = htt->target_version_major < 3 ||
                    !ieee80211_is_mgmt(hdr->frame_control);
 
-       if (!IS_ALIGNED((unsigned long)txdesc->data, 4)) {
-               ath10k_warn("htt alignment check failed. dropping packet.\n");
-               res = -EIO;
-               goto err_free_txdesc;
-       }
-
-       if (use_frags) {
-               skb_cb->htt.frag_len = sizeof(*tx_frags) * 2;
-               skb_cb->htt.pad_len = (unsigned long)msdu->data -
-                                     round_down((unsigned long)msdu->data, 4);
-
-               skb_push(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
-       } else {
-               skb_cb->htt.frag_len = 0;
-               skb_cb->htt.pad_len = 0;
-       }
+       skb_cb->htt.txbuf = dma_pool_alloc(htt->tx_pool, GFP_ATOMIC,
+                                          &paddr);
+       if (!skb_cb->htt.txbuf)
+               goto err_free_msdu_id;
+       skb_cb->htt.txbuf_paddr = paddr;
 
        skb_cb->paddr = dma_map_single(dev, msdu->data, msdu->len,
                                       DMA_TO_DEVICE);
        res = dma_mapping_error(dev, skb_cb->paddr);
        if (res)
-               goto err_pull_txfrag;
-
-       if (use_frags) {
-               dma_sync_single_for_cpu(dev, skb_cb->paddr, msdu->len,
-                                       DMA_TO_DEVICE);
-
-               /* tx fragment list must be terminated with zero-entry */
-               tx_frags = (struct htt_data_tx_desc_frag *)msdu->data;
-               tx_frags[0].paddr = __cpu_to_le32(skb_cb->paddr +
-                                                 skb_cb->htt.frag_len +
-                                                 skb_cb->htt.pad_len);
-               tx_frags[0].len   = __cpu_to_le32(msdu->len -
-                                                 skb_cb->htt.frag_len -
-                                                 skb_cb->htt.pad_len);
-               tx_frags[1].paddr = __cpu_to_le32(0);
-               tx_frags[1].len   = __cpu_to_le32(0);
-
-               dma_sync_single_for_device(dev, skb_cb->paddr, msdu->len,
-                                          DMA_TO_DEVICE);
-       }
+               goto err_free_txbuf;
 
-       ath10k_dbg(ATH10K_DBG_HTT, "tx-msdu 0x%llx\n",
-                  (unsigned long long) ATH10K_SKB_CB(msdu)->paddr);
-       ath10k_dbg_dump(ATH10K_DBG_HTT_DUMP, NULL, "tx-msdu: ",
-                       msdu->data, msdu->len);
+       if (likely(use_frags)) {
+               frags = skb_cb->htt.txbuf->frags;
 
-       skb_put(txdesc, desc_len);
-       cmd = (struct htt_cmd *)txdesc->data;
+               frags[0].paddr = __cpu_to_le32(skb_cb->paddr);
+               frags[0].len = __cpu_to_le32(msdu->len);
+               frags[1].paddr = 0;
+               frags[1].len = 0;
+
+               flags0 |= SM(ATH10K_HW_TXRX_NATIVE_WIFI,
+                            HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
 
-       tid = ATH10K_SKB_CB(msdu)->htt.tid;
+               frags_paddr = skb_cb->htt.txbuf_paddr;
+       } else {
+               flags0 |= SM(ATH10K_HW_TXRX_MGMT,
+                            HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
 
-       ath10k_dbg(ATH10K_DBG_HTT, "htt data tx using tid %hhu\n", tid);
+               frags_paddr = skb_cb->paddr;
+       }
+
+       /* Normally all commands go through HTC which manages tx credits for
+        * each endpoint and notifies when tx is completed.
+        *
+        * HTT endpoint is creditless so there's no need to care about HTC
+        * flags. In that case it is trivial to fill the HTC header here.
+        *
+        * MSDU transmission is considered completed upon HTT event. This
+        * implies no relevant resources can be freed until after the event is
+        * received. That's why HTC tx completion handler itself is ignored by
+        * setting NULL to transfer_context for all sg items.
+        *
+        * There is simply no point in pushing HTT TX_FRM through HTC tx path
+        * as it's a waste of resources. By bypassing HTC it is possible to
+        * avoid extra memory allocations, compress data structures and thus
+        * improve performance. */
+
+       skb_cb->htt.txbuf->htc_hdr.eid = htt->eid;
+       skb_cb->htt.txbuf->htc_hdr.len = __cpu_to_le16(
+                       sizeof(skb_cb->htt.txbuf->cmd_hdr) +
+                       sizeof(skb_cb->htt.txbuf->cmd_tx) +
+                       prefetch_len);
+       skb_cb->htt.txbuf->htc_hdr.flags = 0;
 
-       flags0  = 0;
        if (!ieee80211_has_protected(hdr->frame_control))
                flags0 |= HTT_DATA_TX_DESC_FLAGS0_NO_ENCRYPT;
-       flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
 
-       if (use_frags)
-               flags0 |= SM(ATH10K_HW_TXRX_NATIVE_WIFI,
-                            HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
-       else
-               flags0 |= SM(ATH10K_HW_TXRX_MGMT,
-                            HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
+       flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
 
-       flags1  = 0;
        flags1 |= SM((u16)vdev_id, HTT_DATA_TX_DESC_FLAGS1_VDEV_ID);
        flags1 |= SM((u16)tid, HTT_DATA_TX_DESC_FLAGS1_EXT_TID);
        flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD;
        flags1 |= HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD;
 
-       cmd->hdr.msg_type        = HTT_H2T_MSG_TYPE_TX_FRM;
-       cmd->data_tx.flags0      = flags0;
-       cmd->data_tx.flags1      = __cpu_to_le16(flags1);
-       cmd->data_tx.len         = __cpu_to_le16(msdu->len -
-                                                skb_cb->htt.frag_len -
-                                                skb_cb->htt.pad_len);
-       cmd->data_tx.id          = __cpu_to_le16(msdu_id);
-       cmd->data_tx.frags_paddr = __cpu_to_le32(skb_cb->paddr);
-       cmd->data_tx.peerid      = __cpu_to_le32(HTT_INVALID_PEERID);
-
-       memcpy(cmd->data_tx.prefetch, hdr, prefetch_len);
+       skb_cb->htt.txbuf->cmd_hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FRM;
+       skb_cb->htt.txbuf->cmd_tx.flags0 = flags0;
+       skb_cb->htt.txbuf->cmd_tx.flags1 = __cpu_to_le16(flags1);
+       skb_cb->htt.txbuf->cmd_tx.len = __cpu_to_le16(msdu->len);
+       skb_cb->htt.txbuf->cmd_tx.id = __cpu_to_le16(msdu_id);
+       skb_cb->htt.txbuf->cmd_tx.frags_paddr = __cpu_to_le32(frags_paddr);
+       skb_cb->htt.txbuf->cmd_tx.peerid = __cpu_to_le32(HTT_INVALID_PEERID);
+
+       ath10k_dbg(ATH10K_DBG_HTT,
+                  "htt tx flags0 %hhu flags1 %hu len %d id %hu frags_paddr %08x, msdu_paddr %08x vdev %hhu tid %hhu\n",
+                  flags0, flags1, msdu->len, msdu_id, frags_paddr,
+                  (u32)skb_cb->paddr, vdev_id, tid);
+       ath10k_dbg_dump(ATH10K_DBG_HTT_DUMP, NULL, "htt tx msdu: ",
+                       msdu->data, msdu->len);
 
-       res = ath10k_htc_send(&htt->ar->htc, htt->eid, txdesc);
+       sg_items[0].transfer_id = 0;
+       sg_items[0].transfer_context = NULL;
+       sg_items[0].vaddr = &skb_cb->htt.txbuf->htc_hdr;
+       sg_items[0].paddr = skb_cb->htt.txbuf_paddr +
+                           sizeof(skb_cb->htt.txbuf->frags);
+       sg_items[0].len = sizeof(skb_cb->htt.txbuf->htc_hdr) +
+                         sizeof(skb_cb->htt.txbuf->cmd_hdr) +
+                         sizeof(skb_cb->htt.txbuf->cmd_tx);
+
+       sg_items[1].transfer_id = 0;
+       sg_items[1].transfer_context = NULL;
+       sg_items[1].vaddr = msdu->data;
+       sg_items[1].paddr = skb_cb->paddr;
+       sg_items[1].len = prefetch_len;
+
+       res = ath10k_hif_tx_sg(htt->ar,
+                              htt->ar->htc.endpoint[htt->eid].ul_pipe_id,
+                              sg_items, ARRAY_SIZE(sg_items));
        if (res)
                goto err_unmap_msdu;
 
@@ -514,10 +526,10 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu)
 
 err_unmap_msdu:
        dma_unmap_single(dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE);
-err_pull_txfrag:
-       skb_pull(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
-err_free_txdesc:
-       dev_kfree_skb_any(txdesc);
+err_free_txbuf:
+       dma_pool_free(htt->tx_pool,
+                     skb_cb->htt.txbuf,
+                     skb_cb->htt.txbuf_paddr);
 err_free_msdu_id:
        spin_lock_bh(&htt->tx_lock);
        htt->pending_tx[msdu_id] = NULL;
index 713c18e8a1a868e738f594e3064d649715a08c8d..2305d583019b56917832850779199c2ac464be53 100644 (file)
@@ -714,6 +714,7 @@ static void ath10k_pci_ce_send_done(struct ath10k_ce_pipe *ce_state)
        while (ath10k_ce_completed_send_next(ce_state, &transfer_context,
                                             &ce_data, &nbytes,
                                             &transfer_id) == 0) {
+               /* no need to call tx completion for NULL pointers */
                if (transfer_context == NULL)
                        continue;
 
@@ -1423,16 +1424,9 @@ static void ath10k_pci_tx_pipe_cleanup(struct ath10k_pci_pipe *pipe_info)
 
        while (ath10k_ce_cancel_send_next(ce_hdl, (void **)&netbuf,
                                          &ce_data, &nbytes, &id) == 0) {
-               /*
-                * Indicate the completion to higer layer to free
-                * the buffer
-                */
-
-               if (!netbuf) {
-                       ath10k_warn("invalid sk_buff on CE %d - NULL pointer. firmware crashed?\n",
-                                   ce_hdl->id);
+               /* no need to call tx completion for NULL pointers */
+               if (!netbuf)
                        continue;
-               }
 
                ar_pci->msg_callbacks_current.tx_completion(ar,
                                                            netbuf,
index fe69899f1a22275239b2348f3afc82dc4717abf9..2993ca772c5676d1edd5988aafe163d9932c944d 100644 (file)
@@ -66,8 +66,10 @@ void ath10k_txrx_tx_unref(struct ath10k_htt *htt,
 
        dma_unmap_single(dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE);
 
-       if (skb_cb->htt.frag_len)
-               skb_pull(msdu, skb_cb->htt.frag_len + skb_cb->htt.pad_len);
+       if (skb_cb->htt.txbuf)
+               dma_pool_free(htt->tx_pool,
+                             skb_cb->htt.txbuf,
+                             skb_cb->htt.txbuf_paddr);
 
        ath10k_report_offchan_tx(htt->ar, msdu);