iwlagn: fix RX skb alignment
authorJohannes Berg <johannes@sipsolutions.net>
Tue, 18 Nov 2008 00:47:21 +0000 (01:47 +0100)
committerJohn W. Linville <linville@tuxdriver.com>
Tue, 18 Nov 2008 22:26:26 +0000 (17:26 -0500)
So I dug deeper into the DMA problems I had with iwlagn and a kind soul
helped me in that he said something about pci-e alignment and mentioned
the iwl_rx_allocate function to check for crossing 4KB boundaries. Since
there's 8KB A-MPDU support, crossing 4k boundaries didn't seem like
something the device would fail with, but when I looked into the
function for a minute anyway I stumbled over this little gem:

BUG_ON(rxb->dma_addr & (~DMA_BIT_MASK(36) & 0xff));

Clearly, that is a totally bogus check, one would hope the compiler
removes it entirely. (Think about it)

After fixing it, I obviously ran into it, nothing guarantees the
alignment the way you want it,  because of the way skbs and their
headroom are allocated. I won't explain that here nor double-check that
I'm right, that goes beyond what most of the CC'ed people care about.

So then I came up with the patch below, and so far my system has
survived minutes with 64K pages, when it would previously fail in
seconds. And I haven't seen a single instance of the TX bug either. But
when you see the patch it'll be pretty obvious to you why.

This should fix the following reported kernel bugs:

http://bugzilla.kernel.org/show_bug.cgi?id=11596
http://bugzilla.kernel.org/show_bug.cgi?id=11393
http://bugzilla.kernel.org/show_bug.cgi?id=11983

I haven't checked if there are any elsewhere, but I suppose RHBZ will
have a few instances too...

I'd like to ask anyone who is CC'ed (those are people I know ran into
the bug) to try this patch.

I am convinced that this patch is correct in spirit, but I haven't
understood why, for example, there are so many unmap calls. I'm not
entirely convinced that this is the only bug leading to the TX reply
errors.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
drivers/net/wireless/iwlwifi/iwl-agn.c
drivers/net/wireless/iwlwifi/iwl-dev.h
drivers/net/wireless/iwlwifi/iwl-rx.c

index 6751bb2b8ae2fc271d145eedc6f472e00dabf311..444c5cc05f03c671c2548a4f87ffef55c3c8f0d0 100644 (file)
@@ -1384,7 +1384,7 @@ void iwl_rx_handle(struct iwl_priv *priv)
 
                rxq->queue[i] = NULL;
 
-               pci_dma_sync_single_for_cpu(priv->pci_dev, rxb->dma_addr,
+               pci_dma_sync_single_for_cpu(priv->pci_dev, rxb->aligned_dma_addr,
                                            priv->hw_params.rx_buf_size,
                                            PCI_DMA_FROMDEVICE);
                pkt = (struct iwl_rx_packet *)rxb->skb->data;
@@ -1436,8 +1436,8 @@ void iwl_rx_handle(struct iwl_priv *priv)
                        rxb->skb = NULL;
                }
 
-               pci_unmap_single(priv->pci_dev, rxb->dma_addr,
-                                priv->hw_params.rx_buf_size,
+               pci_unmap_single(priv->pci_dev, rxb->real_dma_addr,
+                                priv->hw_params.rx_buf_size + 256,
                                 PCI_DMA_FROMDEVICE);
                spin_lock_irqsave(&rxq->lock, flags);
                list_add_tail(&rxb->list, &priv->rxq.rx_used);
index c018121085e937dd210d2048f549f6cbcf6b0dfd..9966d4e384ce75d37264345431be2122a5304c68 100644 (file)
@@ -89,7 +89,8 @@ extern struct iwl_cfg iwl5100_abg_cfg;
 #define        DEFAULT_LONG_RETRY_LIMIT  4U
 
 struct iwl_rx_mem_buffer {
-       dma_addr_t dma_addr;
+       dma_addr_t real_dma_addr;
+       dma_addr_t aligned_dma_addr;
        struct sk_buff *skb;
        struct list_head list;
 };
index 7cde9d76ff5df438b335996f1b602cf4e14dd9dc..0509c16dbe758b32e1a23db7da1a09100493b0c8 100644 (file)
@@ -204,7 +204,7 @@ int iwl_rx_queue_restock(struct iwl_priv *priv)
                list_del(element);
 
                /* Point to Rx buffer via next RBD in circular buffer */
-               rxq->bd[rxq->write] = iwl_dma_addr2rbd_ptr(priv, rxb->dma_addr);
+               rxq->bd[rxq->write] = iwl_dma_addr2rbd_ptr(priv, rxb->aligned_dma_addr);
                rxq->queue[rxq->write] = rxb;
                rxq->write = (rxq->write + 1) & RX_QUEUE_MASK;
                rxq->free_count--;
@@ -251,7 +251,7 @@ void iwl_rx_allocate(struct iwl_priv *priv)
                rxb = list_entry(element, struct iwl_rx_mem_buffer, list);
 
                /* Alloc a new receive buffer */
-               rxb->skb = alloc_skb(priv->hw_params.rx_buf_size,
+               rxb->skb = alloc_skb(priv->hw_params.rx_buf_size + 256,
                                __GFP_NOWARN | GFP_ATOMIC);
                if (!rxb->skb) {
                        if (net_ratelimit())
@@ -266,9 +266,17 @@ void iwl_rx_allocate(struct iwl_priv *priv)
                list_del(element);
 
                /* Get physical address of RB/SKB */
-               rxb->dma_addr =
-                   pci_map_single(priv->pci_dev, rxb->skb->data,
-                          priv->hw_params.rx_buf_size, PCI_DMA_FROMDEVICE);
+               rxb->real_dma_addr = pci_map_single(
+                                       priv->pci_dev,
+                                       rxb->skb->data,
+                                       priv->hw_params.rx_buf_size + 256,
+                                       PCI_DMA_FROMDEVICE);
+               /* dma address must be no more than 36 bits */
+               BUG_ON(rxb->real_dma_addr & ~DMA_BIT_MASK(36));
+               /* and also 256 byte aligned! */
+               rxb->aligned_dma_addr = ALIGN(rxb->real_dma_addr, 256);
+               skb_reserve(rxb->skb, rxb->aligned_dma_addr - rxb->real_dma_addr);
+
                list_add_tail(&rxb->list, &rxq->rx_free);
                rxq->free_count++;
        }
@@ -300,8 +308,8 @@ void iwl_rx_queue_free(struct iwl_priv *priv, struct iwl_rx_queue *rxq)
        for (i = 0; i < RX_QUEUE_SIZE + RX_FREE_BUFFERS; i++) {
                if (rxq->pool[i].skb != NULL) {
                        pci_unmap_single(priv->pci_dev,
-                                        rxq->pool[i].dma_addr,
-                                        priv->hw_params.rx_buf_size,
+                                        rxq->pool[i].real_dma_addr,
+                                        priv->hw_params.rx_buf_size + 256,
                                         PCI_DMA_FROMDEVICE);
                        dev_kfree_skb(rxq->pool[i].skb);
                }
@@ -354,8 +362,8 @@ void iwl_rx_queue_reset(struct iwl_priv *priv, struct iwl_rx_queue *rxq)
                 * to an SKB, so we need to unmap and free potential storage */
                if (rxq->pool[i].skb != NULL) {
                        pci_unmap_single(priv->pci_dev,
-                                        rxq->pool[i].dma_addr,
-                                        priv->hw_params.rx_buf_size,
+                                        rxq->pool[i].real_dma_addr,
+                                        priv->hw_params.rx_buf_size + 256,
                                         PCI_DMA_FROMDEVICE);
                        priv->alloc_rxb_skb--;
                        dev_kfree_skb(rxq->pool[i].skb);