[NET]: Make NAPI polling independent of struct net_device objects.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / net / cxgb3 / sge.c
1 /*
2 * Copyright (c) 2005-2007 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32 #include <linux/skbuff.h>
33 #include <linux/netdevice.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_vlan.h>
36 #include <linux/ip.h>
37 #include <linux/tcp.h>
38 #include <linux/dma-mapping.h>
39 #include "common.h"
40 #include "regs.h"
41 #include "sge_defs.h"
42 #include "t3_cpl.h"
43 #include "firmware_exports.h"
44
45 #define USE_GTS 0
46
47 #define SGE_RX_SM_BUF_SIZE 1536
48
49 #define SGE_RX_COPY_THRES 256
50 #define SGE_RX_PULL_LEN 128
51
52 /*
53 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
54 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
55 * directly.
56 */
57 #define FL0_PG_CHUNK_SIZE 2048
58
59 #define SGE_RX_DROP_THRES 16
60
61 /*
62 * Period of the Tx buffer reclaim timer. This timer does not need to run
63 * frequently as Tx buffers are usually reclaimed by new Tx packets.
64 */
65 #define TX_RECLAIM_PERIOD (HZ / 4)
66
67 /* WR size in bytes */
68 #define WR_LEN (WR_FLITS * 8)
69
70 /*
71 * Types of Tx queues in each queue set. Order here matters, do not change.
72 */
73 enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
74
75 /* Values for sge_txq.flags */
76 enum {
77 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
78 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
79 };
80
81 struct tx_desc {
82 u64 flit[TX_DESC_FLITS];
83 };
84
85 struct rx_desc {
86 __be32 addr_lo;
87 __be32 len_gen;
88 __be32 gen2;
89 __be32 addr_hi;
90 };
91
92 struct tx_sw_desc { /* SW state per Tx descriptor */
93 struct sk_buff *skb;
94 };
95
96 struct rx_sw_desc { /* SW state per Rx descriptor */
97 union {
98 struct sk_buff *skb;
99 struct fl_pg_chunk pg_chunk;
100 };
101 DECLARE_PCI_UNMAP_ADDR(dma_addr);
102 };
103
104 struct rsp_desc { /* response queue descriptor */
105 struct rss_header rss_hdr;
106 __be32 flags;
107 __be32 len_cq;
108 u8 imm_data[47];
109 u8 intr_gen;
110 };
111
112 struct unmap_info { /* packet unmapping info, overlays skb->cb */
113 int sflit; /* start flit of first SGL entry in Tx descriptor */
114 u16 fragidx; /* first page fragment in current Tx descriptor */
115 u16 addr_idx; /* buffer index of first SGL entry in descriptor */
116 u32 len; /* mapped length of skb main body */
117 };
118
119 /*
120 * Holds unmapping information for Tx packets that need deferred unmapping.
121 * This structure lives at skb->head and must be allocated by callers.
122 */
123 struct deferred_unmap_info {
124 struct pci_dev *pdev;
125 dma_addr_t addr[MAX_SKB_FRAGS + 1];
126 };
127
128 /*
129 * Maps a number of flits to the number of Tx descriptors that can hold them.
130 * The formula is
131 *
132 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
133 *
134 * HW allows up to 4 descriptors to be combined into a WR.
135 */
136 static u8 flit_desc_map[] = {
137 0,
138 #if SGE_NUM_GENBITS == 1
139 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
143 #elif SGE_NUM_GENBITS == 2
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
147 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
148 #else
149 # error "SGE_NUM_GENBITS must be 1 or 2"
150 #endif
151 };
152
153 static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
154 {
155 return container_of(q, struct sge_qset, fl[qidx]);
156 }
157
158 static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
159 {
160 return container_of(q, struct sge_qset, rspq);
161 }
162
163 static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
164 {
165 return container_of(q, struct sge_qset, txq[qidx]);
166 }
167
168 /**
169 * refill_rspq - replenish an SGE response queue
170 * @adapter: the adapter
171 * @q: the response queue to replenish
172 * @credits: how many new responses to make available
173 *
174 * Replenishes a response queue by making the supplied number of responses
175 * available to HW.
176 */
177 static inline void refill_rspq(struct adapter *adapter,
178 const struct sge_rspq *q, unsigned int credits)
179 {
180 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
181 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
182 }
183
184 /**
185 * need_skb_unmap - does the platform need unmapping of sk_buffs?
186 *
187 * Returns true if the platfrom needs sk_buff unmapping. The compiler
188 * optimizes away unecessary code if this returns true.
189 */
190 static inline int need_skb_unmap(void)
191 {
192 /*
193 * This structure is used to tell if the platfrom needs buffer
194 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
195 */
196 struct dummy {
197 DECLARE_PCI_UNMAP_ADDR(addr);
198 };
199
200 return sizeof(struct dummy) != 0;
201 }
202
203 /**
204 * unmap_skb - unmap a packet main body and its page fragments
205 * @skb: the packet
206 * @q: the Tx queue containing Tx descriptors for the packet
207 * @cidx: index of Tx descriptor
208 * @pdev: the PCI device
209 *
210 * Unmap the main body of an sk_buff and its page fragments, if any.
211 * Because of the fairly complicated structure of our SGLs and the desire
212 * to conserve space for metadata, we keep the information necessary to
213 * unmap an sk_buff partly in the sk_buff itself (in its cb), and partly
214 * in the Tx descriptors (the physical addresses of the various data
215 * buffers). The send functions initialize the state in skb->cb so we
216 * can unmap the buffers held in the first Tx descriptor here, and we
217 * have enough information at this point to update the state for the next
218 * Tx descriptor.
219 */
220 static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
221 unsigned int cidx, struct pci_dev *pdev)
222 {
223 const struct sg_ent *sgp;
224 struct unmap_info *ui = (struct unmap_info *)skb->cb;
225 int nfrags, frag_idx, curflit, j = ui->addr_idx;
226
227 sgp = (struct sg_ent *)&q->desc[cidx].flit[ui->sflit];
228
229 if (ui->len) {
230 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]), ui->len,
231 PCI_DMA_TODEVICE);
232 ui->len = 0; /* so we know for next descriptor for this skb */
233 j = 1;
234 }
235
236 frag_idx = ui->fragidx;
237 curflit = ui->sflit + 1 + j;
238 nfrags = skb_shinfo(skb)->nr_frags;
239
240 while (frag_idx < nfrags && curflit < WR_FLITS) {
241 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
242 skb_shinfo(skb)->frags[frag_idx].size,
243 PCI_DMA_TODEVICE);
244 j ^= 1;
245 if (j == 0) {
246 sgp++;
247 curflit++;
248 }
249 curflit++;
250 frag_idx++;
251 }
252
253 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
254 ui->fragidx = frag_idx;
255 ui->addr_idx = j;
256 ui->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
257 }
258 }
259
260 /**
261 * free_tx_desc - reclaims Tx descriptors and their buffers
262 * @adapter: the adapter
263 * @q: the Tx queue to reclaim descriptors from
264 * @n: the number of descriptors to reclaim
265 *
266 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
267 * Tx buffers. Called with the Tx queue lock held.
268 */
269 static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
270 unsigned int n)
271 {
272 struct tx_sw_desc *d;
273 struct pci_dev *pdev = adapter->pdev;
274 unsigned int cidx = q->cidx;
275
276 const int need_unmap = need_skb_unmap() &&
277 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
278
279 d = &q->sdesc[cidx];
280 while (n--) {
281 if (d->skb) { /* an SGL is present */
282 if (need_unmap)
283 unmap_skb(d->skb, q, cidx, pdev);
284 if (d->skb->priority == cidx)
285 kfree_skb(d->skb);
286 }
287 ++d;
288 if (++cidx == q->size) {
289 cidx = 0;
290 d = q->sdesc;
291 }
292 }
293 q->cidx = cidx;
294 }
295
296 /**
297 * reclaim_completed_tx - reclaims completed Tx descriptors
298 * @adapter: the adapter
299 * @q: the Tx queue to reclaim completed descriptors from
300 *
301 * Reclaims Tx descriptors that the SGE has indicated it has processed,
302 * and frees the associated buffers if possible. Called with the Tx
303 * queue's lock held.
304 */
305 static inline void reclaim_completed_tx(struct adapter *adapter,
306 struct sge_txq *q)
307 {
308 unsigned int reclaim = q->processed - q->cleaned;
309
310 if (reclaim) {
311 free_tx_desc(adapter, q, reclaim);
312 q->cleaned += reclaim;
313 q->in_use -= reclaim;
314 }
315 }
316
317 /**
318 * should_restart_tx - are there enough resources to restart a Tx queue?
319 * @q: the Tx queue
320 *
321 * Checks if there are enough descriptors to restart a suspended Tx queue.
322 */
323 static inline int should_restart_tx(const struct sge_txq *q)
324 {
325 unsigned int r = q->processed - q->cleaned;
326
327 return q->in_use - r < (q->size >> 1);
328 }
329
330 /**
331 * free_rx_bufs - free the Rx buffers on an SGE free list
332 * @pdev: the PCI device associated with the adapter
333 * @rxq: the SGE free list to clean up
334 *
335 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
336 * this queue should be stopped before calling this function.
337 */
338 static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
339 {
340 unsigned int cidx = q->cidx;
341
342 while (q->credits--) {
343 struct rx_sw_desc *d = &q->sdesc[cidx];
344
345 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
346 q->buf_size, PCI_DMA_FROMDEVICE);
347 if (q->use_pages) {
348 put_page(d->pg_chunk.page);
349 d->pg_chunk.page = NULL;
350 } else {
351 kfree_skb(d->skb);
352 d->skb = NULL;
353 }
354 if (++cidx == q->size)
355 cidx = 0;
356 }
357
358 if (q->pg_chunk.page) {
359 __free_page(q->pg_chunk.page);
360 q->pg_chunk.page = NULL;
361 }
362 }
363
364 /**
365 * add_one_rx_buf - add a packet buffer to a free-buffer list
366 * @va: buffer start VA
367 * @len: the buffer length
368 * @d: the HW Rx descriptor to write
369 * @sd: the SW Rx descriptor to write
370 * @gen: the generation bit value
371 * @pdev: the PCI device associated with the adapter
372 *
373 * Add a buffer of the given length to the supplied HW and SW Rx
374 * descriptors.
375 */
376 static inline void add_one_rx_buf(void *va, unsigned int len,
377 struct rx_desc *d, struct rx_sw_desc *sd,
378 unsigned int gen, struct pci_dev *pdev)
379 {
380 dma_addr_t mapping;
381
382 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
383 pci_unmap_addr_set(sd, dma_addr, mapping);
384
385 d->addr_lo = cpu_to_be32(mapping);
386 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
387 wmb();
388 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
389 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
390 }
391
392 static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp)
393 {
394 if (!q->pg_chunk.page) {
395 q->pg_chunk.page = alloc_page(gfp);
396 if (unlikely(!q->pg_chunk.page))
397 return -ENOMEM;
398 q->pg_chunk.va = page_address(q->pg_chunk.page);
399 q->pg_chunk.offset = 0;
400 }
401 sd->pg_chunk = q->pg_chunk;
402
403 q->pg_chunk.offset += q->buf_size;
404 if (q->pg_chunk.offset == PAGE_SIZE)
405 q->pg_chunk.page = NULL;
406 else {
407 q->pg_chunk.va += q->buf_size;
408 get_page(q->pg_chunk.page);
409 }
410 return 0;
411 }
412
413 /**
414 * refill_fl - refill an SGE free-buffer list
415 * @adapter: the adapter
416 * @q: the free-list to refill
417 * @n: the number of new buffers to allocate
418 * @gfp: the gfp flags for allocating new buffers
419 *
420 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
421 * allocated with the supplied gfp flags. The caller must assure that
422 * @n does not exceed the queue's capacity.
423 */
424 static void refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
425 {
426 void *buf_start;
427 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
428 struct rx_desc *d = &q->desc[q->pidx];
429
430 while (n--) {
431 if (q->use_pages) {
432 if (unlikely(alloc_pg_chunk(q, sd, gfp))) {
433 nomem: q->alloc_failed++;
434 break;
435 }
436 buf_start = sd->pg_chunk.va;
437 } else {
438 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
439
440 if (!skb)
441 goto nomem;
442
443 sd->skb = skb;
444 buf_start = skb->data;
445 }
446
447 add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
448 adap->pdev);
449 d++;
450 sd++;
451 if (++q->pidx == q->size) {
452 q->pidx = 0;
453 q->gen ^= 1;
454 sd = q->sdesc;
455 d = q->desc;
456 }
457 q->credits++;
458 }
459
460 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
461 }
462
463 static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
464 {
465 refill_fl(adap, fl, min(16U, fl->size - fl->credits), GFP_ATOMIC);
466 }
467
468 /**
469 * recycle_rx_buf - recycle a receive buffer
470 * @adapter: the adapter
471 * @q: the SGE free list
472 * @idx: index of buffer to recycle
473 *
474 * Recycles the specified buffer on the given free list by adding it at
475 * the next available slot on the list.
476 */
477 static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
478 unsigned int idx)
479 {
480 struct rx_desc *from = &q->desc[idx];
481 struct rx_desc *to = &q->desc[q->pidx];
482
483 q->sdesc[q->pidx] = q->sdesc[idx];
484 to->addr_lo = from->addr_lo; /* already big endian */
485 to->addr_hi = from->addr_hi; /* likewise */
486 wmb();
487 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
488 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
489 q->credits++;
490
491 if (++q->pidx == q->size) {
492 q->pidx = 0;
493 q->gen ^= 1;
494 }
495 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
496 }
497
498 /**
499 * alloc_ring - allocate resources for an SGE descriptor ring
500 * @pdev: the PCI device
501 * @nelem: the number of descriptors
502 * @elem_size: the size of each descriptor
503 * @sw_size: the size of the SW state associated with each ring element
504 * @phys: the physical address of the allocated ring
505 * @metadata: address of the array holding the SW state for the ring
506 *
507 * Allocates resources for an SGE descriptor ring, such as Tx queues,
508 * free buffer lists, or response queues. Each SGE ring requires
509 * space for its HW descriptors plus, optionally, space for the SW state
510 * associated with each HW entry (the metadata). The function returns
511 * three values: the virtual address for the HW ring (the return value
512 * of the function), the physical address of the HW ring, and the address
513 * of the SW ring.
514 */
515 static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
516 size_t sw_size, dma_addr_t * phys, void *metadata)
517 {
518 size_t len = nelem * elem_size;
519 void *s = NULL;
520 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
521
522 if (!p)
523 return NULL;
524 if (sw_size) {
525 s = kcalloc(nelem, sw_size, GFP_KERNEL);
526
527 if (!s) {
528 dma_free_coherent(&pdev->dev, len, p, *phys);
529 return NULL;
530 }
531 }
532 if (metadata)
533 *(void **)metadata = s;
534 memset(p, 0, len);
535 return p;
536 }
537
538 /**
539 * free_qset - free the resources of an SGE queue set
540 * @adapter: the adapter owning the queue set
541 * @q: the queue set
542 *
543 * Release the HW and SW resources associated with an SGE queue set, such
544 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
545 * queue set must be quiesced prior to calling this.
546 */
547 void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
548 {
549 int i;
550 struct pci_dev *pdev = adapter->pdev;
551
552 if (q->tx_reclaim_timer.function)
553 del_timer_sync(&q->tx_reclaim_timer);
554
555 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
556 if (q->fl[i].desc) {
557 spin_lock(&adapter->sge.reg_lock);
558 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
559 spin_unlock(&adapter->sge.reg_lock);
560 free_rx_bufs(pdev, &q->fl[i]);
561 kfree(q->fl[i].sdesc);
562 dma_free_coherent(&pdev->dev,
563 q->fl[i].size *
564 sizeof(struct rx_desc), q->fl[i].desc,
565 q->fl[i].phys_addr);
566 }
567
568 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
569 if (q->txq[i].desc) {
570 spin_lock(&adapter->sge.reg_lock);
571 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
572 spin_unlock(&adapter->sge.reg_lock);
573 if (q->txq[i].sdesc) {
574 free_tx_desc(adapter, &q->txq[i],
575 q->txq[i].in_use);
576 kfree(q->txq[i].sdesc);
577 }
578 dma_free_coherent(&pdev->dev,
579 q->txq[i].size *
580 sizeof(struct tx_desc),
581 q->txq[i].desc, q->txq[i].phys_addr);
582 __skb_queue_purge(&q->txq[i].sendq);
583 }
584
585 if (q->rspq.desc) {
586 spin_lock(&adapter->sge.reg_lock);
587 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
588 spin_unlock(&adapter->sge.reg_lock);
589 dma_free_coherent(&pdev->dev,
590 q->rspq.size * sizeof(struct rsp_desc),
591 q->rspq.desc, q->rspq.phys_addr);
592 }
593
594 memset(q, 0, sizeof(*q));
595 }
596
597 /**
598 * init_qset_cntxt - initialize an SGE queue set context info
599 * @qs: the queue set
600 * @id: the queue set id
601 *
602 * Initializes the TIDs and context ids for the queues of a queue set.
603 */
604 static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
605 {
606 qs->rspq.cntxt_id = id;
607 qs->fl[0].cntxt_id = 2 * id;
608 qs->fl[1].cntxt_id = 2 * id + 1;
609 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
610 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
611 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
612 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
613 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
614 }
615
616 /**
617 * sgl_len - calculates the size of an SGL of the given capacity
618 * @n: the number of SGL entries
619 *
620 * Calculates the number of flits needed for a scatter/gather list that
621 * can hold the given number of entries.
622 */
623 static inline unsigned int sgl_len(unsigned int n)
624 {
625 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
626 return (3 * n) / 2 + (n & 1);
627 }
628
629 /**
630 * flits_to_desc - returns the num of Tx descriptors for the given flits
631 * @n: the number of flits
632 *
633 * Calculates the number of Tx descriptors needed for the supplied number
634 * of flits.
635 */
636 static inline unsigned int flits_to_desc(unsigned int n)
637 {
638 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
639 return flit_desc_map[n];
640 }
641
642 /**
643 * get_packet - return the next ingress packet buffer from a free list
644 * @adap: the adapter that received the packet
645 * @fl: the SGE free list holding the packet
646 * @len: the packet length including any SGE padding
647 * @drop_thres: # of remaining buffers before we start dropping packets
648 *
649 * Get the next packet from a free list and complete setup of the
650 * sk_buff. If the packet is small we make a copy and recycle the
651 * original buffer, otherwise we use the original buffer itself. If a
652 * positive drop threshold is supplied packets are dropped and their
653 * buffers recycled if (a) the number of remaining buffers is under the
654 * threshold and the packet is too big to copy, or (b) the packet should
655 * be copied but there is no memory for the copy.
656 */
657 static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
658 unsigned int len, unsigned int drop_thres)
659 {
660 struct sk_buff *skb = NULL;
661 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
662
663 prefetch(sd->skb->data);
664 fl->credits--;
665
666 if (len <= SGE_RX_COPY_THRES) {
667 skb = alloc_skb(len, GFP_ATOMIC);
668 if (likely(skb != NULL)) {
669 __skb_put(skb, len);
670 pci_dma_sync_single_for_cpu(adap->pdev,
671 pci_unmap_addr(sd, dma_addr), len,
672 PCI_DMA_FROMDEVICE);
673 memcpy(skb->data, sd->skb->data, len);
674 pci_dma_sync_single_for_device(adap->pdev,
675 pci_unmap_addr(sd, dma_addr), len,
676 PCI_DMA_FROMDEVICE);
677 } else if (!drop_thres)
678 goto use_orig_buf;
679 recycle:
680 recycle_rx_buf(adap, fl, fl->cidx);
681 return skb;
682 }
683
684 if (unlikely(fl->credits < drop_thres))
685 goto recycle;
686
687 use_orig_buf:
688 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
689 fl->buf_size, PCI_DMA_FROMDEVICE);
690 skb = sd->skb;
691 skb_put(skb, len);
692 __refill_fl(adap, fl);
693 return skb;
694 }
695
696 /**
697 * get_packet_pg - return the next ingress packet buffer from a free list
698 * @adap: the adapter that received the packet
699 * @fl: the SGE free list holding the packet
700 * @len: the packet length including any SGE padding
701 * @drop_thres: # of remaining buffers before we start dropping packets
702 *
703 * Get the next packet from a free list populated with page chunks.
704 * If the packet is small we make a copy and recycle the original buffer,
705 * otherwise we attach the original buffer as a page fragment to a fresh
706 * sk_buff. If a positive drop threshold is supplied packets are dropped
707 * and their buffers recycled if (a) the number of remaining buffers is
708 * under the threshold and the packet is too big to copy, or (b) there's
709 * no system memory.
710 *
711 * Note: this function is similar to @get_packet but deals with Rx buffers
712 * that are page chunks rather than sk_buffs.
713 */
714 static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
715 unsigned int len, unsigned int drop_thres)
716 {
717 struct sk_buff *skb = NULL;
718 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
719
720 if (len <= SGE_RX_COPY_THRES) {
721 skb = alloc_skb(len, GFP_ATOMIC);
722 if (likely(skb != NULL)) {
723 __skb_put(skb, len);
724 pci_dma_sync_single_for_cpu(adap->pdev,
725 pci_unmap_addr(sd, dma_addr), len,
726 PCI_DMA_FROMDEVICE);
727 memcpy(skb->data, sd->pg_chunk.va, len);
728 pci_dma_sync_single_for_device(adap->pdev,
729 pci_unmap_addr(sd, dma_addr), len,
730 PCI_DMA_FROMDEVICE);
731 } else if (!drop_thres)
732 return NULL;
733 recycle:
734 fl->credits--;
735 recycle_rx_buf(adap, fl, fl->cidx);
736 return skb;
737 }
738
739 if (unlikely(fl->credits <= drop_thres))
740 goto recycle;
741
742 skb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
743 if (unlikely(!skb)) {
744 if (!drop_thres)
745 return NULL;
746 goto recycle;
747 }
748
749 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
750 fl->buf_size, PCI_DMA_FROMDEVICE);
751 __skb_put(skb, SGE_RX_PULL_LEN);
752 memcpy(skb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
753 skb_fill_page_desc(skb, 0, sd->pg_chunk.page,
754 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
755 len - SGE_RX_PULL_LEN);
756 skb->len = len;
757 skb->data_len = len - SGE_RX_PULL_LEN;
758 skb->truesize += skb->data_len;
759
760 fl->credits--;
761 /*
762 * We do not refill FLs here, we let the caller do it to overlap a
763 * prefetch.
764 */
765 return skb;
766 }
767
768 /**
769 * get_imm_packet - return the next ingress packet buffer from a response
770 * @resp: the response descriptor containing the packet data
771 *
772 * Return a packet containing the immediate data of the given response.
773 */
774 static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
775 {
776 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
777
778 if (skb) {
779 __skb_put(skb, IMMED_PKT_SIZE);
780 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
781 }
782 return skb;
783 }
784
785 /**
786 * calc_tx_descs - calculate the number of Tx descriptors for a packet
787 * @skb: the packet
788 *
789 * Returns the number of Tx descriptors needed for the given Ethernet
790 * packet. Ethernet packets require addition of WR and CPL headers.
791 */
792 static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
793 {
794 unsigned int flits;
795
796 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
797 return 1;
798
799 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
800 if (skb_shinfo(skb)->gso_size)
801 flits++;
802 return flits_to_desc(flits);
803 }
804
805 /**
806 * make_sgl - populate a scatter/gather list for a packet
807 * @skb: the packet
808 * @sgp: the SGL to populate
809 * @start: start address of skb main body data to include in the SGL
810 * @len: length of skb main body data to include in the SGL
811 * @pdev: the PCI device
812 *
813 * Generates a scatter/gather list for the buffers that make up a packet
814 * and returns the SGL size in 8-byte words. The caller must size the SGL
815 * appropriately.
816 */
817 static inline unsigned int make_sgl(const struct sk_buff *skb,
818 struct sg_ent *sgp, unsigned char *start,
819 unsigned int len, struct pci_dev *pdev)
820 {
821 dma_addr_t mapping;
822 unsigned int i, j = 0, nfrags;
823
824 if (len) {
825 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
826 sgp->len[0] = cpu_to_be32(len);
827 sgp->addr[0] = cpu_to_be64(mapping);
828 j = 1;
829 }
830
831 nfrags = skb_shinfo(skb)->nr_frags;
832 for (i = 0; i < nfrags; i++) {
833 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
834
835 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
836 frag->size, PCI_DMA_TODEVICE);
837 sgp->len[j] = cpu_to_be32(frag->size);
838 sgp->addr[j] = cpu_to_be64(mapping);
839 j ^= 1;
840 if (j == 0)
841 ++sgp;
842 }
843 if (j)
844 sgp->len[j] = 0;
845 return ((nfrags + (len != 0)) * 3) / 2 + j;
846 }
847
848 /**
849 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
850 * @adap: the adapter
851 * @q: the Tx queue
852 *
853 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
854 * where the HW is going to sleep just after we checked, however,
855 * then the interrupt handler will detect the outstanding TX packet
856 * and ring the doorbell for us.
857 *
858 * When GTS is disabled we unconditionally ring the doorbell.
859 */
860 static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
861 {
862 #if USE_GTS
863 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
864 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
865 set_bit(TXQ_LAST_PKT_DB, &q->flags);
866 t3_write_reg(adap, A_SG_KDOORBELL,
867 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
868 }
869 #else
870 wmb(); /* write descriptors before telling HW */
871 t3_write_reg(adap, A_SG_KDOORBELL,
872 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
873 #endif
874 }
875
876 static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
877 {
878 #if SGE_NUM_GENBITS == 2
879 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
880 #endif
881 }
882
883 /**
884 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
885 * @ndesc: number of Tx descriptors spanned by the SGL
886 * @skb: the packet corresponding to the WR
887 * @d: first Tx descriptor to be written
888 * @pidx: index of above descriptors
889 * @q: the SGE Tx queue
890 * @sgl: the SGL
891 * @flits: number of flits to the start of the SGL in the first descriptor
892 * @sgl_flits: the SGL size in flits
893 * @gen: the Tx descriptor generation
894 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
895 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
896 *
897 * Write a work request header and an associated SGL. If the SGL is
898 * small enough to fit into one Tx descriptor it has already been written
899 * and we just need to write the WR header. Otherwise we distribute the
900 * SGL across the number of descriptors it spans.
901 */
902 static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
903 struct tx_desc *d, unsigned int pidx,
904 const struct sge_txq *q,
905 const struct sg_ent *sgl,
906 unsigned int flits, unsigned int sgl_flits,
907 unsigned int gen, unsigned int wr_hi,
908 unsigned int wr_lo)
909 {
910 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
911 struct tx_sw_desc *sd = &q->sdesc[pidx];
912
913 sd->skb = skb;
914 if (need_skb_unmap()) {
915 struct unmap_info *ui = (struct unmap_info *)skb->cb;
916
917 ui->fragidx = 0;
918 ui->addr_idx = 0;
919 ui->sflit = flits;
920 }
921
922 if (likely(ndesc == 1)) {
923 skb->priority = pidx;
924 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
925 V_WR_SGLSFLT(flits)) | wr_hi;
926 wmb();
927 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
928 V_WR_GEN(gen)) | wr_lo;
929 wr_gen2(d, gen);
930 } else {
931 unsigned int ogen = gen;
932 const u64 *fp = (const u64 *)sgl;
933 struct work_request_hdr *wp = wrp;
934
935 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
936 V_WR_SGLSFLT(flits)) | wr_hi;
937
938 while (sgl_flits) {
939 unsigned int avail = WR_FLITS - flits;
940
941 if (avail > sgl_flits)
942 avail = sgl_flits;
943 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
944 sgl_flits -= avail;
945 ndesc--;
946 if (!sgl_flits)
947 break;
948
949 fp += avail;
950 d++;
951 sd++;
952 if (++pidx == q->size) {
953 pidx = 0;
954 gen ^= 1;
955 d = q->desc;
956 sd = q->sdesc;
957 }
958
959 sd->skb = skb;
960 wrp = (struct work_request_hdr *)d;
961 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
962 V_WR_SGLSFLT(1)) | wr_hi;
963 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
964 sgl_flits + 1)) |
965 V_WR_GEN(gen)) | wr_lo;
966 wr_gen2(d, gen);
967 flits = 1;
968 }
969 skb->priority = pidx;
970 wrp->wr_hi |= htonl(F_WR_EOP);
971 wmb();
972 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
973 wr_gen2((struct tx_desc *)wp, ogen);
974 WARN_ON(ndesc != 0);
975 }
976 }
977
978 /**
979 * write_tx_pkt_wr - write a TX_PKT work request
980 * @adap: the adapter
981 * @skb: the packet to send
982 * @pi: the egress interface
983 * @pidx: index of the first Tx descriptor to write
984 * @gen: the generation value to use
985 * @q: the Tx queue
986 * @ndesc: number of descriptors the packet will occupy
987 * @compl: the value of the COMPL bit to use
988 *
989 * Generate a TX_PKT work request to send the supplied packet.
990 */
991 static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
992 const struct port_info *pi,
993 unsigned int pidx, unsigned int gen,
994 struct sge_txq *q, unsigned int ndesc,
995 unsigned int compl)
996 {
997 unsigned int flits, sgl_flits, cntrl, tso_info;
998 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
999 struct tx_desc *d = &q->desc[pidx];
1000 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1001
1002 cpl->len = htonl(skb->len | 0x80000000);
1003 cntrl = V_TXPKT_INTF(pi->port_id);
1004
1005 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1006 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1007
1008 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1009 if (tso_info) {
1010 int eth_type;
1011 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1012
1013 d->flit[2] = 0;
1014 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1015 hdr->cntrl = htonl(cntrl);
1016 eth_type = skb_network_offset(skb) == ETH_HLEN ?
1017 CPL_ETH_II : CPL_ETH_II_VLAN;
1018 tso_info |= V_LSO_ETH_TYPE(eth_type) |
1019 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1020 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1021 hdr->lso_info = htonl(tso_info);
1022 flits = 3;
1023 } else {
1024 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1025 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1026 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1027 cpl->cntrl = htonl(cntrl);
1028
1029 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1030 q->sdesc[pidx].skb = NULL;
1031 if (!skb->data_len)
1032 skb_copy_from_linear_data(skb, &d->flit[2],
1033 skb->len);
1034 else
1035 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1036
1037 flits = (skb->len + 7) / 8 + 2;
1038 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1039 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1040 | F_WR_SOP | F_WR_EOP | compl);
1041 wmb();
1042 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1043 V_WR_TID(q->token));
1044 wr_gen2(d, gen);
1045 kfree_skb(skb);
1046 return;
1047 }
1048
1049 flits = 2;
1050 }
1051
1052 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1053 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1054 if (need_skb_unmap())
1055 ((struct unmap_info *)skb->cb)->len = skb_headlen(skb);
1056
1057 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1058 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1059 htonl(V_WR_TID(q->token)));
1060 }
1061
1062 /**
1063 * eth_xmit - add a packet to the Ethernet Tx queue
1064 * @skb: the packet
1065 * @dev: the egress net device
1066 *
1067 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1068 */
1069 int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1070 {
1071 unsigned int ndesc, pidx, credits, gen, compl;
1072 const struct port_info *pi = netdev_priv(dev);
1073 struct adapter *adap = pi->adapter;
1074 struct sge_qset *qs = pi->qs;
1075 struct sge_txq *q = &qs->txq[TXQ_ETH];
1076
1077 /*
1078 * The chip min packet length is 9 octets but play safe and reject
1079 * anything shorter than an Ethernet header.
1080 */
1081 if (unlikely(skb->len < ETH_HLEN)) {
1082 dev_kfree_skb(skb);
1083 return NETDEV_TX_OK;
1084 }
1085
1086 spin_lock(&q->lock);
1087 reclaim_completed_tx(adap, q);
1088
1089 credits = q->size - q->in_use;
1090 ndesc = calc_tx_descs(skb);
1091
1092 if (unlikely(credits < ndesc)) {
1093 if (!netif_queue_stopped(dev)) {
1094 netif_stop_queue(dev);
1095 set_bit(TXQ_ETH, &qs->txq_stopped);
1096 q->stops++;
1097 dev_err(&adap->pdev->dev,
1098 "%s: Tx ring %u full while queue awake!\n",
1099 dev->name, q->cntxt_id & 7);
1100 }
1101 spin_unlock(&q->lock);
1102 return NETDEV_TX_BUSY;
1103 }
1104
1105 q->in_use += ndesc;
1106 if (unlikely(credits - ndesc < q->stop_thres)) {
1107 q->stops++;
1108 netif_stop_queue(dev);
1109 set_bit(TXQ_ETH, &qs->txq_stopped);
1110 #if !USE_GTS
1111 if (should_restart_tx(q) &&
1112 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1113 q->restarts++;
1114 netif_wake_queue(dev);
1115 }
1116 #endif
1117 }
1118
1119 gen = q->gen;
1120 q->unacked += ndesc;
1121 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1122 q->unacked &= 7;
1123 pidx = q->pidx;
1124 q->pidx += ndesc;
1125 if (q->pidx >= q->size) {
1126 q->pidx -= q->size;
1127 q->gen ^= 1;
1128 }
1129
1130 /* update port statistics */
1131 if (skb->ip_summed == CHECKSUM_COMPLETE)
1132 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1133 if (skb_shinfo(skb)->gso_size)
1134 qs->port_stats[SGE_PSTAT_TSO]++;
1135 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1136 qs->port_stats[SGE_PSTAT_VLANINS]++;
1137
1138 dev->trans_start = jiffies;
1139 spin_unlock(&q->lock);
1140
1141 /*
1142 * We do not use Tx completion interrupts to free DMAd Tx packets.
1143 * This is good for performamce but means that we rely on new Tx
1144 * packets arriving to run the destructors of completed packets,
1145 * which open up space in their sockets' send queues. Sometimes
1146 * we do not get such new packets causing Tx to stall. A single
1147 * UDP transmitter is a good example of this situation. We have
1148 * a clean up timer that periodically reclaims completed packets
1149 * but it doesn't run often enough (nor do we want it to) to prevent
1150 * lengthy stalls. A solution to this problem is to run the
1151 * destructor early, after the packet is queued but before it's DMAd.
1152 * A cons is that we lie to socket memory accounting, but the amount
1153 * of extra memory is reasonable (limited by the number of Tx
1154 * descriptors), the packets do actually get freed quickly by new
1155 * packets almost always, and for protocols like TCP that wait for
1156 * acks to really free up the data the extra memory is even less.
1157 * On the positive side we run the destructors on the sending CPU
1158 * rather than on a potentially different completing CPU, usually a
1159 * good thing. We also run them without holding our Tx queue lock,
1160 * unlike what reclaim_completed_tx() would otherwise do.
1161 *
1162 * Run the destructor before telling the DMA engine about the packet
1163 * to make sure it doesn't complete and get freed prematurely.
1164 */
1165 if (likely(!skb_shared(skb)))
1166 skb_orphan(skb);
1167
1168 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1169 check_ring_tx_db(adap, q);
1170 return NETDEV_TX_OK;
1171 }
1172
1173 /**
1174 * write_imm - write a packet into a Tx descriptor as immediate data
1175 * @d: the Tx descriptor to write
1176 * @skb: the packet
1177 * @len: the length of packet data to write as immediate data
1178 * @gen: the generation bit value to write
1179 *
1180 * Writes a packet as immediate data into a Tx descriptor. The packet
1181 * contains a work request at its beginning. We must write the packet
1182 * carefully so the SGE doesn't read accidentally before it's written in
1183 * its entirety.
1184 */
1185 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1186 unsigned int len, unsigned int gen)
1187 {
1188 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1189 struct work_request_hdr *to = (struct work_request_hdr *)d;
1190
1191 memcpy(&to[1], &from[1], len - sizeof(*from));
1192 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1193 V_WR_BCNTLFLT(len & 7));
1194 wmb();
1195 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1196 V_WR_LEN((len + 7) / 8));
1197 wr_gen2(d, gen);
1198 kfree_skb(skb);
1199 }
1200
1201 /**
1202 * check_desc_avail - check descriptor availability on a send queue
1203 * @adap: the adapter
1204 * @q: the send queue
1205 * @skb: the packet needing the descriptors
1206 * @ndesc: the number of Tx descriptors needed
1207 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1208 *
1209 * Checks if the requested number of Tx descriptors is available on an
1210 * SGE send queue. If the queue is already suspended or not enough
1211 * descriptors are available the packet is queued for later transmission.
1212 * Must be called with the Tx queue locked.
1213 *
1214 * Returns 0 if enough descriptors are available, 1 if there aren't
1215 * enough descriptors and the packet has been queued, and 2 if the caller
1216 * needs to retry because there weren't enough descriptors at the
1217 * beginning of the call but some freed up in the mean time.
1218 */
1219 static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1220 struct sk_buff *skb, unsigned int ndesc,
1221 unsigned int qid)
1222 {
1223 if (unlikely(!skb_queue_empty(&q->sendq))) {
1224 addq_exit:__skb_queue_tail(&q->sendq, skb);
1225 return 1;
1226 }
1227 if (unlikely(q->size - q->in_use < ndesc)) {
1228 struct sge_qset *qs = txq_to_qset(q, qid);
1229
1230 set_bit(qid, &qs->txq_stopped);
1231 smp_mb__after_clear_bit();
1232
1233 if (should_restart_tx(q) &&
1234 test_and_clear_bit(qid, &qs->txq_stopped))
1235 return 2;
1236
1237 q->stops++;
1238 goto addq_exit;
1239 }
1240 return 0;
1241 }
1242
1243 /**
1244 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1245 * @q: the SGE control Tx queue
1246 *
1247 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1248 * that send only immediate data (presently just the control queues) and
1249 * thus do not have any sk_buffs to release.
1250 */
1251 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1252 {
1253 unsigned int reclaim = q->processed - q->cleaned;
1254
1255 q->in_use -= reclaim;
1256 q->cleaned += reclaim;
1257 }
1258
1259 static inline int immediate(const struct sk_buff *skb)
1260 {
1261 return skb->len <= WR_LEN && !skb->data_len;
1262 }
1263
1264 /**
1265 * ctrl_xmit - send a packet through an SGE control Tx queue
1266 * @adap: the adapter
1267 * @q: the control queue
1268 * @skb: the packet
1269 *
1270 * Send a packet through an SGE control Tx queue. Packets sent through
1271 * a control queue must fit entirely as immediate data in a single Tx
1272 * descriptor and have no page fragments.
1273 */
1274 static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1275 struct sk_buff *skb)
1276 {
1277 int ret;
1278 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1279
1280 if (unlikely(!immediate(skb))) {
1281 WARN_ON(1);
1282 dev_kfree_skb(skb);
1283 return NET_XMIT_SUCCESS;
1284 }
1285
1286 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1287 wrp->wr_lo = htonl(V_WR_TID(q->token));
1288
1289 spin_lock(&q->lock);
1290 again:reclaim_completed_tx_imm(q);
1291
1292 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1293 if (unlikely(ret)) {
1294 if (ret == 1) {
1295 spin_unlock(&q->lock);
1296 return NET_XMIT_CN;
1297 }
1298 goto again;
1299 }
1300
1301 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1302
1303 q->in_use++;
1304 if (++q->pidx >= q->size) {
1305 q->pidx = 0;
1306 q->gen ^= 1;
1307 }
1308 spin_unlock(&q->lock);
1309 wmb();
1310 t3_write_reg(adap, A_SG_KDOORBELL,
1311 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1312 return NET_XMIT_SUCCESS;
1313 }
1314
1315 /**
1316 * restart_ctrlq - restart a suspended control queue
1317 * @qs: the queue set cotaining the control queue
1318 *
1319 * Resumes transmission on a suspended Tx control queue.
1320 */
1321 static void restart_ctrlq(unsigned long data)
1322 {
1323 struct sk_buff *skb;
1324 struct sge_qset *qs = (struct sge_qset *)data;
1325 struct sge_txq *q = &qs->txq[TXQ_CTRL];
1326
1327 spin_lock(&q->lock);
1328 again:reclaim_completed_tx_imm(q);
1329
1330 while (q->in_use < q->size &&
1331 (skb = __skb_dequeue(&q->sendq)) != NULL) {
1332
1333 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1334
1335 if (++q->pidx >= q->size) {
1336 q->pidx = 0;
1337 q->gen ^= 1;
1338 }
1339 q->in_use++;
1340 }
1341
1342 if (!skb_queue_empty(&q->sendq)) {
1343 set_bit(TXQ_CTRL, &qs->txq_stopped);
1344 smp_mb__after_clear_bit();
1345
1346 if (should_restart_tx(q) &&
1347 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1348 goto again;
1349 q->stops++;
1350 }
1351
1352 spin_unlock(&q->lock);
1353 t3_write_reg(qs->adap, A_SG_KDOORBELL,
1354 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1355 }
1356
1357 /*
1358 * Send a management message through control queue 0
1359 */
1360 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1361 {
1362 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1363 }
1364
1365 /**
1366 * deferred_unmap_destructor - unmap a packet when it is freed
1367 * @skb: the packet
1368 *
1369 * This is the packet destructor used for Tx packets that need to remain
1370 * mapped until they are freed rather than until their Tx descriptors are
1371 * freed.
1372 */
1373 static void deferred_unmap_destructor(struct sk_buff *skb)
1374 {
1375 int i;
1376 const dma_addr_t *p;
1377 const struct skb_shared_info *si;
1378 const struct deferred_unmap_info *dui;
1379 const struct unmap_info *ui = (struct unmap_info *)skb->cb;
1380
1381 dui = (struct deferred_unmap_info *)skb->head;
1382 p = dui->addr;
1383
1384 if (ui->len)
1385 pci_unmap_single(dui->pdev, *p++, ui->len, PCI_DMA_TODEVICE);
1386
1387 si = skb_shinfo(skb);
1388 for (i = 0; i < si->nr_frags; i++)
1389 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1390 PCI_DMA_TODEVICE);
1391 }
1392
1393 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1394 const struct sg_ent *sgl, int sgl_flits)
1395 {
1396 dma_addr_t *p;
1397 struct deferred_unmap_info *dui;
1398
1399 dui = (struct deferred_unmap_info *)skb->head;
1400 dui->pdev = pdev;
1401 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1402 *p++ = be64_to_cpu(sgl->addr[0]);
1403 *p++ = be64_to_cpu(sgl->addr[1]);
1404 }
1405 if (sgl_flits)
1406 *p = be64_to_cpu(sgl->addr[0]);
1407 }
1408
1409 /**
1410 * write_ofld_wr - write an offload work request
1411 * @adap: the adapter
1412 * @skb: the packet to send
1413 * @q: the Tx queue
1414 * @pidx: index of the first Tx descriptor to write
1415 * @gen: the generation value to use
1416 * @ndesc: number of descriptors the packet will occupy
1417 *
1418 * Write an offload work request to send the supplied packet. The packet
1419 * data already carry the work request with most fields populated.
1420 */
1421 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1422 struct sge_txq *q, unsigned int pidx,
1423 unsigned int gen, unsigned int ndesc)
1424 {
1425 unsigned int sgl_flits, flits;
1426 struct work_request_hdr *from;
1427 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1428 struct tx_desc *d = &q->desc[pidx];
1429
1430 if (immediate(skb)) {
1431 q->sdesc[pidx].skb = NULL;
1432 write_imm(d, skb, skb->len, gen);
1433 return;
1434 }
1435
1436 /* Only TX_DATA builds SGLs */
1437
1438 from = (struct work_request_hdr *)skb->data;
1439 memcpy(&d->flit[1], &from[1],
1440 skb_transport_offset(skb) - sizeof(*from));
1441
1442 flits = skb_transport_offset(skb) / 8;
1443 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1444 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1445 skb->tail - skb->transport_header,
1446 adap->pdev);
1447 if (need_skb_unmap()) {
1448 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1449 skb->destructor = deferred_unmap_destructor;
1450 ((struct unmap_info *)skb->cb)->len = (skb->tail -
1451 skb->transport_header);
1452 }
1453
1454 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1455 gen, from->wr_hi, from->wr_lo);
1456 }
1457
1458 /**
1459 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1460 * @skb: the packet
1461 *
1462 * Returns the number of Tx descriptors needed for the given offload
1463 * packet. These packets are already fully constructed.
1464 */
1465 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1466 {
1467 unsigned int flits, cnt = skb_shinfo(skb)->nr_frags;
1468
1469 if (skb->len <= WR_LEN && cnt == 0)
1470 return 1; /* packet fits as immediate data */
1471
1472 flits = skb_transport_offset(skb) / 8; /* headers */
1473 if (skb->tail != skb->transport_header)
1474 cnt++;
1475 return flits_to_desc(flits + sgl_len(cnt));
1476 }
1477
1478 /**
1479 * ofld_xmit - send a packet through an offload queue
1480 * @adap: the adapter
1481 * @q: the Tx offload queue
1482 * @skb: the packet
1483 *
1484 * Send an offload packet through an SGE offload queue.
1485 */
1486 static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1487 struct sk_buff *skb)
1488 {
1489 int ret;
1490 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1491
1492 spin_lock(&q->lock);
1493 again:reclaim_completed_tx(adap, q);
1494
1495 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1496 if (unlikely(ret)) {
1497 if (ret == 1) {
1498 skb->priority = ndesc; /* save for restart */
1499 spin_unlock(&q->lock);
1500 return NET_XMIT_CN;
1501 }
1502 goto again;
1503 }
1504
1505 gen = q->gen;
1506 q->in_use += ndesc;
1507 pidx = q->pidx;
1508 q->pidx += ndesc;
1509 if (q->pidx >= q->size) {
1510 q->pidx -= q->size;
1511 q->gen ^= 1;
1512 }
1513 spin_unlock(&q->lock);
1514
1515 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1516 check_ring_tx_db(adap, q);
1517 return NET_XMIT_SUCCESS;
1518 }
1519
1520 /**
1521 * restart_offloadq - restart a suspended offload queue
1522 * @qs: the queue set cotaining the offload queue
1523 *
1524 * Resumes transmission on a suspended Tx offload queue.
1525 */
1526 static void restart_offloadq(unsigned long data)
1527 {
1528 struct sk_buff *skb;
1529 struct sge_qset *qs = (struct sge_qset *)data;
1530 struct sge_txq *q = &qs->txq[TXQ_OFLD];
1531 const struct port_info *pi = netdev_priv(qs->netdev);
1532 struct adapter *adap = pi->adapter;
1533
1534 spin_lock(&q->lock);
1535 again:reclaim_completed_tx(adap, q);
1536
1537 while ((skb = skb_peek(&q->sendq)) != NULL) {
1538 unsigned int gen, pidx;
1539 unsigned int ndesc = skb->priority;
1540
1541 if (unlikely(q->size - q->in_use < ndesc)) {
1542 set_bit(TXQ_OFLD, &qs->txq_stopped);
1543 smp_mb__after_clear_bit();
1544
1545 if (should_restart_tx(q) &&
1546 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1547 goto again;
1548 q->stops++;
1549 break;
1550 }
1551
1552 gen = q->gen;
1553 q->in_use += ndesc;
1554 pidx = q->pidx;
1555 q->pidx += ndesc;
1556 if (q->pidx >= q->size) {
1557 q->pidx -= q->size;
1558 q->gen ^= 1;
1559 }
1560 __skb_unlink(skb, &q->sendq);
1561 spin_unlock(&q->lock);
1562
1563 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1564 spin_lock(&q->lock);
1565 }
1566 spin_unlock(&q->lock);
1567
1568 #if USE_GTS
1569 set_bit(TXQ_RUNNING, &q->flags);
1570 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1571 #endif
1572 t3_write_reg(adap, A_SG_KDOORBELL,
1573 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1574 }
1575
1576 /**
1577 * queue_set - return the queue set a packet should use
1578 * @skb: the packet
1579 *
1580 * Maps a packet to the SGE queue set it should use. The desired queue
1581 * set is carried in bits 1-3 in the packet's priority.
1582 */
1583 static inline int queue_set(const struct sk_buff *skb)
1584 {
1585 return skb->priority >> 1;
1586 }
1587
1588 /**
1589 * is_ctrl_pkt - return whether an offload packet is a control packet
1590 * @skb: the packet
1591 *
1592 * Determines whether an offload packet should use an OFLD or a CTRL
1593 * Tx queue. This is indicated by bit 0 in the packet's priority.
1594 */
1595 static inline int is_ctrl_pkt(const struct sk_buff *skb)
1596 {
1597 return skb->priority & 1;
1598 }
1599
1600 /**
1601 * t3_offload_tx - send an offload packet
1602 * @tdev: the offload device to send to
1603 * @skb: the packet
1604 *
1605 * Sends an offload packet. We use the packet priority to select the
1606 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1607 * should be sent as regular or control, bits 1-3 select the queue set.
1608 */
1609 int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1610 {
1611 struct adapter *adap = tdev2adap(tdev);
1612 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1613
1614 if (unlikely(is_ctrl_pkt(skb)))
1615 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1616
1617 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1618 }
1619
1620 /**
1621 * offload_enqueue - add an offload packet to an SGE offload receive queue
1622 * @q: the SGE response queue
1623 * @skb: the packet
1624 *
1625 * Add a new offload packet to an SGE response queue's offload packet
1626 * queue. If the packet is the first on the queue it schedules the RX
1627 * softirq to process the queue.
1628 */
1629 static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1630 {
1631 skb->next = skb->prev = NULL;
1632 if (q->rx_tail)
1633 q->rx_tail->next = skb;
1634 else {
1635 struct sge_qset *qs = rspq_to_qset(q);
1636
1637 napi_schedule(&qs->napi);
1638 q->rx_head = skb;
1639 }
1640 q->rx_tail = skb;
1641 }
1642
1643 /**
1644 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1645 * @tdev: the offload device that will be receiving the packets
1646 * @q: the SGE response queue that assembled the bundle
1647 * @skbs: the partial bundle
1648 * @n: the number of packets in the bundle
1649 *
1650 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1651 */
1652 static inline void deliver_partial_bundle(struct t3cdev *tdev,
1653 struct sge_rspq *q,
1654 struct sk_buff *skbs[], int n)
1655 {
1656 if (n) {
1657 q->offload_bundles++;
1658 tdev->recv(tdev, skbs, n);
1659 }
1660 }
1661
1662 /**
1663 * ofld_poll - NAPI handler for offload packets in interrupt mode
1664 * @dev: the network device doing the polling
1665 * @budget: polling budget
1666 *
1667 * The NAPI handler for offload packets when a response queue is serviced
1668 * by the hard interrupt handler, i.e., when it's operating in non-polling
1669 * mode. Creates small packet batches and sends them through the offload
1670 * receive handler. Batches need to be of modest size as we do prefetches
1671 * on the packets in each.
1672 */
1673 static int ofld_poll(struct napi_struct *napi, int budget)
1674 {
1675 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1676 struct sge_rspq *q = &qs->rspq;
1677 struct adapter *adapter = qs->adap;
1678 int work_done = 0;
1679
1680 while (work_done < budget) {
1681 struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
1682 int ngathered;
1683
1684 spin_lock_irq(&q->lock);
1685 head = q->rx_head;
1686 if (!head) {
1687 napi_complete(napi);
1688 spin_unlock_irq(&q->lock);
1689 return work_done;
1690 }
1691
1692 tail = q->rx_tail;
1693 q->rx_head = q->rx_tail = NULL;
1694 spin_unlock_irq(&q->lock);
1695
1696 for (ngathered = 0; work_done < budget && head; work_done++) {
1697 prefetch(head->data);
1698 skbs[ngathered] = head;
1699 head = head->next;
1700 skbs[ngathered]->next = NULL;
1701 if (++ngathered == RX_BUNDLE_SIZE) {
1702 q->offload_bundles++;
1703 adapter->tdev.recv(&adapter->tdev, skbs,
1704 ngathered);
1705 ngathered = 0;
1706 }
1707 }
1708 if (head) { /* splice remaining packets back onto Rx queue */
1709 spin_lock_irq(&q->lock);
1710 tail->next = q->rx_head;
1711 if (!q->rx_head)
1712 q->rx_tail = tail;
1713 q->rx_head = head;
1714 spin_unlock_irq(&q->lock);
1715 }
1716 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1717 }
1718
1719 return work_done;
1720 }
1721
1722 /**
1723 * rx_offload - process a received offload packet
1724 * @tdev: the offload device receiving the packet
1725 * @rq: the response queue that received the packet
1726 * @skb: the packet
1727 * @rx_gather: a gather list of packets if we are building a bundle
1728 * @gather_idx: index of the next available slot in the bundle
1729 *
1730 * Process an ingress offload pakcet and add it to the offload ingress
1731 * queue. Returns the index of the next available slot in the bundle.
1732 */
1733 static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1734 struct sk_buff *skb, struct sk_buff *rx_gather[],
1735 unsigned int gather_idx)
1736 {
1737 rq->offload_pkts++;
1738 skb_reset_mac_header(skb);
1739 skb_reset_network_header(skb);
1740 skb_reset_transport_header(skb);
1741
1742 if (rq->polling) {
1743 rx_gather[gather_idx++] = skb;
1744 if (gather_idx == RX_BUNDLE_SIZE) {
1745 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1746 gather_idx = 0;
1747 rq->offload_bundles++;
1748 }
1749 } else
1750 offload_enqueue(rq, skb);
1751
1752 return gather_idx;
1753 }
1754
1755 /**
1756 * restart_tx - check whether to restart suspended Tx queues
1757 * @qs: the queue set to resume
1758 *
1759 * Restarts suspended Tx queues of an SGE queue set if they have enough
1760 * free resources to resume operation.
1761 */
1762 static void restart_tx(struct sge_qset *qs)
1763 {
1764 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1765 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1766 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1767 qs->txq[TXQ_ETH].restarts++;
1768 if (netif_running(qs->netdev))
1769 netif_wake_queue(qs->netdev);
1770 }
1771
1772 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1773 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1774 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1775 qs->txq[TXQ_OFLD].restarts++;
1776 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1777 }
1778 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1779 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1780 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1781 qs->txq[TXQ_CTRL].restarts++;
1782 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1783 }
1784 }
1785
1786 /**
1787 * rx_eth - process an ingress ethernet packet
1788 * @adap: the adapter
1789 * @rq: the response queue that received the packet
1790 * @skb: the packet
1791 * @pad: amount of padding at the start of the buffer
1792 *
1793 * Process an ingress ethernet pakcet and deliver it to the stack.
1794 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1795 * if it was immediate data in a response.
1796 */
1797 static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
1798 struct sk_buff *skb, int pad)
1799 {
1800 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
1801 struct port_info *pi;
1802
1803 skb_pull(skb, sizeof(*p) + pad);
1804 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
1805 skb->dev->last_rx = jiffies;
1806 pi = netdev_priv(skb->dev);
1807 if (pi->rx_csum_offload && p->csum_valid && p->csum == 0xffff &&
1808 !p->fragment) {
1809 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
1810 skb->ip_summed = CHECKSUM_UNNECESSARY;
1811 } else
1812 skb->ip_summed = CHECKSUM_NONE;
1813
1814 if (unlikely(p->vlan_valid)) {
1815 struct vlan_group *grp = pi->vlan_grp;
1816
1817 rspq_to_qset(rq)->port_stats[SGE_PSTAT_VLANEX]++;
1818 if (likely(grp))
1819 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1820 rq->polling);
1821 else
1822 dev_kfree_skb_any(skb);
1823 } else if (rq->polling)
1824 netif_receive_skb(skb);
1825 else
1826 netif_rx(skb);
1827 }
1828
1829 /**
1830 * handle_rsp_cntrl_info - handles control information in a response
1831 * @qs: the queue set corresponding to the response
1832 * @flags: the response control flags
1833 *
1834 * Handles the control information of an SGE response, such as GTS
1835 * indications and completion credits for the queue set's Tx queues.
1836 * HW coalesces credits, we don't do any extra SW coalescing.
1837 */
1838 static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
1839 {
1840 unsigned int credits;
1841
1842 #if USE_GTS
1843 if (flags & F_RSPD_TXQ0_GTS)
1844 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
1845 #endif
1846
1847 credits = G_RSPD_TXQ0_CR(flags);
1848 if (credits)
1849 qs->txq[TXQ_ETH].processed += credits;
1850
1851 credits = G_RSPD_TXQ2_CR(flags);
1852 if (credits)
1853 qs->txq[TXQ_CTRL].processed += credits;
1854
1855 # if USE_GTS
1856 if (flags & F_RSPD_TXQ1_GTS)
1857 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
1858 # endif
1859 credits = G_RSPD_TXQ1_CR(flags);
1860 if (credits)
1861 qs->txq[TXQ_OFLD].processed += credits;
1862 }
1863
1864 /**
1865 * check_ring_db - check if we need to ring any doorbells
1866 * @adapter: the adapter
1867 * @qs: the queue set whose Tx queues are to be examined
1868 * @sleeping: indicates which Tx queue sent GTS
1869 *
1870 * Checks if some of a queue set's Tx queues need to ring their doorbells
1871 * to resume transmission after idling while they still have unprocessed
1872 * descriptors.
1873 */
1874 static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
1875 unsigned int sleeping)
1876 {
1877 if (sleeping & F_RSPD_TXQ0_GTS) {
1878 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1879
1880 if (txq->cleaned + txq->in_use != txq->processed &&
1881 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1882 set_bit(TXQ_RUNNING, &txq->flags);
1883 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1884 V_EGRCNTX(txq->cntxt_id));
1885 }
1886 }
1887
1888 if (sleeping & F_RSPD_TXQ1_GTS) {
1889 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
1890
1891 if (txq->cleaned + txq->in_use != txq->processed &&
1892 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
1893 set_bit(TXQ_RUNNING, &txq->flags);
1894 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
1895 V_EGRCNTX(txq->cntxt_id));
1896 }
1897 }
1898 }
1899
1900 /**
1901 * is_new_response - check if a response is newly written
1902 * @r: the response descriptor
1903 * @q: the response queue
1904 *
1905 * Returns true if a response descriptor contains a yet unprocessed
1906 * response.
1907 */
1908 static inline int is_new_response(const struct rsp_desc *r,
1909 const struct sge_rspq *q)
1910 {
1911 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1912 }
1913
1914 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1915 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1916 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1917 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1918 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1919
1920 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1921 #define NOMEM_INTR_DELAY 2500
1922
1923 /**
1924 * process_responses - process responses from an SGE response queue
1925 * @adap: the adapter
1926 * @qs: the queue set to which the response queue belongs
1927 * @budget: how many responses can be processed in this round
1928 *
1929 * Process responses from an SGE response queue up to the supplied budget.
1930 * Responses include received packets as well as credits and other events
1931 * for the queues that belong to the response queue's queue set.
1932 * A negative budget is effectively unlimited.
1933 *
1934 * Additionally choose the interrupt holdoff time for the next interrupt
1935 * on this queue. If the system is under memory shortage use a fairly
1936 * long delay to help recovery.
1937 */
1938 static int process_responses(struct adapter *adap, struct sge_qset *qs,
1939 int budget)
1940 {
1941 struct sge_rspq *q = &qs->rspq;
1942 struct rsp_desc *r = &q->desc[q->cidx];
1943 int budget_left = budget;
1944 unsigned int sleeping = 0;
1945 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
1946 int ngathered = 0;
1947
1948 q->next_holdoff = q->holdoff_tmr;
1949
1950 while (likely(budget_left && is_new_response(r, q))) {
1951 int eth, ethpad = 2;
1952 struct sk_buff *skb = NULL;
1953 u32 len, flags = ntohl(r->flags);
1954 u32 rss_hi = *(const u32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
1955
1956 eth = r->rss_hdr.opcode == CPL_RX_PKT;
1957
1958 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
1959 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
1960 if (!skb)
1961 goto no_mem;
1962
1963 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
1964 skb->data[0] = CPL_ASYNC_NOTIF;
1965 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
1966 q->async_notif++;
1967 } else if (flags & F_RSPD_IMM_DATA_VALID) {
1968 skb = get_imm_packet(r);
1969 if (unlikely(!skb)) {
1970 no_mem:
1971 q->next_holdoff = NOMEM_INTR_DELAY;
1972 q->nomem++;
1973 /* consume one credit since we tried */
1974 budget_left--;
1975 break;
1976 }
1977 q->imm_data++;
1978 ethpad = 0;
1979 } else if ((len = ntohl(r->len_cq)) != 0) {
1980 struct sge_fl *fl;
1981
1982 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
1983 if (fl->use_pages) {
1984 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
1985
1986 prefetch(addr);
1987 #if L1_CACHE_BYTES < 128
1988 prefetch(addr + L1_CACHE_BYTES);
1989 #endif
1990 __refill_fl(adap, fl);
1991
1992 skb = get_packet_pg(adap, fl, G_RSPD_LEN(len),
1993 eth ? SGE_RX_DROP_THRES : 0);
1994 } else
1995 skb = get_packet(adap, fl, G_RSPD_LEN(len),
1996 eth ? SGE_RX_DROP_THRES : 0);
1997 if (unlikely(!skb)) {
1998 if (!eth)
1999 goto no_mem;
2000 q->rx_drops++;
2001 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2002 __skb_pull(skb, 2);
2003
2004 if (++fl->cidx == fl->size)
2005 fl->cidx = 0;
2006 } else
2007 q->pure_rsps++;
2008
2009 if (flags & RSPD_CTRL_MASK) {
2010 sleeping |= flags & RSPD_GTS_MASK;
2011 handle_rsp_cntrl_info(qs, flags);
2012 }
2013
2014 r++;
2015 if (unlikely(++q->cidx == q->size)) {
2016 q->cidx = 0;
2017 q->gen ^= 1;
2018 r = q->desc;
2019 }
2020 prefetch(r);
2021
2022 if (++q->credits >= (q->size / 4)) {
2023 refill_rspq(adap, q, q->credits);
2024 q->credits = 0;
2025 }
2026
2027 if (likely(skb != NULL)) {
2028 if (eth)
2029 rx_eth(adap, q, skb, ethpad);
2030 else {
2031 /* Preserve the RSS info in csum & priority */
2032 skb->csum = rss_hi;
2033 skb->priority = rss_lo;
2034 ngathered = rx_offload(&adap->tdev, q, skb,
2035 offload_skbs,
2036 ngathered);
2037 }
2038 }
2039 --budget_left;
2040 }
2041
2042 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2043 if (sleeping)
2044 check_ring_db(adap, qs, sleeping);
2045
2046 smp_mb(); /* commit Tx queue .processed updates */
2047 if (unlikely(qs->txq_stopped != 0))
2048 restart_tx(qs);
2049
2050 budget -= budget_left;
2051 return budget;
2052 }
2053
2054 static inline int is_pure_response(const struct rsp_desc *r)
2055 {
2056 u32 n = ntohl(r->flags) & (F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2057
2058 return (n | r->len_cq) == 0;
2059 }
2060
2061 /**
2062 * napi_rx_handler - the NAPI handler for Rx processing
2063 * @napi: the napi instance
2064 * @budget: how many packets we can process in this round
2065 *
2066 * Handler for new data events when using NAPI.
2067 */
2068 static int napi_rx_handler(struct napi_struct *napi, int budget)
2069 {
2070 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2071 struct adapter *adap = qs->adap;
2072 int work_done = process_responses(adap, qs, budget);
2073
2074 if (likely(work_done < budget)) {
2075 napi_complete(napi);
2076
2077 /*
2078 * Because we don't atomically flush the following
2079 * write it is possible that in very rare cases it can
2080 * reach the device in a way that races with a new
2081 * response being written plus an error interrupt
2082 * causing the NAPI interrupt handler below to return
2083 * unhandled status to the OS. To protect against
2084 * this would require flushing the write and doing
2085 * both the write and the flush with interrupts off.
2086 * Way too expensive and unjustifiable given the
2087 * rarity of the race.
2088 *
2089 * The race cannot happen at all with MSI-X.
2090 */
2091 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2092 V_NEWTIMER(qs->rspq.next_holdoff) |
2093 V_NEWINDEX(qs->rspq.cidx));
2094 }
2095 return work_done;
2096 }
2097
2098 /*
2099 * Returns true if the device is already scheduled for polling.
2100 */
2101 static inline int napi_is_scheduled(struct napi_struct *napi)
2102 {
2103 return test_bit(NAPI_STATE_SCHED, &napi->state);
2104 }
2105
2106 /**
2107 * process_pure_responses - process pure responses from a response queue
2108 * @adap: the adapter
2109 * @qs: the queue set owning the response queue
2110 * @r: the first pure response to process
2111 *
2112 * A simpler version of process_responses() that handles only pure (i.e.,
2113 * non data-carrying) responses. Such respones are too light-weight to
2114 * justify calling a softirq under NAPI, so we handle them specially in
2115 * the interrupt handler. The function is called with a pointer to a
2116 * response, which the caller must ensure is a valid pure response.
2117 *
2118 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2119 */
2120 static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2121 struct rsp_desc *r)
2122 {
2123 struct sge_rspq *q = &qs->rspq;
2124 unsigned int sleeping = 0;
2125
2126 do {
2127 u32 flags = ntohl(r->flags);
2128
2129 r++;
2130 if (unlikely(++q->cidx == q->size)) {
2131 q->cidx = 0;
2132 q->gen ^= 1;
2133 r = q->desc;
2134 }
2135 prefetch(r);
2136
2137 if (flags & RSPD_CTRL_MASK) {
2138 sleeping |= flags & RSPD_GTS_MASK;
2139 handle_rsp_cntrl_info(qs, flags);
2140 }
2141
2142 q->pure_rsps++;
2143 if (++q->credits >= (q->size / 4)) {
2144 refill_rspq(adap, q, q->credits);
2145 q->credits = 0;
2146 }
2147 } while (is_new_response(r, q) && is_pure_response(r));
2148
2149 if (sleeping)
2150 check_ring_db(adap, qs, sleeping);
2151
2152 smp_mb(); /* commit Tx queue .processed updates */
2153 if (unlikely(qs->txq_stopped != 0))
2154 restart_tx(qs);
2155
2156 return is_new_response(r, q);
2157 }
2158
2159 /**
2160 * handle_responses - decide what to do with new responses in NAPI mode
2161 * @adap: the adapter
2162 * @q: the response queue
2163 *
2164 * This is used by the NAPI interrupt handlers to decide what to do with
2165 * new SGE responses. If there are no new responses it returns -1. If
2166 * there are new responses and they are pure (i.e., non-data carrying)
2167 * it handles them straight in hard interrupt context as they are very
2168 * cheap and don't deliver any packets. Finally, if there are any data
2169 * signaling responses it schedules the NAPI handler. Returns 1 if it
2170 * schedules NAPI, 0 if all new responses were pure.
2171 *
2172 * The caller must ascertain NAPI is not already running.
2173 */
2174 static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2175 {
2176 struct sge_qset *qs = rspq_to_qset(q);
2177 struct rsp_desc *r = &q->desc[q->cidx];
2178
2179 if (!is_new_response(r, q))
2180 return -1;
2181 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2182 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2183 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2184 return 0;
2185 }
2186 napi_schedule(&qs->napi);
2187 return 1;
2188 }
2189
2190 /*
2191 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2192 * (i.e., response queue serviced in hard interrupt).
2193 */
2194 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2195 {
2196 struct sge_qset *qs = cookie;
2197 struct adapter *adap = qs->adap;
2198 struct sge_rspq *q = &qs->rspq;
2199
2200 spin_lock(&q->lock);
2201 if (process_responses(adap, qs, -1) == 0)
2202 q->unhandled_irqs++;
2203 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2204 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2205 spin_unlock(&q->lock);
2206 return IRQ_HANDLED;
2207 }
2208
2209 /*
2210 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2211 * (i.e., response queue serviced by NAPI polling).
2212 */
2213 irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2214 {
2215 struct sge_qset *qs = cookie;
2216 struct sge_rspq *q = &qs->rspq;
2217
2218 spin_lock(&q->lock);
2219
2220 if (handle_responses(qs->adap, q) < 0)
2221 q->unhandled_irqs++;
2222 spin_unlock(&q->lock);
2223 return IRQ_HANDLED;
2224 }
2225
2226 /*
2227 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2228 * SGE response queues as well as error and other async events as they all use
2229 * the same MSI vector. We use one SGE response queue per port in this mode
2230 * and protect all response queues with queue 0's lock.
2231 */
2232 static irqreturn_t t3_intr_msi(int irq, void *cookie)
2233 {
2234 int new_packets = 0;
2235 struct adapter *adap = cookie;
2236 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2237
2238 spin_lock(&q->lock);
2239
2240 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2241 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2242 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2243 new_packets = 1;
2244 }
2245
2246 if (adap->params.nports == 2 &&
2247 process_responses(adap, &adap->sge.qs[1], -1)) {
2248 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2249
2250 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2251 V_NEWTIMER(q1->next_holdoff) |
2252 V_NEWINDEX(q1->cidx));
2253 new_packets = 1;
2254 }
2255
2256 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2257 q->unhandled_irqs++;
2258
2259 spin_unlock(&q->lock);
2260 return IRQ_HANDLED;
2261 }
2262
2263 static int rspq_check_napi(struct sge_qset *qs)
2264 {
2265 struct sge_rspq *q = &qs->rspq;
2266
2267 if (!napi_is_scheduled(&qs->napi) &&
2268 is_new_response(&q->desc[q->cidx], q)) {
2269 napi_schedule(&qs->napi);
2270 return 1;
2271 }
2272 return 0;
2273 }
2274
2275 /*
2276 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2277 * by NAPI polling). Handles data events from SGE response queues as well as
2278 * error and other async events as they all use the same MSI vector. We use
2279 * one SGE response queue per port in this mode and protect all response
2280 * queues with queue 0's lock.
2281 */
2282 irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2283 {
2284 int new_packets;
2285 struct adapter *adap = cookie;
2286 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2287
2288 spin_lock(&q->lock);
2289
2290 new_packets = rspq_check_napi(&adap->sge.qs[0]);
2291 if (adap->params.nports == 2)
2292 new_packets += rspq_check_napi(&adap->sge.qs[1]);
2293 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2294 q->unhandled_irqs++;
2295
2296 spin_unlock(&q->lock);
2297 return IRQ_HANDLED;
2298 }
2299
2300 /*
2301 * A helper function that processes responses and issues GTS.
2302 */
2303 static inline int process_responses_gts(struct adapter *adap,
2304 struct sge_rspq *rq)
2305 {
2306 int work;
2307
2308 work = process_responses(adap, rspq_to_qset(rq), -1);
2309 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2310 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2311 return work;
2312 }
2313
2314 /*
2315 * The legacy INTx interrupt handler. This needs to handle data events from
2316 * SGE response queues as well as error and other async events as they all use
2317 * the same interrupt pin. We use one SGE response queue per port in this mode
2318 * and protect all response queues with queue 0's lock.
2319 */
2320 static irqreturn_t t3_intr(int irq, void *cookie)
2321 {
2322 int work_done, w0, w1;
2323 struct adapter *adap = cookie;
2324 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2325 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2326
2327 spin_lock(&q0->lock);
2328
2329 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2330 w1 = adap->params.nports == 2 &&
2331 is_new_response(&q1->desc[q1->cidx], q1);
2332
2333 if (likely(w0 | w1)) {
2334 t3_write_reg(adap, A_PL_CLI, 0);
2335 t3_read_reg(adap, A_PL_CLI); /* flush */
2336
2337 if (likely(w0))
2338 process_responses_gts(adap, q0);
2339
2340 if (w1)
2341 process_responses_gts(adap, q1);
2342
2343 work_done = w0 | w1;
2344 } else
2345 work_done = t3_slow_intr_handler(adap);
2346
2347 spin_unlock(&q0->lock);
2348 return IRQ_RETVAL(work_done != 0);
2349 }
2350
2351 /*
2352 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2353 * Handles data events from SGE response queues as well as error and other
2354 * async events as they all use the same interrupt pin. We use one SGE
2355 * response queue per port in this mode and protect all response queues with
2356 * queue 0's lock.
2357 */
2358 static irqreturn_t t3b_intr(int irq, void *cookie)
2359 {
2360 u32 map;
2361 struct adapter *adap = cookie;
2362 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2363
2364 t3_write_reg(adap, A_PL_CLI, 0);
2365 map = t3_read_reg(adap, A_SG_DATA_INTR);
2366
2367 if (unlikely(!map)) /* shared interrupt, most likely */
2368 return IRQ_NONE;
2369
2370 spin_lock(&q0->lock);
2371
2372 if (unlikely(map & F_ERRINTR))
2373 t3_slow_intr_handler(adap);
2374
2375 if (likely(map & 1))
2376 process_responses_gts(adap, q0);
2377
2378 if (map & 2)
2379 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2380
2381 spin_unlock(&q0->lock);
2382 return IRQ_HANDLED;
2383 }
2384
2385 /*
2386 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2387 * Handles data events from SGE response queues as well as error and other
2388 * async events as they all use the same interrupt pin. We use one SGE
2389 * response queue per port in this mode and protect all response queues with
2390 * queue 0's lock.
2391 */
2392 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2393 {
2394 u32 map;
2395 struct adapter *adap = cookie;
2396 struct sge_qset *qs0 = &adap->sge.qs[0];
2397 struct sge_rspq *q0 = &qs0->rspq;
2398
2399 t3_write_reg(adap, A_PL_CLI, 0);
2400 map = t3_read_reg(adap, A_SG_DATA_INTR);
2401
2402 if (unlikely(!map)) /* shared interrupt, most likely */
2403 return IRQ_NONE;
2404
2405 spin_lock(&q0->lock);
2406
2407 if (unlikely(map & F_ERRINTR))
2408 t3_slow_intr_handler(adap);
2409
2410 if (likely(map & 1))
2411 napi_schedule(&qs0->napi);
2412
2413 if (map & 2)
2414 napi_schedule(&adap->sge.qs[1].napi);
2415
2416 spin_unlock(&q0->lock);
2417 return IRQ_HANDLED;
2418 }
2419
2420 /**
2421 * t3_intr_handler - select the top-level interrupt handler
2422 * @adap: the adapter
2423 * @polling: whether using NAPI to service response queues
2424 *
2425 * Selects the top-level interrupt handler based on the type of interrupts
2426 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2427 * response queues.
2428 */
2429 intr_handler_t t3_intr_handler(struct adapter *adap, int polling)
2430 {
2431 if (adap->flags & USING_MSIX)
2432 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2433 if (adap->flags & USING_MSI)
2434 return polling ? t3_intr_msi_napi : t3_intr_msi;
2435 if (adap->params.rev > 0)
2436 return polling ? t3b_intr_napi : t3b_intr;
2437 return t3_intr;
2438 }
2439
2440 /**
2441 * t3_sge_err_intr_handler - SGE async event interrupt handler
2442 * @adapter: the adapter
2443 *
2444 * Interrupt handler for SGE asynchronous (non-data) events.
2445 */
2446 void t3_sge_err_intr_handler(struct adapter *adapter)
2447 {
2448 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2449
2450 if (status & F_RSPQCREDITOVERFOW)
2451 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2452
2453 if (status & F_RSPQDISABLED) {
2454 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2455
2456 CH_ALERT(adapter,
2457 "packet delivered to disabled response queue "
2458 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2459 }
2460
2461 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2462 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
2463 t3_fatal_err(adapter);
2464 }
2465
2466 /**
2467 * sge_timer_cb - perform periodic maintenance of an SGE qset
2468 * @data: the SGE queue set to maintain
2469 *
2470 * Runs periodically from a timer to perform maintenance of an SGE queue
2471 * set. It performs two tasks:
2472 *
2473 * a) Cleans up any completed Tx descriptors that may still be pending.
2474 * Normal descriptor cleanup happens when new packets are added to a Tx
2475 * queue so this timer is relatively infrequent and does any cleanup only
2476 * if the Tx queue has not seen any new packets in a while. We make a
2477 * best effort attempt to reclaim descriptors, in that we don't wait
2478 * around if we cannot get a queue's lock (which most likely is because
2479 * someone else is queueing new packets and so will also handle the clean
2480 * up). Since control queues use immediate data exclusively we don't
2481 * bother cleaning them up here.
2482 *
2483 * b) Replenishes Rx queues that have run out due to memory shortage.
2484 * Normally new Rx buffers are added when existing ones are consumed but
2485 * when out of memory a queue can become empty. We try to add only a few
2486 * buffers here, the queue will be replenished fully as these new buffers
2487 * are used up if memory shortage has subsided.
2488 */
2489 static void sge_timer_cb(unsigned long data)
2490 {
2491 spinlock_t *lock;
2492 struct sge_qset *qs = (struct sge_qset *)data;
2493 struct adapter *adap = qs->adap;
2494
2495 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2496 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2497 spin_unlock(&qs->txq[TXQ_ETH].lock);
2498 }
2499 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2500 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2501 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2502 }
2503 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
2504 &adap->sge.qs[0].rspq.lock;
2505 if (spin_trylock_irq(lock)) {
2506 if (!napi_is_scheduled(&qs->napi)) {
2507 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2508
2509 if (qs->fl[0].credits < qs->fl[0].size)
2510 __refill_fl(adap, &qs->fl[0]);
2511 if (qs->fl[1].credits < qs->fl[1].size)
2512 __refill_fl(adap, &qs->fl[1]);
2513
2514 if (status & (1 << qs->rspq.cntxt_id)) {
2515 qs->rspq.starved++;
2516 if (qs->rspq.credits) {
2517 refill_rspq(adap, &qs->rspq, 1);
2518 qs->rspq.credits--;
2519 qs->rspq.restarted++;
2520 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2521 1 << qs->rspq.cntxt_id);
2522 }
2523 }
2524 }
2525 spin_unlock_irq(lock);
2526 }
2527 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2528 }
2529
2530 /**
2531 * t3_update_qset_coalesce - update coalescing settings for a queue set
2532 * @qs: the SGE queue set
2533 * @p: new queue set parameters
2534 *
2535 * Update the coalescing settings for an SGE queue set. Nothing is done
2536 * if the queue set is not initialized yet.
2537 */
2538 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2539 {
2540 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2541 qs->rspq.polling = p->polling;
2542 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2543 }
2544
2545 /**
2546 * t3_sge_alloc_qset - initialize an SGE queue set
2547 * @adapter: the adapter
2548 * @id: the queue set id
2549 * @nports: how many Ethernet ports will be using this queue set
2550 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2551 * @p: configuration parameters for this queue set
2552 * @ntxq: number of Tx queues for the queue set
2553 * @netdev: net device associated with this queue set
2554 *
2555 * Allocate resources and initialize an SGE queue set. A queue set
2556 * comprises a response queue, two Rx free-buffer queues, and up to 3
2557 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2558 * queue, offload queue, and control queue.
2559 */
2560 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2561 int irq_vec_idx, const struct qset_params *p,
2562 int ntxq, struct net_device *dev)
2563 {
2564 int i, ret = -ENOMEM;
2565 struct sge_qset *q = &adapter->sge.qs[id];
2566
2567 init_qset_cntxt(q, id);
2568 init_timer(&q->tx_reclaim_timer);
2569 q->tx_reclaim_timer.data = (unsigned long)q;
2570 q->tx_reclaim_timer.function = sge_timer_cb;
2571
2572 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2573 sizeof(struct rx_desc),
2574 sizeof(struct rx_sw_desc),
2575 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2576 if (!q->fl[0].desc)
2577 goto err;
2578
2579 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2580 sizeof(struct rx_desc),
2581 sizeof(struct rx_sw_desc),
2582 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2583 if (!q->fl[1].desc)
2584 goto err;
2585
2586 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2587 sizeof(struct rsp_desc), 0,
2588 &q->rspq.phys_addr, NULL);
2589 if (!q->rspq.desc)
2590 goto err;
2591
2592 for (i = 0; i < ntxq; ++i) {
2593 /*
2594 * The control queue always uses immediate data so does not
2595 * need to keep track of any sk_buffs.
2596 */
2597 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2598
2599 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2600 sizeof(struct tx_desc), sz,
2601 &q->txq[i].phys_addr,
2602 &q->txq[i].sdesc);
2603 if (!q->txq[i].desc)
2604 goto err;
2605
2606 q->txq[i].gen = 1;
2607 q->txq[i].size = p->txq_size[i];
2608 spin_lock_init(&q->txq[i].lock);
2609 skb_queue_head_init(&q->txq[i].sendq);
2610 }
2611
2612 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2613 (unsigned long)q);
2614 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2615 (unsigned long)q);
2616
2617 q->fl[0].gen = q->fl[1].gen = 1;
2618 q->fl[0].size = p->fl_size;
2619 q->fl[1].size = p->jumbo_size;
2620
2621 q->rspq.gen = 1;
2622 q->rspq.size = p->rspq_size;
2623 spin_lock_init(&q->rspq.lock);
2624
2625 q->txq[TXQ_ETH].stop_thres = nports *
2626 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2627
2628 #if FL0_PG_CHUNK_SIZE > 0
2629 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
2630 #else
2631 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
2632 #endif
2633 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2634 q->fl[1].buf_size = is_offload(adapter) ?
2635 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2636 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
2637
2638 spin_lock(&adapter->sge.reg_lock);
2639
2640 /* FL threshold comparison uses < */
2641 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2642 q->rspq.phys_addr, q->rspq.size,
2643 q->fl[0].buf_size, 1, 0);
2644 if (ret)
2645 goto err_unlock;
2646
2647 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2648 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2649 q->fl[i].phys_addr, q->fl[i].size,
2650 q->fl[i].buf_size, p->cong_thres, 1,
2651 0);
2652 if (ret)
2653 goto err_unlock;
2654 }
2655
2656 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2657 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2658 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2659 1, 0);
2660 if (ret)
2661 goto err_unlock;
2662
2663 if (ntxq > 1) {
2664 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2665 USE_GTS, SGE_CNTXT_OFLD, id,
2666 q->txq[TXQ_OFLD].phys_addr,
2667 q->txq[TXQ_OFLD].size, 0, 1, 0);
2668 if (ret)
2669 goto err_unlock;
2670 }
2671
2672 if (ntxq > 2) {
2673 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2674 SGE_CNTXT_CTRL, id,
2675 q->txq[TXQ_CTRL].phys_addr,
2676 q->txq[TXQ_CTRL].size,
2677 q->txq[TXQ_CTRL].token, 1, 0);
2678 if (ret)
2679 goto err_unlock;
2680 }
2681
2682 spin_unlock(&adapter->sge.reg_lock);
2683
2684 q->adap = adapter;
2685 q->netdev = dev;
2686 t3_update_qset_coalesce(q, p);
2687
2688 refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
2689 refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);
2690 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2691
2692 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2693 V_NEWTIMER(q->rspq.holdoff_tmr));
2694
2695 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2696 return 0;
2697
2698 err_unlock:
2699 spin_unlock(&adapter->sge.reg_lock);
2700 err:
2701 t3_free_qset(adapter, q);
2702 return ret;
2703 }
2704
2705 /**
2706 * t3_free_sge_resources - free SGE resources
2707 * @adap: the adapter
2708 *
2709 * Frees resources used by the SGE queue sets.
2710 */
2711 void t3_free_sge_resources(struct adapter *adap)
2712 {
2713 int i;
2714
2715 for (i = 0; i < SGE_QSETS; ++i)
2716 t3_free_qset(adap, &adap->sge.qs[i]);
2717 }
2718
2719 /**
2720 * t3_sge_start - enable SGE
2721 * @adap: the adapter
2722 *
2723 * Enables the SGE for DMAs. This is the last step in starting packet
2724 * transfers.
2725 */
2726 void t3_sge_start(struct adapter *adap)
2727 {
2728 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2729 }
2730
2731 /**
2732 * t3_sge_stop - disable SGE operation
2733 * @adap: the adapter
2734 *
2735 * Disables the DMA engine. This can be called in emeregencies (e.g.,
2736 * from error interrupts) or from normal process context. In the latter
2737 * case it also disables any pending queue restart tasklets. Note that
2738 * if it is called in interrupt context it cannot disable the restart
2739 * tasklets as it cannot wait, however the tasklets will have no effect
2740 * since the doorbells are disabled and the driver will call this again
2741 * later from process context, at which time the tasklets will be stopped
2742 * if they are still running.
2743 */
2744 void t3_sge_stop(struct adapter *adap)
2745 {
2746 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
2747 if (!in_interrupt()) {
2748 int i;
2749
2750 for (i = 0; i < SGE_QSETS; ++i) {
2751 struct sge_qset *qs = &adap->sge.qs[i];
2752
2753 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
2754 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
2755 }
2756 }
2757 }
2758
2759 /**
2760 * t3_sge_init - initialize SGE
2761 * @adap: the adapter
2762 * @p: the SGE parameters
2763 *
2764 * Performs SGE initialization needed every time after a chip reset.
2765 * We do not initialize any of the queue sets here, instead the driver
2766 * top-level must request those individually. We also do not enable DMA
2767 * here, that should be done after the queues have been set up.
2768 */
2769 void t3_sge_init(struct adapter *adap, struct sge_params *p)
2770 {
2771 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
2772
2773 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
2774 F_CQCRDTCTRL |
2775 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
2776 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
2777 #if SGE_NUM_GENBITS == 1
2778 ctrl |= F_EGRGENCTRL;
2779 #endif
2780 if (adap->params.rev > 0) {
2781 if (!(adap->flags & (USING_MSIX | USING_MSI)))
2782 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
2783 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
2784 }
2785 t3_write_reg(adap, A_SG_CONTROL, ctrl);
2786 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
2787 V_LORCQDRBTHRSH(512));
2788 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
2789 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
2790 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
2791 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
2792 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
2793 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
2794 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
2795 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
2796 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
2797 }
2798
2799 /**
2800 * t3_sge_prep - one-time SGE initialization
2801 * @adap: the associated adapter
2802 * @p: SGE parameters
2803 *
2804 * Performs one-time initialization of SGE SW state. Includes determining
2805 * defaults for the assorted SGE parameters, which admins can change until
2806 * they are used to initialize the SGE.
2807 */
2808 void __devinit t3_sge_prep(struct adapter *adap, struct sge_params *p)
2809 {
2810 int i;
2811
2812 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
2813 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
2814
2815 for (i = 0; i < SGE_QSETS; ++i) {
2816 struct qset_params *q = p->qset + i;
2817
2818 q->polling = adap->params.rev > 0;
2819 q->coalesce_usecs = 5;
2820 q->rspq_size = 1024;
2821 q->fl_size = 1024;
2822 q->jumbo_size = 512;
2823 q->txq_size[TXQ_ETH] = 1024;
2824 q->txq_size[TXQ_OFLD] = 1024;
2825 q->txq_size[TXQ_CTRL] = 256;
2826 q->cong_thres = 0;
2827 }
2828
2829 spin_lock_init(&adap->sge.reg_lock);
2830 }
2831
2832 /**
2833 * t3_get_desc - dump an SGE descriptor for debugging purposes
2834 * @qs: the queue set
2835 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
2836 * @idx: the descriptor index in the queue
2837 * @data: where to dump the descriptor contents
2838 *
2839 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
2840 * size of the descriptor.
2841 */
2842 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
2843 unsigned char *data)
2844 {
2845 if (qnum >= 6)
2846 return -EINVAL;
2847
2848 if (qnum < 3) {
2849 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
2850 return -EINVAL;
2851 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
2852 return sizeof(struct tx_desc);
2853 }
2854
2855 if (qnum == 3) {
2856 if (!qs->rspq.desc || idx >= qs->rspq.size)
2857 return -EINVAL;
2858 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
2859 return sizeof(struct rsp_desc);
2860 }
2861
2862 qnum -= 4;
2863 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
2864 return -EINVAL;
2865 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
2866 return sizeof(struct rx_desc);
2867 }