2 * Copyright(c) 2017 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 * This file contains HFI1 support for VNIC functionality
53 #include <linux/if_vlan.h>
57 #define HFI_TX_TIMEOUT_MS 1000
59 #define HFI1_VNIC_RCV_Q_SIZE 1024
61 #define HFI1_VNIC_UP 0
63 static DEFINE_SPINLOCK(vport_cntr_lock
);
65 static int setup_vnic_ctxt(struct hfi1_devdata
*dd
, struct hfi1_ctxtdata
*uctxt
)
67 unsigned int rcvctrl_ops
= 0;
70 hfi1_init_ctxt(uctxt
->sc
);
72 uctxt
->do_interrupt
= &handle_receive_interrupt
;
74 /* Now allocate the RcvHdr queue and eager buffers. */
75 ret
= hfi1_create_rcvhdrq(dd
, uctxt
);
79 ret
= hfi1_setup_eagerbufs(uctxt
);
83 if (uctxt
->rcvhdrtail_kvaddr
)
84 clear_rcvhdrtail(uctxt
);
86 rcvctrl_ops
= HFI1_RCVCTRL_CTXT_ENB
;
87 rcvctrl_ops
|= HFI1_RCVCTRL_INTRAVAIL_ENB
;
89 if (!HFI1_CAP_KGET_MASK(uctxt
->flags
, MULTI_PKT_EGR
))
90 rcvctrl_ops
|= HFI1_RCVCTRL_ONE_PKT_EGR_ENB
;
91 if (HFI1_CAP_KGET_MASK(uctxt
->flags
, NODROP_EGR_FULL
))
92 rcvctrl_ops
|= HFI1_RCVCTRL_NO_EGR_DROP_ENB
;
93 if (HFI1_CAP_KGET_MASK(uctxt
->flags
, NODROP_RHQ_FULL
))
94 rcvctrl_ops
|= HFI1_RCVCTRL_NO_RHQ_DROP_ENB
;
95 if (HFI1_CAP_KGET_MASK(uctxt
->flags
, DMA_RTAIL
))
96 rcvctrl_ops
|= HFI1_RCVCTRL_TAILUPD_ENB
;
98 hfi1_rcvctrl(uctxt
->dd
, rcvctrl_ops
, uctxt
->ctxt
);
100 uctxt
->is_vnic
= true;
105 static int allocate_vnic_ctxt(struct hfi1_devdata
*dd
,
106 struct hfi1_ctxtdata
**vnic_ctxt
)
108 struct hfi1_ctxtdata
*uctxt
;
112 if (dd
->flags
& HFI1_FROZEN
)
115 for (ctxt
= dd
->first_dyn_alloc_ctxt
;
116 ctxt
< dd
->num_rcv_contexts
; ctxt
++)
120 if (ctxt
== dd
->num_rcv_contexts
)
123 uctxt
= hfi1_create_ctxtdata(dd
->pport
, ctxt
, dd
->node
);
125 dd_dev_err(dd
, "Unable to create ctxtdata, failing open\n");
129 uctxt
->flags
= HFI1_CAP_KGET(MULTI_PKT_EGR
) |
130 HFI1_CAP_KGET(NODROP_RHQ_FULL
) |
131 HFI1_CAP_KGET(NODROP_EGR_FULL
) |
132 HFI1_CAP_KGET(DMA_RTAIL
);
135 /* Allocate and enable a PIO send context */
136 uctxt
->sc
= sc_alloc(dd
, SC_VNIC
, uctxt
->rcvhdrqentsize
,
139 ret
= uctxt
->sc
? 0 : -ENOMEM
;
143 dd_dev_dbg(dd
, "allocated vnic send context %u(%u)\n",
144 uctxt
->sc
->sw_index
, uctxt
->sc
->hw_context
);
145 ret
= sc_enable(uctxt
->sc
);
149 if (dd
->num_msix_entries
)
150 hfi1_set_vnic_msix_info(uctxt
);
152 hfi1_stats
.sps_ctxts
++;
153 dd_dev_dbg(dd
, "created vnic context %d\n", uctxt
->ctxt
);
159 * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will
160 * release send_context structure if uctxt->sc is not null
162 dd
->rcd
[uctxt
->ctxt
] = NULL
;
164 dd_dev_dbg(dd
, "vnic allocation failed. rc %d\n", ret
);
168 static void deallocate_vnic_ctxt(struct hfi1_devdata
*dd
,
169 struct hfi1_ctxtdata
*uctxt
)
173 dd_dev_dbg(dd
, "closing vnic context %d\n", uctxt
->ctxt
);
176 if (dd
->num_msix_entries
)
177 hfi1_reset_vnic_msix_info(uctxt
);
179 spin_lock_irqsave(&dd
->uctxt_lock
, flags
);
181 * Disable receive context and interrupt available, reset all
182 * RcvCtxtCtrl bits to default values.
184 hfi1_rcvctrl(dd
, HFI1_RCVCTRL_CTXT_DIS
|
185 HFI1_RCVCTRL_TIDFLOW_DIS
|
186 HFI1_RCVCTRL_INTRAVAIL_DIS
|
187 HFI1_RCVCTRL_ONE_PKT_EGR_DIS
|
188 HFI1_RCVCTRL_NO_RHQ_DROP_DIS
|
189 HFI1_RCVCTRL_NO_EGR_DROP_DIS
, uctxt
->ctxt
);
191 * VNIC contexts are allocated from user context pool.
192 * Release them back to user context pool.
194 * Reset context integrity checks to default.
195 * (writes to CSRs probably belong in chip.c)
197 write_kctxt_csr(dd
, uctxt
->sc
->hw_context
, SEND_CTXT_CHECK_ENABLE
,
198 hfi1_pkt_default_send_ctxt_mask(dd
, SC_USER
));
199 sc_disable(uctxt
->sc
);
201 dd
->send_contexts
[uctxt
->sc
->sw_index
].type
= SC_USER
;
202 spin_unlock_irqrestore(&dd
->uctxt_lock
, flags
);
204 dd
->rcd
[uctxt
->ctxt
] = NULL
;
205 uctxt
->event_flags
= 0;
207 hfi1_clear_tids(uctxt
);
208 hfi1_clear_ctxt_pkey(dd
, uctxt
);
210 hfi1_stats
.sps_ctxts
--;
214 void hfi1_vnic_setup(struct hfi1_devdata
*dd
)
216 idr_init(&dd
->vnic
.vesw_idr
);
219 void hfi1_vnic_cleanup(struct hfi1_devdata
*dd
)
221 idr_destroy(&dd
->vnic
.vesw_idr
);
224 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
225 u64 *src64, *dst64; \
226 for (src64 = &qstats->x_grp.unicast, \
227 dst64 = &stats->x_grp.unicast; \
228 dst64 <= &stats->x_grp.s_1519_max;) { \
229 *dst64++ += *src64++; \
233 /* hfi1_vnic_update_stats - update statistics */
234 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info
*vinfo
,
235 struct opa_vnic_stats
*stats
)
237 struct net_device
*netdev
= vinfo
->netdev
;
240 /* add tx counters on different queues */
241 for (i
= 0; i
< vinfo
->num_tx_q
; i
++) {
242 struct opa_vnic_stats
*qstats
= &vinfo
->stats
[i
];
243 struct rtnl_link_stats64
*qnstats
= &vinfo
->stats
[i
].netstats
;
245 stats
->netstats
.tx_fifo_errors
+= qnstats
->tx_fifo_errors
;
246 stats
->netstats
.tx_carrier_errors
+= qnstats
->tx_carrier_errors
;
247 stats
->tx_drop_state
+= qstats
->tx_drop_state
;
248 stats
->tx_dlid_zero
+= qstats
->tx_dlid_zero
;
250 SUM_GRP_COUNTERS(stats
, qstats
, tx_grp
);
251 stats
->netstats
.tx_packets
+= qnstats
->tx_packets
;
252 stats
->netstats
.tx_bytes
+= qnstats
->tx_bytes
;
255 /* add rx counters on different queues */
256 for (i
= 0; i
< vinfo
->num_rx_q
; i
++) {
257 struct opa_vnic_stats
*qstats
= &vinfo
->stats
[i
];
258 struct rtnl_link_stats64
*qnstats
= &vinfo
->stats
[i
].netstats
;
260 stats
->netstats
.rx_fifo_errors
+= qnstats
->rx_fifo_errors
;
261 stats
->netstats
.rx_nohandler
+= qnstats
->rx_nohandler
;
262 stats
->rx_drop_state
+= qstats
->rx_drop_state
;
263 stats
->rx_oversize
+= qstats
->rx_oversize
;
264 stats
->rx_runt
+= qstats
->rx_runt
;
266 SUM_GRP_COUNTERS(stats
, qstats
, rx_grp
);
267 stats
->netstats
.rx_packets
+= qnstats
->rx_packets
;
268 stats
->netstats
.rx_bytes
+= qnstats
->rx_bytes
;
271 stats
->netstats
.tx_errors
= stats
->netstats
.tx_fifo_errors
+
272 stats
->netstats
.tx_carrier_errors
+
273 stats
->tx_drop_state
+ stats
->tx_dlid_zero
;
274 stats
->netstats
.tx_dropped
= stats
->netstats
.tx_errors
;
276 stats
->netstats
.rx_errors
= stats
->netstats
.rx_fifo_errors
+
277 stats
->netstats
.rx_nohandler
+
278 stats
->rx_drop_state
+ stats
->rx_oversize
+
280 stats
->netstats
.rx_dropped
= stats
->netstats
.rx_errors
;
282 netdev
->stats
.tx_packets
= stats
->netstats
.tx_packets
;
283 netdev
->stats
.tx_bytes
= stats
->netstats
.tx_bytes
;
284 netdev
->stats
.tx_fifo_errors
= stats
->netstats
.tx_fifo_errors
;
285 netdev
->stats
.tx_carrier_errors
= stats
->netstats
.tx_carrier_errors
;
286 netdev
->stats
.tx_errors
= stats
->netstats
.tx_errors
;
287 netdev
->stats
.tx_dropped
= stats
->netstats
.tx_dropped
;
289 netdev
->stats
.rx_packets
= stats
->netstats
.rx_packets
;
290 netdev
->stats
.rx_bytes
= stats
->netstats
.rx_bytes
;
291 netdev
->stats
.rx_fifo_errors
= stats
->netstats
.rx_fifo_errors
;
292 netdev
->stats
.multicast
= stats
->rx_grp
.mcastbcast
;
293 netdev
->stats
.rx_length_errors
= stats
->rx_oversize
+ stats
->rx_runt
;
294 netdev
->stats
.rx_errors
= stats
->netstats
.rx_errors
;
295 netdev
->stats
.rx_dropped
= stats
->netstats
.rx_dropped
;
298 /* update_len_counters - update pkt's len histogram counters */
299 static inline void update_len_counters(struct opa_vnic_grp_stats
*grp
,
302 /* account for 4 byte FCS */
305 else if (len
>= 1020)
319 /* hfi1_vnic_update_tx_counters - update transmit counters */
320 static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info
*vinfo
,
321 u8 q_idx
, struct sk_buff
*skb
, int err
)
323 struct ethhdr
*mac_hdr
= (struct ethhdr
*)skb_mac_header(skb
);
324 struct opa_vnic_stats
*stats
= &vinfo
->stats
[q_idx
];
325 struct opa_vnic_grp_stats
*tx_grp
= &stats
->tx_grp
;
328 stats
->netstats
.tx_packets
++;
329 stats
->netstats
.tx_bytes
+= skb
->len
+ ETH_FCS_LEN
;
331 update_len_counters(tx_grp
, skb
->len
);
333 /* rest of the counts are for good packets only */
337 if (is_multicast_ether_addr(mac_hdr
->h_dest
))
338 tx_grp
->mcastbcast
++;
342 if (!__vlan_get_tag(skb
, &vlan_tci
))
348 /* hfi1_vnic_update_rx_counters - update receive counters */
349 static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info
*vinfo
,
350 u8 q_idx
, struct sk_buff
*skb
, int err
)
352 struct ethhdr
*mac_hdr
= (struct ethhdr
*)skb
->data
;
353 struct opa_vnic_stats
*stats
= &vinfo
->stats
[q_idx
];
354 struct opa_vnic_grp_stats
*rx_grp
= &stats
->rx_grp
;
357 stats
->netstats
.rx_packets
++;
358 stats
->netstats
.rx_bytes
+= skb
->len
+ ETH_FCS_LEN
;
360 update_len_counters(rx_grp
, skb
->len
);
362 /* rest of the counts are for good packets only */
366 if (is_multicast_ether_addr(mac_hdr
->h_dest
))
367 rx_grp
->mcastbcast
++;
371 if (!__vlan_get_tag(skb
, &vlan_tci
))
377 /* This function is overloaded for opa_vnic specific implementation */
378 static void hfi1_vnic_get_stats64(struct net_device
*netdev
,
379 struct rtnl_link_stats64
*stats
)
381 struct opa_vnic_stats
*vstats
= (struct opa_vnic_stats
*)stats
;
382 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
384 hfi1_vnic_update_stats(vinfo
, vstats
);
387 static u64
create_bypass_pbc(u32 vl
, u32 dw_len
)
391 pbc
= ((u64
)PBC_IHCRC_NONE
<< PBC_INSERT_HCRC_SHIFT
)
392 | PBC_INSERT_BYPASS_ICRC
| PBC_CREDIT_RETURN
394 | ((vl
& PBC_VL_MASK
) << PBC_VL_SHIFT
)
395 | (dw_len
& PBC_LENGTH_DWS_MASK
) << PBC_LENGTH_DWS_SHIFT
;
400 /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
401 static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info
*vinfo
,
404 netif_stop_subqueue(vinfo
->netdev
, q_idx
);
405 if (!hfi1_vnic_sdma_write_avail(vinfo
, q_idx
))
408 netif_start_subqueue(vinfo
->netdev
, q_idx
);
411 static netdev_tx_t
hfi1_netdev_start_xmit(struct sk_buff
*skb
,
412 struct net_device
*netdev
)
414 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
415 u8 pad_len
, q_idx
= skb
->queue_mapping
;
416 struct hfi1_devdata
*dd
= vinfo
->dd
;
417 struct opa_vnic_skb_mdata
*mdata
;
418 u32 pkt_len
, total_len
;
422 v_dbg("xmit: queue %d skb len %d\n", q_idx
, skb
->len
);
423 if (unlikely(!netif_oper_up(netdev
))) {
424 vinfo
->stats
[q_idx
].tx_drop_state
++;
428 /* take out meta data */
429 mdata
= (struct opa_vnic_skb_mdata
*)skb
->data
;
430 skb_pull(skb
, sizeof(*mdata
));
431 if (unlikely(mdata
->flags
& OPA_VNIC_SKB_MDATA_ENCAP_ERR
)) {
432 vinfo
->stats
[q_idx
].tx_dlid_zero
++;
436 /* add tail padding (for 8 bytes size alignment) and icrc */
437 pad_len
= -(skb
->len
+ OPA_VNIC_ICRC_TAIL_LEN
) & 0x7;
438 pad_len
+= OPA_VNIC_ICRC_TAIL_LEN
;
441 * pkt_len is how much data we have to write, includes header and data.
442 * total_len is length of the packet in Dwords plus the PBC should not
445 pkt_len
= (skb
->len
+ pad_len
) >> 2;
446 total_len
= pkt_len
+ 2; /* PBC + packet */
448 pbc
= create_bypass_pbc(mdata
->vl
, total_len
);
451 v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc
, skb
->len
, pad_len
);
452 err
= dd
->process_vnic_dma_send(dd
, q_idx
, vinfo
, skb
, pbc
, pad_len
);
455 vinfo
->stats
[q_idx
].netstats
.tx_fifo_errors
++;
456 else if (err
!= -EBUSY
)
457 vinfo
->stats
[q_idx
].netstats
.tx_carrier_errors
++;
459 /* remove the header before updating tx counters */
460 skb_pull(skb
, OPA_VNIC_HDR_LEN
);
462 if (unlikely(err
== -EBUSY
)) {
463 hfi1_vnic_maybe_stop_tx(vinfo
, q_idx
);
464 dev_kfree_skb_any(skb
);
465 return NETDEV_TX_BUSY
;
469 /* update tx counters */
470 hfi1_vnic_update_tx_counters(vinfo
, q_idx
, skb
, err
);
471 dev_kfree_skb_any(skb
);
475 static u16
hfi1_vnic_select_queue(struct net_device
*netdev
,
478 select_queue_fallback_t fallback
)
480 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
481 struct opa_vnic_skb_mdata
*mdata
;
482 struct sdma_engine
*sde
;
484 mdata
= (struct opa_vnic_skb_mdata
*)skb
->data
;
485 sde
= sdma_select_engine_vl(vinfo
->dd
, mdata
->entropy
, mdata
->vl
);
486 return sde
->this_idx
;
489 /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
490 static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue
*rxq
,
493 struct hfi1_vnic_vport_info
*vinfo
= rxq
->vinfo
;
494 int max_len
= vinfo
->netdev
->mtu
+ VLAN_ETH_HLEN
;
497 skb_pull(skb
, OPA_VNIC_HDR_LEN
);
499 /* Validate Packet length */
500 if (unlikely(skb
->len
> max_len
))
501 vinfo
->stats
[rxq
->idx
].rx_oversize
++;
502 else if (unlikely(skb
->len
< ETH_ZLEN
))
503 vinfo
->stats
[rxq
->idx
].rx_runt
++;
509 static inline struct sk_buff
*hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue
*rxq
)
511 unsigned char *pad_info
;
514 skb
= skb_dequeue(&rxq
->skbq
);
518 /* remove tail padding and icrc */
519 pad_info
= skb
->data
+ skb
->len
- 1;
520 skb_trim(skb
, (skb
->len
- OPA_VNIC_ICRC_TAIL_LEN
-
521 ((*pad_info
) & 0x7)));
526 /* hfi1_vnic_handle_rx - handle skb receive */
527 static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue
*rxq
,
528 int *work_done
, int work_to_do
)
530 struct hfi1_vnic_vport_info
*vinfo
= rxq
->vinfo
;
535 if (*work_done
>= work_to_do
)
538 skb
= hfi1_vnic_get_skb(rxq
);
542 rc
= hfi1_vnic_decap_skb(rxq
, skb
);
543 /* update rx counters */
544 hfi1_vnic_update_rx_counters(vinfo
, rxq
->idx
, skb
, rc
);
546 dev_kfree_skb_any(skb
);
550 skb_checksum_none_assert(skb
);
551 skb
->protocol
= eth_type_trans(skb
, rxq
->netdev
);
553 napi_gro_receive(&rxq
->napi
, skb
);
558 /* hfi1_vnic_napi - napi receive polling callback function */
559 static int hfi1_vnic_napi(struct napi_struct
*napi
, int budget
)
561 struct hfi1_vnic_rx_queue
*rxq
= container_of(napi
,
562 struct hfi1_vnic_rx_queue
, napi
);
563 struct hfi1_vnic_vport_info
*vinfo
= rxq
->vinfo
;
566 v_dbg("napi %d budget %d\n", rxq
->idx
, budget
);
567 hfi1_vnic_handle_rx(rxq
, &work_done
, budget
);
569 v_dbg("napi %d work_done %d\n", rxq
->idx
, work_done
);
570 if (work_done
< budget
)
576 void hfi1_vnic_bypass_rcv(struct hfi1_packet
*packet
)
578 struct hfi1_devdata
*dd
= packet
->rcd
->dd
;
579 struct hfi1_vnic_vport_info
*vinfo
= NULL
;
580 struct hfi1_vnic_rx_queue
*rxq
;
582 int l4_type
, vesw_id
= -1;
585 l4_type
= HFI1_GET_L4_TYPE(packet
->ebuf
);
586 if (likely(l4_type
== OPA_VNIC_L4_ETHR
)) {
587 vesw_id
= HFI1_VNIC_GET_VESWID(packet
->ebuf
);
588 vinfo
= idr_find(&dd
->vnic
.vesw_idr
, vesw_id
);
591 * In case of invalid vesw id, count the error on
592 * the first available vport.
594 if (unlikely(!vinfo
)) {
595 struct hfi1_vnic_vport_info
*vinfo_tmp
;
598 vinfo_tmp
= idr_get_next(&dd
->vnic
.vesw_idr
, &id_tmp
);
600 spin_lock(&vport_cntr_lock
);
601 vinfo_tmp
->stats
[0].netstats
.rx_nohandler
++;
602 spin_unlock(&vport_cntr_lock
);
607 if (unlikely(!vinfo
)) {
608 dd_dev_warn(dd
, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
609 l4_type
, vesw_id
, packet
->rcd
->ctxt
);
613 q_idx
= packet
->rcd
->vnic_q_idx
;
614 rxq
= &vinfo
->rxq
[q_idx
];
615 if (unlikely(!netif_oper_up(vinfo
->netdev
))) {
616 vinfo
->stats
[q_idx
].rx_drop_state
++;
617 skb_queue_purge(&rxq
->skbq
);
621 if (unlikely(skb_queue_len(&rxq
->skbq
) > HFI1_VNIC_RCV_Q_SIZE
)) {
622 vinfo
->stats
[q_idx
].netstats
.rx_fifo_errors
++;
626 skb
= netdev_alloc_skb(vinfo
->netdev
, packet
->tlen
);
627 if (unlikely(!skb
)) {
628 vinfo
->stats
[q_idx
].netstats
.rx_fifo_errors
++;
632 memcpy(skb
->data
, packet
->ebuf
, packet
->tlen
);
633 skb_put(skb
, packet
->tlen
);
634 skb_queue_tail(&rxq
->skbq
, skb
);
636 if (napi_schedule_prep(&rxq
->napi
)) {
637 v_dbg("napi %d scheduling\n", q_idx
);
638 __napi_schedule(&rxq
->napi
);
642 static int hfi1_vnic_up(struct hfi1_vnic_vport_info
*vinfo
)
644 struct hfi1_devdata
*dd
= vinfo
->dd
;
645 struct net_device
*netdev
= vinfo
->netdev
;
648 /* ensure virtual eth switch id is valid */
652 rc
= idr_alloc(&dd
->vnic
.vesw_idr
, vinfo
, vinfo
->vesw_id
,
653 vinfo
->vesw_id
+ 1, GFP_NOWAIT
);
657 for (i
= 0; i
< vinfo
->num_rx_q
; i
++) {
658 struct hfi1_vnic_rx_queue
*rxq
= &vinfo
->rxq
[i
];
660 skb_queue_head_init(&rxq
->skbq
);
661 napi_enable(&rxq
->napi
);
664 netif_carrier_on(netdev
);
665 netif_tx_start_all_queues(netdev
);
666 set_bit(HFI1_VNIC_UP
, &vinfo
->flags
);
671 static void hfi1_vnic_down(struct hfi1_vnic_vport_info
*vinfo
)
673 struct hfi1_devdata
*dd
= vinfo
->dd
;
676 clear_bit(HFI1_VNIC_UP
, &vinfo
->flags
);
677 netif_carrier_off(vinfo
->netdev
);
678 netif_tx_disable(vinfo
->netdev
);
679 idr_remove(&dd
->vnic
.vesw_idr
, vinfo
->vesw_id
);
681 /* ensure irqs see the change */
682 hfi1_vnic_synchronize_irq(dd
);
684 /* remove unread skbs */
685 for (i
= 0; i
< vinfo
->num_rx_q
; i
++) {
686 struct hfi1_vnic_rx_queue
*rxq
= &vinfo
->rxq
[i
];
688 napi_disable(&rxq
->napi
);
689 skb_queue_purge(&rxq
->skbq
);
693 static int hfi1_netdev_open(struct net_device
*netdev
)
695 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
698 mutex_lock(&vinfo
->lock
);
699 rc
= hfi1_vnic_up(vinfo
);
700 mutex_unlock(&vinfo
->lock
);
704 static int hfi1_netdev_close(struct net_device
*netdev
)
706 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
708 mutex_lock(&vinfo
->lock
);
709 if (test_bit(HFI1_VNIC_UP
, &vinfo
->flags
))
710 hfi1_vnic_down(vinfo
);
711 mutex_unlock(&vinfo
->lock
);
715 static int hfi1_vnic_allot_ctxt(struct hfi1_devdata
*dd
,
716 struct hfi1_ctxtdata
**vnic_ctxt
)
720 rc
= allocate_vnic_ctxt(dd
, vnic_ctxt
);
722 dd_dev_err(dd
, "vnic ctxt alloc failed %d\n", rc
);
726 rc
= setup_vnic_ctxt(dd
, *vnic_ctxt
);
728 dd_dev_err(dd
, "vnic ctxt setup failed %d\n", rc
);
729 deallocate_vnic_ctxt(dd
, *vnic_ctxt
);
736 static int hfi1_vnic_init(struct hfi1_vnic_vport_info
*vinfo
)
738 struct hfi1_devdata
*dd
= vinfo
->dd
;
741 mutex_lock(&hfi1_mutex
);
742 if (!dd
->vnic
.num_vports
) {
743 rc
= hfi1_vnic_txreq_init(dd
);
747 dd
->vnic
.msix_idx
= dd
->first_dyn_msix_idx
;
750 for (i
= dd
->vnic
.num_ctxt
; i
< vinfo
->num_rx_q
; i
++) {
751 rc
= hfi1_vnic_allot_ctxt(dd
, &dd
->vnic
.ctxt
[i
]);
754 hfi1_rcd_get(dd
->vnic
.ctxt
[i
]);
755 dd
->vnic
.ctxt
[i
]->vnic_q_idx
= i
;
758 if (i
< vinfo
->num_rx_q
) {
760 * If required amount of contexts is not
761 * allocated successfully then remaining contexts
764 while (i
-- > dd
->vnic
.num_ctxt
) {
765 deallocate_vnic_ctxt(dd
, dd
->vnic
.ctxt
[i
]);
766 hfi1_rcd_put(dd
->vnic
.ctxt
[i
]);
767 dd
->vnic
.ctxt
[i
] = NULL
;
772 if (dd
->vnic
.num_ctxt
!= i
) {
773 dd
->vnic
.num_ctxt
= i
;
774 hfi1_init_vnic_rsm(dd
);
777 dd
->vnic
.num_vports
++;
778 hfi1_vnic_sdma_init(vinfo
);
780 if (!dd
->vnic
.num_vports
)
781 hfi1_vnic_txreq_deinit(dd
);
783 mutex_unlock(&hfi1_mutex
);
787 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info
*vinfo
)
789 struct hfi1_devdata
*dd
= vinfo
->dd
;
792 mutex_lock(&hfi1_mutex
);
793 if (--dd
->vnic
.num_vports
== 0) {
794 for (i
= 0; i
< dd
->vnic
.num_ctxt
; i
++) {
795 deallocate_vnic_ctxt(dd
, dd
->vnic
.ctxt
[i
]);
796 hfi1_rcd_put(dd
->vnic
.ctxt
[i
]);
797 dd
->vnic
.ctxt
[i
] = NULL
;
799 hfi1_deinit_vnic_rsm(dd
);
800 dd
->vnic
.num_ctxt
= 0;
801 hfi1_vnic_txreq_deinit(dd
);
803 mutex_unlock(&hfi1_mutex
);
806 static void hfi1_vnic_set_vesw_id(struct net_device
*netdev
, int id
)
808 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
812 * If vesw_id is being changed, and if the vnic port is up,
813 * reset the vnic port to ensure new vesw_id gets picked up
815 if (id
!= vinfo
->vesw_id
) {
816 mutex_lock(&vinfo
->lock
);
817 if (test_bit(HFI1_VNIC_UP
, &vinfo
->flags
)) {
818 hfi1_vnic_down(vinfo
);
826 mutex_unlock(&vinfo
->lock
);
831 static const struct net_device_ops hfi1_netdev_ops
= {
832 .ndo_open
= hfi1_netdev_open
,
833 .ndo_stop
= hfi1_netdev_close
,
834 .ndo_start_xmit
= hfi1_netdev_start_xmit
,
835 .ndo_select_queue
= hfi1_vnic_select_queue
,
836 .ndo_get_stats64
= hfi1_vnic_get_stats64
,
839 static void hfi1_vnic_free_rn(struct net_device
*netdev
)
841 struct hfi1_vnic_vport_info
*vinfo
= opa_vnic_dev_priv(netdev
);
843 hfi1_vnic_deinit(vinfo
);
844 mutex_destroy(&vinfo
->lock
);
848 struct net_device
*hfi1_vnic_alloc_rn(struct ib_device
*device
,
850 enum rdma_netdev_t type
,
852 unsigned char name_assign_type
,
853 void (*setup
)(struct net_device
*))
855 struct hfi1_devdata
*dd
= dd_from_ibdev(device
);
856 struct hfi1_vnic_vport_info
*vinfo
;
857 struct net_device
*netdev
;
858 struct rdma_netdev
*rn
;
861 if (!port_num
|| (port_num
> dd
->num_pports
))
862 return ERR_PTR(-EINVAL
);
864 if (type
!= RDMA_NETDEV_OPA_VNIC
)
865 return ERR_PTR(-EOPNOTSUPP
);
867 size
= sizeof(struct opa_vnic_rdma_netdev
) + sizeof(*vinfo
);
868 netdev
= alloc_netdev_mqs(size
, name
, name_assign_type
, setup
,
869 dd
->chip_sdma_engines
, HFI1_NUM_VNIC_CTXT
);
871 return ERR_PTR(-ENOMEM
);
873 rn
= netdev_priv(netdev
);
874 vinfo
= opa_vnic_dev_priv(netdev
);
876 vinfo
->num_tx_q
= dd
->chip_sdma_engines
;
877 vinfo
->num_rx_q
= HFI1_NUM_VNIC_CTXT
;
878 vinfo
->netdev
= netdev
;
879 rn
->free_rdma_netdev
= hfi1_vnic_free_rn
;
880 rn
->set_id
= hfi1_vnic_set_vesw_id
;
882 netdev
->features
= NETIF_F_HIGHDMA
| NETIF_F_SG
;
883 netdev
->hw_features
= netdev
->features
;
884 netdev
->vlan_features
= netdev
->features
;
885 netdev
->watchdog_timeo
= msecs_to_jiffies(HFI_TX_TIMEOUT_MS
);
886 netdev
->netdev_ops
= &hfi1_netdev_ops
;
887 mutex_init(&vinfo
->lock
);
889 for (i
= 0; i
< vinfo
->num_rx_q
; i
++) {
890 struct hfi1_vnic_rx_queue
*rxq
= &vinfo
->rxq
[i
];
894 rxq
->netdev
= netdev
;
895 netif_napi_add(netdev
, &rxq
->napi
, hfi1_vnic_napi
, 64);
898 rc
= hfi1_vnic_init(vinfo
);
904 mutex_destroy(&vinfo
->lock
);